mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-06-13 12:57:12 -07:00
88985511c6
- Redesigned CORE_SI_SKELETON for maximum reasoning fidelity and minimum token usage. - Extracted Software Engineering and New Application workflows to dynamic skills. - Added 'Essential Workflows' section to maintain visibility for core user journeys. - Implemented precedence-based sorting for available skills (Workspace > User > Built-in). - Added behavioral tests in evals/skill_activation.eval.ts to verify proactive skill activation. - Fixed pre-existing build error in useGeminiStream.ts related to missing FinishReason values.
111 lines
3.4 KiB
TypeScript
111 lines
3.4 KiB
TypeScript
/**
|
|
* @license
|
|
* Copyright 2026 Google LLC
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
import { describe } from 'vitest';
|
|
import { evalTest } from './test-helper.js';
|
|
|
|
describe('Skill Activation Behavioral Evals', () => {
|
|
/**
|
|
* Tests that the model proactively activates the software-engineering skill
|
|
* when faced with a typical engineering task like bug fixing.
|
|
*/
|
|
evalTest('ALWAYS_PASSES', {
|
|
name: 'should activate software-engineering skill for bug fixes',
|
|
prompt:
|
|
'There is a bug in the greeting logic in src/index.ts. Please fix it.',
|
|
files: {
|
|
'src/index.ts':
|
|
'export const greet = (name: string) => `Hello, ${name}!`;',
|
|
'src/index.test.ts': `
|
|
import { greet } from './index';
|
|
import { expect, test } from 'vitest';
|
|
test('greet', () => { expect(greet('World')).toBe('Hello, World!'); });
|
|
`,
|
|
},
|
|
assert: async (rig) => {
|
|
await rig.expectToolCallSuccess(['activate_skill'], undefined, (args) => {
|
|
try {
|
|
const parsed = JSON.parse(args);
|
|
return parsed.name === 'software-engineering';
|
|
} catch {
|
|
return false;
|
|
}
|
|
});
|
|
},
|
|
});
|
|
|
|
/**
|
|
* Tests that the model proactively activates the new-application skill
|
|
* when asked to scaffold a new project.
|
|
*/
|
|
evalTest('ALWAYS_PASSES', {
|
|
name: 'should activate new-application skill for prototyping',
|
|
prompt: 'Build me a new Todo app using React and Vanilla CSS.',
|
|
assert: async (rig) => {
|
|
await rig.expectToolCallSuccess(['activate_skill'], undefined, (args) => {
|
|
try {
|
|
const parsed = JSON.parse(args);
|
|
return parsed.name === 'new-application';
|
|
} catch {
|
|
return false;
|
|
}
|
|
});
|
|
},
|
|
});
|
|
|
|
/**
|
|
* Tests that the model proactively activates the docs-writer skill
|
|
* when asked to update documentation files.
|
|
*/
|
|
evalTest('ALWAYS_PASSES', {
|
|
name: 'should activate docs-writer skill for documentation tasks',
|
|
prompt:
|
|
'Update the documentation in docs/index.md to include the new features.',
|
|
files: {
|
|
'docs/index.md': `# Documentation
|
|
|
|
Existing content.`,
|
|
},
|
|
assert: async (rig) => {
|
|
await rig.expectToolCallSuccess(['activate_skill'], undefined, (args) => {
|
|
try {
|
|
const parsed = JSON.parse(args);
|
|
return parsed.name === 'docs-writer';
|
|
} catch {
|
|
return false;
|
|
}
|
|
});
|
|
},
|
|
});
|
|
|
|
/**
|
|
* Tests that the model can handle multi-step tasks that might require
|
|
* activating multiple skills sequentially (though usually it just activates one).
|
|
*/
|
|
evalTest('USUALLY_PASSES', {
|
|
name: 'should activate software-engineering even when the prompt is slightly indirect',
|
|
prompt:
|
|
'The CI is failing on the main branch. Can you investigate and fix whatever is broken?',
|
|
files: {
|
|
'package.json': '{ "scripts": { "test": "vitest run" } }',
|
|
'src/logic.ts':
|
|
'export const compute = () => { throw new Error("Broken"); };',
|
|
'src/logic.test.ts':
|
|
'import { compute } from "./logic"; import { test } from "vitest"; test("compute", () => { compute(); });',
|
|
},
|
|
assert: async (rig) => {
|
|
await rig.expectToolCallSuccess(['activate_skill'], undefined, (args) => {
|
|
try {
|
|
const parsed = JSON.parse(args);
|
|
return parsed.name === 'software-engineering';
|
|
} catch {
|
|
return false;
|
|
}
|
|
});
|
|
},
|
|
});
|
|
});
|