diff --git a/evals/subagents.eval.ts b/evals/subagents.eval.ts index 7e9b3cd808..4d2886d8ef 100644 --- a/evals/subagents.eval.ts +++ b/evals/subagents.eval.ts @@ -31,7 +31,7 @@ describe('subagent eval test cases', () => { * * This tests the system prompt's subagent specific clauses. */ - evalTest('USUALLY_PASSES', { + evalTest('ALWAYS_PASSES', { name: 'should delegate to user provided agent with relevant expertise', params: { settings: { @@ -46,6 +46,9 @@ describe('subagent eval test cases', () => { 'index.ts': INDEX_TS, 'README.md': 'TODO: update the README.', }, + acknowledgedAgents: { + 'docs-agent': AGENT_DEFINITION, + }, assert: async (rig, _result) => { await rig.expectToolCallSuccess(['docs-agent']); }, diff --git a/evals/test-helper.ts b/evals/test-helper.ts index 37d79eb6a4..05350eb951 100644 --- a/evals/test-helper.ts +++ b/evals/test-helper.ts @@ -112,6 +112,10 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) { execSync('git commit --allow-empty -m "Initial commit"', execOptions); } + if (evalCase.acknowledgedAgents) { + await rig.acknowledgeAgents(evalCase.acknowledgedAgents); + } + const result = await rig.run({ args: evalCase.prompt, approvalMode: evalCase.approvalMode ?? 'yolo', @@ -172,5 +176,6 @@ export interface EvalCase { timeout?: number; files?: Record; approvalMode?: 'default' | 'auto_edit' | 'yolo' | 'plan'; + acknowledgedAgents?: Record; assert: (rig: TestRig, result: string) => Promise; } diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index f63c189014..d04432c55b 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -135,6 +135,7 @@ export * from './prompts/mcp-prompts.js'; export * from './agents/types.js'; export * from './agents/agentLoader.js'; export * from './agents/local-executor.js'; +export * from './agents/acknowledgedAgents.js'; // Export specific tool logic export * from './tools/read-file.js'; diff --git a/packages/test-utils/src/test-rig.ts b/packages/test-utils/src/test-rig.ts index 1401304560..61d69dcbb3 100644 --- a/packages/test-utils/src/test-rig.ts +++ b/packages/test-utils/src/test-rig.ts @@ -11,8 +11,13 @@ import { join, dirname } from 'node:path'; import { fileURLToPath } from 'node:url'; import { env } from 'node:process'; import { setTimeout as sleep } from 'node:timers/promises'; -import { DEFAULT_GEMINI_MODEL, GEMINI_DIR } from '@google/gemini-cli-core'; +import { + DEFAULT_GEMINI_MODEL, + GEMINI_DIR, + AcknowledgedAgentsService, +} from '@google/gemini-cli-core'; import fs from 'node:fs'; +import crypto from 'node:crypto'; import * as pty from '@lydell/node-pty'; import stripAnsi from 'strip-ansi'; import * as os from 'node:os'; @@ -356,6 +361,27 @@ export class TestRig { ); } + async acknowledgeAgents(agents: Record) { + // Use realpath to ensure the path matches what the CLI sees (e.g. /var vs /private/var on macOS) + const projectRoot = fs.realpathSync(this.testDir!); + const originalHome = process.env['GEMINI_CLI_HOME']; + process.env['GEMINI_CLI_HOME'] = this.homeDir!; + + try { + const service = new AcknowledgedAgentsService(); + for (const [name, content] of Object.entries(agents)) { + const hash = crypto.createHash('sha256').update(content).digest('hex'); + await service.acknowledge(projectRoot, name, hash); + } + } finally { + if (originalHome) { + process.env['GEMINI_CLI_HOME'] = originalHome; + } else { + delete process.env['GEMINI_CLI_HOME']; + } + } + } + createFile(fileName: string, content: string) { const filePath = join(this.testDir!, fileName); writeFileSync(filePath, content);