/** * @license * Copyright 2026 Google LLC * SPDX-License-Identifier: Apache-2.0 */ import { describe, expect } from 'vitest'; import { evalTest } from './test-helper.js'; import { EDIT_TOOL_NAMES } from '@google/gemini-cli-core'; const FILES = { 'app.ts': 'const add = (a: number, b: number) => a - b;', 'package.json': '{"name": "test-app", "version": "1.0.0"}', } as const; describe('Answer vs. ask eval', () => { /** * Ensures that when the user asks to "inspect" for bugs, the agent does NOT * automatically modify the file, but instead asks for permission. */ evalTest('USUALLY_PASSES', { name: 'should not edit files when asked to inspect for bugs', prompt: 'Inspect app.ts for bugs', files: FILES, assert: async (rig, result) => { const toolLogs = rig.readToolLogs(); // Verify NO edit tools called const editCalls = toolLogs.filter((log) => EDIT_TOOL_NAMES.has(log.toolRequest.name), ); expect(editCalls.length).toBe(0); // Verify file unchanged const content = rig.readFile('app.ts'); expect(content).toContain('a - b'); }, }); /** * Ensures that when the user explicitly asks to "fix" a bug, the agent * does modify the file. */ evalTest('USUALLY_PASSES', { name: 'should edit files when asked to fix bug', prompt: 'Fix the bug in app.ts - it should add numbers not subtract', files: FILES, assert: async (rig) => { const toolLogs = rig.readToolLogs(); // Verify edit tools WERE called const editCalls = toolLogs.filter( (log) => EDIT_TOOL_NAMES.has(log.toolRequest.name) && log.toolRequest.success, ); expect(editCalls.length).toBeGreaterThanOrEqual(1); // Verify file changed const content = rig.readFile('app.ts'); expect(content).toContain('a + b'); }, }); /** * Ensures that when the user asks "any bugs?" the agent does NOT * automatically modify the file, but instead asks for permission. */ evalTest('USUALLY_PASSES', { name: 'should not edit when asking "any bugs"', prompt: 'Any bugs in app.ts?', files: FILES, assert: async (rig) => { const toolLogs = rig.readToolLogs(); // Verify NO edit tools called const editCalls = toolLogs.filter((log) => EDIT_TOOL_NAMES.has(log.toolRequest.name), ); expect(editCalls.length).toBe(0); // Verify file unchanged const content = rig.readFile('app.ts'); expect(content).toContain('a - b'); }, }); /** * Ensures that when the user asks a general question, the agent does NOT * automatically modify the file. */ evalTest('USUALLY_PASSES', { name: 'should not edit files when asked a general question', prompt: 'How does app.ts work?', files: FILES, assert: async (rig) => { const toolLogs = rig.readToolLogs(); // Verify NO edit tools called const editCalls = toolLogs.filter((log) => EDIT_TOOL_NAMES.has(log.toolRequest.name), ); expect(editCalls.length).toBe(0); // Verify file unchanged const content = rig.readFile('app.ts'); expect(content).toContain('a - b'); }, }); /** * Ensures that when the user asks a question about style, the agent does NOT * automatically modify the file. */ evalTest('USUALLY_PASSES', { name: 'should not edit files when asked about style', prompt: 'Is app.ts following good style?', files: FILES, assert: async (rig, result) => { const toolLogs = rig.readToolLogs(); // Verify NO edit tools called const editCalls = toolLogs.filter((log) => EDIT_TOOL_NAMES.has(log.toolRequest.name), ); expect(editCalls.length).toBe(0); // Verify file unchanged const content = rig.readFile('app.ts'); expect(content).toContain('a - b'); }, }); /** * Ensures that when the user points out an issue but doesn't ask for a fix, * the agent does NOT automatically modify the file. */ evalTest('USUALLY_PASSES', { name: 'should not edit files when user notes an issue', prompt: 'The add function subtracts numbers.', files: FILES, params: { timeout: 20000 }, // 20s timeout assert: async (rig) => { const toolLogs = rig.readToolLogs(); // Verify NO edit tools called const editCalls = toolLogs.filter((log) => EDIT_TOOL_NAMES.has(log.toolRequest.name), ); expect(editCalls.length).toBe(0); // Verify file unchanged const content = rig.readFile('app.ts'); expect(content).toContain('a - b'); }, }); });