/** * @license * Copyright 2026 Google LLC * SPDX-License-Identifier: Apache-2.0 */ import { describe, it, expect, beforeEach, afterEach } from 'vitest'; import { join } from 'node:path'; import { TestRig } from './test-helper.js'; interface PromptCommand { prompt: (testFile: string) => string; tool: string; command: string; expectedSuccessResult: string; expectedFailureResult: string; } const ECHO_PROMPT: PromptCommand = { command: 'echo', prompt: () => `Use the \`echo POLICY_TEST_ECHO_COMMAND\` shell command. On success, ` + `your final response must ONLY be "POLICY_TEST_ECHO_COMMAND". If the ` + `command fails output AR NAR and stop.`, tool: 'run_shell_command', expectedSuccessResult: 'POLICY_TEST_ECHO_COMMAND', expectedFailureResult: 'AR NAR', }; const READ_FILE_PROMPT: PromptCommand = { prompt: (testFile: string) => `Read the file ${testFile} and tell me what language it is, if the ` + `read_file tool fails output AR NAR and stop.`, tool: 'read_file', command: '', expectedSuccessResult: 'Latin', expectedFailureResult: 'AR NAR', }; async function waitForToolCallLog( rig: TestRig, tool: string, command: string, timeout: number = 15000, ) { const foundToolCall = await rig.waitForToolCall(tool, timeout, (args) => args.toLowerCase().includes(command.toLowerCase()), ); expect(foundToolCall).toBe(true); const toolLogs = rig .readToolLogs() .filter((toolLog) => toolLog.toolRequest.name === tool); const log = toolLogs.find( (toolLog) => !command || toolLog.toolRequest.args.toLowerCase().includes(command.toLowerCase()), ); // The policy engine should have logged the tool call expect(log).toBeTruthy(); return log; } async function verifyToolExecution( rig: TestRig, promptCommand: PromptCommand, result: string, expectAllowed: boolean, expectedDenialString?: string, ) { const log = await waitForToolCallLog( rig, promptCommand.tool, promptCommand.command, ); if (expectAllowed) { expect(log!.toolRequest.success).toBe(true); expect(result).not.toContain('Tool execution denied by policy'); expect(result).not.toContain(`Tool "${promptCommand.tool}" not found`); expect(result).toContain(promptCommand.expectedSuccessResult); } else { expect(log!.toolRequest.success).toBe(false); expect(result).toContain( expectedDenialString || 'Tool execution denied by policy', ); expect(result).toContain(promptCommand.expectedFailureResult); } } interface TestCase { name: string; responsesFile: string; promptCommand: PromptCommand; policyContent?: string; expectAllowed: boolean; expectedDenialString?: string; } describe('Policy Engine Headless Mode', () => { let rig: TestRig; let testFile: string; beforeEach(() => { rig = new TestRig(); }); afterEach(async () => { if (rig) { await rig.cleanup(); } }); const runTestCase = async (tc: TestCase) => { const fakeResponsesPath = join(import.meta.dirname, tc.responsesFile); rig.setup(tc.name, { fakeResponsesPath }); testFile = rig.createFile('test.txt', 'Lorem\nIpsum\nDolor\n'); const args = ['-p', tc.promptCommand.prompt(testFile)]; if (tc.policyContent) { const policyPath = rig.createFile('test-policy.toml', tc.policyContent); args.push('--policy', policyPath); } const result = await rig.run({ args, approvalMode: 'default', }); await verifyToolExecution( rig, tc.promptCommand, result, tc.expectAllowed, tc.expectedDenialString, ); }; const testCases = [ { name: 'should deny ASK_USER tools by default in headless mode', responsesFile: 'policy-headless-shell-denied.responses', promptCommand: ECHO_PROMPT, expectAllowed: false, expectedDenialString: 'Tool "run_shell_command" not found', }, { name: 'should allow ASK_USER tools in headless mode if explicitly allowed via policy file', responsesFile: 'policy-headless-shell-allowed.responses', promptCommand: ECHO_PROMPT, policyContent: ` [[rule]] toolName = "run_shell_command" decision = "allow" priority = 100 `, expectAllowed: true, }, { name: 'should allow read-only tools by default in headless mode', responsesFile: 'policy-headless-readonly.responses', promptCommand: READ_FILE_PROMPT, expectAllowed: true, }, { name: 'should allow specific shell commands in policy file', responsesFile: 'policy-headless-shell-allowed.responses', promptCommand: ECHO_PROMPT, policyContent: ` [[rule]] toolName = "run_shell_command" commandPrefix = "${ECHO_PROMPT.command}" decision = "allow" priority = 100 `, expectAllowed: true, }, { name: 'should deny other shell commands in policy file', responsesFile: 'policy-headless-shell-denied.responses', promptCommand: ECHO_PROMPT, policyContent: ` [[rule]] toolName = "run_shell_command" commandPrefix = "node" decision = "allow" priority = 100 `, expectAllowed: false, expectedDenialString: 'Tool execution denied by policy', }, ]; it.each(testCases)( '$name', async (tc) => { await runTestCase(tc); }, // Large timeout for regeneration process.env['REGENERATE_MODEL_GOLDENS'] === 'true' ? 120000 : undefined, ); });