mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-10 14:10:37 -07:00
206 lines
5.4 KiB
TypeScript
206 lines
5.4 KiB
TypeScript
/**
|
|
* @license
|
|
* Copyright 2026 Google LLC
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
|
import { join } from 'node:path';
|
|
import { TestRig } from './test-helper.js';
|
|
|
|
interface PromptCommand {
|
|
prompt: (testFile: string) => string;
|
|
tool: string;
|
|
command: string;
|
|
expectedSuccessResult: string;
|
|
expectedFailureResult: string;
|
|
}
|
|
|
|
const ECHO_PROMPT: PromptCommand = {
|
|
command: 'echo',
|
|
prompt: () =>
|
|
`Use the \`echo POLICY_TEST_ECHO_COMMAND\` shell command. On success, ` +
|
|
`your final response must ONLY be "POLICY_TEST_ECHO_COMMAND". If the ` +
|
|
`command fails output AR NAR and stop.`,
|
|
tool: 'run_shell_command',
|
|
expectedSuccessResult: 'POLICY_TEST_ECHO_COMMAND',
|
|
expectedFailureResult: 'AR NAR',
|
|
};
|
|
|
|
const READ_FILE_PROMPT: PromptCommand = {
|
|
prompt: (testFile: string) =>
|
|
`Read the file ${testFile} and tell me what language it is, if the ` +
|
|
`read_file tool fails output AR NAR and stop.`,
|
|
tool: 'read_file',
|
|
command: '',
|
|
expectedSuccessResult: 'Latin',
|
|
expectedFailureResult: 'AR NAR',
|
|
};
|
|
|
|
async function waitForToolCallLog(
|
|
rig: TestRig,
|
|
tool: string,
|
|
command: string,
|
|
timeout: number = 15000,
|
|
) {
|
|
const foundToolCall = await rig.waitForToolCall(tool, timeout, (args) =>
|
|
args.toLowerCase().includes(command.toLowerCase()),
|
|
);
|
|
|
|
expect(foundToolCall).toBe(true);
|
|
|
|
const toolLogs = rig
|
|
.readToolLogs()
|
|
.filter((toolLog) => toolLog.toolRequest.name === tool);
|
|
const log = toolLogs.find(
|
|
(toolLog) =>
|
|
!command ||
|
|
toolLog.toolRequest.args.toLowerCase().includes(command.toLowerCase()),
|
|
);
|
|
|
|
// The policy engine should have logged the tool call
|
|
expect(log).toBeTruthy();
|
|
return log;
|
|
}
|
|
|
|
async function verifyToolExecution(
|
|
rig: TestRig,
|
|
promptCommand: PromptCommand,
|
|
result: string,
|
|
expectAllowed: boolean,
|
|
expectedDenialString?: string,
|
|
) {
|
|
const log = await waitForToolCallLog(
|
|
rig,
|
|
promptCommand.tool,
|
|
promptCommand.command,
|
|
);
|
|
|
|
if (expectAllowed) {
|
|
expect(log!.toolRequest.success).toBe(true);
|
|
expect(result).not.toContain('Tool execution denied by policy');
|
|
expect(result).not.toContain(`Tool "${promptCommand.tool}" not found`);
|
|
expect(result).toContain(promptCommand.expectedSuccessResult);
|
|
} else {
|
|
expect(log!.toolRequest.success).toBe(false);
|
|
expect(result).toContain(
|
|
expectedDenialString || 'Tool execution denied by policy',
|
|
);
|
|
expect(result).toContain(promptCommand.expectedFailureResult);
|
|
}
|
|
}
|
|
|
|
interface TestCase {
|
|
name: string;
|
|
responsesFile: string;
|
|
promptCommand: PromptCommand;
|
|
policyContent?: string;
|
|
expectAllowed: boolean;
|
|
expectedDenialString?: string;
|
|
}
|
|
|
|
describe('Policy Engine Headless Mode', () => {
|
|
let rig: TestRig;
|
|
let testFile: string;
|
|
|
|
beforeEach(() => {
|
|
rig = new TestRig();
|
|
});
|
|
|
|
afterEach(async () => {
|
|
if (rig) {
|
|
await rig.cleanup();
|
|
}
|
|
});
|
|
|
|
const runTestCase = async (tc: TestCase) => {
|
|
const fakeResponsesPath = join(import.meta.dirname, tc.responsesFile);
|
|
rig.setup(tc.name, { fakeResponsesPath });
|
|
|
|
testFile = rig.createFile('test.txt', 'Lorem\nIpsum\nDolor\n');
|
|
const args = ['-p', tc.promptCommand.prompt(testFile)];
|
|
|
|
if (tc.policyContent) {
|
|
const policyPath = rig.createFile('test-policy.toml', tc.policyContent);
|
|
args.push('--policy', policyPath);
|
|
}
|
|
|
|
const result = await rig.run({
|
|
args,
|
|
approvalMode: 'default',
|
|
});
|
|
|
|
await verifyToolExecution(
|
|
rig,
|
|
tc.promptCommand,
|
|
result,
|
|
tc.expectAllowed,
|
|
tc.expectedDenialString,
|
|
);
|
|
};
|
|
|
|
const testCases = [
|
|
{
|
|
name: 'should deny ASK_USER tools by default in headless mode',
|
|
responsesFile: 'policy-headless-shell-denied.responses',
|
|
promptCommand: ECHO_PROMPT,
|
|
expectAllowed: false,
|
|
expectedDenialString: 'Tool "run_shell_command" not found',
|
|
},
|
|
{
|
|
name: 'should allow ASK_USER tools in headless mode if explicitly allowed via policy file',
|
|
responsesFile: 'policy-headless-shell-allowed.responses',
|
|
promptCommand: ECHO_PROMPT,
|
|
policyContent: `
|
|
[[rule]]
|
|
toolName = "run_shell_command"
|
|
decision = "allow"
|
|
priority = 100
|
|
`,
|
|
expectAllowed: true,
|
|
},
|
|
{
|
|
name: 'should allow read-only tools by default in headless mode',
|
|
responsesFile: 'policy-headless-readonly.responses',
|
|
promptCommand: READ_FILE_PROMPT,
|
|
expectAllowed: true,
|
|
},
|
|
{
|
|
name: 'should allow specific shell commands in policy file',
|
|
responsesFile: 'policy-headless-shell-allowed.responses',
|
|
promptCommand: ECHO_PROMPT,
|
|
policyContent: `
|
|
[[rule]]
|
|
toolName = "run_shell_command"
|
|
commandPrefix = "${ECHO_PROMPT.command}"
|
|
decision = "allow"
|
|
priority = 100
|
|
`,
|
|
expectAllowed: true,
|
|
},
|
|
{
|
|
name: 'should deny other shell commands in policy file',
|
|
responsesFile: 'policy-headless-shell-denied.responses',
|
|
promptCommand: ECHO_PROMPT,
|
|
policyContent: `
|
|
[[rule]]
|
|
toolName = "run_shell_command"
|
|
commandPrefix = "node"
|
|
decision = "allow"
|
|
priority = 100
|
|
`,
|
|
expectAllowed: false,
|
|
expectedDenialString: 'Tool execution denied by policy',
|
|
},
|
|
];
|
|
|
|
it.each(testCases)(
|
|
'$name',
|
|
async (tc) => {
|
|
await runTestCase(tc);
|
|
},
|
|
// Large timeout for regeneration
|
|
process.env['REGENERATE_MODEL_GOLDENS'] === 'true' ? 120000 : undefined,
|
|
);
|
|
});
|