2025-06-16 08:27:29 -07:00
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
2025-12-15 11:11:08 -08:00
import { describe , it , expect , beforeEach , afterEach } from 'vitest' ;
2026-02-05 10:07:47 -08:00
import {
TestRig ,
printDebugInfo ,
assertModelHasOutput ,
checkModelOutputContent ,
} from './test-helper.js' ;
2025-10-06 12:15:21 -07:00
import { getShellConfiguration } from '../packages/core/src/utils/shell-utils.js' ;
const { shell } = getShellConfiguration ( ) ;
function getLineCountCommand ( ) : { command : string ; tool : string } {
switch ( shell ) {
case 'powershell' :
2026-03-03 06:32:19 +05:30
return { command : ` Measure-Object -Line ` , tool : 'Measure-Object' } ;
2025-10-06 12:15:21 -07:00
case 'cmd' :
2025-10-15 12:44:07 -07:00
return { command : ` find /c /v ` , tool : 'find' } ;
2025-10-06 12:15:21 -07:00
case 'bash' :
default :
2025-10-15 12:44:07 -07:00
return { command : ` wc -l ` , tool : 'wc' } ;
2025-10-06 12:15:21 -07:00
}
}
2025-06-16 08:27:29 -07:00
2025-10-16 17:25:30 -07:00
function getInvalidCommand ( ) : string {
switch ( shell ) {
case 'powershell' :
return ` Get-ChildItem | | Select-Object ` ;
case 'cmd' :
return ` dir | | findstr foo ` ;
case 'bash' :
default :
return ` echo "hello" > > file ` ;
}
}
function getAllowedListCommand ( ) : string {
switch ( shell ) {
case 'powershell' :
return 'Get-ChildItem' ;
case 'cmd' :
return 'dir' ;
case 'bash' :
default :
return 'ls' ;
}
}
function getDisallowedFileReadCommand ( testFile : string ) : {
command : string ;
tool : string ;
} {
const quotedPath = ` " ${ testFile } " ` ;
switch ( shell ) {
case 'powershell' :
2026-03-23 21:48:13 -07:00
return {
command : ` powershell -Command "Get-Content ${ quotedPath } " ` ,
tool : 'powershell' ,
} ;
2025-10-16 17:25:30 -07:00
case 'cmd' :
2026-03-23 21:48:13 -07:00
return { command : ` cmd /c type ${ quotedPath } ` , tool : 'cmd' } ;
2025-10-16 17:25:30 -07:00
case 'bash' :
default :
2026-03-23 21:48:13 -07:00
return {
command : ` node -e "console.log(require('fs').readFileSync(' ${ testFile } ', 'utf8'))" ` ,
tool : ' node ',
};
2025-10-16 17:25:30 -07:00
}
}
2025-10-23 16:55:01 -04:00
function getChainedEchoCommand(): { allowPattern: string; command: string } {
const secondCommand = getAllowedListCommand();
switch (shell) {
case ' powershell ':
return {
allowPattern: ' Write - Output ',
command: `Write-Output "foo" && ${secondCommand}`,
};
case ' cmd ':
return {
allowPattern: ' echo ',
command: `echo "foo" && ${secondCommand}`,
};
case ' bash ':
default:
return {
allowPattern: ' echo ',
command: `echo "foo" && ${secondCommand}`,
};
}
}
2025-08-12 15:57:27 -07:00
describe(' run_shell_command ', () => {
2025-12-15 11:11:08 -08:00
let rig: TestRig;
beforeEach(() => {
rig = new TestRig();
});
afterEach(async () => await rig.cleanup());
2025-08-12 15:57:27 -07:00
it(' should be able to run a shell command ', async () => {
2025-12-02 17:43:06 -08:00
await rig.setup(' should be able to run a shell command ', {
settings: { tools: { core: [' run_shell_command '] } },
});
2025-06-16 08:27:29 -07:00
2025-08-12 15:57:27 -07:00
const prompt = `Please run the command "echo hello-world" and show me the output`;
2025-07-05 08:27:22 -07:00
2025-12-15 13:18:04 -08:00
const result = await rig.run({ args: prompt });
2025-08-01 14:33:33 -07:00
2025-08-12 15:57:27 -07:00
const foundToolCall = await rig.waitForToolCall(' run_shell_command ');
2025-08-01 14:33:33 -07:00
2025-08-12 15:57:27 -07:00
// Add debugging information
if (!foundToolCall || !result.includes(' hello - world ')) {
printDebugInfo(rig, result, {
' Found tool call ': foundToolCall,
' Contains hello - world ': result.includes(' hello - world '),
});
}
2025-08-01 14:33:33 -07:00
2025-08-12 15:57:27 -07:00
expect(
foundToolCall,
' Expected to find a run_shell_command tool call ',
).toBeTruthy();
2025-08-01 14:33:33 -07:00
2026-02-05 10:07:47 -08:00
assertModelHasOutput(result);
checkModelOutputContent(result, {
expectedContent: [' hello - world ', ' exit code 0 '],
testName: ' Shell command test ',
});
2025-08-12 15:57:27 -07:00
});
2025-07-05 08:27:22 -07:00
2025-08-12 15:57:27 -07:00
it(' should be able to run a shell command via stdin ', async () => {
2025-12-02 17:43:06 -08:00
await rig.setup(' should be able to run a shell command via stdin ', {
settings: { tools: { core: [' run_shell_command '] } },
});
2025-08-01 14:33:33 -07:00
2025-08-12 15:57:27 -07:00
const prompt = `Please run the command "echo test-stdin" and show me what it outputs`;
2025-08-01 14:33:33 -07:00
2025-08-12 15:57:27 -07:00
const result = await rig.run({ stdin: prompt });
2025-08-01 14:33:33 -07:00
2025-08-12 15:57:27 -07:00
const foundToolCall = await rig.waitForToolCall(' run_shell_command ');
2025-08-01 14:33:33 -07:00
2025-08-12 15:57:27 -07:00
// Add debugging information
if (!foundToolCall || !result.includes(' test - stdin ')) {
printDebugInfo(rig, result, {
' Test type ': ' Stdin test ',
' Found tool call ': foundToolCall,
' Contains test - stdin ': result.includes(' test - stdin '),
});
}
2025-07-05 08:27:22 -07:00
2025-08-12 15:57:27 -07:00
expect(
foundToolCall,
' Expected to find a run_shell_command tool call ',
).toBeTruthy();
2025-06-16 08:27:29 -07:00
2026-02-05 10:07:47 -08:00
assertModelHasOutput(result);
checkModelOutputContent(result, {
expectedContent: ' test - stdin ',
testName: ' Shell command stdin test ',
});
2025-08-12 15:57:27 -07:00
});
2025-09-25 17:32:40 -07:00
2025-10-24 13:04:40 -07:00
it.skip(' should run allowed sub - command in non - interactive mode ', async () => {
2025-10-06 12:15:21 -07:00
await rig.setup(' should run allowed sub - command in non - interactive mode ');
const testFile = rig.createFile(' test . txt ', ' Lorem \ nIpsum \ nDolor \ n ');
2025-10-15 12:44:07 -07:00
const { tool, command } = getLineCountCommand();
const prompt = `use ${command} to tell me how many lines there are in ${testFile}`;
2025-10-06 12:15:21 -07:00
// Provide the prompt via stdin to simulate non-interactive mode
2025-12-15 13:18:04 -08:00
const result = await rig.run({
args: [`--allowed-tools=run_shell_command(${tool})`],
stdin: prompt,
2026-01-21 10:43:48 -05:00
approvalMode: ' default ',
2025-12-15 13:18:04 -08:00
});
2025-10-06 12:15:21 -07:00
const foundToolCall = await rig.waitForToolCall(' run_shell_command ', 15000);
if (!foundToolCall) {
2025-10-16 17:25:30 -07:00
const toolLogs = rig.readToolLogs().map(({ toolRequest }) => ({
name: toolRequest.name,
success: toolRequest.success,
args: toolRequest.args,
}));
2025-10-06 12:15:21 -07:00
printDebugInfo(rig, result, {
' Found tool call ': foundToolCall,
2025-10-16 17:25:30 -07:00
' Allowed tools flag ': `run_shell_command(${tool})`,
Prompt: prompt,
' Tool logs ': toolLogs,
Result: result,
2025-10-06 12:15:21 -07:00
});
}
expect(
foundToolCall,
' Expected to find a run_shell_command tool call ',
).toBeTruthy();
2025-10-14 11:36:49 -07:00
const toolCall = rig
.readToolLogs()
.filter(
(toolCall) => toolCall.toolRequest.name === ' run_shell_command ',
)[0];
expect(toolCall.toolRequest.success).toBe(true);
2025-10-06 12:15:21 -07:00
});
2025-11-09 14:36:13 -08:00
it.skip(' should succeed with no parens in non - interactive mode ', async () => {
2025-10-06 12:15:21 -07:00
await rig.setup(' should succeed with no parens in non - interactive mode ');
const testFile = rig.createFile(' test . txt ', ' Lorem \ nIpsum \ nDolor \ n ');
2025-10-15 12:44:07 -07:00
const { command } = getLineCountCommand();
const prompt = `use ${command} to tell me how many lines there are in ${testFile}`;
2025-10-06 12:15:21 -07:00
2025-12-15 13:18:04 -08:00
const result = await rig.run({
args: ' -- allowed - tools = run_shell_command ',
stdin: prompt,
2026-01-21 10:43:48 -05:00
approvalMode: ' default ',
2025-12-15 13:18:04 -08:00
});
2025-10-06 12:15:21 -07:00
const foundToolCall = await rig.waitForToolCall(' run_shell_command ', 15000);
if (!foundToolCall) {
printDebugInfo(rig, result, {
' Found tool call ': foundToolCall,
});
}
expect(
foundToolCall,
' Expected to find a run_shell_command tool call ',
).toBeTruthy();
2025-10-14 11:36:49 -07:00
const toolCall = rig
.readToolLogs()
.filter(
(toolCall) => toolCall.toolRequest.name === ' run_shell_command ',
)[0];
expect(toolCall.toolRequest.success).toBe(true);
2025-10-06 12:15:21 -07:00
});
2026-01-21 10:43:48 -05:00
it(' should succeed in yolo mode ', async () => {
2026-03-03 06:32:19 +05:30
const isWindows = process.platform === ' win32 ';
2026-01-21 10:43:48 -05:00
await rig.setup(' should succeed in yolo mode ', {
2026-03-03 06:32:19 +05:30
settings: {
tools: { core: [' run_shell_command '] },
shell: isWindows ? { enableInteractiveShell: false } : undefined,
},
2025-12-02 17:43:06 -08:00
});
2025-10-06 12:15:21 -07:00
const testFile = rig.createFile(' test . txt ', ' Lorem \ nIpsum \ nDolor \ n ');
2025-10-15 12:44:07 -07:00
const { command } = getLineCountCommand();
const prompt = `use ${command} to tell me how many lines there are in ${testFile}`;
2025-10-06 12:15:21 -07:00
2025-10-14 11:36:49 -07:00
const result = await rig.run({
2025-12-15 13:18:04 -08:00
args: prompt,
2026-01-21 10:43:48 -05:00
approvalMode: ' yolo ',
2025-10-14 11:36:49 -07:00
});
2025-10-06 12:15:21 -07:00
const foundToolCall = await rig.waitForToolCall(' run_shell_command ', 15000);
if (!foundToolCall) {
printDebugInfo(rig, result, {
' Found tool call ': foundToolCall,
});
}
expect(
foundToolCall,
' Expected to find a run_shell_command tool call ',
).toBeTruthy();
2025-10-14 11:36:49 -07:00
const toolCall = rig
.readToolLogs()
.filter(
(toolCall) => toolCall.toolRequest.name === ' run_shell_command ',
)[0];
expect(toolCall.toolRequest.success).toBe(true);
2025-10-06 12:15:21 -07:00
});
2025-10-24 13:04:40 -07:00
it.skip(' should work with ShellTool alias ', async () => {
2025-10-06 12:15:21 -07:00
await rig.setup(' should work with ShellTool alias ');
const testFile = rig.createFile(' test . txt ', ' Lorem \ nIpsum \ nDolor \ n ');
2025-10-15 12:44:07 -07:00
const { tool, command } = getLineCountCommand();
const prompt = `use ${command} to tell me how many lines there are in ${testFile}`;
2025-10-06 12:15:21 -07:00
2025-12-15 13:18:04 -08:00
const result = await rig.run({
args: `--allowed-tools=ShellTool(${tool})`,
stdin: prompt,
2026-01-21 10:43:48 -05:00
approvalMode: ' default ',
2025-12-15 13:18:04 -08:00
});
2025-10-06 12:15:21 -07:00
const foundToolCall = await rig.waitForToolCall(' run_shell_command ', 15000);
if (!foundToolCall) {
2025-10-16 17:25:30 -07:00
const toolLogs = rig.readToolLogs().map(({ toolRequest }) => ({
name: toolRequest.name,
success: toolRequest.success,
args: toolRequest.args,
}));
2025-10-06 12:15:21 -07:00
printDebugInfo(rig, result, {
' Found tool call ': foundToolCall,
2025-10-16 17:25:30 -07:00
' Allowed tools flag ': `ShellTool(${tool})`,
Prompt: prompt,
' Tool logs ': toolLogs,
Result: result,
2025-10-06 12:15:21 -07:00
});
}
expect(
foundToolCall,
' Expected to find a run_shell_command tool call ',
).toBeTruthy();
2025-10-14 11:36:49 -07:00
const toolCall = rig
.readToolLogs()
.filter(
(toolCall) => toolCall.toolRequest.name === ' run_shell_command ',
)[0];
expect(toolCall.toolRequest.success).toBe(true);
2025-10-06 12:15:21 -07:00
});
2025-10-14 11:36:49 -07:00
// TODO(#11062): Un-skip this once we can make it reliable by using hard coded
// model responses.
it.skip(' should combine multiple -- allowed - tools flags ', async () => {
2025-10-06 12:15:21 -07:00
await rig.setup(' should combine multiple -- allowed - tools flags ');
2025-10-15 12:44:07 -07:00
const { tool, command } = getLineCountCommand();
2025-10-09 14:13:26 -07:00
const prompt =
2025-10-15 12:44:07 -07:00
`use both ${command} and ls to count the number of lines in files in this ` +
2025-10-14 11:36:49 -07:00
`directory. Do not pipe these commands into each other, run them separately.`;
2025-10-06 12:15:21 -07:00
2025-12-15 13:18:04 -08:00
const result = await rig.run({
args: [
`--allowed-tools=run_shell_command(${tool})`,
' -- allowed - tools = run_shell_command ( ls ) ',
],
stdin: prompt,
2026-01-21 10:43:48 -05:00
approvalMode: ' default ',
2025-12-15 13:18:04 -08:00
});
2025-10-06 12:15:21 -07:00
2025-10-14 11:36:49 -07:00
for (const expected in [' ls ', tool]) {
const foundToolCall = await rig.waitForToolCall(
' run_shell_command ',
15000,
(args) => args.toLowerCase().includes(`"command": "${expected}`),
);
if (!foundToolCall) {
printDebugInfo(rig, result, {
' Found tool call ': foundToolCall,
});
}
expect(
foundToolCall,
`Expected to find a run_shell_command tool call to "${expected}",` +
` got ${rig.readToolLogs().join(' \ n ')}`,
).toBeTruthy();
2025-10-06 12:15:21 -07:00
}
2025-10-14 11:36:49 -07:00
const toolLogs = rig
.readToolLogs()
.filter((toolCall) => toolCall.toolRequest.name === ' run_shell_command ');
expect(toolLogs.length, toolLogs.join(' \ n ')).toBeGreaterThanOrEqual(2);
for (const toolLog of toolLogs) {
expect(
toolLog.toolRequest.success,
`Expected tool call ${toolLog} to succeed`,
).toBe(true);
}
2025-10-06 12:15:21 -07:00
});
2025-10-23 09:50:50 -04:00
it(' should reject commands not on the allowlist ', async () => {
2025-12-02 17:43:06 -08:00
await rig.setup(' should reject commands not on the allowlist ', {
settings: { tools: { core: [' run_shell_command '] } },
});
2025-10-16 17:25:30 -07:00
const testFile = rig.createFile(' test . txt ', ' Disallowed command check \ n ');
const allowedCommand = getAllowedListCommand();
const disallowed = getDisallowedFileReadCommand(testFile);
const prompt =
`I am testing the allowed tools configuration. ` +
`Attempt to run "${disallowed.command}" to read the contents of ${testFile}. ` +
`If the command fails because it is not permitted, respond with the single word FAIL. ` +
`If it succeeds, respond with SUCCESS.`;
2025-12-15 13:18:04 -08:00
const result = await rig.run({
args: `--allowed-tools=run_shell_command(${allowedCommand})`,
stdin: prompt,
2026-01-21 10:43:48 -05:00
approvalMode: ' default ',
2025-12-15 13:18:04 -08:00
});
2025-10-16 17:25:30 -07:00
if (!result.toLowerCase().includes(' fail ')) {
printDebugInfo(rig, result, {
Result: result,
AllowedCommand: allowedCommand,
DisallowedCommand: disallowed.command,
});
}
expect(result).toContain(' FAIL ');
const foundToolCall = await rig.waitForToolCall(
' run_shell_command ',
15000,
(args) => args.toLowerCase().includes(disallowed.tool.toLowerCase()),
);
if (!foundToolCall) {
printDebugInfo(rig, result, {
' Found tool call ': foundToolCall,
ToolLogs: rig.readToolLogs(),
});
}
expect(foundToolCall).toBe(true);
const toolLogs = rig
.readToolLogs()
.filter((toolLog) => toolLog.toolRequest.name === ' run_shell_command ');
const failureLog = toolLogs.find((toolLog) =>
toolLog.toolRequest.args
.toLowerCase()
.includes(disallowed.tool.toLowerCase()),
);
if (!failureLog || failureLog.toolRequest.success) {
printDebugInfo(rig, result, {
ToolLogs: toolLogs,
DisallowedTool: disallowed.tool,
});
}
expect(
failureLog,
' Expected failing run_shell_command invocation ',
).toBeTruthy();
expect(failureLog!.toolRequest.success).toBe(false);
});
2025-10-24 14:25:54 -04:00
// TODO(#11966): Deflake this test and re-enable once the underlying race is resolved.
it.skip(' should reject chained commands when only the first segment is allowlisted in non - interactive mode ', async () => {
2025-10-23 16:55:01 -04:00
await rig.setup(
' should reject chained commands when only the first segment is allowlisted ',
);
const chained = getChainedEchoCommand();
const shellInjection = `!{${chained.command}}`;
2025-12-15 13:18:04 -08:00
await rig.run({
args: `--allowed-tools=ShellTool(${chained.allowPattern})`,
stdin: `${shellInjection}\n`,
2026-01-21 10:43:48 -05:00
approvalMode: ' default ',
2025-12-15 13:18:04 -08:00
});
2025-10-23 16:55:01 -04:00
// CLI should refuse to execute the chained command without scheduling run_shell_command.
const toolLogs = rig
.readToolLogs()
.filter((log) => log.toolRequest.name === ' run_shell_command ');
// Success is false because tool is in the scheduled state.
for (const log of toolLogs) {
expect(log.toolRequest.success).toBe(false);
expect(log.toolRequest.args).toContain(' && ');
}
});
2025-10-10 10:48:24 -07:00
it(' should allow all with "ShellTool" and other specific tools ', async () => {
await rig.setup(
' should allow all with "ShellTool" and other specific tools ',
2025-12-02 17:43:06 -08:00
{
settings: { tools: { core: [' run_shell_command '] } },
},
2025-10-10 10:48:24 -07:00
);
2025-10-06 12:15:21 -07:00
const { tool } = getLineCountCommand();
2025-10-10 10:48:24 -07:00
const prompt = `Please run the command "echo test-allow-all" and show me the output`;
2025-10-06 12:15:21 -07:00
2025-12-15 13:18:04 -08:00
const result = await rig.run({
args: [
`--allowed-tools=run_shell_command(${tool})`,
' -- allowed - tools = run_shell_command ',
],
stdin: prompt,
2026-01-21 10:43:48 -05:00
approvalMode: ' default ',
2025-12-15 13:18:04 -08:00
});
2025-10-06 12:15:21 -07:00
const foundToolCall = await rig.waitForToolCall(' run_shell_command ', 15000);
2025-10-10 10:48:24 -07:00
if (!foundToolCall || !result.includes(' test - allow - all ')) {
2025-10-06 12:15:21 -07:00
printDebugInfo(rig, result, {
' Found tool call ': foundToolCall,
2025-10-10 10:48:24 -07:00
Result: result,
2025-10-06 12:15:21 -07:00
});
}
expect(
foundToolCall,
' Expected to find a run_shell_command tool call ',
).toBeTruthy();
2025-10-10 10:48:24 -07:00
2025-10-14 11:36:49 -07:00
const toolCall = rig
.readToolLogs()
.filter(
(toolCall) => toolCall.toolRequest.name === ' run_shell_command ',
)[0];
expect(toolCall.toolRequest.success).toBe(true);
2026-02-05 10:07:47 -08:00
assertModelHasOutput(result);
checkModelOutputContent(result, {
expectedContent: ' test - allow - all ',
testName: ' Shell command stdin allow all ',
});
2025-10-06 12:15:21 -07:00
});
2025-09-25 17:32:40 -07:00
it(' should propagate environment variables to the child process ', async () => {
2025-12-02 17:43:06 -08:00
await rig.setup(' should propagate environment variables ', {
settings: { tools: { core: [' run_shell_command '] } },
});
2025-09-25 17:32:40 -07:00
const varName = ' GEMINI_CLI_TEST_VAR ';
const varValue = `test-value-${Math.random().toString(36).substring(7)}`;
process.env[varName] = varValue;
try {
const prompt = `Use echo to learn the value of the environment variable named ${varName} and tell me what it is.`;
2025-12-15 13:18:04 -08:00
const result = await rig.run({ args: prompt });
2025-09-25 17:32:40 -07:00
const foundToolCall = await rig.waitForToolCall(' run_shell_command ');
if (!foundToolCall || !result.includes(varValue)) {
printDebugInfo(rig, result, {
' Found tool call ': foundToolCall,
' Contains varValue ': result.includes(varValue),
});
}
expect(
foundToolCall,
' Expected to find a run_shell_command tool call ',
).toBeTruthy();
2026-02-05 10:07:47 -08:00
assertModelHasOutput(result);
checkModelOutputContent(result, {
expectedContent: varValue,
testName: ' Env var propagation test ',
});
2025-09-25 17:32:40 -07:00
expect(result).toContain(varValue);
} finally {
delete process.env[varName];
}
});
2025-10-21 08:01:13 -07:00
it.skip(' should run a platform - specific file listing command ', async () => {
2025-09-25 17:32:40 -07:00
await rig.setup(' should run platform - specific file listing ');
const fileName = `test-file-${Math.random().toString(36).substring(7)}.txt`;
rig.createFile(fileName, ' test content ');
const prompt = `Run a shell command to list the files in the current directory and tell me what they are.`;
2025-12-15 13:18:04 -08:00
const result = await rig.run({ args: prompt });
2025-09-25 17:32:40 -07:00
const foundToolCall = await rig.waitForToolCall(' run_shell_command ');
// Debugging info
if (!foundToolCall || !result.includes(fileName)) {
printDebugInfo(rig, result, {
' Found tool call ': foundToolCall,
' Contains fileName ': result.includes(fileName),
});
}
expect(
foundToolCall,
' Expected to find a run_shell_command tool call ',
).toBeTruthy();
2026-02-05 10:07:47 -08:00
assertModelHasOutput(result);
checkModelOutputContent(result, {
expectedContent: fileName,
testName: ' Platform - specific listing test ',
});
2025-09-25 17:32:40 -07:00
expect(result).toContain(fileName);
});
2025-10-16 17:25:30 -07:00
it(' rejects invalid shell expressions ', async () => {
2025-12-02 17:43:06 -08:00
await rig.setup(' rejects invalid shell expressions ', {
2026-01-30 15:44:17 -05:00
settings: {
tools: {
core: [' run_shell_command '],
allowed: [' run_shell_command ( echo ) '], // Specifically allow echo
},
},
2025-12-02 17:43:06 -08:00
});
2025-10-16 17:25:30 -07:00
const invalidCommand = getInvalidCommand();
2025-12-15 13:18:04 -08:00
const result = await rig.run({
args: `I am testing the error handling of the run_shell_command tool. Please attempt to run the following command, which I know has invalid syntax: \`${invalidCommand}\`. If the command fails as expected, please return the word FAIL, otherwise return the word SUCCESS.`,
2026-01-30 15:44:17 -05:00
approvalMode: ' default ', // Use default mode so safety fallback triggers confirmation
2025-12-15 13:18:04 -08:00
});
2025-10-16 17:25:30 -07:00
expect(result).toContain(' FAIL ');
const escapedInvalidCommand = JSON.stringify(invalidCommand).slice(1, -1);
const foundToolCall = await rig.waitForToolCall(
' run_shell_command ',
15000,
(args) =>
args.toLowerCase().includes(escapedInvalidCommand.toLowerCase()),
);
if (!foundToolCall) {
printDebugInfo(rig, result, {
' Found tool call ': foundToolCall,
EscapedCommand: escapedInvalidCommand,
ToolLogs: rig.readToolLogs(),
});
}
expect(foundToolCall).toBe(true);
const toolLogs = rig
.readToolLogs()
.filter((toolLog) => toolLog.toolRequest.name === ' run_shell_command ');
const failureLog = toolLogs.find((toolLog) =>
toolLog.toolRequest.args
.toLowerCase()
.includes(escapedInvalidCommand.toLowerCase()),
);
if (!failureLog || failureLog.toolRequest.success) {
printDebugInfo(rig, result, {
ToolLogs: toolLogs,
EscapedCommand: escapedInvalidCommand,
});
}
expect(
failureLog,
' Expected failing run_shell_command invocation for invalid syntax ' ,
) . toBeTruthy ( ) ;
expect ( failureLog ! . toolRequest . success ) . toBe ( false ) ;
} ) ;
2025-06-16 08:27:29 -07:00
} ) ;