2025-06-16 08:27:29 -07:00
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
2025-12-15 11:11:08 -08:00
import { describe , it , expect , beforeEach , afterEach } from 'vitest' ;
2026-02-05 10:07:47 -08:00
import {
TestRig ,
printDebugInfo ,
assertModelHasOutput ,
checkModelOutputContent ,
} from './test-helper.js' ;
2025-10-06 12:15:21 -07:00
import { getShellConfiguration } from '../packages/core/src/utils/shell-utils.js' ;
const { shell } = getShellConfiguration ( ) ;
function getLineCountCommand ( ) : { command : string ; tool : string } {
switch ( shell ) {
case 'powershell' :
2026-03-03 06:32:19 +05:30
return { command : ` Measure-Object -Line ` , tool : 'Measure-Object' } ;
2025-10-06 12:15:21 -07:00
case 'cmd' :
2025-10-15 12:44:07 -07:00
return { command : ` find /c /v ` , tool : 'find' } ;
2025-10-06 12:15:21 -07:00
case 'bash' :
default :
2025-10-15 12:44:07 -07:00
return { command : ` wc -l ` , tool : 'wc' } ;
2025-10-06 12:15:21 -07:00
}
}
2025-06-16 08:27:29 -07:00
2025-10-16 17:25:30 -07:00
function getInvalidCommand ( ) : string {
switch ( shell ) {
case 'powershell' :
return ` Get-ChildItem | | Select-Object ` ;
case 'cmd' :
return ` dir | | findstr foo ` ;
case 'bash' :
default :
return ` echo "hello" > > file ` ;
}
}
function getAllowedListCommand ( ) : string {
switch ( shell ) {
case 'powershell' :
return 'Get-ChildItem' ;
case 'cmd' :
return 'dir' ;
case 'bash' :
default :
return 'ls' ;
}
}
function getDisallowedFileReadCommand ( testFile : string ) : {
command : string ;
tool : string ;
} {
const quotedPath = ` " ${ testFile } " ` ;
switch ( shell ) {
case 'powershell' :
2026-03-23 21:48:13 -07:00
return {
command : ` powershell -Command "Get-Content ${ quotedPath } " ` ,
tool : 'powershell' ,
} ;
2025-10-16 17:25:30 -07:00
case 'cmd' :
2026-03-23 21:48:13 -07:00
return { command : ` cmd /c type ${ quotedPath } ` , tool : 'cmd' } ;
2025-10-16 17:25:30 -07:00
case 'bash' :
default :
2026-03-23 21:48:13 -07:00
return {
command : ` node -e "console.log(require('fs').readFileSync(' ${ testFile } ', 'utf8'))" ` ,
tool : 'node' ,
} ;
2025-10-16 17:25:30 -07:00
}
}
2025-10-23 16:55:01 -04:00
function getChainedEchoCommand ( ) : { allowPattern : string ; command : string } {
const secondCommand = getAllowedListCommand ( ) ;
switch ( shell ) {
case 'powershell' :
return {
allowPattern : 'Write-Output' ,
command : ` Write-Output "foo" && ${ secondCommand } ` ,
} ;
case 'cmd' :
return {
allowPattern : 'echo' ,
command : ` echo "foo" && ${ secondCommand } ` ,
} ;
case 'bash' :
default :
return {
allowPattern : 'echo' ,
command : ` echo "foo" && ${ secondCommand } ` ,
} ;
}
}
2025-08-12 15:57:27 -07:00
describe ( 'run_shell_command' , ( ) = > {
2025-12-15 11:11:08 -08:00
let rig : TestRig ;
beforeEach ( ( ) = > {
rig = new TestRig ( ) ;
} ) ;
afterEach ( async ( ) = > await rig . cleanup ( ) ) ;
2025-08-12 15:57:27 -07:00
it ( 'should be able to run a shell command' , async ( ) = > {
2025-12-02 17:43:06 -08:00
await rig . setup ( 'should be able to run a shell command' , {
settings : { tools : { core : [ 'run_shell_command' ] } } ,
} ) ;
2025-06-16 08:27:29 -07:00
2025-08-12 15:57:27 -07:00
const prompt = ` Please run the command "echo hello-world" and show me the output ` ;
2025-07-05 08:27:22 -07:00
2025-12-15 13:18:04 -08:00
const result = await rig . run ( { args : prompt } ) ;
2025-08-01 14:33:33 -07:00
2025-08-12 15:57:27 -07:00
const foundToolCall = await rig . waitForToolCall ( 'run_shell_command' ) ;
2025-08-01 14:33:33 -07:00
2025-08-12 15:57:27 -07:00
// Add debugging information
if ( ! foundToolCall || ! result . includes ( 'hello-world' ) ) {
printDebugInfo ( rig , result , {
'Found tool call' : foundToolCall ,
'Contains hello-world' : result . includes ( 'hello-world' ) ,
} ) ;
}
2025-08-01 14:33:33 -07:00
2025-08-12 15:57:27 -07:00
expect (
foundToolCall ,
'Expected to find a run_shell_command tool call' ,
) . toBeTruthy ( ) ;
2025-08-01 14:33:33 -07:00
2026-02-05 10:07:47 -08:00
assertModelHasOutput ( result ) ;
checkModelOutputContent ( result , {
expectedContent : [ 'hello-world' , 'exit code 0' ] ,
testName : 'Shell command test' ,
} ) ;
2025-08-12 15:57:27 -07:00
} ) ;
2025-07-05 08:27:22 -07:00
2025-08-12 15:57:27 -07:00
it ( 'should be able to run a shell command via stdin' , async ( ) = > {
2025-12-02 17:43:06 -08:00
await rig . setup ( 'should be able to run a shell command via stdin' , {
settings : { tools : { core : [ 'run_shell_command' ] } } ,
} ) ;
2025-08-01 14:33:33 -07:00
2025-08-12 15:57:27 -07:00
const prompt = ` Please run the command "echo test-stdin" and show me what it outputs ` ;
2025-08-01 14:33:33 -07:00
2025-08-12 15:57:27 -07:00
const result = await rig . run ( { stdin : prompt } ) ;
2025-08-01 14:33:33 -07:00
2025-08-12 15:57:27 -07:00
const foundToolCall = await rig . waitForToolCall ( 'run_shell_command' ) ;
2025-08-01 14:33:33 -07:00
2025-08-12 15:57:27 -07:00
// Add debugging information
if ( ! foundToolCall || ! result . includes ( 'test-stdin' ) ) {
printDebugInfo ( rig , result , {
'Test type' : 'Stdin test' ,
'Found tool call' : foundToolCall ,
'Contains test-stdin' : result . includes ( 'test-stdin' ) ,
} ) ;
}
2025-07-05 08:27:22 -07:00
2025-08-12 15:57:27 -07:00
expect (
foundToolCall ,
'Expected to find a run_shell_command tool call' ,
) . toBeTruthy ( ) ;
2025-06-16 08:27:29 -07:00
2026-02-05 10:07:47 -08:00
assertModelHasOutput ( result ) ;
checkModelOutputContent ( result , {
expectedContent : 'test-stdin' ,
testName : 'Shell command stdin test' ,
} ) ;
2025-08-12 15:57:27 -07:00
} ) ;
2025-09-25 17:32:40 -07:00
2025-10-24 13:04:40 -07:00
it . skip ( 'should run allowed sub-command in non-interactive mode' , async ( ) = > {
2025-10-06 12:15:21 -07:00
await rig . setup ( 'should run allowed sub-command in non-interactive mode' ) ;
const testFile = rig . createFile ( 'test.txt' , 'Lorem\nIpsum\nDolor\n' ) ;
2025-10-15 12:44:07 -07:00
const { tool , command } = getLineCountCommand ( ) ;
const prompt = ` use ${ command } to tell me how many lines there are in ${ testFile } ` ;
2025-10-06 12:15:21 -07:00
// Provide the prompt via stdin to simulate non-interactive mode
2025-12-15 13:18:04 -08:00
const result = await rig . run ( {
args : [ ` --allowed-tools=run_shell_command( ${ tool } ) ` ] ,
stdin : prompt ,
2026-01-21 10:43:48 -05:00
approvalMode : 'default' ,
2025-12-15 13:18:04 -08:00
} ) ;
2025-10-06 12:15:21 -07:00
const foundToolCall = await rig . waitForToolCall ( 'run_shell_command' , 15000 ) ;
if ( ! foundToolCall ) {
2025-10-16 17:25:30 -07:00
const toolLogs = rig . readToolLogs ( ) . map ( ( { toolRequest } ) = > ( {
name : toolRequest.name ,
success : toolRequest.success ,
args : toolRequest.args ,
} ) ) ;
2025-10-06 12:15:21 -07:00
printDebugInfo ( rig , result , {
'Found tool call' : foundToolCall ,
2025-10-16 17:25:30 -07:00
'Allowed tools flag' : ` run_shell_command( ${ tool } ) ` ,
Prompt : prompt ,
'Tool logs' : toolLogs ,
Result : result ,
2025-10-06 12:15:21 -07:00
} ) ;
}
expect (
foundToolCall ,
'Expected to find a run_shell_command tool call' ,
) . toBeTruthy ( ) ;
2025-10-14 11:36:49 -07:00
const toolCall = rig
. readToolLogs ( )
. filter (
( toolCall ) = > toolCall . toolRequest . name === 'run_shell_command' ,
) [ 0 ] ;
expect ( toolCall . toolRequest . success ) . toBe ( true ) ;
2025-10-06 12:15:21 -07:00
} ) ;
2025-11-09 14:36:13 -08:00
it . skip ( 'should succeed with no parens in non-interactive mode' , async ( ) = > {
2025-10-06 12:15:21 -07:00
await rig . setup ( 'should succeed with no parens in non-interactive mode' ) ;
const testFile = rig . createFile ( 'test.txt' , 'Lorem\nIpsum\nDolor\n' ) ;
2025-10-15 12:44:07 -07:00
const { command } = getLineCountCommand ( ) ;
const prompt = ` use ${ command } to tell me how many lines there are in ${ testFile } ` ;
2025-10-06 12:15:21 -07:00
2025-12-15 13:18:04 -08:00
const result = await rig . run ( {
args : '--allowed-tools=run_shell_command' ,
stdin : prompt ,
2026-01-21 10:43:48 -05:00
approvalMode : 'default' ,
2025-12-15 13:18:04 -08:00
} ) ;
2025-10-06 12:15:21 -07:00
const foundToolCall = await rig . waitForToolCall ( 'run_shell_command' , 15000 ) ;
if ( ! foundToolCall ) {
printDebugInfo ( rig , result , {
'Found tool call' : foundToolCall ,
} ) ;
}
expect (
foundToolCall ,
'Expected to find a run_shell_command tool call' ,
) . toBeTruthy ( ) ;
2025-10-14 11:36:49 -07:00
const toolCall = rig
. readToolLogs ( )
. filter (
( toolCall ) = > toolCall . toolRequest . name === 'run_shell_command' ,
) [ 0 ] ;
expect ( toolCall . toolRequest . success ) . toBe ( true ) ;
2025-10-06 12:15:21 -07:00
} ) ;
2026-01-21 10:43:48 -05:00
it ( 'should succeed in yolo mode' , async ( ) = > {
2026-03-03 06:32:19 +05:30
const isWindows = process . platform === 'win32' ;
2026-01-21 10:43:48 -05:00
await rig . setup ( 'should succeed in yolo mode' , {
2026-03-03 06:32:19 +05:30
settings : {
tools : { core : [ 'run_shell_command' ] } ,
shell : isWindows ? { enableInteractiveShell : false } : undefined ,
} ,
2025-12-02 17:43:06 -08:00
} ) ;
2025-10-06 12:15:21 -07:00
const testFile = rig . createFile ( 'test.txt' , 'Lorem\nIpsum\nDolor\n' ) ;
2025-10-15 12:44:07 -07:00
const { command } = getLineCountCommand ( ) ;
const prompt = ` use ${ command } to tell me how many lines there are in ${ testFile } ` ;
2025-10-06 12:15:21 -07:00
2025-10-14 11:36:49 -07:00
const result = await rig . run ( {
2025-12-15 13:18:04 -08:00
args : prompt ,
2026-01-21 10:43:48 -05:00
approvalMode : 'yolo' ,
2025-10-14 11:36:49 -07:00
} ) ;
2025-10-06 12:15:21 -07:00
const foundToolCall = await rig . waitForToolCall ( 'run_shell_command' , 15000 ) ;
if ( ! foundToolCall ) {
printDebugInfo ( rig , result , {
'Found tool call' : foundToolCall ,
} ) ;
}
expect (
foundToolCall ,
'Expected to find a run_shell_command tool call' ,
) . toBeTruthy ( ) ;
2025-10-14 11:36:49 -07:00
const toolCall = rig
. readToolLogs ( )
. filter (
( toolCall ) = > toolCall . toolRequest . name === 'run_shell_command' ,
) [ 0 ] ;
expect ( toolCall . toolRequest . success ) . toBe ( true ) ;
2025-10-06 12:15:21 -07:00
} ) ;
2025-10-24 13:04:40 -07:00
it . skip ( 'should work with ShellTool alias' , async ( ) = > {
2025-10-06 12:15:21 -07:00
await rig . setup ( 'should work with ShellTool alias' ) ;
const testFile = rig . createFile ( 'test.txt' , 'Lorem\nIpsum\nDolor\n' ) ;
2025-10-15 12:44:07 -07:00
const { tool , command } = getLineCountCommand ( ) ;
const prompt = ` use ${ command } to tell me how many lines there are in ${ testFile } ` ;
2025-10-06 12:15:21 -07:00
2025-12-15 13:18:04 -08:00
const result = await rig . run ( {
args : ` --allowed-tools=ShellTool( ${ tool } ) ` ,
stdin : prompt ,
2026-01-21 10:43:48 -05:00
approvalMode : 'default' ,
2025-12-15 13:18:04 -08:00
} ) ;
2025-10-06 12:15:21 -07:00
const foundToolCall = await rig . waitForToolCall ( 'run_shell_command' , 15000 ) ;
if ( ! foundToolCall ) {
2025-10-16 17:25:30 -07:00
const toolLogs = rig . readToolLogs ( ) . map ( ( { toolRequest } ) = > ( {
name : toolRequest.name ,
success : toolRequest.success ,
args : toolRequest.args ,
} ) ) ;
2025-10-06 12:15:21 -07:00
printDebugInfo ( rig , result , {
'Found tool call' : foundToolCall ,
2025-10-16 17:25:30 -07:00
'Allowed tools flag' : ` ShellTool( ${ tool } ) ` ,
Prompt : prompt ,
'Tool logs' : toolLogs ,
Result : result ,
2025-10-06 12:15:21 -07:00
} ) ;
}
expect (
foundToolCall ,
'Expected to find a run_shell_command tool call' ,
) . toBeTruthy ( ) ;
2025-10-14 11:36:49 -07:00
const toolCall = rig
. readToolLogs ( )
. filter (
( toolCall ) = > toolCall . toolRequest . name === 'run_shell_command' ,
) [ 0 ] ;
expect ( toolCall . toolRequest . success ) . toBe ( true ) ;
2025-10-06 12:15:21 -07:00
} ) ;
2025-10-14 11:36:49 -07:00
// TODO(#11062): Un-skip this once we can make it reliable by using hard coded
// model responses.
it . skip ( 'should combine multiple --allowed-tools flags' , async ( ) = > {
2025-10-06 12:15:21 -07:00
await rig . setup ( 'should combine multiple --allowed-tools flags' ) ;
2025-10-15 12:44:07 -07:00
const { tool , command } = getLineCountCommand ( ) ;
2025-10-09 14:13:26 -07:00
const prompt =
2025-10-15 12:44:07 -07:00
` use both ${ command } and ls to count the number of lines in files in this ` +
2025-10-14 11:36:49 -07:00
` directory. Do not pipe these commands into each other, run them separately. ` ;
2025-10-06 12:15:21 -07:00
2025-12-15 13:18:04 -08:00
const result = await rig . run ( {
args : [
` --allowed-tools=run_shell_command( ${ tool } ) ` ,
'--allowed-tools=run_shell_command(ls)' ,
] ,
stdin : prompt ,
2026-01-21 10:43:48 -05:00
approvalMode : 'default' ,
2025-12-15 13:18:04 -08:00
} ) ;
2025-10-06 12:15:21 -07:00
2025-10-14 11:36:49 -07:00
for ( const expected in [ 'ls' , tool ] ) {
const foundToolCall = await rig . waitForToolCall (
'run_shell_command' ,
15000 ,
( args ) = > args . toLowerCase ( ) . includes ( ` "command": " ${ expected } ` ) ,
) ;
if ( ! foundToolCall ) {
printDebugInfo ( rig , result , {
'Found tool call' : foundToolCall ,
} ) ;
}
expect (
foundToolCall ,
` Expected to find a run_shell_command tool call to " ${ expected } ", ` +
` got ${ rig . readToolLogs ( ) . join ( '\n' ) } ` ,
) . toBeTruthy ( ) ;
2025-10-06 12:15:21 -07:00
}
2025-10-14 11:36:49 -07:00
const toolLogs = rig
. readToolLogs ( )
. filter ( ( toolCall ) = > toolCall . toolRequest . name === 'run_shell_command' ) ;
expect ( toolLogs . length , toolLogs . join ( '\n' ) ) . toBeGreaterThanOrEqual ( 2 ) ;
for ( const toolLog of toolLogs ) {
expect (
toolLog . toolRequest . success ,
` Expected tool call ${ toolLog } to succeed ` ,
) . toBe ( true ) ;
}
2025-10-06 12:15:21 -07:00
} ) ;
2025-10-23 09:50:50 -04:00
it ( 'should reject commands not on the allowlist' , async ( ) = > {
2025-12-02 17:43:06 -08:00
await rig . setup ( 'should reject commands not on the allowlist' , {
settings : { tools : { core : [ 'run_shell_command' ] } } ,
} ) ;
2025-10-16 17:25:30 -07:00
const testFile = rig . createFile ( 'test.txt' , 'Disallowed command check\n' ) ;
const allowedCommand = getAllowedListCommand ( ) ;
const disallowed = getDisallowedFileReadCommand ( testFile ) ;
const prompt =
` I am testing the allowed tools configuration. ` +
` Attempt to run " ${ disallowed . command } " to read the contents of ${ testFile } . ` +
` If the command fails because it is not permitted , respond with the single word FAIL . ` +
` If it succeeds , respond with SUCCESS . ` ;
2025-12-15 13:18:04 -08:00
const result = await rig.run({
args: ` -- allowed - tools = run_shell_command ( $ { allowedCommand } ) ` ,
stdin: prompt,
2026-01-21 10:43:48 -05:00
approvalMode: 'default',
2025-12-15 13:18:04 -08:00
});
2025-10-16 17:25:30 -07:00
if (!result.toLowerCase().includes('fail')) {
printDebugInfo(rig, result, {
Result: result,
AllowedCommand: allowedCommand,
DisallowedCommand: disallowed.command,
});
}
expect(result).toContain('FAIL');
const foundToolCall = await rig.waitForToolCall(
'run_shell_command',
15000,
(args) => args.toLowerCase().includes(disallowed.tool.toLowerCase()),
);
if (!foundToolCall) {
printDebugInfo(rig, result, {
'Found tool call': foundToolCall,
ToolLogs: rig.readToolLogs(),
});
}
expect(foundToolCall).toBe(true);
const toolLogs = rig
.readToolLogs()
.filter((toolLog) => toolLog.toolRequest.name === 'run_shell_command');
const failureLog = toolLogs.find((toolLog) =>
toolLog.toolRequest.args
.toLowerCase()
.includes(disallowed.tool.toLowerCase()),
);
if (!failureLog || failureLog.toolRequest.success) {
printDebugInfo(rig, result, {
ToolLogs: toolLogs,
DisallowedTool: disallowed.tool,
});
}
expect(
failureLog,
'Expected failing run_shell_command invocation',
).toBeTruthy();
expect(failureLog!.toolRequest.success).toBe(false);
});
2025-10-24 14:25:54 -04:00
// TODO(#11966): Deflake this test and re-enable once the underlying race is resolved.
it.skip('should reject chained commands when only the first segment is allowlisted in non-interactive mode', async () => {
2025-10-23 16:55:01 -04:00
await rig.setup(
'should reject chained commands when only the first segment is allowlisted',
);
const chained = getChainedEchoCommand();
const shellInjection = ` ! { $ { chained . command } } ` ;
2025-12-15 13:18:04 -08:00
await rig.run({
args: ` -- allowed - tools = ShellTool ( $ { chained . allowPattern } ) ` ,
stdin: ` $ { shellInjection } \ n ` ,
2026-01-21 10:43:48 -05:00
approvalMode: 'default',
2025-12-15 13:18:04 -08:00
});
2025-10-23 16:55:01 -04:00
// CLI should refuse to execute the chained command without scheduling run_shell_command.
const toolLogs = rig
.readToolLogs()
.filter((log) => log.toolRequest.name === 'run_shell_command');
// Success is false because tool is in the scheduled state.
for (const log of toolLogs) {
expect(log.toolRequest.success).toBe(false);
expect(log.toolRequest.args).toContain('&&');
}
});
2025-10-10 10:48:24 -07:00
it('should allow all with "ShellTool" and other specific tools', async () => {
await rig.setup(
'should allow all with "ShellTool" and other specific tools',
2025-12-02 17:43:06 -08:00
{
settings: { tools: { core: ['run_shell_command'] } },
},
2025-10-10 10:48:24 -07:00
);
2025-10-06 12:15:21 -07:00
const { tool } = getLineCountCommand();
2025-10-10 10:48:24 -07:00
const prompt = ` Please run the command "echo test-allow-all" and show me the output ` ;
2025-10-06 12:15:21 -07:00
2025-12-15 13:18:04 -08:00
const result = await rig.run({
args: [
` -- allowed - tools = run_shell_command ( $ { tool } ) ` ,
'--allowed-tools=run_shell_command',
],
stdin: prompt,
2026-01-21 10:43:48 -05:00
approvalMode: 'default',
2025-12-15 13:18:04 -08:00
});
2025-10-06 12:15:21 -07:00
const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);
2025-10-10 10:48:24 -07:00
if (!foundToolCall || !result.includes('test-allow-all')) {
2025-10-06 12:15:21 -07:00
printDebugInfo(rig, result, {
'Found tool call': foundToolCall,
2025-10-10 10:48:24 -07:00
Result: result,
2025-10-06 12:15:21 -07:00
});
}
expect(
foundToolCall,
'Expected to find a run_shell_command tool call',
).toBeTruthy();
2025-10-10 10:48:24 -07:00
2025-10-14 11:36:49 -07:00
const toolCall = rig
.readToolLogs()
.filter(
(toolCall) => toolCall.toolRequest.name === 'run_shell_command',
)[0];
expect(toolCall.toolRequest.success).toBe(true);
2026-02-05 10:07:47 -08:00
assertModelHasOutput(result);
checkModelOutputContent(result, {
expectedContent: 'test-allow-all',
testName: 'Shell command stdin allow all',
});
2025-10-06 12:15:21 -07:00
});
2025-09-25 17:32:40 -07:00
it('should propagate environment variables to the child process', async () => {
2025-12-02 17:43:06 -08:00
await rig.setup('should propagate environment variables', {
settings: { tools: { core: ['run_shell_command'] } },
});
2025-09-25 17:32:40 -07:00
const varName = 'GEMINI_CLI_TEST_VAR';
const varValue = ` test - value - $ { Math . random ( ) . toString ( 36 ) . substring ( 7 ) } ` ;
process.env[varName] = varValue;
try {
const prompt = ` Use echo to learn the value of the environment variable named $ { varName } and tell me what it is . ` ;
2025-12-15 13:18:04 -08:00
const result = await rig.run({ args: prompt });
2025-09-25 17:32:40 -07:00
const foundToolCall = await rig.waitForToolCall('run_shell_command');
if (!foundToolCall || !result.includes(varValue)) {
printDebugInfo(rig, result, {
'Found tool call': foundToolCall,
'Contains varValue': result.includes(varValue),
});
}
expect(
foundToolCall,
'Expected to find a run_shell_command tool call',
).toBeTruthy();
2026-02-05 10:07:47 -08:00
assertModelHasOutput(result);
checkModelOutputContent(result, {
expectedContent: varValue,
testName: 'Env var propagation test',
});
2025-09-25 17:32:40 -07:00
expect(result).toContain(varValue);
} finally {
delete process.env[varName];
}
});
2025-10-21 08:01:13 -07:00
it.skip('should run a platform-specific file listing command', async () => {
2025-09-25 17:32:40 -07:00
await rig.setup('should run platform-specific file listing');
const fileName = ` test - file - $ { Math . random ( ) . toString ( 36 ) . substring ( 7 ) } . txt ` ;
rig.createFile(fileName, 'test content');
const prompt = ` Run a shell command to list the files in the current directory and tell me what they are . ` ;
2025-12-15 13:18:04 -08:00
const result = await rig.run({ args: prompt });
2025-09-25 17:32:40 -07:00
const foundToolCall = await rig.waitForToolCall('run_shell_command');
// Debugging info
if (!foundToolCall || !result.includes(fileName)) {
printDebugInfo(rig, result, {
'Found tool call': foundToolCall,
'Contains fileName': result.includes(fileName),
});
}
expect(
foundToolCall,
'Expected to find a run_shell_command tool call',
).toBeTruthy();
2026-02-05 10:07:47 -08:00
assertModelHasOutput(result);
checkModelOutputContent(result, {
expectedContent: fileName,
testName: 'Platform-specific listing test',
});
2025-09-25 17:32:40 -07:00
expect(result).toContain(fileName);
});
2025-10-16 17:25:30 -07:00
it('rejects invalid shell expressions', async () => {
2025-12-02 17:43:06 -08:00
await rig.setup('rejects invalid shell expressions', {
2026-01-30 15:44:17 -05:00
settings: {
tools: {
core: ['run_shell_command'],
allowed: ['run_shell_command(echo)'], // Specifically allow echo
},
},
2025-12-02 17:43:06 -08:00
});
2025-10-16 17:25:30 -07:00
const invalidCommand = getInvalidCommand();
2025-12-15 13:18:04 -08:00
const result = await rig.run({
args: ` I am testing the error handling of the run_shell_command tool . Please attempt to run the following command , which I know has invalid syntax : \ ` ${ invalidCommand } \` . If the command fails as expected, please return the word FAIL, otherwise return the word SUCCESS. ` ,
2026-01-30 15:44:17 -05:00
approvalMode : 'default' , // Use default mode so safety fallback triggers confirmation
2025-12-15 13:18:04 -08:00
} ) ;
2025-10-16 17:25:30 -07:00
expect ( result ) . toContain ( 'FAIL' ) ;
const escapedInvalidCommand = JSON . stringify ( invalidCommand ) . slice ( 1 , - 1 ) ;
const foundToolCall = await rig . waitForToolCall (
'run_shell_command' ,
15000 ,
( args ) = >
args . toLowerCase ( ) . includes ( escapedInvalidCommand . toLowerCase ( ) ) ,
) ;
if ( ! foundToolCall ) {
printDebugInfo ( rig , result , {
'Found tool call' : foundToolCall ,
EscapedCommand : escapedInvalidCommand ,
ToolLogs : rig.readToolLogs ( ) ,
} ) ;
}
expect ( foundToolCall ) . toBe ( true ) ;
const toolLogs = rig
. readToolLogs ( )
. filter ( ( toolLog ) = > toolLog . toolRequest . name === 'run_shell_command' ) ;
const failureLog = toolLogs . find ( ( toolLog ) = >
toolLog . toolRequest . args
. toLowerCase ( )
. includes ( escapedInvalidCommand . toLowerCase ( ) ) ,
) ;
if ( ! failureLog || failureLog . toolRequest . success ) {
printDebugInfo ( rig , result , {
ToolLogs : toolLogs ,
EscapedCommand : escapedInvalidCommand ,
} ) ;
}
expect (
failureLog ,
'Expected failing run_shell_command invocation for invalid syntax' ,
) . toBeTruthy ( ) ;
expect ( failureLog ! . toolRequest . success ) . toBe ( false ) ;
} ) ;
2025-06-16 08:27:29 -07:00
} ) ;