2025-06-16 08:27:29 -07:00
/ * *
* @license
* Copyright 2025 Google LLC
* SPDX - License - Identifier : Apache - 2.0
* /
2025-08-12 15:57:27 -07:00
import { describe , it , expect } from 'vitest' ;
2025-08-01 14:33:33 -07:00
import { TestRig , printDebugInfo , validateModelOutput } from './test-helper.js' ;
2025-10-06 12:15:21 -07:00
import { getShellConfiguration } from '../packages/core/src/utils/shell-utils.js' ;
const { shell } = getShellConfiguration ( ) ;
function getLineCountCommand ( ) : { command : string ; tool : string } {
switch ( shell ) {
case 'powershell' :
case 'cmd' :
2025-10-15 12:44:07 -07:00
return { command : ` find /c /v ` , tool : 'find' } ;
2025-10-06 12:15:21 -07:00
case 'bash' :
default :
2025-10-15 12:44:07 -07:00
return { command : ` wc -l ` , tool : 'wc' } ;
2025-10-06 12:15:21 -07:00
}
}
2025-06-16 08:27:29 -07:00
2025-10-16 17:25:30 -07:00
function getInvalidCommand ( ) : string {
switch ( shell ) {
case 'powershell' :
return ` Get-ChildItem | | Select-Object ` ;
case 'cmd' :
return ` dir | | findstr foo ` ;
case 'bash' :
default :
return ` echo "hello" > > file ` ;
}
}
function getAllowedListCommand ( ) : string {
switch ( shell ) {
case 'powershell' :
return 'Get-ChildItem' ;
case 'cmd' :
return 'dir' ;
case 'bash' :
default :
return 'ls' ;
}
}
function getDisallowedFileReadCommand ( testFile : string ) : {
command : string ;
tool : string ;
} {
const quotedPath = ` " ${ testFile } " ` ;
switch ( shell ) {
case 'powershell' :
return { command : ` Get-Content ${ quotedPath } ` , tool : 'Get-Content' } ;
case 'cmd' :
return { command : ` type ${ quotedPath } ` , tool : 'type' } ;
case 'bash' :
default :
return { command : ` cat ${ quotedPath } ` , tool : 'cat' } ;
}
}
2025-10-23 16:55:01 -04:00
function getChainedEchoCommand ( ) : { allowPattern : string ; command : string } {
const secondCommand = getAllowedListCommand ( ) ;
switch ( shell ) {
case 'powershell' :
return {
allowPattern : 'Write-Output' ,
command : ` Write-Output "foo" && ${ secondCommand } ` ,
} ;
case 'cmd' :
return {
allowPattern : 'echo' ,
command : ` echo "foo" && ${ secondCommand } ` ,
} ;
case 'bash' :
default :
return {
allowPattern : 'echo' ,
command : ` echo "foo" && ${ secondCommand } ` ,
} ;
}
}
2025-08-12 15:57:27 -07:00
describe ( 'run_shell_command' , ( ) = > {
it ( 'should be able to run a shell command' , async ( ) = > {
const rig = new TestRig ( ) ;
await rig . setup ( 'should be able to run a shell command' ) ;
2025-06-16 08:27:29 -07:00
2025-08-12 15:57:27 -07:00
const prompt = ` Please run the command "echo hello-world" and show me the output ` ;
2025-07-05 08:27:22 -07:00
2025-08-12 15:57:27 -07:00
const result = await rig . run ( prompt ) ;
2025-08-01 14:33:33 -07:00
2025-08-12 15:57:27 -07:00
const foundToolCall = await rig . waitForToolCall ( 'run_shell_command' ) ;
2025-08-01 14:33:33 -07:00
2025-08-12 15:57:27 -07:00
// Add debugging information
if ( ! foundToolCall || ! result . includes ( 'hello-world' ) ) {
printDebugInfo ( rig , result , {
'Found tool call' : foundToolCall ,
'Contains hello-world' : result . includes ( 'hello-world' ) ,
} ) ;
}
2025-08-01 14:33:33 -07:00
2025-08-12 15:57:27 -07:00
expect (
foundToolCall ,
'Expected to find a run_shell_command tool call' ,
) . toBeTruthy ( ) ;
2025-08-01 14:33:33 -07:00
2025-08-12 15:57:27 -07:00
// Validate model output - will throw if no output, warn if missing expected content
// Model often reports exit code instead of showing output
validateModelOutput (
result ,
[ 'hello-world' , 'exit code 0' ] ,
'Shell command test' ,
) ;
} ) ;
2025-07-05 08:27:22 -07:00
2025-08-12 15:57:27 -07:00
it ( 'should be able to run a shell command via stdin' , async ( ) = > {
const rig = new TestRig ( ) ;
await rig . setup ( 'should be able to run a shell command via stdin' ) ;
2025-08-01 14:33:33 -07:00
2025-08-12 15:57:27 -07:00
const prompt = ` Please run the command "echo test-stdin" and show me what it outputs ` ;
2025-08-01 14:33:33 -07:00
2025-08-12 15:57:27 -07:00
const result = await rig . run ( { stdin : prompt } ) ;
2025-08-01 14:33:33 -07:00
2025-08-12 15:57:27 -07:00
const foundToolCall = await rig . waitForToolCall ( 'run_shell_command' ) ;
2025-08-01 14:33:33 -07:00
2025-08-12 15:57:27 -07:00
// Add debugging information
if ( ! foundToolCall || ! result . includes ( 'test-stdin' ) ) {
printDebugInfo ( rig , result , {
'Test type' : 'Stdin test' ,
'Found tool call' : foundToolCall ,
'Contains test-stdin' : result . includes ( 'test-stdin' ) ,
} ) ;
}
2025-07-05 08:27:22 -07:00
2025-08-12 15:57:27 -07:00
expect (
foundToolCall ,
'Expected to find a run_shell_command tool call' ,
) . toBeTruthy ( ) ;
2025-06-16 08:27:29 -07:00
2025-08-12 15:57:27 -07:00
// Validate model output - will throw if no output, warn if missing expected content
validateModelOutput ( result , 'test-stdin' , 'Shell command stdin test' ) ;
} ) ;
2025-09-25 17:32:40 -07:00
2025-10-24 13:04:40 -07:00
it . skip ( 'should run allowed sub-command in non-interactive mode' , async ( ) = > {
2025-10-06 12:15:21 -07:00
const rig = new TestRig ( ) ;
await rig . setup ( 'should run allowed sub-command in non-interactive mode' ) ;
const testFile = rig . createFile ( 'test.txt' , 'Lorem\nIpsum\nDolor\n' ) ;
2025-10-15 12:44:07 -07:00
const { tool , command } = getLineCountCommand ( ) ;
const prompt = ` use ${ command } to tell me how many lines there are in ${ testFile } ` ;
2025-10-06 12:15:21 -07:00
// Provide the prompt via stdin to simulate non-interactive mode
2025-10-14 11:36:49 -07:00
const result = await rig . run (
{
stdin : prompt ,
yolo : false ,
} ,
` --allowed-tools=run_shell_command( ${ tool } ) ` ,
) ;
2025-10-06 12:15:21 -07:00
const foundToolCall = await rig . waitForToolCall ( 'run_shell_command' , 15000 ) ;
if ( ! foundToolCall ) {
2025-10-16 17:25:30 -07:00
const toolLogs = rig . readToolLogs ( ) . map ( ( { toolRequest } ) = > ( {
name : toolRequest.name ,
success : toolRequest.success ,
args : toolRequest.args ,
} ) ) ;
2025-10-06 12:15:21 -07:00
printDebugInfo ( rig , result , {
'Found tool call' : foundToolCall ,
2025-10-16 17:25:30 -07:00
'Allowed tools flag' : ` run_shell_command( ${ tool } ) ` ,
Prompt : prompt ,
'Tool logs' : toolLogs ,
Result : result ,
2025-10-06 12:15:21 -07:00
} ) ;
}
expect (
foundToolCall ,
'Expected to find a run_shell_command tool call' ,
) . toBeTruthy ( ) ;
2025-10-14 11:36:49 -07:00
const toolCall = rig
. readToolLogs ( )
. filter (
( toolCall ) = > toolCall . toolRequest . name === 'run_shell_command' ,
) [ 0 ] ;
expect ( toolCall . toolRequest . success ) . toBe ( true ) ;
2025-10-06 12:15:21 -07:00
} ) ;
it ( 'should succeed with no parens in non-interactive mode' , async ( ) = > {
const rig = new TestRig ( ) ;
await rig . setup ( 'should succeed with no parens in non-interactive mode' ) ;
const testFile = rig . createFile ( 'test.txt' , 'Lorem\nIpsum\nDolor\n' ) ;
2025-10-15 12:44:07 -07:00
const { command } = getLineCountCommand ( ) ;
const prompt = ` use ${ command } to tell me how many lines there are in ${ testFile } ` ;
2025-10-06 12:15:21 -07:00
2025-10-14 11:36:49 -07:00
const result = await rig . run (
{
stdin : prompt ,
yolo : false ,
} ,
'--allowed-tools=run_shell_command' ,
) ;
2025-10-06 12:15:21 -07:00
const foundToolCall = await rig . waitForToolCall ( 'run_shell_command' , 15000 ) ;
if ( ! foundToolCall ) {
printDebugInfo ( rig , result , {
'Found tool call' : foundToolCall ,
} ) ;
}
expect (
foundToolCall ,
'Expected to find a run_shell_command tool call' ,
) . toBeTruthy ( ) ;
2025-10-14 11:36:49 -07:00
const toolCall = rig
. readToolLogs ( )
. filter (
( toolCall ) = > toolCall . toolRequest . name === 'run_shell_command' ,
) [ 0 ] ;
expect ( toolCall . toolRequest . success ) . toBe ( true ) ;
2025-10-06 12:15:21 -07:00
} ) ;
it ( 'should succeed with --yolo mode' , async ( ) = > {
const rig = new TestRig ( ) ;
await rig . setup ( 'should succeed with --yolo mode' ) ;
const testFile = rig . createFile ( 'test.txt' , 'Lorem\nIpsum\nDolor\n' ) ;
2025-10-15 12:44:07 -07:00
const { command } = getLineCountCommand ( ) ;
const prompt = ` use ${ command } to tell me how many lines there are in ${ testFile } ` ;
2025-10-06 12:15:21 -07:00
2025-10-14 11:36:49 -07:00
const result = await rig . run ( {
prompt : prompt ,
yolo : true ,
} ) ;
2025-10-06 12:15:21 -07:00
const foundToolCall = await rig . waitForToolCall ( 'run_shell_command' , 15000 ) ;
if ( ! foundToolCall ) {
printDebugInfo ( rig , result , {
'Found tool call' : foundToolCall ,
} ) ;
}
expect (
foundToolCall ,
'Expected to find a run_shell_command tool call' ,
) . toBeTruthy ( ) ;
2025-10-14 11:36:49 -07:00
const toolCall = rig
. readToolLogs ( )
. filter (
( toolCall ) = > toolCall . toolRequest . name === 'run_shell_command' ,
) [ 0 ] ;
expect ( toolCall . toolRequest . success ) . toBe ( true ) ;
2025-10-06 12:15:21 -07:00
} ) ;
2025-10-24 13:04:40 -07:00
it . skip ( 'should work with ShellTool alias' , async ( ) = > {
2025-10-06 12:15:21 -07:00
const rig = new TestRig ( ) ;
await rig . setup ( 'should work with ShellTool alias' ) ;
const testFile = rig . createFile ( 'test.txt' , 'Lorem\nIpsum\nDolor\n' ) ;
2025-10-15 12:44:07 -07:00
const { tool , command } = getLineCountCommand ( ) ;
const prompt = ` use ${ command } to tell me how many lines there are in ${ testFile } ` ;
2025-10-06 12:15:21 -07:00
2025-10-14 11:36:49 -07:00
const result = await rig . run (
{
stdin : prompt ,
yolo : false ,
} ,
` --allowed-tools=ShellTool( ${ tool } ) ` ,
) ;
2025-10-06 12:15:21 -07:00
const foundToolCall = await rig . waitForToolCall ( 'run_shell_command' , 15000 ) ;
if ( ! foundToolCall ) {
2025-10-16 17:25:30 -07:00
const toolLogs = rig . readToolLogs ( ) . map ( ( { toolRequest } ) = > ( {
name : toolRequest.name ,
success : toolRequest.success ,
args : toolRequest.args ,
} ) ) ;
2025-10-06 12:15:21 -07:00
printDebugInfo ( rig , result , {
'Found tool call' : foundToolCall ,
2025-10-16 17:25:30 -07:00
'Allowed tools flag' : ` ShellTool( ${ tool } ) ` ,
Prompt : prompt ,
'Tool logs' : toolLogs ,
Result : result ,
2025-10-06 12:15:21 -07:00
} ) ;
}
expect (
foundToolCall ,
'Expected to find a run_shell_command tool call' ,
) . toBeTruthy ( ) ;
2025-10-14 11:36:49 -07:00
const toolCall = rig
. readToolLogs ( )
. filter (
( toolCall ) = > toolCall . toolRequest . name === 'run_shell_command' ,
) [ 0 ] ;
expect ( toolCall . toolRequest . success ) . toBe ( true ) ;
2025-10-06 12:15:21 -07:00
} ) ;
2025-10-14 11:36:49 -07:00
// TODO(#11062): Un-skip this once we can make it reliable by using hard coded
// model responses.
it . skip ( 'should combine multiple --allowed-tools flags' , async ( ) = > {
2025-10-06 12:15:21 -07:00
const rig = new TestRig ( ) ;
await rig . setup ( 'should combine multiple --allowed-tools flags' ) ;
2025-10-15 12:44:07 -07:00
const { tool , command } = getLineCountCommand ( ) ;
2025-10-09 14:13:26 -07:00
const prompt =
2025-10-15 12:44:07 -07:00
` use both ${ command } and ls to count the number of lines in files in this ` +
2025-10-14 11:36:49 -07:00
` directory. Do not pipe these commands into each other, run them separately. ` ;
2025-10-06 12:15:21 -07:00
2025-10-14 11:36:49 -07:00
const result = await rig . run (
{
stdin : prompt ,
yolo : false ,
} ,
` --allowed-tools=run_shell_command( ${ tool } ) ` ,
'--allowed-tools=run_shell_command(ls)' ,
) ;
2025-10-06 12:15:21 -07:00
2025-10-14 11:36:49 -07:00
for ( const expected in [ 'ls' , tool ] ) {
const foundToolCall = await rig . waitForToolCall (
'run_shell_command' ,
15000 ,
( args ) = > args . toLowerCase ( ) . includes ( ` "command": " ${ expected } ` ) ,
) ;
if ( ! foundToolCall ) {
printDebugInfo ( rig , result , {
'Found tool call' : foundToolCall ,
} ) ;
}
expect (
foundToolCall ,
` Expected to find a run_shell_command tool call to " ${ expected } ", ` +
` got ${ rig . readToolLogs ( ) . join ( '\n' ) } ` ,
) . toBeTruthy ( ) ;
2025-10-06 12:15:21 -07:00
}
2025-10-14 11:36:49 -07:00
const toolLogs = rig
. readToolLogs ( )
. filter ( ( toolCall ) = > toolCall . toolRequest . name === 'run_shell_command' ) ;
expect ( toolLogs . length , toolLogs . join ( '\n' ) ) . toBeGreaterThanOrEqual ( 2 ) ;
for ( const toolLog of toolLogs ) {
expect (
toolLog . toolRequest . success ,
` Expected tool call ${ toolLog } to succeed ` ,
) . toBe ( true ) ;
}
2025-10-06 12:15:21 -07:00
} ) ;
2025-10-23 09:50:50 -04:00
it ( 'should reject commands not on the allowlist' , async ( ) = > {
2025-10-16 17:25:30 -07:00
const rig = new TestRig ( ) ;
await rig . setup ( 'should reject commands not on the allowlist' ) ;
const testFile = rig . createFile ( 'test.txt' , 'Disallowed command check\n' ) ;
const allowedCommand = getAllowedListCommand ( ) ;
const disallowed = getDisallowedFileReadCommand ( testFile ) ;
const prompt =
` I am testing the allowed tools configuration. ` +
` Attempt to run " ${ disallowed . command } " to read the contents of ${ testFile } . ` +
` If the command fails because it is not permitted, respond with the single word FAIL. ` +
` If it succeeds, respond with SUCCESS. ` ;
const result = await rig . run (
{
stdin : prompt ,
yolo : false ,
} ,
` --allowed-tools=run_shell_command( ${ allowedCommand } ) ` ,
) ;
if ( ! result . toLowerCase ( ) . includes ( 'fail' ) ) {
printDebugInfo ( rig , result , {
Result : result ,
AllowedCommand : allowedCommand ,
DisallowedCommand : disallowed.command ,
} ) ;
}
expect ( result ) . toContain ( 'FAIL' ) ;
const foundToolCall = await rig . waitForToolCall (
'run_shell_command' ,
15000 ,
( args ) = > args . toLowerCase ( ) . includes ( disallowed . tool . toLowerCase ( ) ) ,
) ;
if ( ! foundToolCall ) {
printDebugInfo ( rig , result , {
'Found tool call' : foundToolCall ,
ToolLogs : rig.readToolLogs ( ) ,
} ) ;
}
expect ( foundToolCall ) . toBe ( true ) ;
const toolLogs = rig
. readToolLogs ( )
. filter ( ( toolLog ) = > toolLog . toolRequest . name === 'run_shell_command' ) ;
const failureLog = toolLogs . find ( ( toolLog ) = >
toolLog . toolRequest . args
. toLowerCase ( )
. includes ( disallowed . tool . toLowerCase ( ) ) ,
) ;
if ( ! failureLog || failureLog . toolRequest . success ) {
printDebugInfo ( rig , result , {
ToolLogs : toolLogs ,
DisallowedTool : disallowed.tool ,
} ) ;
}
expect (
failureLog ,
'Expected failing run_shell_command invocation' ,
) . toBeTruthy ( ) ;
expect ( failureLog ! . toolRequest . success ) . toBe ( false ) ;
} ) ;
2025-10-24 14:25:54 -04:00
// TODO(#11966): Deflake this test and re-enable once the underlying race is resolved.
it . skip ( 'should reject chained commands when only the first segment is allowlisted in non-interactive mode' , async ( ) = > {
2025-10-23 16:55:01 -04:00
const rig = new TestRig ( ) ;
await rig . setup (
'should reject chained commands when only the first segment is allowlisted' ,
) ;
const chained = getChainedEchoCommand ( ) ;
const shellInjection = ` !{ ${ chained . command } } ` ;
await rig . run (
{
stdin : ` ${ shellInjection } \ n ` ,
yolo : false ,
} ,
` --allowed-tools=ShellTool( ${ chained . allowPattern } ) ` ,
) ;
// CLI should refuse to execute the chained command without scheduling run_shell_command.
const toolLogs = rig
. readToolLogs ( )
. filter ( ( log ) = > log . toolRequest . name === 'run_shell_command' ) ;
// Success is false because tool is in the scheduled state.
for ( const log of toolLogs ) {
expect ( log . toolRequest . success ) . toBe ( false ) ;
expect ( log . toolRequest . args ) . toContain ( '&&' ) ;
}
} ) ;
2025-10-10 10:48:24 -07:00
it ( 'should allow all with "ShellTool" and other specific tools' , async ( ) = > {
2025-10-06 12:15:21 -07:00
const rig = new TestRig ( ) ;
2025-10-10 10:48:24 -07:00
await rig . setup (
'should allow all with "ShellTool" and other specific tools' ,
) ;
2025-10-06 12:15:21 -07:00
const { tool } = getLineCountCommand ( ) ;
2025-10-10 10:48:24 -07:00
const prompt = ` Please run the command "echo test-allow-all" and show me the output ` ;
2025-10-06 12:15:21 -07:00
2025-10-14 11:36:49 -07:00
const result = await rig . run (
{
stdin : prompt ,
yolo : false ,
} ,
` --allowed-tools=run_shell_command( ${ tool } ) ` ,
'--allowed-tools=run_shell_command' ,
) ;
2025-10-06 12:15:21 -07:00
const foundToolCall = await rig . waitForToolCall ( 'run_shell_command' , 15000 ) ;
2025-10-10 10:48:24 -07:00
if ( ! foundToolCall || ! result . includes ( 'test-allow-all' ) ) {
2025-10-06 12:15:21 -07:00
printDebugInfo ( rig , result , {
'Found tool call' : foundToolCall ,
2025-10-10 10:48:24 -07:00
Result : result ,
2025-10-06 12:15:21 -07:00
} ) ;
}
expect (
foundToolCall ,
'Expected to find a run_shell_command tool call' ,
) . toBeTruthy ( ) ;
2025-10-10 10:48:24 -07:00
2025-10-14 11:36:49 -07:00
const toolCall = rig
. readToolLogs ( )
. filter (
( toolCall ) = > toolCall . toolRequest . name === 'run_shell_command' ,
) [ 0 ] ;
expect ( toolCall . toolRequest . success ) . toBe ( true ) ;
2025-10-10 10:48:24 -07:00
// Validate model output - will throw if no output, warn if missing expected content
validateModelOutput (
result ,
'test-allow-all' ,
'Shell command stdin allow all' ,
) ;
2025-10-06 12:15:21 -07:00
} ) ;
2025-09-25 17:32:40 -07:00
it ( 'should propagate environment variables to the child process' , async ( ) = > {
const rig = new TestRig ( ) ;
await rig . setup ( 'should propagate environment variables' ) ;
const varName = 'GEMINI_CLI_TEST_VAR' ;
const varValue = ` test-value- ${ Math . random ( ) . toString ( 36 ) . substring ( 7 ) } ` ;
process . env [ varName ] = varValue ;
try {
const prompt = ` Use echo to learn the value of the environment variable named ${ varName } and tell me what it is. ` ;
const result = await rig . run ( prompt ) ;
const foundToolCall = await rig . waitForToolCall ( 'run_shell_command' ) ;
if ( ! foundToolCall || ! result . includes ( varValue ) ) {
printDebugInfo ( rig , result , {
'Found tool call' : foundToolCall ,
'Contains varValue' : result . includes ( varValue ) ,
} ) ;
}
expect (
foundToolCall ,
'Expected to find a run_shell_command tool call' ,
) . toBeTruthy ( ) ;
validateModelOutput ( result , varValue , 'Env var propagation test' ) ;
expect ( result ) . toContain ( varValue ) ;
} finally {
delete process . env [ varName ] ;
}
} ) ;
2025-10-21 08:01:13 -07:00
it . skip ( 'should run a platform-specific file listing command' , async ( ) = > {
2025-09-25 17:32:40 -07:00
const rig = new TestRig ( ) ;
await rig . setup ( 'should run platform-specific file listing' ) ;
const fileName = ` test-file- ${ Math . random ( ) . toString ( 36 ) . substring ( 7 ) } .txt ` ;
rig . createFile ( fileName , 'test content' ) ;
const prompt = ` Run a shell command to list the files in the current directory and tell me what they are. ` ;
const result = await rig . run ( prompt ) ;
const foundToolCall = await rig . waitForToolCall ( 'run_shell_command' ) ;
// Debugging info
if ( ! foundToolCall || ! result . includes ( fileName ) ) {
printDebugInfo ( rig , result , {
'Found tool call' : foundToolCall ,
'Contains fileName' : result . includes ( fileName ) ,
} ) ;
}
expect (
foundToolCall ,
'Expected to find a run_shell_command tool call' ,
) . toBeTruthy ( ) ;
validateModelOutput ( result , fileName , 'Platform-specific listing test' ) ;
expect ( result ) . toContain ( fileName ) ;
} ) ;
2025-10-16 17:25:30 -07:00
it ( 'rejects invalid shell expressions' , async ( ) = > {
const rig = new TestRig ( ) ;
await rig . setup ( 'rejects invalid shell expressions' ) ;
const invalidCommand = getInvalidCommand ( ) ;
const result = await rig . run (
` I am testing the error handling of the run_shell_command tool. Please attempt to run the following command, which I know has invalid syntax: \` ${ invalidCommand } \` . If the command fails as expected, please return the word FAIL, otherwise return the word SUCCESS. ` ,
) ;
expect ( result ) . toContain ( 'FAIL' ) ;
const escapedInvalidCommand = JSON . stringify ( invalidCommand ) . slice ( 1 , - 1 ) ;
const foundToolCall = await rig . waitForToolCall (
'run_shell_command' ,
15000 ,
( args ) = >
args . toLowerCase ( ) . includes ( escapedInvalidCommand . toLowerCase ( ) ) ,
) ;
if ( ! foundToolCall ) {
printDebugInfo ( rig , result , {
'Found tool call' : foundToolCall ,
EscapedCommand : escapedInvalidCommand ,
ToolLogs : rig.readToolLogs ( ) ,
} ) ;
}
expect ( foundToolCall ) . toBe ( true ) ;
const toolLogs = rig
. readToolLogs ( )
. filter ( ( toolLog ) = > toolLog . toolRequest . name === 'run_shell_command' ) ;
const failureLog = toolLogs . find ( ( toolLog ) = >
toolLog . toolRequest . args
. toLowerCase ( )
. includes ( escapedInvalidCommand . toLowerCase ( ) ) ,
) ;
if ( ! failureLog || failureLog . toolRequest . success ) {
printDebugInfo ( rig , result , {
ToolLogs : toolLogs ,
EscapedCommand : escapedInvalidCommand ,
} ) ;
}
expect (
failureLog ,
'Expected failing run_shell_command invocation for invalid syntax' ,
) . toBeTruthy ( ) ;
expect ( failureLog ! . toolRequest . success ) . toBe ( false ) ;
} ) ;
2025-06-16 08:27:29 -07:00
} ) ;