Inline tree-sitter wasm and add runtime fallback (#11157)

This commit is contained in:
cornmander
2025-10-16 17:25:30 -07:00
committed by GitHub
parent 22f725eb08
commit dcf362bcf9
18 changed files with 966 additions and 282 deletions

View File

@@ -21,6 +21,46 @@ function getLineCountCommand(): { command: string; tool: string } {
}
}
function getInvalidCommand(): string {
switch (shell) {
case 'powershell':
return `Get-ChildItem | | Select-Object`;
case 'cmd':
return `dir | | findstr foo`;
case 'bash':
default:
return `echo "hello" > > file`;
}
}
function getAllowedListCommand(): string {
switch (shell) {
case 'powershell':
return 'Get-ChildItem';
case 'cmd':
return 'dir';
case 'bash':
default:
return 'ls';
}
}
function getDisallowedFileReadCommand(testFile: string): {
command: string;
tool: string;
} {
const quotedPath = `"${testFile}"`;
switch (shell) {
case 'powershell':
return { command: `Get-Content ${quotedPath}`, tool: 'Get-Content' };
case 'cmd':
return { command: `type ${quotedPath}`, tool: 'type' };
case 'bash':
default:
return { command: `cat ${quotedPath}`, tool: 'cat' };
}
}
describe('run_shell_command', () => {
it('should be able to run a shell command', async () => {
const rig = new TestRig();
@@ -102,8 +142,17 @@ describe('run_shell_command', () => {
const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);
if (!foundToolCall) {
const toolLogs = rig.readToolLogs().map(({ toolRequest }) => ({
name: toolRequest.name,
success: toolRequest.success,
args: toolRequest.args,
}));
printDebugInfo(rig, result, {
'Found tool call': foundToolCall,
'Allowed tools flag': `run_shell_command(${tool})`,
Prompt: prompt,
'Tool logs': toolLogs,
Result: result,
});
}
@@ -210,8 +259,17 @@ describe('run_shell_command', () => {
const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);
if (!foundToolCall) {
const toolLogs = rig.readToolLogs().map(({ toolRequest }) => ({
name: toolRequest.name,
success: toolRequest.success,
args: toolRequest.args,
}));
printDebugInfo(rig, result, {
'Found tool call': foundToolCall,
'Allowed tools flag': `ShellTool(${tool})`,
Prompt: prompt,
'Tool logs': toolLogs,
Result: result,
});
}
@@ -280,6 +338,73 @@ describe('run_shell_command', () => {
}
});
it('should reject commands not on the allowlist', async () => {
const rig = new TestRig();
await rig.setup('should reject commands not on the allowlist');
const testFile = rig.createFile('test.txt', 'Disallowed command check\n');
const allowedCommand = getAllowedListCommand();
const disallowed = getDisallowedFileReadCommand(testFile);
const prompt =
`I am testing the allowed tools configuration. ` +
`Attempt to run "${disallowed.command}" to read the contents of ${testFile}. ` +
`If the command fails because it is not permitted, respond with the single word FAIL. ` +
`If it succeeds, respond with SUCCESS.`;
const result = await rig.run(
{
stdin: prompt,
yolo: false,
},
`--allowed-tools=run_shell_command(${allowedCommand})`,
);
if (!result.toLowerCase().includes('fail')) {
printDebugInfo(rig, result, {
Result: result,
AllowedCommand: allowedCommand,
DisallowedCommand: disallowed.command,
});
}
expect(result).toContain('FAIL');
const foundToolCall = await rig.waitForToolCall(
'run_shell_command',
15000,
(args) => args.toLowerCase().includes(disallowed.tool.toLowerCase()),
);
if (!foundToolCall) {
printDebugInfo(rig, result, {
'Found tool call': foundToolCall,
ToolLogs: rig.readToolLogs(),
});
}
expect(foundToolCall).toBe(true);
const toolLogs = rig
.readToolLogs()
.filter((toolLog) => toolLog.toolRequest.name === 'run_shell_command');
const failureLog = toolLogs.find((toolLog) =>
toolLog.toolRequest.args
.toLowerCase()
.includes(disallowed.tool.toLowerCase()),
);
if (!failureLog || failureLog.toolRequest.success) {
printDebugInfo(rig, result, {
ToolLogs: toolLogs,
DisallowedTool: disallowed.tool,
});
}
expect(
failureLog,
'Expected failing run_shell_command invocation',
).toBeTruthy();
expect(failureLog!.toolRequest.success).toBe(false);
});
it('should allow all with "ShellTool" and other specific tools', async () => {
const rig = new TestRig();
await rig.setup(
@@ -386,4 +511,53 @@ describe('run_shell_command', () => {
validateModelOutput(result, fileName, 'Platform-specific listing test');
expect(result).toContain(fileName);
});
it('rejects invalid shell expressions', async () => {
const rig = new TestRig();
await rig.setup('rejects invalid shell expressions');
const invalidCommand = getInvalidCommand();
const result = await rig.run(
`I am testing the error handling of the run_shell_command tool. Please attempt to run the following command, which I know has invalid syntax: \`${invalidCommand}\`. If the command fails as expected, please return the word FAIL, otherwise return the word SUCCESS.`,
);
expect(result).toContain('FAIL');
const escapedInvalidCommand = JSON.stringify(invalidCommand).slice(1, -1);
const foundToolCall = await rig.waitForToolCall(
'run_shell_command',
15000,
(args) =>
args.toLowerCase().includes(escapedInvalidCommand.toLowerCase()),
);
if (!foundToolCall) {
printDebugInfo(rig, result, {
'Found tool call': foundToolCall,
EscapedCommand: escapedInvalidCommand,
ToolLogs: rig.readToolLogs(),
});
}
expect(foundToolCall).toBe(true);
const toolLogs = rig
.readToolLogs()
.filter((toolLog) => toolLog.toolRequest.name === 'run_shell_command');
const failureLog = toolLogs.find((toolLog) =>
toolLog.toolRequest.args
.toLowerCase()
.includes(escapedInvalidCommand.toLowerCase()),
);
if (!failureLog || failureLog.toolRequest.success) {
printDebugInfo(rig, result, {
ToolLogs: toolLogs,
EscapedCommand: escapedInvalidCommand,
});
}
expect(
failureLog,
'Expected failing run_shell_command invocation for invalid syntax',
).toBeTruthy();
expect(failureLog!.toolRequest.success).toBe(false);
});
});