mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-10 22:21:22 -07:00
Inline tree-sitter wasm and add runtime fallback (#11157)
This commit is contained in:
@@ -8,7 +8,8 @@ import { describe, it, expect } from 'vitest';
|
||||
import { TestRig } from './test-helper.js';
|
||||
|
||||
describe('Flicker Detector', () => {
|
||||
it('should not detect a flicker under the max height budget', async () => {
|
||||
// TODO: https://github.com/google-gemini/gemini-cli/issues/11170
|
||||
it.skip('should not detect a flicker under the max height budget', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup('flicker-detector-test');
|
||||
|
||||
|
||||
@@ -21,6 +21,46 @@ function getLineCountCommand(): { command: string; tool: string } {
|
||||
}
|
||||
}
|
||||
|
||||
function getInvalidCommand(): string {
|
||||
switch (shell) {
|
||||
case 'powershell':
|
||||
return `Get-ChildItem | | Select-Object`;
|
||||
case 'cmd':
|
||||
return `dir | | findstr foo`;
|
||||
case 'bash':
|
||||
default:
|
||||
return `echo "hello" > > file`;
|
||||
}
|
||||
}
|
||||
|
||||
function getAllowedListCommand(): string {
|
||||
switch (shell) {
|
||||
case 'powershell':
|
||||
return 'Get-ChildItem';
|
||||
case 'cmd':
|
||||
return 'dir';
|
||||
case 'bash':
|
||||
default:
|
||||
return 'ls';
|
||||
}
|
||||
}
|
||||
|
||||
function getDisallowedFileReadCommand(testFile: string): {
|
||||
command: string;
|
||||
tool: string;
|
||||
} {
|
||||
const quotedPath = `"${testFile}"`;
|
||||
switch (shell) {
|
||||
case 'powershell':
|
||||
return { command: `Get-Content ${quotedPath}`, tool: 'Get-Content' };
|
||||
case 'cmd':
|
||||
return { command: `type ${quotedPath}`, tool: 'type' };
|
||||
case 'bash':
|
||||
default:
|
||||
return { command: `cat ${quotedPath}`, tool: 'cat' };
|
||||
}
|
||||
}
|
||||
|
||||
describe('run_shell_command', () => {
|
||||
it('should be able to run a shell command', async () => {
|
||||
const rig = new TestRig();
|
||||
@@ -102,8 +142,17 @@ describe('run_shell_command', () => {
|
||||
const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);
|
||||
|
||||
if (!foundToolCall) {
|
||||
const toolLogs = rig.readToolLogs().map(({ toolRequest }) => ({
|
||||
name: toolRequest.name,
|
||||
success: toolRequest.success,
|
||||
args: toolRequest.args,
|
||||
}));
|
||||
printDebugInfo(rig, result, {
|
||||
'Found tool call': foundToolCall,
|
||||
'Allowed tools flag': `run_shell_command(${tool})`,
|
||||
Prompt: prompt,
|
||||
'Tool logs': toolLogs,
|
||||
Result: result,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -210,8 +259,17 @@ describe('run_shell_command', () => {
|
||||
const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);
|
||||
|
||||
if (!foundToolCall) {
|
||||
const toolLogs = rig.readToolLogs().map(({ toolRequest }) => ({
|
||||
name: toolRequest.name,
|
||||
success: toolRequest.success,
|
||||
args: toolRequest.args,
|
||||
}));
|
||||
printDebugInfo(rig, result, {
|
||||
'Found tool call': foundToolCall,
|
||||
'Allowed tools flag': `ShellTool(${tool})`,
|
||||
Prompt: prompt,
|
||||
'Tool logs': toolLogs,
|
||||
Result: result,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -280,6 +338,73 @@ describe('run_shell_command', () => {
|
||||
}
|
||||
});
|
||||
|
||||
it('should reject commands not on the allowlist', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup('should reject commands not on the allowlist');
|
||||
|
||||
const testFile = rig.createFile('test.txt', 'Disallowed command check\n');
|
||||
const allowedCommand = getAllowedListCommand();
|
||||
const disallowed = getDisallowedFileReadCommand(testFile);
|
||||
const prompt =
|
||||
`I am testing the allowed tools configuration. ` +
|
||||
`Attempt to run "${disallowed.command}" to read the contents of ${testFile}. ` +
|
||||
`If the command fails because it is not permitted, respond with the single word FAIL. ` +
|
||||
`If it succeeds, respond with SUCCESS.`;
|
||||
|
||||
const result = await rig.run(
|
||||
{
|
||||
stdin: prompt,
|
||||
yolo: false,
|
||||
},
|
||||
`--allowed-tools=run_shell_command(${allowedCommand})`,
|
||||
);
|
||||
|
||||
if (!result.toLowerCase().includes('fail')) {
|
||||
printDebugInfo(rig, result, {
|
||||
Result: result,
|
||||
AllowedCommand: allowedCommand,
|
||||
DisallowedCommand: disallowed.command,
|
||||
});
|
||||
}
|
||||
expect(result).toContain('FAIL');
|
||||
|
||||
const foundToolCall = await rig.waitForToolCall(
|
||||
'run_shell_command',
|
||||
15000,
|
||||
(args) => args.toLowerCase().includes(disallowed.tool.toLowerCase()),
|
||||
);
|
||||
|
||||
if (!foundToolCall) {
|
||||
printDebugInfo(rig, result, {
|
||||
'Found tool call': foundToolCall,
|
||||
ToolLogs: rig.readToolLogs(),
|
||||
});
|
||||
}
|
||||
expect(foundToolCall).toBe(true);
|
||||
|
||||
const toolLogs = rig
|
||||
.readToolLogs()
|
||||
.filter((toolLog) => toolLog.toolRequest.name === 'run_shell_command');
|
||||
const failureLog = toolLogs.find((toolLog) =>
|
||||
toolLog.toolRequest.args
|
||||
.toLowerCase()
|
||||
.includes(disallowed.tool.toLowerCase()),
|
||||
);
|
||||
|
||||
if (!failureLog || failureLog.toolRequest.success) {
|
||||
printDebugInfo(rig, result, {
|
||||
ToolLogs: toolLogs,
|
||||
DisallowedTool: disallowed.tool,
|
||||
});
|
||||
}
|
||||
|
||||
expect(
|
||||
failureLog,
|
||||
'Expected failing run_shell_command invocation',
|
||||
).toBeTruthy();
|
||||
expect(failureLog!.toolRequest.success).toBe(false);
|
||||
});
|
||||
|
||||
it('should allow all with "ShellTool" and other specific tools', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup(
|
||||
@@ -386,4 +511,53 @@ describe('run_shell_command', () => {
|
||||
validateModelOutput(result, fileName, 'Platform-specific listing test');
|
||||
expect(result).toContain(fileName);
|
||||
});
|
||||
|
||||
it('rejects invalid shell expressions', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup('rejects invalid shell expressions');
|
||||
const invalidCommand = getInvalidCommand();
|
||||
const result = await rig.run(
|
||||
`I am testing the error handling of the run_shell_command tool. Please attempt to run the following command, which I know has invalid syntax: \`${invalidCommand}\`. If the command fails as expected, please return the word FAIL, otherwise return the word SUCCESS.`,
|
||||
);
|
||||
expect(result).toContain('FAIL');
|
||||
|
||||
const escapedInvalidCommand = JSON.stringify(invalidCommand).slice(1, -1);
|
||||
const foundToolCall = await rig.waitForToolCall(
|
||||
'run_shell_command',
|
||||
15000,
|
||||
(args) =>
|
||||
args.toLowerCase().includes(escapedInvalidCommand.toLowerCase()),
|
||||
);
|
||||
|
||||
if (!foundToolCall) {
|
||||
printDebugInfo(rig, result, {
|
||||
'Found tool call': foundToolCall,
|
||||
EscapedCommand: escapedInvalidCommand,
|
||||
ToolLogs: rig.readToolLogs(),
|
||||
});
|
||||
}
|
||||
expect(foundToolCall).toBe(true);
|
||||
|
||||
const toolLogs = rig
|
||||
.readToolLogs()
|
||||
.filter((toolLog) => toolLog.toolRequest.name === 'run_shell_command');
|
||||
const failureLog = toolLogs.find((toolLog) =>
|
||||
toolLog.toolRequest.args
|
||||
.toLowerCase()
|
||||
.includes(escapedInvalidCommand.toLowerCase()),
|
||||
);
|
||||
|
||||
if (!failureLog || failureLog.toolRequest.success) {
|
||||
printDebugInfo(rig, result, {
|
||||
ToolLogs: toolLogs,
|
||||
EscapedCommand: escapedInvalidCommand,
|
||||
});
|
||||
}
|
||||
|
||||
expect(
|
||||
failureLog,
|
||||
'Expected failing run_shell_command invocation for invalid syntax',
|
||||
).toBeTruthy();
|
||||
expect(failureLog!.toolRequest.success).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user