Inline tree-sitter wasm and add runtime fallback (#11157)

2026-04-25 20:44:46 -07:00 · 2025-10-16 17:25:30 -07:00
parent 22f725eb08
commit dcf362bcf9
18 changed files with 966 additions and 282 deletions
@@ -21,6 +21,46 @@ function getLineCountCommand(): { command: string; tool: string } {
  }
 }

+function getInvalidCommand(): string {
+  switch (shell) {
+    case 'powershell':
+      return `Get-ChildItem | | Select-Object`;
+    case 'cmd':
+      return `dir | | findstr foo`;
+    case 'bash':
+    default:
+      return `echo "hello" > > file`;
+  }
+}
+
+function getAllowedListCommand(): string {
+  switch (shell) {
+    case 'powershell':
+      return 'Get-ChildItem';
+    case 'cmd':
+      return 'dir';
+    case 'bash':
+    default:
+      return 'ls';
+  }
+}
+
+function getDisallowedFileReadCommand(testFile: string): {
+  command: string;
+  tool: string;
+} {
+  const quotedPath = `"${testFile}"`;
+  switch (shell) {
+    case 'powershell':
+      return { command: `Get-Content ${quotedPath}`, tool: 'Get-Content' };
+    case 'cmd':
+      return { command: `type ${quotedPath}`, tool: 'type' };
+    case 'bash':
+    default:
+      return { command: `cat ${quotedPath}`, tool: 'cat' };
+  }
+}
+
 describe('run_shell_command', () => {
  it('should be able to run a shell command', async () => {
    const rig = new TestRig();
@@ -102,8 +142,17 @@ describe('run_shell_command', () => {
    const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);

    if (!foundToolCall) {
+      const toolLogs = rig.readToolLogs().map(({ toolRequest }) => ({
+        name: toolRequest.name,
+        success: toolRequest.success,
+        args: toolRequest.args,
+      }));
      printDebugInfo(rig, result, {
        'Found tool call': foundToolCall,
+        'Allowed tools flag': `run_shell_command(${tool})`,
+        Prompt: prompt,
+        'Tool logs': toolLogs,
+        Result: result,
      });
    }

@@ -210,8 +259,17 @@ describe('run_shell_command', () => {
    const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);

    if (!foundToolCall) {
+      const toolLogs = rig.readToolLogs().map(({ toolRequest }) => ({
+        name: toolRequest.name,
+        success: toolRequest.success,
+        args: toolRequest.args,
+      }));
      printDebugInfo(rig, result, {
        'Found tool call': foundToolCall,
+        'Allowed tools flag': `ShellTool(${tool})`,
+        Prompt: prompt,
+        'Tool logs': toolLogs,
+        Result: result,
      });
    }

@@ -280,6 +338,73 @@ describe('run_shell_command', () => {
    }
  });

+  it('should reject commands not on the allowlist', async () => {
+    const rig = new TestRig();
+    await rig.setup('should reject commands not on the allowlist');
+
+    const testFile = rig.createFile('test.txt', 'Disallowed command check\n');
+    const allowedCommand = getAllowedListCommand();
+    const disallowed = getDisallowedFileReadCommand(testFile);
+    const prompt =
+      `I am testing the allowed tools configuration. ` +
+      `Attempt to run "${disallowed.command}" to read the contents of ${testFile}. ` +
+      `If the command fails because it is not permitted, respond with the single word FAIL. ` +
+      `If it succeeds, respond with SUCCESS.`;
+
+    const result = await rig.run(
+      {
+        stdin: prompt,
+        yolo: false,
+      },
+      `--allowed-tools=run_shell_command(${allowedCommand})`,
+    );
+
+    if (!result.toLowerCase().includes('fail')) {
+      printDebugInfo(rig, result, {
+        Result: result,
+        AllowedCommand: allowedCommand,
+        DisallowedCommand: disallowed.command,
+      });
+    }
+    expect(result).toContain('FAIL');
+
+    const foundToolCall = await rig.waitForToolCall(
+      'run_shell_command',
+      15000,
+      (args) => args.toLowerCase().includes(disallowed.tool.toLowerCase()),
+    );
+
+    if (!foundToolCall) {
+      printDebugInfo(rig, result, {
+        'Found tool call': foundToolCall,
+        ToolLogs: rig.readToolLogs(),
+      });
+    }
+    expect(foundToolCall).toBe(true);
+
+    const toolLogs = rig
+      .readToolLogs()
+      .filter((toolLog) => toolLog.toolRequest.name === 'run_shell_command');
+    const failureLog = toolLogs.find((toolLog) =>
+      toolLog.toolRequest.args
+        .toLowerCase()
+        .includes(disallowed.tool.toLowerCase()),
+    );
+
+    if (!failureLog || failureLog.toolRequest.success) {
+      printDebugInfo(rig, result, {
+        ToolLogs: toolLogs,
+        DisallowedTool: disallowed.tool,
+      });
+    }
+
+    expect(
+      failureLog,
+      'Expected failing run_shell_command invocation',
+    ).toBeTruthy();
+    expect(failureLog!.toolRequest.success).toBe(false);
+  });
+
  it('should allow all with "ShellTool" and other specific tools', async () => {
    const rig = new TestRig();
    await rig.setup(
@@ -386,4 +511,53 @@ describe('run_shell_command', () => {
    validateModelOutput(result, fileName, 'Platform-specific listing test');
    expect(result).toContain(fileName);
  });
+
+  it('rejects invalid shell expressions', async () => {
+    const rig = new TestRig();
+    await rig.setup('rejects invalid shell expressions');
+    const invalidCommand = getInvalidCommand();
+    const result = await rig.run(
+      `I am testing the error handling of the run_shell_command tool. Please attempt to run the following command, which I know has invalid syntax: \`${invalidCommand}\`. If the command fails as expected, please return the word FAIL, otherwise return the word SUCCESS.`,
+    );
+    expect(result).toContain('FAIL');
+
+    const escapedInvalidCommand = JSON.stringify(invalidCommand).slice(1, -1);
+    const foundToolCall = await rig.waitForToolCall(
+      'run_shell_command',
+      15000,
+      (args) =>
+        args.toLowerCase().includes(escapedInvalidCommand.toLowerCase()),
+    );
+
+    if (!foundToolCall) {
+      printDebugInfo(rig, result, {
+        'Found tool call': foundToolCall,
+        EscapedCommand: escapedInvalidCommand,
+        ToolLogs: rig.readToolLogs(),
+      });
+    }
+    expect(foundToolCall).toBe(true);
+
+    const toolLogs = rig
+      .readToolLogs()
+      .filter((toolLog) => toolLog.toolRequest.name === 'run_shell_command');
+    const failureLog = toolLogs.find((toolLog) =>
+      toolLog.toolRequest.args
+        .toLowerCase()
+        .includes(escapedInvalidCommand.toLowerCase()),
+    );
+
+    if (!failureLog || failureLog.toolRequest.success) {
+      printDebugInfo(rig, result, {
+        ToolLogs: toolLogs,
+        EscapedCommand: escapedInvalidCommand,
+      });
+    }
+
+    expect(
+      failureLog,
+      'Expected failing run_shell_command invocation for invalid syntax',
+    ).toBeTruthy();
+    expect(failureLog!.toolRequest.success).toBe(false);
+  });
 });