feat(core): use shell for file operations under sandboxing

When `security.toolSandboxing` is enabled, the CLI now excludes the lower-fidelity
tools (`grep_search`, `replace`, `write_file`, `read_file`) from the main agent.
Instead, it relies on `run_shell_command` (e.g. `sed`, `grep`, `cat`, `echo >`)
to perform these actions.

To maintain UX and telemetry parity, `run_shell_command` now infers common file
operations. When detected:
- The UI title is updated to a high-fidelity display (e.g. "Shell (Read File)",
  "Shell (Replace)").
- File editing/writing commands (like `sed -i` or `echo >`) generate a predicted
  diff view for the user during confirmation.
- The execution emits the standard `FileOperationEvent` telemetry using the
  canonical tool names, ensuring metrics consistency.
This commit is contained in:
Christian Gunderman
2026-04-13 19:23:50 -07:00
parent 9cf410478c
commit 44d8db20c8
9 changed files with 523 additions and 7 deletions
+108
View File
@@ -0,0 +1,108 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import {
TestRig,
} from './test-helper.js';
describe('shell-parity', () => {
let rig: TestRig;
beforeEach(() => {
rig = new TestRig();
});
afterEach(async () => await rig.cleanup());
it('should use run_shell_command for replace when sandboxing is enabled', async () => {
await rig.setup('should use run_shell_command for replace when sandboxing is enabled', {
settings: {
security: { toolSandboxing: true },
},
});
rig.createFile('test.ts', 'const foo = "bar";');
// We expect the model to use run_shell_command because edit/replace/write_file are filtered out.
const result = await rig.run({
args: `Replace "bar" with "baz" in test.ts`,
});
// Verify forbidden tools were NOT used
const forbiddenTools = ['grep_search', 'replace', 'write_file', 'edit', 'read_file'];
const toolLogs = rig.readToolLogs();
const usedForbidden = toolLogs.filter((t) =>
forbiddenTools.includes(t.toolRequest.name),
);
expect(usedForbidden).toHaveLength(0);
// Verify run_shell_command was used
const shellCall = await rig.waitForToolCall('run_shell_command');
expect(shellCall).toBeTruthy();
// Verify the command looks like a replace operation
const command = shellCall!.args.command as string;
// It should contain some form of replacement (sed, perl, or powershell -replace)
expect(command).toMatch(/sed|replace|Set-Content|perl/i);
// Verify file content changed
const content = rig.readFile('test.ts');
expect(content).toContain('baz');
expect(content).not.toContain('"bar"');
});
it('should use run_shell_command for search when sandboxing is enabled', async () => {
await rig.setup('should use run_shell_command for search when sandboxing is enabled', {
settings: {
security: { toolSandboxing: true },
},
});
rig.createFile('search-me.txt', 'target-string');
await rig.run({
args: `Search for "target-string" in search-me.txt`,
});
// Verify grep_search was NOT used
const toolLogs = rig.readToolLogs();
const usedGrep = toolLogs.filter((t) => t.toolRequest.name === 'grep_search');
expect(usedGrep).toHaveLength(0);
// Verify run_shell_command was used
const shellCall = await rig.waitForToolCall('run_shell_command');
expect(shellCall).toBeTruthy();
const command = shellCall!.args.command as string;
expect(command).toMatch(/grep|rg|ripgrep|Select-String|findstr/i);
});
it('should use run_shell_command for read when sandboxing is enabled', async () => {
await rig.setup('should use run_shell_command for read when sandboxing is enabled', {
settings: {
security: { toolSandboxing: true },
},
});
rig.createFile('read-me.txt', 'hello world');
const result = await rig.run({
args: `Read the file read-me.txt and tell me what it says`,
});
// Verify read_file was NOT used
const toolLogs = rig.readToolLogs();
const usedRead = toolLogs.filter((t) => t.toolRequest.name === 'read_file');
expect(usedRead).toHaveLength(0);
// Verify run_shell_command was used
const shellCall = await rig.waitForToolCall('run_shell_command');
expect(shellCall).toBeTruthy();
const command = shellCall!.args.command as string;
expect(command).toMatch(/cat|head|tail|less|more|Get-Content|type/i);
expect(result).toContain('hello world');
});
});
+1 -1
View File
@@ -42,7 +42,7 @@ export function mapToDisplay(
if (call.status === CoreToolCallStatus.Error) {
description = JSON.stringify(call.request.args);
} else {
description = call.invocation.getDescription();
description = typeof call.invocation.getDisplayTitle === 'function' ? call.invocation.getDisplayTitle() : call.invocation.getDescription();
renderOutputAsMarkdown = call.tool.isOutputMarkdown;
}
+3 -1
View File
@@ -1024,7 +1024,9 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
if (tool) {
displayName = tool.displayName ?? toolName;
const invocation = tool.build(args);
description = invocation.getDescription();
// Prefer getDisplayTitle if it differs, otherwise fallback to getDescription.
// This ensures the timeline ("Shell (Replace)") matches the confirmation dialog.
description = typeof invocation.getDisplayTitle === 'function' ? invocation.getDisplayTitle() : invocation.getDescription();
}
} catch {
// Ignore errors during formatting for activity emission
+6 -2
View File
@@ -1030,6 +1030,11 @@ export class GeminiChat {
? resultDisplayRaw
: undefined;
let description: string | undefined = undefined;
if ('invocation' in call && call.invocation) {
description = typeof call.invocation.getDisplayTitle === 'function' ? call.invocation.getDisplayTitle() : call.invocation.getDescription();
}
return {
id: call.request.callId,
name: call.request.originalRequestName ?? call.request.name,
@@ -1038,8 +1043,7 @@ export class GeminiChat {
status: call.status,
timestamp: new Date().toISOString(),
resultDisplay,
description:
'invocation' in call ? call.invocation?.getDescription() : undefined,
description,
};
});
+155 -3
View File
@@ -12,6 +12,7 @@ import crypto from 'node:crypto';
import { debugLogger } from '../index.js';
import type { SandboxPermissions } from '../services/sandboxManager.js';
import { ToolErrorType } from './tool-error.js';
import * as Diff from 'diff';
import {
BaseDeclarativeTool,
BaseToolInvocation,
@@ -22,6 +23,7 @@ import {
type BackgroundExecutionData,
type ToolCallConfirmationDetails,
type ToolExecuteConfirmationDetails,
type ToolEditConfirmationDetails,
type PolicyUpdateOptions,
type ToolLiveOutput,
type ExecuteOptions,
@@ -41,8 +43,19 @@ import {
stripShellWrapper,
parseCommandDetails,
hasRedirection,
inferFileOperation,
FileOperationType,
} from '../utils/shell-utils.js';
import { SHELL_TOOL_NAME } from './tool-names.js';
import { logFileOperation } from '../telemetry/loggers.js';
import { FileOperationEvent } from '../telemetry/types.js';
import { FileOperation } from '../telemetry/metrics.js';
import {
SHELL_TOOL_NAME,
EDIT_TOOL_NAME,
WRITE_FILE_TOOL_NAME,
GREP_TOOL_NAME,
READ_FILE_TOOL_NAME,
} from './tool-names.js';
import { PARAM_ADDITIONAL_PERMISSIONS } from './definitions/base-declarations.js';
import type { MessageBus } from '../confirmation-bus/message-bus.js';
import { getShellDefinition } from './definitions/coreTools.js';
@@ -127,6 +140,19 @@ export class ShellToolInvocation extends BaseToolInvocation<
}
override getDisplayTitle(): string {
const inferred = inferFileOperation(this.params.command);
if (inferred) {
switch (inferred.type) {
case FileOperationType.SEARCH:
return `Shell (Search)`;
case FileOperationType.EDIT:
return `Shell (Replace)`;
case FileOperationType.WRITE:
return `Shell (Write File)`;
case FileOperationType.READ:
return `Shell (Read File)`;
}
}
return this.params.command;
}
@@ -162,10 +188,96 @@ export class ShellToolInvocation extends BaseToolInvocation<
return super.shouldConfirmExecute(abortSignal);
}
private simulateSed(
content: string,
sedExpression: string,
): string | undefined {
const match = sedExpression.match(/^s\/(.+)\/(.+)\/([g]*)?$/);
if (!match) return undefined;
const [_, oldStr, newStr, flags] = match;
try {
const regex = new RegExp(oldStr, flags?.includes('g') ? 'g' : '');
return content.replace(regex, newStr);
} catch {
return undefined;
}
}
private simulatePsReplace(
content: string,
oldString: string,
newString: string,
): string {
try {
// PowerShell -replace is regex-based and case-insensitive by default.
const regex = new RegExp(oldString, 'gi');
return content.replace(regex, newString);
} catch {
return content;
}
}
protected override async getConfirmationDetails(
_abortSignal: AbortSignal,
abortSignal: AbortSignal,
): Promise<ToolCallConfirmationDetails | false> {
const command = stripShellWrapper(this.params.command);
const inferred = inferFileOperation(this.params.command);
if (
inferred &&
(inferred.type === FileOperationType.EDIT ||
inferred.type === FileOperationType.WRITE)
) {
const filePath = path.resolve(
this.context.config.getTargetDir(),
inferred.filePath,
);
let originalContent: string | null = null;
try {
originalContent = await fsPromises.readFile(filePath, 'utf-8');
} catch {
// file might not exist yet if it's a WRITE
}
let newContent: string | undefined;
if (
inferred.type === FileOperationType.EDIT &&
originalContent !== null
) {
if (inferred.metadata?.['sedExpression']) {
newContent = this.simulateSed(
originalContent,
inferred.metadata['sedExpression'] as string,
);
} else if (
inferred.metadata?.['oldString'] &&
inferred.metadata?.['newString']
) {
newContent = this.simulatePsReplace(
originalContent,
inferred.metadata['oldString'] as string,
inferred.metadata['newString'] as string,
);
}
}
if (newContent !== undefined && originalContent !== null) {
const fileDiff = Diff.createPatch(filePath, originalContent, newContent);
const editDetails: ToolEditConfirmationDetails = {
type: 'edit',
title: this.getDisplayTitle(),
fileName: path.basename(filePath),
filePath: inferred.filePath,
fileDiff,
originalContent,
newContent,
onConfirm: async (_outcome: ToolConfirmationOutcome) => {
// Policy updates are now handled centrally by the scheduler
},
};
return editDetails;
}
}
const parsed = parseCommandDetails(command);
let rootCommandDisplay = '';
@@ -216,7 +328,7 @@ export class ShellToolInvocation extends BaseToolInvocation<
const confirmationDetails: ToolExecuteConfirmationDetails = {
type: 'exec',
title: 'Confirm Shell Command',
title: this.getDisplayTitle(),
command: this.params.command,
rootCommand: rootCommandDisplay,
rootCommands,
@@ -377,6 +489,46 @@ export class ShellToolInvocation extends BaseToolInvocation<
const result = await resultPromise;
// Telemetry parity
const inferred = inferFileOperation(this.params.command);
if (inferred && result.exitCode === 0) {
let operation: FileOperation | undefined;
let canonicalToolName = SHELL_TOOL_NAME;
switch (inferred.type) {
case FileOperationType.SEARCH:
operation = FileOperation.READ;
canonicalToolName = GREP_TOOL_NAME;
break;
case FileOperationType.READ:
operation = FileOperation.READ;
canonicalToolName = READ_FILE_TOOL_NAME;
break;
case FileOperationType.EDIT:
operation = FileOperation.UPDATE;
canonicalToolName = EDIT_TOOL_NAME;
break;
case FileOperationType.WRITE:
operation = FileOperation.UPDATE;
canonicalToolName = WRITE_FILE_TOOL_NAME;
break;
}
if (operation) {
const extension = path.extname(inferred.filePath);
logFileOperation(
this.context.config,
new FileOperationEvent(
canonicalToolName,
operation,
0, // Lines changed not easily available
'', // mimetype
extension,
'', // programming language
),
);
}
}
const backgroundPIDs: number[] = [];
if (os.platform() !== 'win32') {
let tempFileExists = false;
@@ -22,6 +22,11 @@ import { ToolRegistry, DiscoveredTool } from './tool-registry.js';
import {
DISCOVERED_TOOL_PREFIX,
UPDATE_TOPIC_TOOL_NAME,
GREP_TOOL_NAME,
EDIT_TOOL_NAME,
WRITE_FILE_TOOL_NAME,
READ_FILE_TOOL_NAME,
SHELL_TOOL_NAME,
} from './tool-names.js';
import { DiscoveredMCPTool } from './mcp-tool.js';
import {
@@ -733,6 +738,33 @@ describe('ToolRegistry', () => {
expect(declarations).toHaveLength(1);
expect(declarations[0].name).toBe(`mcp_${serverName}_${toolName}`);
});
it('should exclude lower fidelity tools when sandboxing is enabled for the main agent', () => {
vi.spyOn(config, 'getSandboxEnabled').mockReturnValue(true);
(toolRegistry as any).isMainRegistry = true;
// Register the tools that should be excluded
const grepTool = new MockTool({ name: GREP_TOOL_NAME });
const editTool = new MockTool({ name: EDIT_TOOL_NAME });
const writeTool = new MockTool({ name: WRITE_FILE_TOOL_NAME });
const readTool = new MockTool({ name: READ_FILE_TOOL_NAME });
const shellTool = new MockTool({ name: SHELL_TOOL_NAME });
toolRegistry.registerTool(grepTool);
toolRegistry.registerTool(editTool);
toolRegistry.registerTool(writeTool);
toolRegistry.registerTool(readTool);
toolRegistry.registerTool(shellTool);
const declarations = toolRegistry.getFunctionDeclarations();
const names = declarations.map((d) => d.name);
expect(names).not.toContain(GREP_TOOL_NAME);
expect(names).not.toContain(EDIT_TOOL_NAME);
expect(names).not.toContain(WRITE_FILE_TOOL_NAME);
expect(names).not.toContain(READ_FILE_TOOL_NAME);
expect(names).toContain(SHELL_TOOL_NAME);
});
});
describe('plan mode', () => {
+13
View File
@@ -30,6 +30,8 @@ import {
getToolAliases,
WRITE_FILE_TOOL_NAME,
EDIT_TOOL_NAME,
GREP_TOOL_NAME,
READ_FILE_TOOL_NAME,
UPDATE_TOPIC_TOOL_NAME,
} from './tool-names.js';
@@ -635,6 +637,17 @@ export class ToolRegistry {
return;
}
if (
this.isMainRegistry &&
this.config.getSandboxEnabled() &&
(toolName === GREP_TOOL_NAME ||
toolName === EDIT_TOOL_NAME ||
toolName === WRITE_FILE_TOOL_NAME ||
toolName === READ_FILE_TOOL_NAME)
) {
return;
}
seenNames.add(toolName);
let schema = tool.getSchema(modelId);
@@ -23,6 +23,8 @@ import {
stripShellWrapper,
hasRedirection,
resolveExecutable,
inferFileOperation,
FileOperationType,
} from './shell-utils.js';
import path from 'node:path';
@@ -603,3 +605,41 @@ describe('resolveExecutable', () => {
expect(await resolveExecutable('unknown')).toBeUndefined();
});
});
describe('inferFileOperation', () => {
it('should infer sed -i as an EDIT operation', () => {
mockPlatform.mockReturnValue('linux');
const result = inferFileOperation("sed -i 's/foo/bar/g' test.ts");
expect(result).toBeDefined();
expect(result?.type).toBe(FileOperationType.EDIT);
expect(result?.filePath).toBe('test.ts');
expect(result?.metadata?.['sedExpression']).toBe('s/foo/bar/g');
});
it('should infer echo redirection as a WRITE operation', () => {
mockPlatform.mockReturnValue('linux');
const result = inferFileOperation("echo 'hello' > hello.txt");
expect(result).toBeDefined();
expect(result?.type).toBe(FileOperationType.WRITE);
expect(result?.filePath).toBe('hello.txt');
});
it('should infer grep as a SEARCH operation', () => {
mockPlatform.mockReturnValue('linux');
const result = inferFileOperation("grep 'pattern' file.txt");
expect(result).toBeDefined();
expect(result?.type).toBe(FileOperationType.SEARCH);
expect(result?.filePath).toBe('file.txt');
expect(result?.metadata?.['pattern']).toBe('pattern');
});
it('should infer PowerShell -replace as an EDIT operation', () => {
mockPlatform.mockReturnValue('win32');
const result = inferFileOperation("(Get-Content file.txt) -replace 'a', 'b' | Set-Content file.txt");
expect(result).toBeDefined();
expect(result?.type).toBe(FileOperationType.EDIT);
expect(result?.filePath).toBe('file.txt');
expect(result?.metadata?.['oldString']).toBe('a');
expect(result?.metadata?.['newString']).toBe('b');
});
});
+165
View File
@@ -623,6 +623,20 @@ export function getShellConfiguration(): ShellConfiguration {
*/
export const isWindows = () => os.platform() === 'win32';
export enum FileOperationType {
SEARCH = 'SEARCH',
EDIT = 'EDIT',
WRITE = 'WRITE',
READ = 'READ',
}
export interface InferredFileOperation {
type: FileOperationType;
filePath: string;
/** Inferred details (e.g., the sed expression or replacement string) */
metadata?: Record<string, unknown>;
}
/**
* Escapes a string so that it can be safely used as a single argument
* in a shell command, preventing command injection.
@@ -751,6 +765,157 @@ export function getCommandRoots(command: string): string[] {
.filter(Boolean);
}
/**
* Attempts to infer if a shell command is performing a common file operation
* like search, edit, or write.
*/
export function inferFileOperation(
command: string,
): InferredFileOperation | undefined {
const stripped = stripShellWrapper(command);
const shellType = getShellConfiguration().shell;
if (shellType === 'bash') {
// sed -i 's/foo/bar/g' file
// Handle both -i and --in-place, and optional space after -i
const sedMatch = stripped.match(
/^sed\s+(?:-i|--in-place)(?:\s*['"]?\.?['"]?)?\s+['"]?([^'"]+)['"]?\s+['"]?([^'"]+)['"]?\s*$/,
);
if (sedMatch) {
return {
type: FileOperationType.EDIT,
filePath: sedMatch[2].trim(),
metadata: { sedExpression: sedMatch[1] },
};
}
// echo "content" > file
// Simple redirection check
const redirectionMatch = stripped.match(
/(?:^|&&|\|\||;)\s*(?:echo|printf)\s+.*?\s*>\s*(\S+)\s*$/,
);
if (redirectionMatch) {
return {
type: FileOperationType.WRITE,
filePath: redirectionMatch[1].trim(),
};
}
// cat > file <<EOF ...
const heredocMatch = stripped.match(
/(?:^|&&|\|\||;)\s*cat\s*>\s*(\S+)\s*<<\s*(\S+)/,
);
if (heredocMatch) {
return {
type: FileOperationType.WRITE,
filePath: heredocMatch[1].trim(),
};
}
// cp src dest
const cpMatch = stripped.match(
/^(?:cp)\s+(?:-[^ ]+\s+)*['"]?([^'"]+)['"]?\s+['"]?([^'"]+)['"]?\s*$/,
);
if (cpMatch) {
return {
type: FileOperationType.WRITE,
filePath: cpMatch[2].trim(),
};
}
// mv src dest
const mvMatch = stripped.match(
/^(?:mv)\s+(?:-[^ ]+\s+)*['"]?([^'"]+)['"]?\s+['"]?([^'"]+)['"]?\s*$/,
);
if (mvMatch) {
return {
type: FileOperationType.WRITE,
filePath: mvMatch[2].trim(),
};
}
// ... | tee file
const teeMatch = stripped.match(/\|\s*tee\s+(?:-a\s+)?['"]?([^'"]+)['"]?\s*$/);
if (teeMatch) {
return {
type: FileOperationType.WRITE,
filePath: teeMatch[1].trim(),
};
}
// grep "pattern" file
const grepMatch = stripped.match(
/^(?:grep|rg|ripgrep)\s+(?:-[^ ]+\s+)*['"]?([^'"]+)['"]?\s+['"]?([^'"]+)['"]?\s*$/,
);
if (grepMatch) {
return {
type: FileOperationType.SEARCH,
filePath: grepMatch[2].trim(),
metadata: { pattern: grepMatch[1] },
};
}
// cat file, head file, tail file
const readMatch = stripped.match(
/^(?:cat|head|tail|less|more)\s+(?:-[^ ]+\s+)*['"]?([^'"]+)['"]?\s*$/,
);
if (readMatch) {
return {
type: FileOperationType.READ,
filePath: readMatch[1].trim(),
};
}
} else if (shellType === 'powershell') {
// (Get-Content file) -replace 'a', 'b' | Set-Content file
const psReplaceMatch = stripped.match(
/\(Get-Content\s+['"]?([^'"]+)['"]?\)\s+-replace\s+['"]?([^'"]+)['"]?,\s*['"]?([^'"]+)['"]?\s*\|\s*Set-Content\s+['"]?([^'"]+)['"]?/i,
);
if (psReplaceMatch) {
return {
type: FileOperationType.EDIT,
filePath: psReplaceMatch[1].trim(),
metadata: { oldString: psReplaceMatch[2], newString: psReplaceMatch[3] },
};
}
// Set-Content -Path file -Value "..."
const psSetContentMatch = stripped.match(
/Set-Content\s+-(?:Path|LiteralPath)\s+['"]?([^'"]+)['"]?/i,
);
if (psSetContentMatch) {
return {
type: FileOperationType.WRITE,
filePath: psSetContentMatch[1].trim(),
};
}
// Select-String -Path file -Pattern "..."
const psSearchMatch = stripped.match(
/Select-String\s+-(?:Path|LiteralPath)\s+['"]?([^'"]+)['"]?\s+-Pattern\s+['"]?([^'"]+)['"]?/i,
);
if (psSearchMatch) {
return {
type: FileOperationType.SEARCH,
filePath: psSearchMatch[1].trim(),
metadata: { pattern: psSearchMatch[2] },
};
}
// Get-Content file, type file, cat file
const psReadMatch = stripped.match(
/^(?:Get-Content|type|cat)\s+(?:-(?:Path|LiteralPath|Tail|Head|TotalCount|Wait)\s+[^ ]+\s+)*['"]?([^'"]+)['"]?\s*$/i,
);
if (psReadMatch) {
return {
type: FileOperationType.READ,
filePath: psReadMatch[1].trim(),
};
}
}
return undefined;
}
export function stripShellWrapper(command: string): string {
const pattern =
/^\s*(?:(?:(?:\S+\/)?(?:sh|bash|zsh))\s+-c|cmd\.exe\s+\/c|powershell(?:\.exe)?\s+(?:-NoProfile\s+)?-Command|pwsh(?:\.exe)?\s+(?:-NoProfile\s+)?-Command)\s+/i;