From 212edf31ed7835c240f73c9f65d04cc05d2786fc Mon Sep 17 00:00:00 2001 From: Jack Wotherspoon Date: Tue, 14 Apr 2026 13:24:21 -0400 Subject: [PATCH 1/3] chore(mcp): check MCP error code over brittle string match (#25381) --- packages/core/src/tools/mcp-client.test.ts | 22 ++++++++++++++++++++++ packages/core/src/tools/mcp-client.ts | 16 +++++++++------- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/packages/core/src/tools/mcp-client.test.ts b/packages/core/src/tools/mcp-client.test.ts index 4a14b671a0..50b17aa735 100644 --- a/packages/core/src/tools/mcp-client.test.ts +++ b/packages/core/src/tools/mcp-client.test.ts @@ -20,6 +20,8 @@ import { MCPOAuthTokenStorage } from '../mcp/oauth-token-storage.js'; import { OAuthUtils } from '../mcp/oauth-utils.js'; import type { PromptRegistry } from '../prompts/prompt-registry.js'; import { + ErrorCode, + McpError, PromptListChangedNotificationSchema, ResourceListChangedNotificationSchema, ToolListChangedNotificationSchema, @@ -35,6 +37,7 @@ import { isEnabled, McpClient, populateMcpServerCommand, + discoverPrompts, type McpContext, } from './mcp-client.js'; import type { ToolRegistry } from './tool-registry.js'; @@ -320,6 +323,25 @@ describe('mcp-client', () => { ); }); + it('should return empty array for discoverPrompts on MethodNotFound error without diagnostic', async () => { + const mockedClient = { + getServerCapabilities: vi.fn().mockReturnValue({ prompts: {} }), + listPrompts: vi + .fn() + .mockRejectedValue( + new McpError(ErrorCode.MethodNotFound, 'Method not supported'), + ), + }; + const result = await discoverPrompts( + 'test-server', + mockedClient as unknown as ClientLib.Client, + MOCK_CONTEXT, + ); + expect(result).toEqual([]); + // MethodNotFound errors should be silently ignored regardless of message text + expect(MOCK_CONTEXT.emitMcpDiagnostic).not.toHaveBeenCalled(); + }); + it('should not discover tools if server does not support them', async () => { const mockedClient = { connect: vi.fn(), diff --git a/packages/core/src/tools/mcp-client.ts b/packages/core/src/tools/mcp-client.ts index a7852050fc..0441063f81 100644 --- a/packages/core/src/tools/mcp-client.ts +++ b/packages/core/src/tools/mcp-client.ts @@ -27,6 +27,8 @@ import { ReadResourceResultSchema, ResourceListChangedNotificationSchema, ToolListChangedNotificationSchema, + ErrorCode, + McpError, PromptListChangedNotificationSchema, ProgressNotificationSchema, type GetPromptResult, @@ -1250,6 +1252,10 @@ export async function connectAndDiscover( } } +function isMcpMethodNotFoundError(error: unknown): boolean { + return error instanceof McpError && error.code === ErrorCode.MethodNotFound; +} + /** * Discovers and sanitizes tools from a connected MCP client. * It retrieves function declarations from the client, filters out disabled tools, @@ -1329,10 +1335,7 @@ export async function discoverTools( } return discoveredTools; } catch (error) { - if ( - error instanceof Error && - !error.message?.includes('Method not found') - ) { + if (!isMcpMethodNotFoundError(error)) { cliConfig.emitMcpDiagnostic( 'error', `Error discovering tools from ${mcpServerName}: ${getErrorMessage( @@ -1456,8 +1459,7 @@ export async function discoverPrompts( ), })); } catch (error) { - // It's okay if the method is not found, which is a common case. - if (error instanceof Error && error.message?.includes('Method not found')) { + if (isMcpMethodNotFoundError(error)) { return []; } cliConfig.emitMcpDiagnostic( @@ -1505,7 +1507,7 @@ async function listResources( cursor = response.nextCursor ?? undefined; } while (cursor); } catch (error) { - if (error instanceof Error && error.message?.includes('Method not found')) { + if (isMcpMethodNotFoundError(error)) { return []; } cliConfig.emitMcpDiagnostic( From 059d9175ebc661165388f8f8755d30e5a781a9fb Mon Sep 17 00:00:00 2001 From: Emily Hedlund Date: Tue, 14 Apr 2026 10:33:07 -0700 Subject: [PATCH 2/3] test(core): improve sandbox integration test coverage and fix OS-specific failures (#25307) Co-authored-by: David Pierce --- .../src/sandbox/linux/LinuxSandboxManager.ts | 30 +- .../sandbox/linux/bwrapArgsBuilder.test.ts | 26 +- .../src/sandbox/linux/bwrapArgsBuilder.ts | 13 +- .../sandbox/macos/MacOsSandboxManager.test.ts | 24 +- .../sandbox/windows/WindowsSandboxManager.ts | 81 +- .../sandboxManager.integration.test.ts | 1072 ++++++++++------- 6 files changed, 768 insertions(+), 478 deletions(-) diff --git a/packages/core/src/sandbox/linux/LinuxSandboxManager.ts b/packages/core/src/sandbox/linux/LinuxSandboxManager.ts index 0dc3c76f74..a431085c33 100644 --- a/packages/core/src/sandbox/linux/LinuxSandboxManager.ts +++ b/packages/core/src/sandbox/linux/LinuxSandboxManager.ts @@ -37,6 +37,7 @@ import { createSandboxDenialCache, type SandboxDenialCache, } from '../utils/sandboxDenialUtils.js'; +import { isErrnoException } from '../utils/fsUtils.js'; import { handleReadWriteCommands } from '../utils/sandboxReadWriteUtils.js'; import { buildBwrapArgs } from './bwrapArgsBuilder.js'; @@ -116,9 +117,12 @@ function touch(filePath: string, isDirectory: boolean) { assertValidPathString(filePath); try { // If it exists (even as a broken symlink), do nothing - if (fs.lstatSync(filePath)) return; - } catch { - // Ignore ENOENT + fs.lstatSync(filePath); + return; + } catch (e: unknown) { + if (isErrnoException(e) && e.code !== 'ENOENT') { + throw e; + } } if (isDirectory) { @@ -136,9 +140,22 @@ function touch(filePath: string, isDirectory: boolean) { export class LinuxSandboxManager implements SandboxManager { private static maskFilePath: string | undefined; private readonly denialCache: SandboxDenialCache = createSandboxDenialCache(); + private governanceFilesInitialized = false; constructor(private readonly options: GlobalSandboxOptions) {} + private ensureGovernanceFilesExist(workspace: string): void { + if (this.governanceFilesInitialized) return; + + // These must exist on the host before running the sandbox to ensure they are protected. + for (const file of GOVERNANCE_FILES) { + const filePath = join(workspace, file.path); + touch(filePath, file.isDirectory); + } + + this.governanceFilesInitialized = true; + } + isKnownSafeCommand(args: string[]): boolean { return isKnownSafeCommand(args); } @@ -258,17 +275,14 @@ export class LinuxSandboxManager implements SandboxManager { mergedAdditional, ); - for (const file of GOVERNANCE_FILES) { - const filePath = join(this.options.workspace, file.path); - touch(filePath, file.isDirectory); - } + this.ensureGovernanceFilesExist(resolvedPaths.workspace.resolved); const bwrapArgs = await buildBwrapArgs({ resolvedPaths, workspaceWrite, networkAccess: mergedAdditional.network ?? false, maskFilePath: this.getMaskFilePath(), - isWriteCommand: req.command === '__write', + isReadOnlyCommand: req.command === '__read', }); const bpfPath = getSeccompBpfPath(); diff --git a/packages/core/src/sandbox/linux/bwrapArgsBuilder.test.ts b/packages/core/src/sandbox/linux/bwrapArgsBuilder.test.ts index b9584062bc..6cff168d21 100644 --- a/packages/core/src/sandbox/linux/bwrapArgsBuilder.test.ts +++ b/packages/core/src/sandbox/linux/bwrapArgsBuilder.test.ts @@ -92,7 +92,7 @@ describe.skipIf(os.platform() === 'win32')('buildBwrapArgs', () => { workspaceWrite: false, networkAccess: false, maskFilePath: '/tmp/mask', - isWriteCommand: false, + isReadOnlyCommand: false, }; it('should correctly format the base arguments', async () => { @@ -188,7 +188,7 @@ describe.skipIf(os.platform() === 'win32')('buildBwrapArgs', () => { expect(args[args.indexOf('/opt/tools') - 1]).toBe('--bind-try'); }); - it('should bind the parent directory of a non-existent path', async () => { + it('should bind the parent directory of a non-existent path with --bind-try when isReadOnlyCommand is false', async () => { vi.mocked(fs.existsSync).mockImplementation((p) => { if (p === '/home/user/workspace/new-file.txt') return false; return true; @@ -196,10 +196,10 @@ describe.skipIf(os.platform() === 'win32')('buildBwrapArgs', () => { const args = await buildBwrapArgs({ ...defaultOptions, + isReadOnlyCommand: false, resolvedPaths: createResolvedPaths({ policyAllowed: ['/home/user/workspace/new-file.txt'], }), - isWriteCommand: true, }); const parentDir = '/home/user/workspace'; @@ -208,6 +208,26 @@ describe.skipIf(os.platform() === 'win32')('buildBwrapArgs', () => { expect(args[bindIndex - 2]).toBe('--bind-try'); }); + it('should bind the parent directory of a non-existent path with --ro-bind-try when isReadOnlyCommand is true', async () => { + vi.mocked(fs.existsSync).mockImplementation((p) => { + if (p === '/home/user/workspace/new-file.txt') return false; + return true; + }); + + const args = await buildBwrapArgs({ + ...defaultOptions, + isReadOnlyCommand: true, + resolvedPaths: createResolvedPaths({ + policyAllowed: ['/home/user/workspace/new-file.txt'], + }), + }); + + const parentDir = '/home/user/workspace'; + const bindIndex = args.lastIndexOf(parentDir); + expect(bindIndex).not.toBe(-1); + expect(args[bindIndex - 2]).toBe('--ro-bind-try'); + }); + it('should parameterize forbidden paths and explicitly deny them', async () => { vi.mocked(fs.statSync).mockImplementation((p) => { if (p.toString().includes('cache')) { diff --git a/packages/core/src/sandbox/linux/bwrapArgsBuilder.ts b/packages/core/src/sandbox/linux/bwrapArgsBuilder.ts index d7fec044e3..591bba8a0e 100644 --- a/packages/core/src/sandbox/linux/bwrapArgsBuilder.ts +++ b/packages/core/src/sandbox/linux/bwrapArgsBuilder.ts @@ -23,7 +23,7 @@ export interface BwrapArgsOptions { workspaceWrite: boolean; networkAccess: boolean; maskFilePath: string; - isWriteCommand: boolean; + isReadOnlyCommand: boolean; } /** @@ -37,7 +37,7 @@ export async function buildBwrapArgs( workspaceWrite, networkAccess, maskFilePath, - isWriteCommand, + isReadOnlyCommand, } = options; const { workspace } = resolvedPaths; @@ -79,10 +79,13 @@ export async function buildBwrapArgs( bwrapArgs.push('--bind-try', allowedPath, allowedPath); } else { // If the path doesn't exist, we still want to allow access to its parent - // to enable creating it. Since allowedPath is already resolved by resolveSandboxPaths, - // its parent is also correctly resolved. + // to enable creating it. const parent = dirname(allowedPath); - bwrapArgs.push(isWriteCommand ? '--bind-try' : bindFlag, parent, parent); + bwrapArgs.push( + isReadOnlyCommand ? '--ro-bind-try' : '--bind-try', + parent, + parent, + ); } } diff --git a/packages/core/src/sandbox/macos/MacOsSandboxManager.test.ts b/packages/core/src/sandbox/macos/MacOsSandboxManager.test.ts index 3e1862998e..963743a78d 100644 --- a/packages/core/src/sandbox/macos/MacOsSandboxManager.test.ts +++ b/packages/core/src/sandbox/macos/MacOsSandboxManager.test.ts @@ -5,7 +5,7 @@ */ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import { MacOsSandboxManager } from './MacOsSandboxManager.js'; -import type { ExecutionPolicy } from '../../services/sandboxManager.js'; +import { type ExecutionPolicy } from '../../services/sandboxManager.js'; import * as seatbeltArgsBuilder from './seatbeltArgsBuilder.js'; import fs from 'node:fs'; import os from 'node:os'; @@ -191,28 +191,6 @@ describe('MacOsSandboxManager', () => { }); }); - describe('governance files', () => { - it('should ensure governance files exist', async () => { - await manager.prepareCommand({ - command: 'echo', - args: [], - cwd: mockWorkspace, - env: {}, - policy: mockPolicy, - }); - - // The seatbelt builder internally handles governance files, so we simply verify - // it is invoked correctly with the right workspace. - expect(seatbeltArgsBuilder.buildSeatbeltProfile).toHaveBeenCalledWith( - expect.objectContaining({ - resolvedPaths: expect.objectContaining({ - workspace: { resolved: mockWorkspace, original: mockWorkspace }, - }), - }), - ); - }); - }); - describe('allowedPaths', () => { it('should parameterize allowed paths and normalize them', async () => { await manager.prepareCommand({ diff --git a/packages/core/src/sandbox/windows/WindowsSandboxManager.ts b/packages/core/src/sandbox/windows/WindowsSandboxManager.ts index 0f60f5906f..42fa196749 100644 --- a/packages/core/src/sandbox/windows/WindowsSandboxManager.ts +++ b/packages/core/src/sandbox/windows/WindowsSandboxManager.ts @@ -5,7 +5,7 @@ */ import fs from 'node:fs'; -import path from 'node:path'; +import path, { join } from 'node:path'; import os from 'node:os'; import { fileURLToPath } from 'node:url'; import { @@ -33,6 +33,7 @@ import { } from './commandSafety.js'; import { verifySandboxOverrides } from '../utils/commandUtils.js'; import { parseWindowsSandboxDenials } from './windowsSandboxDenialUtils.js'; +import { isErrnoException } from '../utils/fsUtils.js'; import { isSubpath, resolveToRealPath, @@ -53,10 +54,13 @@ const __dirname = path.dirname(__filename); */ export class WindowsSandboxManager implements SandboxManager { static readonly HELPER_EXE = 'GeminiSandbox.exe'; + private readonly helperPath: string; - private initialized = false; private readonly denialCache: SandboxDenialCache = createSandboxDenialCache(); + private static helperCompiled = false; + private governanceFilesInitialized = false; + constructor(private readonly options: GlobalSandboxOptions) { this.helperPath = path.resolve(__dirname, WindowsSandboxManager.HELPER_EXE); } @@ -86,33 +90,20 @@ export class WindowsSandboxManager implements SandboxManager { return this.options; } - /** - * Ensures a file or directory exists. - */ - private touch(filePath: string, isDirectory: boolean): void { - assertValidPathString(filePath); - try { - // If it exists (even as a broken symlink), do nothing - if (fs.lstatSync(filePath)) return; - } catch { - // Ignore ENOENT + private ensureGovernanceFilesExist(workspace: string): void { + if (this.governanceFilesInitialized) return; + + // These must exist on the host before running the sandbox to ensure they are protected. + for (const file of GOVERNANCE_FILES) { + const filePath = join(workspace, file.path); + touch(filePath, file.isDirectory); } - if (isDirectory) { - fs.mkdirSync(filePath, { recursive: true }); - } else { - const dir = path.dirname(filePath); - if (!fs.existsSync(dir)) { - fs.mkdirSync(dir, { recursive: true }); - } - fs.closeSync(fs.openSync(filePath, 'a')); - } + this.governanceFilesInitialized = true; } - private async ensureInitialized(): Promise { - if (this.initialized) return; - if (os.platform() !== 'win32') { - this.initialized = true; + private async ensureHelperCompiled(): Promise { + if (WindowsSandboxManager.helperCompiled || os.platform() !== 'win32') { return; } @@ -207,14 +198,14 @@ export class WindowsSandboxManager implements SandboxManager { ); } - this.initialized = true; + WindowsSandboxManager.helperCompiled = true; } /** * Prepares a command for sandboxed execution on Windows. */ async prepareCommand(req: SandboxRequest): Promise { - await this.ensureInitialized(); + await this.ensureHelperCompiled(); const sanitizationConfig = getSecureSanitizationConfig( req.policy?.sanitizationConfig, @@ -276,6 +267,8 @@ export class WindowsSandboxManager implements SandboxManager { mergedAdditional, ); + this.ensureGovernanceFilesExist(resolvedPaths.workspace.resolved); + // 1. Collect all forbidden paths. // We start with explicitly forbidden paths from the options and request. const forbiddenManifest = new Set( @@ -402,14 +395,6 @@ export class WindowsSandboxManager implements SandboxManager { // No-op for read access on Windows. } - // 4. Protected governance files - // These must exist on the host before running the sandbox to prevent - // the sandboxed process from creating them with Low integrity. - for (const file of GOVERNANCE_FILES) { - const filePath = path.join(resolvedPaths.workspace.resolved, file.path); - this.touch(filePath, file.isDirectory); - } - // 5. Generate Manifests const tempDir = await fs.promises.mkdtemp( path.join(os.tmpdir(), 'gemini-cli-sandbox-'), @@ -471,3 +456,29 @@ export class WindowsSandboxManager implements SandboxManager { ); } } + +/** + * Ensures a file or directory exists. + */ +function touch(filePath: string, isDirectory: boolean): void { + assertValidPathString(filePath); + try { + // If it exists (even as a broken symlink), do nothing + fs.lstatSync(filePath); + return; + } catch (e: unknown) { + if (isErrnoException(e) && e.code !== 'ENOENT') { + throw e; + } + } + + if (isDirectory) { + fs.mkdirSync(filePath, { recursive: true }); + } else { + const dir = path.dirname(filePath); + if (!fs.existsSync(dir)) { + fs.mkdirSync(dir, { recursive: true }); + } + fs.closeSync(fs.openSync(filePath, 'a')); + } +} diff --git a/packages/core/src/services/sandboxManager.integration.test.ts b/packages/core/src/services/sandboxManager.integration.test.ts index 65adeaacbb..3481c53ca7 100644 --- a/packages/core/src/services/sandboxManager.integration.test.ts +++ b/packages/core/src/services/sandboxManager.integration.test.ts @@ -3,13 +3,22 @@ * Copyright 2026 Google LLC * SPDX-License-Identifier: Apache-2.0 */ -import { describe, it, expect, beforeAll, afterAll } from 'vitest'; +import { + describe, + it, + expect, + beforeAll, + beforeEach, + afterEach, + afterAll, +} from 'vitest'; import { createSandboxManager } from './sandboxManagerFactory.js'; import { ShellExecutionService } from './shellExecutionService.js'; import { getSecureSanitizationConfig } from './environmentSanitization.js'; import { type SandboxManager, type SandboxedCommand, + GOVERNANCE_FILES, } from './sandboxManager.js'; import { execFile } from 'node:child_process'; import { promisify } from 'node:util'; @@ -139,10 +148,10 @@ Status: ${result.status} (expected ${expected})${ } describe('SandboxManager Integration', () => { - const tempDirectories: string[] = []; + let tempDirectories: string[] = []; /** - * Creates a temporary directory. + * Creates a temporary directory and tracks it for automatic cleanup after each test. * - macOS: Created in process.cwd() to avoid the seatbelt profile's global os.tmpdir() whitelist. * - Win/Linux: Created in os.tmpdir() because enforcing sandbox restrictions inside a large directory can be very slow. */ @@ -159,12 +168,15 @@ describe('SandboxManager Integration', () => { let workspace: string; let manager: SandboxManager; - beforeAll(() => { + beforeEach(() => { + tempDirectories = []; + // Create a fresh, isolated workspace for every test to prevent state + // leakage from causing intermittent or order-dependent test failures. workspace = createTempDir('workspace-'); manager = createSandboxManager({ enabled: true }, { workspace }); }); - afterAll(() => { + afterEach(() => { for (const dir of tempDirectories) { try { fs.rmSync(dir, { recursive: true, force: true }); @@ -172,147 +184,330 @@ describe('SandboxManager Integration', () => { // Best-effort cleanup } } + tempDirectories = []; }); - describe('Basic Execution', () => { - it('executes commands within the workspace', async () => { - const { command, args } = Platform.echo('sandbox test'); - const sandboxed = await manager.prepareCommand({ - command, - args, - cwd: workspace, - env: process.env, + describe('Execution & Environment', () => { + describe('Basic Execution', () => { + it('allows workspace execution', async () => { + const { command, args } = Platform.echo('sandbox test'); + const sandboxed = await manager.prepareCommand({ + command, + args, + cwd: workspace, + env: process.env, + }); + + const result = await runCommand(sandboxed); + assertResult(result, sandboxed, 'success'); + expect(result.stdout.trim()).toBe('sandbox test'); }); - const result = await runCommand(sandboxed); - assertResult(result, sandboxed, 'success'); - expect(result.stdout.trim()).toBe('sandbox test'); + // The Windows sandbox wrapper (GeminiSandbox.exe) uses standard pipes + // for I/O interception, which breaks ConPTY pseudo-terminal inheritance. + it.skipIf(Platform.isWindows)( + 'supports interactive terminals', + async () => { + const handle = await ShellExecutionService.execute( + Platform.isPty(), + workspace, + () => {}, + new AbortController().signal, + true, + { + sanitizationConfig: getSecureSanitizationConfig(), + sandboxManager: manager, + }, + ); + + const result = await handle.result; + expect(result.exitCode).toBe(0); + expect(result.output).toContain('True'); + }, + ); }); - // The Windows sandbox wrapper (GeminiSandbox.exe) uses standard pipes - // for I/O interception, which breaks ConPTY pseudo-terminal inheritance. - it.skipIf(Platform.isWindows)( - 'supports interactive pseudo-terminals (node-pty)', - async () => { - const handle = await ShellExecutionService.execute( - Platform.isPty(), - workspace, - () => {}, - new AbortController().signal, - true, + describe('Virtual Commands', () => { + it('handles virtual read commands', async () => { + const testFile = path.join(workspace, 'read-virtual.txt'); + fs.writeFileSync(testFile, 'virtual read success'); + + const sandboxed = await manager.prepareCommand({ + command: '__read', + args: [testFile], + cwd: workspace, + env: process.env, + }); + + const result = await runCommand(sandboxed); + assertResult(result, sandboxed, 'success'); + expect(result.stdout.trim()).toBe('virtual read success'); + }); + + it('handles virtual write commands', async () => { + const testFile = path.join(workspace, 'write-virtual.txt'); + + const sandboxed = await manager.prepareCommand({ + command: '__write', + args: [testFile], + cwd: workspace, + env: process.env, + }); + + // Executing __write directly via runCommand hangs because 'cat' waits for stdin. + // Instead, we verify the command was translated correctly. + if (Platform.isWindows) { + // On Windows, the native helper handles '__write' + expect(sandboxed.args.includes('__write')).toBe(true); + } else { + // On macOS/Linux, it is translated to a shell command with 'tee -- "$@" > /dev/null' + expect(sandboxed.args.join(' ')).toContain('tee --'); + } + }); + }); + + describe('Environment Sanitization', () => { + it('scrubs sensitive environment variables', async () => { + const checkEnvCmd = { + command: process.execPath, + args: [ + '-e', + 'console.log(process.env.TEST_SECRET_TOKEN || "MISSING")', + ], + }; + + const sandboxed = await manager.prepareCommand({ + ...checkEnvCmd, + cwd: workspace, + env: { ...process.env, TEST_SECRET_TOKEN: 'super-secret-value' }, + policy: { + sanitizationConfig: { + enableEnvironmentVariableRedaction: true, + blockedEnvironmentVariables: ['TEST_SECRET_TOKEN'], + }, + }, + }); + + const result = await runCommand(sandboxed); + assertResult(result, sandboxed, 'success'); + // By default, environment sanitization drops non-allowlisted vars or vars that look like secrets. + // Assuming TEST_SECRET_TOKEN is scrubbed: + expect(result.stdout.trim()).toBe('MISSING'); + }); + }); + }); + + describe('Sandbox Policies & Modes', () => { + describe('Plan Mode Transitions', () => { + it('allows writing plans in plan mode', async () => { + // In Plan Mode, modeConfig sets readonly: true, allowOverrides: true + const planManager = createSandboxManager( + { enabled: true }, + { workspace, modeConfig: { readonly: true, allowOverrides: true } }, + ); + + const plansDir = path.join(workspace, '.gemini/tmp/session-123/plans'); + fs.mkdirSync(plansDir, { recursive: true }); + const planFile = path.join(plansDir, 'feature-plan.md'); + + // The WriteFile tool requests explicit write access for the plan file path + const { command, args } = Platform.touch(planFile); + + const sandboxed = await planManager.prepareCommand({ + command, + args, + cwd: workspace, + env: process.env, + policy: { allowedPaths: [plansDir] }, + }); + + const result = await runCommand(sandboxed); + assertResult(result, sandboxed, 'success'); + expect(fs.existsSync(planFile)).toBe(true); + }); + + it('allows workspace writes after exiting plan mode', async () => { + // Upon exiting Plan Mode, the sandbox transitions to autoEdit/accepting_edits + // which sets readonly: false, allowOverrides: true + const editManager = createSandboxManager( + { enabled: true }, + { workspace, modeConfig: { readonly: false, allowOverrides: true } }, + ); + + const taskFile = path.join(workspace, 'src/tasks/task.ts'); + const taskDir = path.dirname(taskFile); + fs.mkdirSync(taskDir, { recursive: true }); + + // Simulate a generic edit anywhere in the workspace + const { command, args } = Platform.touch(taskFile); + + const sandboxed = await editManager.prepareCommand({ + command, + args, + cwd: workspace, + env: process.env, + policy: { allowedPaths: [taskDir] }, + }); + + const result = await runCommand(sandboxed); + assertResult(result, sandboxed, 'success'); + expect(fs.existsSync(taskFile)).toBe(true); + }); + }); + + describe('Workspace Write Policies', () => { + it('enforces read-only mode', async () => { + const testFile = path.join(workspace, 'readonly-test.txt'); + const { command, args } = Platform.touch(testFile); + + const readonlyManager = createSandboxManager( + { enabled: true }, { - sanitizationConfig: getSecureSanitizationConfig(), - sandboxManager: manager, + workspace, + modeConfig: { readonly: true, allowOverrides: true }, }, ); - const result = await handle.result; - expect(result.exitCode).toBe(0); - expect(result.output).toContain('True'); - }, - ); + const sandboxed = await readonlyManager.prepareCommand({ + command, + args, + cwd: workspace, + env: process.env, + }); + + const result = await runCommand(sandboxed); + assertResult(result, sandboxed, 'failure'); + }); + + it('allows writes for approved tools', async () => { + const testFile = path.join(workspace, 'approved-test.txt'); + const command = Platform.isWindows ? 'cmd.exe' : 'sh'; + const args = Platform.isWindows + ? ['/c', `echo test > ${testFile}`] + : ['-c', `echo test > "${testFile}"`]; + + // The shell wrapper is stripped by getCommandRoots, so the root command evaluated is 'echo' + const approvedTool = 'echo'; + + const approvedManager = createSandboxManager( + { enabled: true }, + { + workspace, + modeConfig: { + readonly: true, + allowOverrides: true, + approvedTools: [approvedTool], + }, + }, + ); + + const sandboxed = await approvedManager.prepareCommand({ + command, + args, + cwd: workspace, + env: process.env, + }); + + const result = await runCommand(sandboxed); + assertResult(result, sandboxed, 'success'); + expect(fs.existsSync(testFile)).toBe(true); + }); + + it('allows writes in YOLO mode', async () => { + const testFile = path.join(workspace, 'yolo-test.txt'); + const { command, args } = Platform.touch(testFile); + + const yoloManager = createSandboxManager( + { enabled: true }, + { + workspace, + modeConfig: { readonly: true, yolo: true, allowOverrides: true }, + }, + ); + + const sandboxed = await yoloManager.prepareCommand({ + command, + args, + cwd: workspace, + env: process.env, + }); + + const result = await runCommand(sandboxed); + assertResult(result, sandboxed, 'success'); + expect(fs.existsSync(testFile)).toBe(true); + }); + }); }); - describe('File System Access', () => { - it('blocks access outside the workspace', async () => { - const blockedPath = Platform.getExternalBlockedPath(); - const { command, args } = Platform.touch(blockedPath); + describe('File System Security', () => { + describe('File System Access', () => { + it('prevents out-of-bounds access', async () => { + const blockedPath = Platform.getExternalBlockedPath(); + const { command, args } = Platform.touch(blockedPath); - const sandboxed = await manager.prepareCommand({ - command, - args, - cwd: workspace, - env: process.env, + const sandboxed = await manager.prepareCommand({ + command, + args, + cwd: workspace, + env: process.env, + }); + + const result = await runCommand(sandboxed); + assertResult(result, sandboxed, 'failure'); }); - const result = await runCommand(sandboxed); - assertResult(result, sandboxed, 'failure'); - }); + it('supports dynamic permission expansion', async () => { + const tempDir = createTempDir('expansion-'); + const testFile = path.join(tempDir, 'test.txt'); + const { command, args } = Platform.touch(testFile); - it('allows dynamic expansion of permissions after a failure', async () => { - const tempDir = createTempDir('expansion-'); - const testFile = path.join(tempDir, 'test.txt'); - const { command, args } = Platform.touch(testFile); + // First attempt: fails due to sandbox restrictions + const sandboxed1 = await manager.prepareCommand({ + command, + args, + cwd: workspace, + env: process.env, + }); + const result1 = await runCommand(sandboxed1); + assertResult(result1, sandboxed1, 'failure'); + expect(fs.existsSync(testFile)).toBe(false); - // First attempt: fails due to sandbox restrictions - const sandboxed1 = await manager.prepareCommand({ - command, - args, - cwd: workspace, - env: process.env, - }); - const result1 = await runCommand(sandboxed1); - assertResult(result1, sandboxed1, 'failure'); - expect(fs.existsSync(testFile)).toBe(false); - - // Second attempt: succeeds with additional permissions - const sandboxed2 = await manager.prepareCommand({ - command, - args, - cwd: workspace, - env: process.env, - policy: { allowedPaths: [tempDir] }, - }); - const result2 = await runCommand(sandboxed2); - assertResult(result2, sandboxed2, 'success'); - expect(fs.existsSync(testFile)).toBe(true); - }); - - it('grants access to explicitly allowed paths', async () => { - const allowedDir = createTempDir('allowed-'); - const testFile = path.join(allowedDir, 'test.txt'); - - const { command, args } = Platform.touch(testFile); - const sandboxed = await manager.prepareCommand({ - command, - args, - cwd: workspace, - env: process.env, - policy: { allowedPaths: [allowedDir] }, + // Second attempt: succeeds with additional permissions + const sandboxed2 = await manager.prepareCommand({ + command, + args, + cwd: workspace, + env: process.env, + policy: { allowedPaths: [tempDir] }, + }); + const result2 = await runCommand(sandboxed2); + assertResult(result2, sandboxed2, 'success'); + expect(fs.existsSync(testFile)).toBe(true); }); - const result = await runCommand(sandboxed); - assertResult(result, sandboxed, 'success'); - expect(fs.existsSync(testFile)).toBe(true); - }); + it('allows access to authorized paths', async () => { + const allowedDir = createTempDir('allowed-'); + const testFile = path.join(allowedDir, 'test.txt'); - it('blocks write access to forbidden paths within the workspace', async () => { - const tempWorkspace = createTempDir('workspace-'); - const forbiddenDir = path.join(tempWorkspace, 'forbidden'); - const testFile = path.join(forbiddenDir, 'test.txt'); - fs.mkdirSync(forbiddenDir); + const { command, args } = Platform.touch(testFile); + const sandboxed = await manager.prepareCommand({ + command, + args, + cwd: workspace, + env: process.env, + policy: { allowedPaths: [allowedDir] }, + }); - const osManager = createSandboxManager( - { enabled: true }, - { - workspace: tempWorkspace, - forbiddenPaths: async () => [forbiddenDir], - }, - ); - const { command, args } = Platform.touch(testFile); - - const sandboxed = await osManager.prepareCommand({ - command, - args, - cwd: tempWorkspace, - env: process.env, + const result = await runCommand(sandboxed); + assertResult(result, sandboxed, 'success'); + expect(fs.existsSync(testFile)).toBe(true); }); - const result = await runCommand(sandboxed); - assertResult(result, sandboxed, 'failure'); - }); - - // Windows icacls does not reliably block read-up access for Low Integrity - // processes, so we skip read-specific assertions on Windows. The internal - // tool architecture prevents read bypasses via the C# wrapper and __read. - it.skipIf(Platform.isWindows)( - 'blocks read access to forbidden paths within the workspace', - async () => { + it('protects forbidden paths from writes', async () => { const tempWorkspace = createTempDir('workspace-'); const forbiddenDir = path.join(tempWorkspace, 'forbidden'); const testFile = path.join(forbiddenDir, 'test.txt'); fs.mkdirSync(forbiddenDir); - fs.writeFileSync(testFile, 'secret data'); const osManager = createSandboxManager( { enabled: true }, @@ -321,8 +516,7 @@ describe('SandboxManager Integration', () => { forbiddenPaths: async () => [forbiddenDir], }, ); - - const { command, args } = Platform.cat(testFile); + const { command, args } = Platform.touch(testFile); const sandboxed = await osManager.prepareCommand({ command, @@ -333,333 +527,403 @@ describe('SandboxManager Integration', () => { const result = await runCommand(sandboxed); assertResult(result, sandboxed, 'failure'); - }, - ); + }); - it('blocks access to files inside forbidden directories recursively', async () => { - const tempWorkspace = createTempDir('workspace-'); - const forbiddenDir = path.join(tempWorkspace, 'forbidden'); - const nestedDir = path.join(forbiddenDir, 'nested'); - const nestedFile = path.join(nestedDir, 'test.txt'); + // Windows icacls does not reliably block read-up access for Low Integrity + // processes, so we skip read-specific assertions on Windows. The internal + // tool architecture prevents read bypasses via the C# wrapper and __read. + it.skipIf(Platform.isWindows)( + 'protects forbidden paths from reads', + async () => { + const tempWorkspace = createTempDir('workspace-'); + const forbiddenDir = path.join(tempWorkspace, 'forbidden'); + const testFile = path.join(forbiddenDir, 'test.txt'); + fs.mkdirSync(forbiddenDir); + fs.writeFileSync(testFile, 'secret data'); - // Create the base forbidden directory first so the manager can restrict access to it. - fs.mkdirSync(forbiddenDir); + const osManager = createSandboxManager( + { enabled: true }, + { + workspace: tempWorkspace, + forbiddenPaths: async () => [forbiddenDir], + }, + ); - const osManager = createSandboxManager( - { enabled: true }, - { - workspace: tempWorkspace, - forbiddenPaths: async () => [forbiddenDir], + const { command, args } = Platform.cat(testFile); + + const sandboxed = await osManager.prepareCommand({ + command, + args, + cwd: tempWorkspace, + env: process.env, + }); + + const result = await runCommand(sandboxed); + assertResult(result, sandboxed, 'failure'); }, ); - // Execute a dummy command so the manager initializes its restrictions. - const dummyCommand = await osManager.prepareCommand({ - ...Platform.echo('init'), - cwd: tempWorkspace, - env: process.env, - }); - await runCommand(dummyCommand); + it('protects forbidden directories recursively', async () => { + const tempWorkspace = createTempDir('workspace-'); + const forbiddenDir = path.join(tempWorkspace, 'forbidden'); + const nestedDir = path.join(forbiddenDir, 'nested'); + const nestedFile = path.join(nestedDir, 'test.txt'); - // Now create the nested items. They will inherit the sandbox restrictions from their parent. - fs.mkdirSync(nestedDir, { recursive: true }); - fs.writeFileSync(nestedFile, 'secret'); + // Create the base forbidden directory first so the manager can restrict access to it. + fs.mkdirSync(forbiddenDir); - const { command, args } = Platform.touch(nestedFile); + const osManager = createSandboxManager( + { enabled: true }, + { + workspace: tempWorkspace, + forbiddenPaths: async () => [forbiddenDir], + }, + ); - const sandboxed = await osManager.prepareCommand({ - command, - args, - cwd: tempWorkspace, - env: process.env, + // Execute a dummy command so the manager initializes its restrictions. + const dummyCommand = await osManager.prepareCommand({ + ...Platform.echo('init'), + cwd: tempWorkspace, + env: process.env, + }); + await runCommand(dummyCommand); + + // Now create the nested items. They will inherit the sandbox restrictions from their parent. + fs.mkdirSync(nestedDir, { recursive: true }); + fs.writeFileSync(nestedFile, 'secret'); + + const { command, args } = Platform.touch(nestedFile); + + const sandboxed = await osManager.prepareCommand({ + command, + args, + cwd: tempWorkspace, + env: process.env, + }); + + const result = await runCommand(sandboxed); + assertResult(result, sandboxed, 'failure'); }); - const result = await runCommand(sandboxed); - assertResult(result, sandboxed, 'failure'); + it('prioritizes denials over allowances', async () => { + const tempWorkspace = createTempDir('workspace-'); + const conflictDir = path.join(tempWorkspace, 'conflict'); + const testFile = path.join(conflictDir, 'test.txt'); + fs.mkdirSync(conflictDir); + + const osManager = createSandboxManager( + { enabled: true }, + { + workspace: tempWorkspace, + forbiddenPaths: async () => [conflictDir], + }, + ); + const { command, args } = Platform.touch(testFile); + + const sandboxed = await osManager.prepareCommand({ + command, + args, + cwd: tempWorkspace, + env: process.env, + policy: { + allowedPaths: [conflictDir], + }, + }); + + const result = await runCommand(sandboxed); + assertResult(result, sandboxed, 'failure'); + }); + + it('handles missing paths gracefully', async () => { + const tempWorkspace = createTempDir('workspace-'); + const nonExistentPath = path.join(tempWorkspace, 'does-not-exist'); + + const osManager = createSandboxManager( + { enabled: true }, + { + workspace: tempWorkspace, + forbiddenPaths: async () => [nonExistentPath], + }, + ); + const { command, args } = Platform.echo('survived'); + const sandboxed = await osManager.prepareCommand({ + command, + args, + cwd: tempWorkspace, + env: process.env, + policy: { + allowedPaths: [nonExistentPath], + }, + }); + + const result = await runCommand(sandboxed); + assertResult(result, sandboxed, 'success'); + expect(result.stdout.trim()).toBe('survived'); + }); + + it('prevents creation of forbidden files', async () => { + const tempWorkspace = createTempDir('workspace-'); + const nonExistentFile = path.join(tempWorkspace, 'never-created.txt'); + + const osManager = createSandboxManager( + { enabled: true }, + { + workspace: tempWorkspace, + forbiddenPaths: async () => [nonExistentFile], + }, + ); + + // We use touch to attempt creation of the file + const { command: cmdTouch, args: argsTouch } = + Platform.touch(nonExistentFile); + + const sandboxedCmd = await osManager.prepareCommand({ + command: cmdTouch, + args: argsTouch, + cwd: tempWorkspace, + env: process.env, + }); + + // Execute the command, we expect it to fail (permission denied or read-only file system) + const result = await runCommand(sandboxedCmd); + + assertResult(result, sandboxedCmd, 'failure'); + expect(fs.existsSync(nonExistentFile)).toBe(false); + }); + + it('restricts symlinks to forbidden targets', async () => { + const tempWorkspace = createTempDir('workspace-'); + const targetFile = path.join(tempWorkspace, 'target.txt'); + const symlinkFile = path.join(tempWorkspace, 'link.txt'); + + fs.writeFileSync(targetFile, 'secret data'); + fs.symlinkSync(targetFile, symlinkFile); + + const osManager = createSandboxManager( + { enabled: true }, + { + workspace: tempWorkspace, + forbiddenPaths: async () => [symlinkFile], + }, + ); + + // Attempt to write to the target file directly + const { command: cmdTarget, args: argsTarget } = + Platform.touch(targetFile); + const commandTarget = await osManager.prepareCommand({ + command: cmdTarget, + args: argsTarget, + cwd: tempWorkspace, + env: process.env, + }); + + const resultTarget = await runCommand(commandTarget); + assertResult(resultTarget, commandTarget, 'failure'); + + // Attempt to write via the symlink + const { command: cmdLink, args: argsLink } = + Platform.touch(symlinkFile); + const commandLink = await osManager.prepareCommand({ + command: cmdLink, + args: argsLink, + cwd: tempWorkspace, + env: process.env, + }); + + const resultLink = await runCommand(commandLink); + assertResult(resultLink, commandLink, 'failure'); + }); }); - it('prioritizes forbiddenPaths over allowedPaths', async () => { - const tempWorkspace = createTempDir('workspace-'); - const conflictDir = path.join(tempWorkspace, 'conflict'); - const testFile = path.join(conflictDir, 'test.txt'); - fs.mkdirSync(conflictDir); + describe('Governance Files', () => { + it('prevents modification of governance files', async () => { + // Ensure workspace is initialized and governance files are created + const { command: echoCmd, args: echoArgs } = Platform.echo('test'); + await manager.prepareCommand({ + command: echoCmd, + args: echoArgs, + cwd: workspace, + env: process.env, + // Even if the entire workspace is explicitly allowed, governance files must be protected + policy: { allowedPaths: [workspace] }, + }); - const osManager = createSandboxManager( - { enabled: true }, - { - workspace: tempWorkspace, - forbiddenPaths: async () => [conflictDir], - }, - ); - const { command, args } = Platform.touch(testFile); + for (const file of GOVERNANCE_FILES) { + const filePath = path.join(workspace, file.path); + // Try to append to/overwrite the file or create a file inside the directory + const { command, args } = file.isDirectory + ? Platform.touch(path.join(filePath, 'evil.txt')) + : Platform.touch(filePath); - const sandboxed = await osManager.prepareCommand({ - command, - args, - cwd: tempWorkspace, - env: process.env, - policy: { - allowedPaths: [conflictDir], - }, + const sandboxed = await manager.prepareCommand({ + command, + args, + cwd: workspace, + env: process.env, + }); + + const result = await runCommand(sandboxed); + assertResult(result, sandboxed, 'failure'); + } }); - - const result = await runCommand(sandboxed); - assertResult(result, sandboxed, 'failure'); }); - it('gracefully ignores non-existent paths in allowedPaths and forbiddenPaths', async () => { - const tempWorkspace = createTempDir('workspace-'); - const nonExistentPath = path.join(tempWorkspace, 'does-not-exist'); + describe('Git Worktree Support', () => { + it('supports git worktrees', async () => { + const mainRepo = createTempDir('main-repo-'); + const worktreeDir = createTempDir('worktree-'); - const osManager = createSandboxManager( - { enabled: true }, - { - workspace: tempWorkspace, - forbiddenPaths: async () => [nonExistentPath], - }, - ); - const { command, args } = Platform.echo('survived'); - const sandboxed = await osManager.prepareCommand({ - command, - args, - cwd: tempWorkspace, - env: process.env, - policy: { - allowedPaths: [nonExistentPath], - }, + const mainGitDir = path.join(mainRepo, '.git'); + fs.mkdirSync(mainGitDir, { recursive: true }); + fs.writeFileSync( + path.join(mainGitDir, 'config'), + '[core]\n\trepositoryformatversion = 0\n', + ); + + const worktreeGitDir = path.join( + mainGitDir, + 'worktrees', + 'test-worktree', + ); + fs.mkdirSync(worktreeGitDir, { recursive: true }); + + // Create the .git file in the worktree directory pointing to the worktree git dir + fs.writeFileSync( + path.join(worktreeDir, '.git'), + `gitdir: ${worktreeGitDir}\n`, + ); + + // Create the backlink from worktree git dir to the worktree's .git file + const backlinkPath = path.join(worktreeGitDir, 'gitdir'); + fs.writeFileSync(backlinkPath, path.join(worktreeDir, '.git')); + + // Create a file in the worktree git dir that we want to access + const secretFile = path.join(worktreeGitDir, 'secret.txt'); + fs.writeFileSync(secretFile, 'git-secret'); + + const osManager = createSandboxManager( + { enabled: true }, + { workspace: worktreeDir }, + ); + + const { command, args } = Platform.cat(secretFile); + const sandboxed = await osManager.prepareCommand({ + command, + args, + cwd: worktreeDir, + env: process.env, + }); + + const result = await runCommand(sandboxed); + assertResult(result, sandboxed, 'success'); + expect(result.stdout.trim()).toBe('git-secret'); }); - const result = await runCommand(sandboxed); - assertResult(result, sandboxed, 'success'); - expect(result.stdout.trim()).toBe('survived'); - }); + it('protects git worktree metadata', async () => { + const mainRepo = createTempDir('main-repo-'); + const worktreeDir = createTempDir('worktree-'); - it('prevents creation of non-existent forbidden paths', async () => { - const tempWorkspace = createTempDir('workspace-'); - const nonExistentFile = path.join(tempWorkspace, 'never-created.txt'); + const mainGitDir = path.join(mainRepo, '.git'); + fs.mkdirSync(mainGitDir, { recursive: true }); - const osManager = createSandboxManager( - { enabled: true }, - { - workspace: tempWorkspace, - forbiddenPaths: async () => [nonExistentFile], - }, - ); + const worktreeGitDir = path.join( + mainGitDir, + 'worktrees', + 'test-worktree', + ); + fs.mkdirSync(worktreeGitDir, { recursive: true }); - // We use touch to attempt creation of the file - const { command: cmdTouch, args: argsTouch } = - Platform.touch(nonExistentFile); + fs.writeFileSync( + path.join(worktreeDir, '.git'), + `gitdir: ${worktreeGitDir}\n`, + ); + fs.writeFileSync( + path.join(worktreeGitDir, 'gitdir'), + path.join(worktreeDir, '.git'), + ); - const sandboxedCmd = await osManager.prepareCommand({ - command: cmdTouch, - args: argsTouch, - cwd: tempWorkspace, - env: process.env, + const targetFile = path.join(worktreeGitDir, 'secret.txt'); + + const osManager = createSandboxManager( + { enabled: true }, + // Use YOLO mode to ensure the workspace is fully writable, but git worktrees should still be read-only + { workspace: worktreeDir, modeConfig: { yolo: true } }, + ); + + const { command, args } = Platform.touch(targetFile); + const sandboxed = await osManager.prepareCommand({ + command, + args, + cwd: worktreeDir, + env: process.env, + }); + + const result = await runCommand(sandboxed); + assertResult(result, sandboxed, 'failure'); + expect(fs.existsSync(targetFile)).toBe(false); }); - - // Execute the command, we expect it to fail (permission denied or read-only file system) - const result = await runCommand(sandboxedCmd); - - assertResult(result, sandboxedCmd, 'failure'); - expect(fs.existsSync(nonExistentFile)).toBe(false); - }); - - it('blocks access to both a symlink and its target when the symlink is forbidden', async () => { - const tempWorkspace = createTempDir('workspace-'); - const targetFile = path.join(tempWorkspace, 'target.txt'); - const symlinkFile = path.join(tempWorkspace, 'link.txt'); - - fs.writeFileSync(targetFile, 'secret data'); - fs.symlinkSync(targetFile, symlinkFile); - - const osManager = createSandboxManager( - { enabled: true }, - { - workspace: tempWorkspace, - forbiddenPaths: async () => [symlinkFile], - }, - ); - - // Attempt to write to the target file directly - const { command: cmdTarget, args: argsTarget } = - Platform.touch(targetFile); - const commandTarget = await osManager.prepareCommand({ - command: cmdTarget, - args: argsTarget, - cwd: tempWorkspace, - env: process.env, - }); - - const resultTarget = await runCommand(commandTarget); - assertResult(resultTarget, commandTarget, 'failure'); - - // Attempt to write via the symlink - const { command: cmdLink, args: argsLink } = Platform.touch(symlinkFile); - const commandLink = await osManager.prepareCommand({ - command: cmdLink, - args: argsLink, - cwd: tempWorkspace, - env: process.env, - }); - - const resultLink = await runCommand(commandLink); - assertResult(resultLink, commandLink, 'failure'); }); }); - describe('Git Worktree Support', () => { - it('allows access to git common directory in a worktree', async () => { - const mainRepo = createTempDir('main-repo-'); - const worktreeDir = createTempDir('worktree-'); + describe('Network Security', () => { + describe('Network Access', () => { + let server: http.Server; + let url: string; - const mainGitDir = path.join(mainRepo, '.git'); - fs.mkdirSync(mainGitDir, { recursive: true }); - fs.writeFileSync( - path.join(mainGitDir, 'config'), - '[core]\n\trepositoryformatversion = 0\n', - ); - - const worktreeGitDir = path.join( - mainGitDir, - 'worktrees', - 'test-worktree', - ); - fs.mkdirSync(worktreeGitDir, { recursive: true }); - - // Create the .git file in the worktree directory pointing to the worktree git dir - fs.writeFileSync( - path.join(worktreeDir, '.git'), - `gitdir: ${worktreeGitDir}\n`, - ); - - // Create the backlink from worktree git dir to the worktree's .git file - const backlinkPath = path.join(worktreeGitDir, 'gitdir'); - fs.writeFileSync(backlinkPath, path.join(worktreeDir, '.git')); - - // Create a file in the worktree git dir that we want to access - const secretFile = path.join(worktreeGitDir, 'secret.txt'); - fs.writeFileSync(secretFile, 'git-secret'); - - const osManager = createSandboxManager( - { enabled: true }, - { workspace: worktreeDir }, - ); - - const { command, args } = Platform.cat(secretFile); - const sandboxed = await osManager.prepareCommand({ - command, - args, - cwd: worktreeDir, - env: process.env, - }); - - const result = await runCommand(sandboxed); - assertResult(result, sandboxed, 'success'); - expect(result.stdout.trim()).toBe('git-secret'); - }); - - it('blocks write access to git common directory in a worktree', async () => { - const mainRepo = createTempDir('main-repo-'); - const worktreeDir = createTempDir('worktree-'); - - const mainGitDir = path.join(mainRepo, '.git'); - fs.mkdirSync(mainGitDir, { recursive: true }); - - const worktreeGitDir = path.join( - mainGitDir, - 'worktrees', - 'test-worktree', - ); - fs.mkdirSync(worktreeGitDir, { recursive: true }); - - fs.writeFileSync( - path.join(worktreeDir, '.git'), - `gitdir: ${worktreeGitDir}\n`, - ); - fs.writeFileSync( - path.join(worktreeGitDir, 'gitdir'), - path.join(worktreeDir, '.git'), - ); - - const targetFile = path.join(worktreeGitDir, 'secret.txt'); - - const osManager = createSandboxManager( - { enabled: true }, - // Use YOLO mode to ensure the workspace is fully writable, but git worktrees should still be read-only - { workspace: worktreeDir, modeConfig: { yolo: true } }, - ); - - const { command, args } = Platform.touch(targetFile); - const sandboxed = await osManager.prepareCommand({ - command, - args, - cwd: worktreeDir, - env: process.env, - }); - - const result = await runCommand(sandboxed); - assertResult(result, sandboxed, 'failure'); - expect(fs.existsSync(targetFile)).toBe(false); - }); - }); - - describe('Network Access', () => { - let server: http.Server; - let url: string; - - beforeAll(async () => { - server = http.createServer((_, res) => { - res.setHeader('Connection', 'close'); - res.writeHead(200); - res.end('ok'); - }); - await new Promise((resolve, reject) => { - server.on('error', reject); - server.listen(0, '127.0.0.1', () => { - const addr = server.address() as import('net').AddressInfo; - url = `http://127.0.0.1:${addr.port}`; - resolve(); + beforeAll(async () => { + server = http.createServer((_, res) => { + res.setHeader('Connection', 'close'); + res.writeHead(200); + res.end('ok'); + }); + await new Promise((resolve, reject) => { + server.on('error', reject); + server.listen(0, '127.0.0.1', () => { + const addr = server.address() as import('net').AddressInfo; + url = `http://127.0.0.1:${addr.port}`; + resolve(); + }); }); }); - }); - afterAll(async () => { - if (server) await new Promise((res) => server.close(() => res())); - }); + afterAll(async () => { + if (server) await new Promise((res) => server.close(() => res())); + }); - // Windows Job Object rate limits exempt loopback (127.0.0.1) traffic, - // so this test cannot verify loopback blocking on Windows. - it.skipIf(Platform.isWindows)( - 'blocks network access by default', - async () => { + // Windows Job Object rate limits exempt loopback (127.0.0.1) traffic, + // so this test cannot verify loopback blocking on Windows. + it.skipIf(Platform.isWindows)( + 'prevents unauthorized network access', + async () => { + const { command, args } = Platform.curl(url); + const sandboxed = await manager.prepareCommand({ + command, + args, + cwd: workspace, + env: process.env, + }); + + const result = await runCommand(sandboxed); + assertResult(result, sandboxed, 'failure'); + }, + ); + + it('allows authorized network access', async () => { const { command, args } = Platform.curl(url); const sandboxed = await manager.prepareCommand({ command, args, cwd: workspace, env: process.env, + policy: { networkAccess: true }, }); const result = await runCommand(sandboxed); - assertResult(result, sandboxed, 'failure'); - }, - ); - - it('grants network access when explicitly allowed', async () => { - const { command, args } = Platform.curl(url); - const sandboxed = await manager.prepareCommand({ - command, - args, - cwd: workspace, - env: process.env, - policy: { networkAccess: true }, + assertResult(result, sandboxed, 'success'); + if (!Platform.isWindows) { + expect(result.stdout.trim()).toBe('ok'); + } }); - - const result = await runCommand(sandboxed); - assertResult(result, sandboxed, 'success'); - if (!Platform.isWindows) { - expect(result.stdout.trim()).toBe('ok'); - } }); }); }); From 02792264ed99c760a8e38b48379f2fa3286419f5 Mon Sep 17 00:00:00 2001 From: ruomeng Date: Tue, 14 Apr 2026 13:36:37 -0400 Subject: [PATCH 3/3] feat(plan): update plan mode prompt to allow showing plan content (#25058) --- packages/core/src/core/__snapshots__/prompts.test.ts.snap | 6 ++++++ packages/core/src/prompts/snippets.ts | 3 ++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/core/src/core/__snapshots__/prompts.test.ts.snap b/packages/core/src/core/__snapshots__/prompts.test.ts.snap index 0e9cc591d1..9132791974 100644 --- a/packages/core/src/core/__snapshots__/prompts.test.ts.snap +++ b/packages/core/src/core/__snapshots__/prompts.test.ts.snap @@ -119,6 +119,7 @@ The following tools are available in Plan Mode: - **Directives:** If the request is a **Directive** (e.g., "Fix bug Y"), follow the workflow below. 5. **Plan Storage:** Save plans as Markdown (.md) using descriptive filenames. 6. **Direct Modification:** If asked to modify code, explain you are in Plan Mode and use \`exit_plan_mode\` to request approval. +7. **Presenting Plan:** When seeking informal agreement on a plan, or any time the user asks to see the plan, you MUST output the full content of the plan in the chat response. This overrides the "Minimal Output" guideline. ## Planning Workflow Plan Mode uses an adaptive planning workflow where the research depth, plan structure, and consultation level are proportional to the task's complexity. @@ -139,6 +140,7 @@ Write the implementation plan to \`/tmp/plans/\`. The plan's structure adapts to - **Simple Tasks:** Include a bulleted list of specific **Changes** and **Verification** steps. - **Standard Tasks:** Include an **Objective**, **Key Files & Context**, **Implementation Steps**, and **Verification & Testing**. - **Complex Tasks:** Include **Background & Motivation**, **Scope & Impact**, **Proposed Solution**, **Alternatives Considered**, a phased **Implementation Plan**, **Verification**, and **Migration & Rollback** strategies. +- **Alignment Check:** After drafting the plan, you MUST present it to the user in the chat (adhering to Rule 7 for presenting plans) to ensure alignment on the specific details. Ask for feedback or confirmation, and proceed to Step 4 (Review & Approval) once the user agrees with the detailed plan. ### 4. Review & Approval ONLY use the \`exit_plan_mode\` tool to present the plan for formal approval AFTER you have reached an informal agreement with the user in the chat regarding the proposed strategy. When called, this tool will present the plan and formally request approval. @@ -297,6 +299,7 @@ The following tools are available in Plan Mode: - **Directives:** If the request is a **Directive** (e.g., "Fix bug Y"), follow the workflow below. 5. **Plan Storage:** Save plans as Markdown (.md) using descriptive filenames. 6. **Direct Modification:** If asked to modify code, explain you are in Plan Mode and use \`exit_plan_mode\` to request approval. +7. **Presenting Plan:** When seeking informal agreement on a plan, or any time the user asks to see the plan, you MUST output the full content of the plan in the chat response. This overrides the "Minimal Output" guideline. ## Planning Workflow Plan Mode uses an adaptive planning workflow where the research depth, plan structure, and consultation level are proportional to the task's complexity. @@ -317,6 +320,7 @@ Write the implementation plan to \`/tmp/plans/\`. The plan's structure adapts to - **Simple Tasks:** Include a bulleted list of specific **Changes** and **Verification** steps. - **Standard Tasks:** Include an **Objective**, **Key Files & Context**, **Implementation Steps**, and **Verification & Testing**. - **Complex Tasks:** Include **Background & Motivation**, **Scope & Impact**, **Proposed Solution**, **Alternatives Considered**, a phased **Implementation Plan**, **Verification**, and **Migration & Rollback** strategies. +- **Alignment Check:** After drafting the plan, you MUST present it to the user in the chat (adhering to Rule 7 for presenting plans) to ensure alignment on the specific details. Ask for feedback or confirmation, and proceed to Step 4 (Review & Approval) once the user agrees with the detailed plan. ### 4. Review & Approval ONLY use the \`exit_plan_mode\` tool to present the plan for formal approval AFTER you have reached an informal agreement with the user in the chat regarding the proposed strategy. When called, this tool will present the plan and formally request approval. @@ -596,6 +600,7 @@ The following tools are available in Plan Mode: - **Directives:** If the request is a **Directive** (e.g., "Fix bug Y"), follow the workflow below. 5. **Plan Storage:** Save plans as Markdown (.md) using descriptive filenames. 6. **Direct Modification:** If asked to modify code, explain you are in Plan Mode and use \`exit_plan_mode\` to request approval. +7. **Presenting Plan:** When seeking informal agreement on a plan, or any time the user asks to see the plan, you MUST output the full content of the plan in the chat response. This overrides the "Minimal Output" guideline. ## Planning Workflow Plan Mode uses an adaptive planning workflow where the research depth, plan structure, and consultation level are proportional to the task's complexity. @@ -616,6 +621,7 @@ Write the implementation plan to \`/tmp/project-temp/plans/\`. The plan's struct - **Simple Tasks:** Include a bulleted list of specific **Changes** and **Verification** steps. - **Standard Tasks:** Include an **Objective**, **Key Files & Context**, **Implementation Steps**, and **Verification & Testing**. - **Complex Tasks:** Include **Background & Motivation**, **Scope & Impact**, **Proposed Solution**, **Alternatives Considered**, a phased **Implementation Plan**, **Verification**, and **Migration & Rollback** strategies. +- **Alignment Check:** After drafting the plan, you MUST present it to the user in the chat (adhering to Rule 7 for presenting plans) to ensure alignment on the specific details. Ask for feedback or confirmation, and proceed to Step 4 (Review & Approval) once the user agrees with the detailed plan. ### 4. Review & Approval ONLY use the \`exit_plan_mode\` tool to present the plan for formal approval AFTER you have reached an informal agreement with the user in the chat regarding the proposed strategy. When called, this tool will present the plan and formally request approval. diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index a2853c8964..c420f22ae3 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -586,6 +586,7 @@ ${options.planModeToolsList} - **Directives:** If the request is a **Directive** (e.g., "Fix bug Y"), follow the workflow below. 5. **Plan Storage:** Save plans as Markdown (.md) using descriptive filenames. 6. **Direct Modification:** If asked to modify code, explain you are in Plan Mode and use ${formatToolName(EXIT_PLAN_MODE_TOOL_NAME)} to request approval. +7. **Presenting Plan:** When seeking informal agreement on a plan, or any time the user asks to see the plan, you MUST output the full content of the plan in the chat response. This overrides the "Minimal Output" guideline. ## Planning Workflow Plan Mode uses an adaptive planning workflow where the research depth, plan structure, and consultation level are proportional to the task's complexity. @@ -605,7 +606,7 @@ The depth of your consultation should be proportional to the task's complexity. Write the implementation plan to \`${options.plansDir}/\`. The plan's structure adapts to the task: - **Simple Tasks:** Include a bulleted list of specific **Changes** and **Verification** steps. - **Standard Tasks:** Include an **Objective**, **Key Files & Context**, **Implementation Steps**, and **Verification & Testing**. -- **Complex Tasks:** Include **Background & Motivation**, **Scope & Impact**, **Proposed Solution**, **Alternatives Considered**, a phased **Implementation Plan**, **Verification**, and **Migration & Rollback** strategies. +- **Complex Tasks:** Include **Background & Motivation**, **Scope & Impact**, **Proposed Solution**, **Alternatives Considered**, a phased **Implementation Plan**, **Verification**, and **Migration & Rollback** strategies.${options.interactive ? '\n- **Alignment Check:** After drafting the plan, you MUST present it to the user in the chat (adhering to Rule 7 for presenting plans) to ensure alignment on the specific details. Ask for feedback or confirmation, and proceed to Step 4 (Review & Approval) once the user agrees with the detailed plan.' : ''} ### 4. Review & Approval ONLY use the ${formatToolName(EXIT_PLAN_MODE_TOOL_NAME)} tool to present the plan for formal approval AFTER you have reached an informal agreement with the user in the chat regarding the proposed strategy. When called, this tool will present the plan and ${options.interactive ? 'formally request approval.' : 'begin implementation.'}