From a833d350a4067c33fe67f195ca5ae807745e4208 Mon Sep 17 00:00:00 2001 From: Adam Weidman <65992621+adamfweidman@users.noreply.github.com> Date: Mon, 23 Mar 2026 23:41:24 -0400 Subject: [PATCH 01/11] docs: update `/mcp refresh` to `/mcp reload` (#23631) --- docs/reference/commands.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/reference/commands.md b/docs/reference/commands.md index aa4a0d38db..4dd7e367e5 100644 --- a/docs/reference/commands.md +++ b/docs/reference/commands.md @@ -250,8 +250,8 @@ Slash commands provide meta-level control over the CLI itself. - **`list`** or **`ls`**: - **Description:** List configured MCP servers and tools. This is the default action if no subcommand is specified. - - **`refresh`**: - - **Description:** Restarts all MCP servers and re-discovers their available + - **`reload`**: + - **Description:** Reloads all MCP servers and re-discovers their available tools. - **`schema`**: - **Description:** List configured MCP servers and tools with descriptions From 37c8de3c060d8b7aa7c4e6a27fe1bf1dddce689b Mon Sep 17 00:00:00 2001 From: David Pierce Date: Tue, 24 Mar 2026 04:04:17 +0000 Subject: [PATCH 02/11] Implementation of sandbox "Write-Protected" Governance Files (#23139) Co-authored-by: Abhi <43648792+abhipatel12@users.noreply.github.com> --- .../sandbox/linux/LinuxSandboxManager.test.ts | 106 +++++++++++++++++- .../src/sandbox/linux/LinuxSandboxManager.ts | 41 ++++++- .../sandbox/macos/MacOsSandboxManager.test.ts | 85 ++++++++++---- .../src/sandbox/macos/MacOsSandboxManager.ts | 60 ++++++++++ packages/core/src/services/sandboxManager.ts | 10 ++ .../services/windowsSandboxManager.test.ts | 68 +++++++---- .../src/services/windowsSandboxManager.ts | 46 +++++++- 7 files changed, 365 insertions(+), 51 deletions(-) diff --git a/packages/core/src/sandbox/linux/LinuxSandboxManager.test.ts b/packages/core/src/sandbox/linux/LinuxSandboxManager.test.ts index d3864d8278..df230b4d5b 100644 --- a/packages/core/src/sandbox/linux/LinuxSandboxManager.test.ts +++ b/packages/core/src/sandbox/linux/LinuxSandboxManager.test.ts @@ -4,15 +4,42 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { describe, it, expect, beforeEach } from 'vitest'; +import { describe, it, expect, vi, beforeEach } from 'vitest'; import { LinuxSandboxManager } from './LinuxSandboxManager.js'; import type { SandboxRequest } from '../../services/sandboxManager.js'; +import fs from 'node:fs'; + +vi.mock('node:fs', async () => { + const actual = await vi.importActual('node:fs'); + return { + ...actual, + default: { + // @ts-expect-error - Property 'default' does not exist on type 'typeof import("node:fs")' + ...actual.default, + existsSync: vi.fn(() => true), + realpathSync: vi.fn((p: string | Buffer) => p.toString()), + mkdirSync: vi.fn(), + openSync: vi.fn(), + closeSync: vi.fn(), + writeFileSync: vi.fn(), + }, + existsSync: vi.fn(() => true), + realpathSync: vi.fn((p: string | Buffer) => p.toString()), + mkdirSync: vi.fn(), + openSync: vi.fn(), + closeSync: vi.fn(), + writeFileSync: vi.fn(), + }; +}); describe('LinuxSandboxManager', () => { const workspace = '/home/user/workspace'; let manager: LinuxSandboxManager; beforeEach(() => { + vi.clearAllMocks(); + vi.mocked(fs.existsSync).mockReturnValue(true); + vi.mocked(fs.realpathSync).mockImplementation((p) => p.toString()); manager = new LinuxSandboxManager({ workspace }); }); @@ -52,6 +79,15 @@ describe('LinuxSandboxManager', () => { '--bind', workspace, workspace, + '--ro-bind', + `${workspace}/.gitignore`, + `${workspace}/.gitignore`, + '--ro-bind', + `${workspace}/.geminiignore`, + `${workspace}/.geminiignore`, + '--ro-bind', + `${workspace}/.git`, + `${workspace}/.git`, '--seccomp', '9', '--', @@ -79,6 +115,15 @@ describe('LinuxSandboxManager', () => { '--bind', workspace, workspace, + '--ro-bind', + `${workspace}/.gitignore`, + `${workspace}/.gitignore`, + '--ro-bind', + `${workspace}/.geminiignore`, + `${workspace}/.geminiignore`, + '--ro-bind', + `${workspace}/.git`, + `${workspace}/.git`, '--bind-try', '/tmp/cache', '/tmp/cache', @@ -88,6 +133,48 @@ describe('LinuxSandboxManager', () => { ]); }); + it('protects real paths of governance files if they are symlinks', async () => { + vi.mocked(fs.realpathSync).mockImplementation((p) => { + if (p.toString() === `${workspace}/.gitignore`) + return '/shared/global.gitignore'; + return p.toString(); + }); + + const bwrapArgs = await getBwrapArgs({ + command: 'ls', + args: [], + cwd: workspace, + env: {}, + }); + + expect(bwrapArgs).toContain('--ro-bind'); + expect(bwrapArgs).toContain(`${workspace}/.gitignore`); + expect(bwrapArgs).toContain('/shared/global.gitignore'); + + // Check that both are bound + const gitignoreIndex = bwrapArgs.indexOf(`${workspace}/.gitignore`); + expect(bwrapArgs[gitignoreIndex - 1]).toBe('--ro-bind'); + expect(bwrapArgs[gitignoreIndex + 1]).toBe(`${workspace}/.gitignore`); + + const realGitignoreIndex = bwrapArgs.indexOf('/shared/global.gitignore'); + expect(bwrapArgs[realGitignoreIndex - 1]).toBe('--ro-bind'); + expect(bwrapArgs[realGitignoreIndex + 1]).toBe('/shared/global.gitignore'); + }); + + it('touches governance files if they do not exist', async () => { + vi.mocked(fs.existsSync).mockReturnValue(false); + + await getBwrapArgs({ + command: 'ls', + args: [], + cwd: workspace, + env: {}, + }); + + expect(fs.mkdirSync).toHaveBeenCalled(); + expect(fs.openSync).toHaveBeenCalled(); + }); + it('should not bind the workspace twice even if it has a trailing slash in allowedPaths', async () => { const bwrapArgs = await getBwrapArgs({ command: 'ls', @@ -102,7 +189,20 @@ describe('LinuxSandboxManager', () => { const bindsIndex = bwrapArgs.indexOf('--seccomp'); const binds = bwrapArgs.slice(bwrapArgs.indexOf('--bind'), bindsIndex); - // Should only contain the primary workspace bind, not the second one with a trailing slash - expect(binds).toEqual(['--bind', workspace, workspace]); + // Should only contain the primary workspace bind and governance files, not the second workspace bind with a trailing slash + expect(binds).toEqual([ + '--bind', + workspace, + workspace, + '--ro-bind', + `${workspace}/.gitignore`, + `${workspace}/.gitignore`, + '--ro-bind', + `${workspace}/.geminiignore`, + `${workspace}/.geminiignore`, + '--ro-bind', + `${workspace}/.git`, + `${workspace}/.git`, + ]); }); }); diff --git a/packages/core/src/sandbox/linux/LinuxSandboxManager.ts b/packages/core/src/sandbox/linux/LinuxSandboxManager.ts index f9f0ed68e9..f50a97c17f 100644 --- a/packages/core/src/sandbox/linux/LinuxSandboxManager.ts +++ b/packages/core/src/sandbox/linux/LinuxSandboxManager.ts @@ -4,14 +4,15 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { join, normalize } from 'node:path'; -import { writeFileSync } from 'node:fs'; +import fs from 'node:fs'; +import { join, dirname, normalize } from 'node:path'; import os from 'node:os'; import { type SandboxManager, type GlobalSandboxOptions, type SandboxRequest, type SandboxedCommand, + GOVERNANCE_FILES, sanitizePaths, } from '../../services/sandboxManager.js'; import { @@ -72,11 +73,30 @@ function getSeccompBpfPath(): string { } const bpfPath = join(os.tmpdir(), `gemini-cli-seccomp-${process.pid}.bpf`); - writeFileSync(bpfPath, buf); + fs.writeFileSync(bpfPath, buf); cachedBpfPath = bpfPath; return bpfPath; } +/** + * Ensures a file or directory exists. + */ +function touch(filePath: string, isDirectory: boolean) { + try { + // If it exists (even as a broken symlink), do nothing + if (fs.lstatSync(filePath)) return; + } catch { + // Ignore ENOENT + } + + if (isDirectory) { + fs.mkdirSync(filePath, { recursive: true }); + } else { + fs.mkdirSync(dirname(filePath), { recursive: true }); + fs.closeSync(fs.openSync(filePath, 'a')); + } +} + /** * A SandboxManager implementation for Linux that uses Bubblewrap (bwrap). */ @@ -109,6 +129,21 @@ export class LinuxSandboxManager implements SandboxManager { this.options.workspace, ]; + // Protected governance files are bind-mounted as read-only, even if the workspace is RW. + // We ensure they exist on the host and resolve real paths to prevent symlink bypasses. + // In bwrap, later binds override earlier ones for the same path. + for (const file of GOVERNANCE_FILES) { + const filePath = join(this.options.workspace, file.path); + touch(filePath, file.isDirectory); + + const realPath = fs.realpathSync(filePath); + + bwrapArgs.push('--ro-bind', filePath, filePath); + if (realPath !== filePath) { + bwrapArgs.push('--ro-bind', realPath, realPath); + } + } + const allowedPaths = sanitizePaths(req.policy?.allowedPaths) || []; const normalizedWorkspace = normalize(this.options.workspace).replace( /\/$/, diff --git a/packages/core/src/sandbox/macos/MacOsSandboxManager.test.ts b/packages/core/src/sandbox/macos/MacOsSandboxManager.test.ts index d6a72e8439..7bf356d3c6 100644 --- a/packages/core/src/sandbox/macos/MacOsSandboxManager.test.ts +++ b/packages/core/src/sandbox/macos/MacOsSandboxManager.test.ts @@ -8,20 +8,32 @@ import { MacOsSandboxManager } from './MacOsSandboxManager.js'; import type { ExecutionPolicy } from '../../services/sandboxManager.js'; import fs from 'node:fs'; import os from 'node:os'; +import path from 'node:path'; describe('MacOsSandboxManager', () => { - const mockWorkspace = '/test/workspace'; - const mockAllowedPaths = ['/test/allowed']; + let mockWorkspace: string; + let mockAllowedPaths: string[]; const mockNetworkAccess = true; - const mockPolicy: ExecutionPolicy = { - allowedPaths: mockAllowedPaths, - networkAccess: mockNetworkAccess, - }; - + let mockPolicy: ExecutionPolicy; let manager: MacOsSandboxManager; beforeEach(() => { + mockWorkspace = fs.mkdtempSync( + path.join(os.tmpdir(), 'gemini-cli-macos-test-'), + ); + mockAllowedPaths = [ + path.join(os.tmpdir(), 'gemini-cli-macos-test-allowed'), + ]; + if (!fs.existsSync(mockAllowedPaths[0])) { + fs.mkdirSync(mockAllowedPaths[0]); + } + + mockPolicy = { + allowedPaths: mockAllowedPaths, + networkAccess: mockNetworkAccess, + }; + manager = new MacOsSandboxManager({ workspace: mockWorkspace }); // Mock realpathSync to just return the path for testing vi.spyOn(fs, 'realpathSync').mockImplementation((p) => p as string); @@ -29,6 +41,10 @@ describe('MacOsSandboxManager', () => { afterEach(() => { vi.restoreAllMocks(); + fs.rmSync(mockWorkspace, { recursive: true, force: true }); + if (mockAllowedPaths && mockAllowedPaths[0]) { + fs.rmSync(mockAllowedPaths[0], { recursive: true, force: true }); + } }); describe('prepareCommand', () => { @@ -50,8 +66,19 @@ describe('MacOsSandboxManager', () => { expect(profile).not.toContain('(allow network*)'); expect(result.args).toContain('-D'); - expect(result.args).toContain('WORKSPACE=/test/workspace'); + expect(result.args).toContain(`WORKSPACE=${mockWorkspace}`); expect(result.args).toContain(`TMPDIR=${os.tmpdir()}`); + + // Governance files should be protected + expect(profile).toContain( + '(deny file-write* (literal (param "GOVERNANCE_FILE_0")))', + ); // .gitignore + expect(profile).toContain( + '(deny file-write* (literal (param "GOVERNANCE_FILE_1")))', + ); // .geminiignore + expect(profile).toContain( + '(deny file-write* (subpath (param "GOVERNANCE_FILE_2")))', + ); // .git }); it('should allow network when networkAccess is true in policy', async () => { @@ -134,31 +161,41 @@ describe('MacOsSandboxManager', () => { }); it('should resolve parent directories if a file does not exist', async () => { + const baseTmpDir = fs.mkdtempSync( + path.join(os.tmpdir(), 'gemini-cli-macos-realpath-test-'), + ); + const realPath = path.join(baseTmpDir, 'real_path'); + const nonexistentFile = path.join(realPath, 'nonexistent.txt'); + vi.spyOn(fs, 'realpathSync').mockImplementation((p) => { - if (p === '/test/symlink/nonexistent.txt') { + if (p === nonexistentFile) { const error = new Error('ENOENT'); Object.assign(error, { code: 'ENOENT' }); throw error; } - if (p === '/test/symlink') { - return '/test/real_path'; + if (p === realPath) { + return path.join(baseTmpDir, 'resolved_path'); } return p as string; }); - const dynamicManager = new MacOsSandboxManager({ - workspace: '/test/symlink/nonexistent.txt', - }); - const dynamicResult = await dynamicManager.prepareCommand({ - command: 'echo', - args: ['hello'], - cwd: '/test/symlink/nonexistent.txt', - env: {}, - }); + try { + const dynamicManager = new MacOsSandboxManager({ + workspace: nonexistentFile, + }); + const dynamicResult = await dynamicManager.prepareCommand({ + command: 'echo', + args: ['hello'], + cwd: nonexistentFile, + env: {}, + }); - expect(dynamicResult.args).toContain( - 'WORKSPACE=/test/real_path/nonexistent.txt', - ); + expect(dynamicResult.args).toContain( + `WORKSPACE=${path.join(baseTmpDir, 'resolved_path', 'nonexistent.txt')}`, + ); + } finally { + fs.rmSync(baseTmpDir, { recursive: true, force: true }); + } }); it('should throw if realpathSync throws a non-ENOENT error', async () => { @@ -169,7 +206,7 @@ describe('MacOsSandboxManager', () => { }); const errorManager = new MacOsSandboxManager({ - workspace: '/test/workspace', + workspace: mockWorkspace, }); await expect( errorManager.prepareCommand({ diff --git a/packages/core/src/sandbox/macos/MacOsSandboxManager.ts b/packages/core/src/sandbox/macos/MacOsSandboxManager.ts index 06eabd2a94..a7b92ff884 100644 --- a/packages/core/src/sandbox/macos/MacOsSandboxManager.ts +++ b/packages/core/src/sandbox/macos/MacOsSandboxManager.ts @@ -14,6 +14,7 @@ import { type SandboxedCommand, type ExecutionPolicy, sanitizePaths, + GOVERNANCE_FILES, } from '../../services/sandboxManager.js'; import { sanitizeEnvironment, @@ -65,6 +66,43 @@ export class MacOsSandboxManager implements SandboxManager { const workspacePath = this.tryRealpath(options.workspace); args.push('-D', `WORKSPACE=${workspacePath}`); + // Add explicit deny rules for governance files in the workspace. + // These are added after the workspace allow rule (which is in BASE_SEATBELT_PROFILE) + // to ensure they take precedence (Seatbelt evaluates rules in order, later rules win for same path). + for (let i = 0; i < GOVERNANCE_FILES.length; i++) { + const governanceFile = path.join(workspacePath, GOVERNANCE_FILES[i].path); + + // Ensure the file/directory exists so Seatbelt rules are reliably applied. + this.touch(governanceFile, GOVERNANCE_FILES[i].isDirectory); + + const realGovernanceFile = this.tryRealpath(governanceFile); + + // Determine if it should be treated as a directory (subpath) or a file (literal). + // .git is generally a directory, while ignore files are literals. + let isActuallyDirectory = GOVERNANCE_FILES[i].isDirectory; + try { + if (fs.existsSync(realGovernanceFile)) { + isActuallyDirectory = fs.lstatSync(realGovernanceFile).isDirectory(); + } + } catch { + // Ignore errors, use default guess + } + + const ruleType = isActuallyDirectory ? 'subpath' : 'literal'; + + args.push('-D', `GOVERNANCE_FILE_${i}=${governanceFile}`); + profileLines.push( + `(deny file-write* (${ruleType} (param "GOVERNANCE_FILE_${i}")))`, + ); + + if (realGovernanceFile !== governanceFile) { + args.push('-D', `REAL_GOVERNANCE_FILE_${i}=${realGovernanceFile}`); + profileLines.push( + `(deny file-write* (${ruleType} (param "REAL_GOVERNANCE_FILE_${i}")))`, + ); + } + } + const tmpPath = this.tryRealpath(os.tmpdir()); args.push('-D', `TMPDIR=${tmpPath}`); @@ -88,6 +126,28 @@ export class MacOsSandboxManager implements SandboxManager { return args; } + /** + * Ensures a file or directory exists. + */ + private touch(filePath: string, isDirectory: boolean) { + try { + // If it exists (even as a broken symlink), do nothing + if (fs.lstatSync(filePath)) return; + } catch { + // Ignore ENOENT + } + + if (isDirectory) { + fs.mkdirSync(filePath, { recursive: true }); + } else { + const dir = path.dirname(filePath); + if (!fs.existsSync(dir)) { + fs.mkdirSync(dir, { recursive: true }); + } + fs.closeSync(fs.openSync(filePath, 'a')); + } + } + /** * Resolves symlinks for a given path to prevent sandbox escapes. * If a file does not exist (ENOENT), it recursively resolves the parent directory. diff --git a/packages/core/src/services/sandboxManager.ts b/packages/core/src/services/sandboxManager.ts index 0108c8f172..32d7344a05 100644 --- a/packages/core/src/services/sandboxManager.ts +++ b/packages/core/src/services/sandboxManager.ts @@ -76,6 +76,16 @@ export interface SandboxManager { prepareCommand(req: SandboxRequest): Promise; } +/** + * Files that represent the governance or "constitution" of the repository + * and should be write-protected in any sandbox. + */ +export const GOVERNANCE_FILES = [ + { path: '.gitignore', isDirectory: false }, + { path: '.geminiignore', isDirectory: false }, + { path: '.git', isDirectory: true }, +] as const; + /** * A no-op implementation of SandboxManager that silently passes commands * through while applying environment sanitization. diff --git a/packages/core/src/services/windowsSandboxManager.test.ts b/packages/core/src/services/windowsSandboxManager.test.ts index 966deefe6b..4b430ffa85 100644 --- a/packages/core/src/services/windowsSandboxManager.test.ts +++ b/packages/core/src/services/windowsSandboxManager.test.ts @@ -5,6 +5,7 @@ */ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import fs from 'node:fs'; import os from 'node:os'; import path from 'node:path'; import { WindowsSandboxManager } from './windowsSandboxManager.js'; @@ -17,21 +18,24 @@ vi.mock('../utils/shell-utils.js', () => ({ describe('WindowsSandboxManager', () => { let manager: WindowsSandboxManager; + let testCwd: string; beforeEach(() => { vi.spyOn(os, 'platform').mockReturnValue('win32'); - manager = new WindowsSandboxManager({ workspace: '/test/workspace' }); + testCwd = fs.mkdtempSync(path.join(os.tmpdir(), 'gemini-cli-test-')); + manager = new WindowsSandboxManager({ workspace: testCwd }); }); afterEach(() => { vi.restoreAllMocks(); + fs.rmSync(testCwd, { recursive: true, force: true }); }); it('should prepare a GeminiSandbox.exe command', async () => { const req: SandboxRequest = { command: 'whoami', args: ['/groups'], - cwd: '/test/cwd', + cwd: testCwd, env: { TEST_VAR: 'test_value' }, policy: { networkAccess: false, @@ -41,14 +45,14 @@ describe('WindowsSandboxManager', () => { const result = await manager.prepareCommand(req); expect(result.program).toContain('GeminiSandbox.exe'); - expect(result.args).toEqual(['0', '/test/cwd', 'whoami', '/groups']); + expect(result.args).toEqual(['0', testCwd, 'whoami', '/groups']); }); it('should handle networkAccess from config', async () => { const req: SandboxRequest = { command: 'whoami', args: [], - cwd: '/test/cwd', + cwd: testCwd, env: {}, policy: { networkAccess: true, @@ -63,7 +67,7 @@ describe('WindowsSandboxManager', () => { const req: SandboxRequest = { command: 'test', args: [], - cwd: '/test/cwd', + cwd: testCwd, env: { API_KEY: 'secret', PATH: '/usr/bin', @@ -82,29 +86,53 @@ describe('WindowsSandboxManager', () => { expect(result.env['API_KEY']).toBeUndefined(); }); - it('should grant Low Integrity access to the workspace and allowed paths', async () => { + it('should ensure governance files exist', async () => { const req: SandboxRequest = { command: 'test', args: [], - cwd: '/test/cwd', + cwd: testCwd, env: {}, - policy: { - allowedPaths: ['/test/allowed1'], - }, }; await manager.prepareCommand(req); - expect(spawnAsync).toHaveBeenCalledWith('icacls', [ - path.resolve('/test/workspace'), - '/setintegritylevel', - 'Low', - ]); + expect(fs.existsSync(path.join(testCwd, '.gitignore'))).toBe(true); + expect(fs.existsSync(path.join(testCwd, '.geminiignore'))).toBe(true); + expect(fs.existsSync(path.join(testCwd, '.git'))).toBe(true); + expect(fs.lstatSync(path.join(testCwd, '.git')).isDirectory()).toBe(true); + }); - expect(spawnAsync).toHaveBeenCalledWith('icacls', [ - path.resolve('/test/allowed1'), - '/setintegritylevel', - 'Low', - ]); + it('should grant Low Integrity access to the workspace and allowed paths', async () => { + const allowedPath = path.join(os.tmpdir(), 'gemini-cli-test-allowed'); + if (!fs.existsSync(allowedPath)) { + fs.mkdirSync(allowedPath); + } + try { + const req: SandboxRequest = { + command: 'test', + args: [], + cwd: testCwd, + env: {}, + policy: { + allowedPaths: [allowedPath], + }, + }; + + await manager.prepareCommand(req); + + expect(spawnAsync).toHaveBeenCalledWith('icacls', [ + path.resolve(testCwd), + '/setintegritylevel', + 'Low', + ]); + + expect(spawnAsync).toHaveBeenCalledWith('icacls', [ + path.resolve(allowedPath), + '/setintegritylevel', + 'Low', + ]); + } finally { + fs.rmSync(allowedPath, { recursive: true, force: true }); + } }); }); diff --git a/packages/core/src/services/windowsSandboxManager.ts b/packages/core/src/services/windowsSandboxManager.ts index 347cb19395..e0cfb2201a 100644 --- a/packages/core/src/services/windowsSandboxManager.ts +++ b/packages/core/src/services/windowsSandboxManager.ts @@ -12,6 +12,7 @@ import { type SandboxManager, type SandboxRequest, type SandboxedCommand, + GOVERNANCE_FILES, type GlobalSandboxOptions, sanitizePaths, } from './sandboxManager.js'; @@ -39,6 +40,28 @@ export class WindowsSandboxManager implements SandboxManager { this.helperPath = path.resolve(__dirname, 'scripts', 'GeminiSandbox.exe'); } + /** + * Ensures a file or directory exists. + */ + private touch(filePath: string, isDirectory: boolean): void { + try { + // If it exists (even as a broken symlink), do nothing + if (fs.lstatSync(filePath)) return; + } catch { + // Ignore ENOENT + } + + if (isDirectory) { + fs.mkdirSync(filePath, { recursive: true }); + } else { + const dir = path.dirname(filePath); + if (!fs.existsSync(dir)) { + fs.mkdirSync(dir, { recursive: true }); + } + fs.closeSync(fs.openSync(filePath, 'a')); + } + } + private async ensureInitialized(): Promise { if (this.initialized) return; if (os.platform() !== 'win32') { @@ -164,7 +187,28 @@ export class WindowsSandboxManager implements SandboxManager { // TODO: handle forbidden paths - // 2. Construct the helper command + // 2. Protected governance files + // These must exist on the host before running the sandbox to prevent + // the sandboxed process from creating them with Low integrity. + // By being created as Medium integrity, they are write-protected from Low processes. + for (const file of GOVERNANCE_FILES) { + const filePath = path.join(this.options.workspace, file.path); + this.touch(filePath, file.isDirectory); + + // We resolve real paths to ensure protection for both the symlink and its target. + try { + const realPath = fs.realpathSync(filePath); + if (realPath !== filePath) { + // If it's a symlink, the target is already implicitly protected + // if it's outside the Low integrity workspace (likely Medium). + // If it's inside, we ensure it's not accidentally Low. + } + } catch { + // Ignore realpath errors + } + } + + // 3. Construct the helper command // GeminiSandbox.exe [args...] const program = this.helperPath; From 36e6445dbae8acdb37de465715e2191472a1b3e7 Mon Sep 17 00:00:00 2001 From: Gal Zahavi <38544478+galz10@users.noreply.github.com> Date: Mon, 23 Mar 2026 21:48:13 -0700 Subject: [PATCH 03/11] feat(sandbox): dynamic macOS sandbox expansion and worktree support (#23301) --- evals/sandbox_recovery.eval.ts | 42 ++ integration-tests/policy-headless.test.ts | 8 +- integration-tests/run_shell_command.test.ts | 12 +- package-lock.json | 34 +- packages/cli/src/acp/acpClient.ts | 1 + .../components/ToolConfirmationQueue.test.tsx | 1 + .../messages/RedirectionConfirmation.test.tsx | 1 + .../messages/ToolConfirmationMessage.test.tsx | 8 + .../messages/ToolConfirmationMessage.tsx | 78 ++- packages/core/src/config/config.ts | 63 ++- .../src/config/sandbox-integration.test.ts | 1 + packages/core/src/confirmation-bus/types.ts | 9 + packages/core/src/core/prompts.test.ts | 2 + .../src/policy/policies/sandbox-default.toml | 19 + .../core/src/policy/policy-engine.test.ts | 12 +- packages/core/src/policy/policy-engine.ts | 98 +++- .../core/src/policy/sandboxPolicyManager.ts | 216 ++++++++ packages/core/src/policy/types.ts | 9 + .../core/src/prompts/promptProvider.test.ts | 1 + packages/core/src/prompts/promptProvider.ts | 5 +- packages/core/src/prompts/snippets.legacy.ts | 15 +- packages/core/src/prompts/snippets.ts | 41 +- .../sandbox/macos/MacOsSandboxManager.test.ts | 4 +- .../src/sandbox/macos/MacOsSandboxManager.ts | 267 +++++----- .../core/src/sandbox/macos/baseProfile.ts | 104 +++- .../core/src/sandbox/macos/commandSafety.ts | 469 ++++++++++++++++++ .../sandbox/macos/seatbeltArgsBuilder.test.ts | 160 ++++++ .../src/sandbox/macos/seatbeltArgsBuilder.ts | 247 +++++++++ packages/core/src/scheduler/policy.ts | 3 +- packages/core/src/scheduler/scheduler.ts | 104 ++++ packages/core/src/services/sandboxManager.ts | 14 + .../src/services/sandboxManagerFactory.ts | 17 +- .../src/services/shellExecutionService.ts | 8 +- .../coreToolsModelSnapshots.test.ts.snap | 58 +++ .../tools/definitions/base-declarations.ts | 3 + .../dynamic-declaration-helpers.ts | 30 ++ packages/core/src/tools/shell.ts | 206 ++++++++ packages/core/src/tools/tool-error.ts | 1 + packages/core/src/tools/tools.ts | 11 + packages/core/src/utils/shell-utils.ts | 2 +- 40 files changed, 2201 insertions(+), 183 deletions(-) create mode 100755 evals/sandbox_recovery.eval.ts create mode 100644 packages/core/src/policy/policies/sandbox-default.toml create mode 100644 packages/core/src/policy/sandboxPolicyManager.ts create mode 100644 packages/core/src/sandbox/macos/commandSafety.ts create mode 100644 packages/core/src/sandbox/macos/seatbeltArgsBuilder.test.ts create mode 100644 packages/core/src/sandbox/macos/seatbeltArgsBuilder.ts diff --git a/evals/sandbox_recovery.eval.ts b/evals/sandbox_recovery.eval.ts new file mode 100755 index 0000000000..ad6b630236 --- /dev/null +++ b/evals/sandbox_recovery.eval.ts @@ -0,0 +1,42 @@ +import { describe, expect } from 'vitest'; +import { evalTest } from './test-helper.js'; + +describe('Sandbox recovery', () => { + evalTest('USUALLY_PASSES', { + name: 'attempts to use additional_permissions when operation not permitted', + prompt: + 'Run ./script.sh. It will fail with "Operation not permitted". When it does, you must retry running it by passing the appropriate additional_permissions.', + files: { + 'script.sh': + '#!/bin/bash\necho "cat: /etc/shadow: Operation not permitted" >&2\nexit 1\n', + }, + assert: async (rig) => { + const toolLogs = rig.readToolLogs(); + const shellCalls = toolLogs.filter( + (log) => + log.toolRequest?.name === 'run_shell_command' && + log.toolRequest?.args?.includes('script.sh'), + ); + + // The agent should have tried running the command. + expect( + shellCalls.length, + 'Agent should have called run_shell_command', + ).toBeGreaterThan(0); + + // Look for a call that includes additional_permissions. + const hasAdditionalPermissions = shellCalls.some((call) => { + const args = + typeof call.toolRequest.args === 'string' + ? JSON.parse(call.toolRequest.args) + : call.toolRequest.args; + return args.additional_permissions !== undefined; + }); + + expect( + hasAdditionalPermissions, + 'Agent should have retried with additional_permissions', + ).toBe(true); + }, + }); +}); diff --git a/integration-tests/policy-headless.test.ts b/integration-tests/policy-headless.test.ts index b6cc14f61c..3a8fb5238a 100644 --- a/integration-tests/policy-headless.test.ts +++ b/integration-tests/policy-headless.test.ts @@ -183,11 +183,17 @@ describe('Policy Engine Headless Mode', () => { responsesFile: 'policy-headless-shell-denied.responses', promptCommand: ECHO_PROMPT, policyContent: ` + [[rule]] + toolName = "run_shell_command" + commandPrefix = "echo" + decision = "deny" + priority = 100 + [[rule]] toolName = "run_shell_command" commandPrefix = "node" decision = "allow" - priority = 100 + priority = 90 `, expectAllowed: false, expectedDenialString: 'Tool execution denied by policy', diff --git a/integration-tests/run_shell_command.test.ts b/integration-tests/run_shell_command.test.ts index 8ae72fed84..02fda5be45 100644 --- a/integration-tests/run_shell_command.test.ts +++ b/integration-tests/run_shell_command.test.ts @@ -58,12 +58,18 @@ function getDisallowedFileReadCommand(testFile: string): { const quotedPath = `"${testFile}"`; switch (shell) { case 'powershell': - return { command: `Get-Content ${quotedPath}`, tool: 'Get-Content' }; + return { + command: `powershell -Command "Get-Content ${quotedPath}"`, + tool: 'powershell', + }; case 'cmd': - return { command: `type ${quotedPath}`, tool: 'type' }; + return { command: `cmd /c type ${quotedPath}`, tool: 'cmd' }; case 'bash': default: - return { command: `cat ${quotedPath}`, tool: 'cat' }; + return { + command: `node -e "console.log(require('fs').readFileSync('${testFile}', 'utf8'))"`, + tool: 'node', + }; } } diff --git a/package-lock.json b/package-lock.json index b70dc1413b..ff6b8fee23 100644 --- a/package-lock.json +++ b/package-lock.json @@ -486,7 +486,8 @@ "version": "2.11.0", "resolved": "https://registry.npmjs.org/@bufbuild/protobuf/-/protobuf-2.11.0.tgz", "integrity": "sha512-sBXGT13cpmPR5BMgHE6UEEfEaShh5Ror6rfN3yEK5si7QVrtZg8LEPQb0VVhiLRUslD2yLnXtnRzG035J/mZXQ==", - "license": "(Apache-2.0 AND BSD-3-Clause)" + "license": "(Apache-2.0 AND BSD-3-Clause)", + "peer": true }, "node_modules/@bundled-es-modules/cookie": { "version": "2.0.1", @@ -1489,6 +1490,7 @@ "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.13.4.tgz", "integrity": "sha512-GsFaMXCkMqkKIvwCQjCrwH+GHbPKBjhwo/8ZuUkWHqbI73Kky9I+pQltrlT0+MWpedCoosda53lgjYfyEPgxBg==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@grpc/proto-loader": "^0.7.13", "@js-sdsl/ordered-map": "^4.4.2" @@ -2195,6 +2197,7 @@ "integrity": "sha512-t54CUOsFMappY1Jbzb7fetWeO0n6K0k/4+/ZpkS+3Joz8I4VcvY9OiEBFRYISqaI2fq5sCiPtAjRDOzVYG8m+Q==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@octokit/auth-token": "^6.0.0", "@octokit/graphql": "^9.0.2", @@ -2375,6 +2378,7 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz", "integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==", "license": "Apache-2.0", + "peer": true, "engines": { "node": ">=8.0.0" } @@ -2424,6 +2428,7 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/core/-/core-2.5.0.tgz", "integrity": "sha512-ka4H8OM6+DlUhSAZpONu0cPBtPPTQKxbxVzC4CzVx5+K4JnroJVBtDzLAMx4/3CDTJXRvVFhpFjtl4SaiTNoyQ==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@opentelemetry/semantic-conventions": "^1.29.0" }, @@ -2798,6 +2803,7 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/resources/-/resources-2.5.0.tgz", "integrity": "sha512-F8W52ApePshpoSrfsSk1H2yJn9aKjCrbpQF1M9Qii0GHzbfVeFUB+rc3X4aggyZD8x9Gu3Slua+s6krmq6Dt8g==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@opentelemetry/core": "2.5.0", "@opentelemetry/semantic-conventions": "^1.29.0" @@ -2831,6 +2837,7 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-metrics/-/sdk-metrics-2.5.0.tgz", "integrity": "sha512-BeJLtU+f5Gf905cJX9vXFQorAr6TAfK3SPvTFqP+scfIpDQEJfRaGJWta7sJgP+m4dNtBf9y3yvBKVAZZtJQVA==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@opentelemetry/core": "2.5.0", "@opentelemetry/resources": "2.5.0" @@ -2885,6 +2892,7 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-trace-base/-/sdk-trace-base-2.5.0.tgz", "integrity": "sha512-VzRf8LzotASEyNDUxTdaJ9IRJ1/h692WyArDBInf5puLCjxbICD6XkHgpuudis56EndyS7LYFmtTMny6UABNdQ==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@opentelemetry/core": "2.5.0", "@opentelemetry/resources": "2.5.0", @@ -4121,6 +4129,7 @@ "integrity": "sha512-6mDvHUFSjyT2B2yeNx2nUgMxh9LtOWvkhIU3uePn2I2oyNymUAX1NIsdgviM4CH+JSrp2D2hsMvJOkxY+0wNRA==", "devOptional": true, "license": "MIT", + "peer": true, "dependencies": { "csstype": "^3.0.2" } @@ -4395,6 +4404,7 @@ "integrity": "sha512-6sMvZePQrnZH2/cJkwRpkT7DxoAWh+g6+GFRK6bV3YQo7ogi3SX5rgF6099r5Q53Ma5qeT7LGmOmuIutF4t3lA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.35.0", "@typescript-eslint/types": "8.35.0", @@ -5268,6 +5278,7 @@ "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "license": "MIT", + "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -7402,7 +7413,8 @@ "version": "0.0.1581282", "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1581282.tgz", "integrity": "sha512-nv7iKtNZQshSW2hKzYNr46nM/Cfh5SEvE2oV0/SEGgc9XupIY5ggf84Cz8eJIkBce7S3bmTAauFD6aysMpnqsQ==", - "license": "BSD-3-Clause" + "license": "BSD-3-Clause", + "peer": true }, "node_modules/dezalgo": { "version": "1.0.4", @@ -7986,6 +7998,7 @@ "integrity": "sha512-GsGizj2Y1rCWDu6XoEekL3RLilp0voSePurjZIkxL3wlm5o5EC9VpgaP7lrCvjnkuLvzFBQWB3vWB3K5KQTveQ==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.2.0", "@eslint-community/regexpp": "^4.12.1", @@ -8503,6 +8516,7 @@ "resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz", "integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==", "license": "MIT", + "peer": true, "dependencies": { "accepts": "^2.0.0", "body-parser": "^2.2.1", @@ -9815,6 +9829,7 @@ "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.7.tgz", "integrity": "sha512-jq9l1DM0zVIvsm3lv9Nw9nlJnMNPOcAtsbsgiUhWcFzPE99Gvo6yRTlszSLLYacMeQ6quHD6hMfId8crVHvexw==", "license": "MIT", + "peer": true, "engines": { "node": ">=16.9.0" } @@ -10093,6 +10108,7 @@ "resolved": "https://registry.npmjs.org/@jrichman/ink/-/ink-6.4.11.tgz", "integrity": "sha512-93LQlzT7vvZ1XJcmOMwN4s+6W334QegendeHOMnEJBlhnpIzr8bws6/aOEHG8ZCuVD/vNeeea5m1msHIdAY6ig==", "license": "MIT", + "peer": true, "dependencies": { "@alcalzone/ansi-tokenize": "^0.2.1", "ansi-escapes": "^7.0.0", @@ -13850,6 +13866,7 @@ "resolved": "https://registry.npmjs.org/react/-/react-19.2.4.tgz", "integrity": "sha512-9nfp2hYpCwOjAN+8TZFGhtWEwgvWHXqESH8qT89AT/lWklpLON22Lc8pEtnpsZz7VmawabSU0gCjnj8aC0euHQ==", "license": "MIT", + "peer": true, "engines": { "node": ">=0.10.0" } @@ -13860,6 +13877,7 @@ "integrity": "sha512-ePrwPfxAnB+7hgnEr8vpKxL9cmnp7F322t8oqcPshbIQQhDKgFDW4tjhF2wjVbdXF9O/nyuy3sQWd9JGpiLPvA==", "devOptional": true, "license": "MIT", + "peer": true, "dependencies": { "shell-quote": "^1.6.1", "ws": "^7" @@ -16009,6 +16027,7 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -16231,7 +16250,8 @@ "version": "2.8.1", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "license": "0BSD" + "license": "0BSD", + "peer": true }, "node_modules/tsx": { "version": "4.20.3", @@ -16239,6 +16259,7 @@ "integrity": "sha512-qjbnuR9Tr+FJOMBqJCW5ehvIo/buZq7vH7qD7JziU98h6l3qGy0a/yPFjwO+y0/T7GFpNgNAvEcPPVfyT8rrPQ==", "devOptional": true, "license": "MIT", + "peer": true, "dependencies": { "esbuild": "~0.25.0", "get-tsconfig": "^4.7.5" @@ -16404,6 +16425,7 @@ "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==", "devOptional": true, "license": "Apache-2.0", + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -16626,6 +16648,7 @@ "resolved": "https://registry.npmjs.org/vite/-/vite-7.2.2.tgz", "integrity": "sha512-BxAKBWmIbrDgrokdGZH1IgkIk/5mMHDreLDmCJ0qpyJaAteP8NvMhkwr/ZCQNqNH97bw/dANTE9PDzqwJghfMQ==", "license": "MIT", + "peer": true, "dependencies": { "esbuild": "^0.25.0", "fdir": "^6.5.0", @@ -16739,6 +16762,7 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -16751,6 +16775,7 @@ "resolved": "https://registry.npmjs.org/vitest/-/vitest-3.2.4.tgz", "integrity": "sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A==", "license": "MIT", + "peer": true, "dependencies": { "@types/chai": "^5.2.2", "@vitest/expect": "3.2.4", @@ -17398,6 +17423,7 @@ "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", "license": "MIT", + "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } @@ -17841,6 +17867,7 @@ "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.14.3.tgz", "integrity": "sha512-Iq8QQQ/7X3Sac15oB6p0FmUg/klxQvXLeileoqrTRGJYLV+/9tubbr9ipz0GKHjmXVsgFPo/+W+2cA8eNcR+XA==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@grpc/proto-loader": "^0.8.0", "@js-sdsl/ordered-map": "^4.4.2" @@ -17944,6 +17971,7 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, diff --git a/packages/cli/src/acp/acpClient.ts b/packages/cli/src/acp/acpClient.ts index bead6f0067..7a45f98dc7 100644 --- a/packages/cli/src/acp/acpClient.ts +++ b/packages/cli/src/acp/acpClient.ts @@ -1625,6 +1625,7 @@ function toPermissionOptions( case 'info': case 'ask_user': case 'exit_plan_mode': + case 'sandbox_expansion': break; default: { const unreachable: never = confirmation; diff --git a/packages/cli/src/ui/components/ToolConfirmationQueue.test.tsx b/packages/cli/src/ui/components/ToolConfirmationQueue.test.tsx index 4edf1e4f35..490fa0d4a1 100644 --- a/packages/cli/src/ui/components/ToolConfirmationQueue.test.tsx +++ b/packages/cli/src/ui/components/ToolConfirmationQueue.test.tsx @@ -47,6 +47,7 @@ describe('ToolConfirmationQueue', () => { const mockConfig = { isTrustedFolder: () => true, getIdeMode: () => false, + getApprovalMode: () => 'default', getDisableAlwaysAllow: () => false, getModel: () => 'gemini-pro', getDebugMode: () => false, diff --git a/packages/cli/src/ui/components/messages/RedirectionConfirmation.test.tsx b/packages/cli/src/ui/components/messages/RedirectionConfirmation.test.tsx index 68e8ae6ebe..95f0cffb69 100644 --- a/packages/cli/src/ui/components/messages/RedirectionConfirmation.test.tsx +++ b/packages/cli/src/ui/components/messages/RedirectionConfirmation.test.tsx @@ -22,6 +22,7 @@ describe('ToolConfirmationMessage Redirection', () => { isTrustedFolder: () => true, getIdeMode: () => false, getDisableAlwaysAllow: () => false, + getApprovalMode: () => 'default', } as unknown as Config; it('should display redirection warning and tip for redirected commands', async () => { diff --git a/packages/cli/src/ui/components/messages/ToolConfirmationMessage.test.tsx b/packages/cli/src/ui/components/messages/ToolConfirmationMessage.test.tsx index eddbaf4396..e0f4430c6c 100644 --- a/packages/cli/src/ui/components/messages/ToolConfirmationMessage.test.tsx +++ b/packages/cli/src/ui/components/messages/ToolConfirmationMessage.test.tsx @@ -40,6 +40,7 @@ describe('ToolConfirmationMessage', () => { isTrustedFolder: () => true, getIdeMode: () => false, getDisableAlwaysAllow: () => false, + getApprovalMode: () => 'default', } as unknown as Config; it('should not display urls if prompt and url are the same', async () => { @@ -324,6 +325,7 @@ describe('ToolConfirmationMessage', () => { isTrustedFolder: () => true, getIdeMode: () => false, getDisableAlwaysAllow: () => false, + getApprovalMode: () => 'default', } as unknown as Config; const { lastFrame, unmount } = await renderWithProviders( { isTrustedFolder: () => false, getIdeMode: () => false, getDisableAlwaysAllow: () => false, + getApprovalMode: () => 'default', } as unknown as Config; const { lastFrame, unmount } = await renderWithProviders( @@ -380,6 +383,7 @@ describe('ToolConfirmationMessage', () => { isTrustedFolder: () => true, getIdeMode: () => false, getDisableAlwaysAllow: () => false, + getApprovalMode: () => 'default', } as unknown as Config; const { lastFrame, unmount } = await renderWithProviders( { isTrustedFolder: () => true, getIdeMode: () => false, getDisableAlwaysAllow: () => false, + getApprovalMode: () => 'default', } as unknown as Config; const { lastFrame, unmount } = await renderWithProviders( { isTrustedFolder: () => true, getIdeMode: () => false, getDisableAlwaysAllow: () => false, + getApprovalMode: () => 'default', } as unknown as Config; vi.mocked(useToolActions).mockReturnValue({ confirm: vi.fn(), @@ -473,6 +479,7 @@ describe('ToolConfirmationMessage', () => { isTrustedFolder: () => true, getIdeMode: () => true, getDisableAlwaysAllow: () => false, + getApprovalMode: () => 'default', } as unknown as Config; vi.mocked(useToolActions).mockReturnValue({ confirm: vi.fn(), @@ -499,6 +506,7 @@ describe('ToolConfirmationMessage', () => { isTrustedFolder: () => true, getIdeMode: () => true, getDisableAlwaysAllow: () => false, + getApprovalMode: () => 'default', } as unknown as Config; vi.mocked(useToolActions).mockReturnValue({ confirm: vi.fn(), diff --git a/packages/cli/src/ui/components/messages/ToolConfirmationMessage.tsx b/packages/cli/src/ui/components/messages/ToolConfirmationMessage.tsx index d9ca2e66c6..631bbf032d 100644 --- a/packages/cli/src/ui/components/messages/ToolConfirmationMessage.tsx +++ b/packages/cli/src/ui/components/messages/ToolConfirmationMessage.tsx @@ -15,6 +15,7 @@ import { type ToolConfirmationPayload, ToolConfirmationOutcome, type EditorType, + ApprovalMode, hasRedirection, debugLogger, } from '@google/gemini-cli-core'; @@ -314,6 +315,31 @@ export const ToolConfirmationMessage: React.FC< key: 'No, suggest changes (esc)', }); } + } else if (confirmationDetails.type === 'sandbox_expansion') { + options.push({ + label: 'Allow once', + value: ToolConfirmationOutcome.ProceedOnce, + key: 'Allow once', + }); + if (isTrustedFolder) { + options.push({ + label: 'Allow for this session', + value: ToolConfirmationOutcome.ProceedAlways, + key: 'Allow for this session', + }); + if (allowPermanentApproval) { + options.push({ + label: 'Allow for all future sessions', + value: ToolConfirmationOutcome.ProceedAlwaysAndSave, + key: 'Allow for all future sessions', + }); + } + } + options.push({ + label: 'No, suggest changes (esc)', + value: ToolConfirmationOutcome.Cancel, + key: 'No, suggest changes (esc)', + }); } else if (confirmationDetails.type === 'exec') { options.push({ label: 'Allow once', @@ -546,6 +572,8 @@ export const ToolConfirmationMessage: React.FC< if (!confirmationDetails.isModifying) { question = `Apply this change?`; } + } else if (confirmationDetails.type === 'sandbox_expansion') { + question = `Allow sandbox expansion for: '${sanitizeForDisplay(confirmationDetails.rootCommand)}'?`; } else if (confirmationDetails.type === 'exec') { const executionProps = confirmationDetails; @@ -573,6 +601,52 @@ export const ToolConfirmationMessage: React.FC< /> ); } + } else if (confirmationDetails.type === 'sandbox_expansion') { + const { additionalPermissions } = confirmationDetails; + const readPaths = additionalPermissions?.fileSystem?.read || []; + const writePaths = additionalPermissions?.fileSystem?.write || []; + const network = additionalPermissions?.network; + + bodyContent = ( + + + The agent is requesting additional sandbox permissions to execute + this command: + + + + {sanitizeForDisplay(confirmationDetails.command)} + + + {network && ( + + • Network Access + + )} + {readPaths.length > 0 && ( + + • Read Access: + {readPaths.map((p, i) => ( + + {' '} + {sanitizeForDisplay(p)} + + ))} + + )} + {writePaths.length > 0 && ( + + • Write Access: + {writePaths.map((p, i) => ( + + {' '} + {sanitizeForDisplay(p)} + + ))} + + )} + + ); } else if (confirmationDetails.type === 'exec') { const executionProps = confirmationDetails; @@ -587,7 +661,8 @@ export const ToolConfirmationMessage: React.FC< let bodyContentHeight = availableBodyContentHeight(); let warnings: React.ReactNode = null; - if (containsRedirection) { + const isAutoEdit = config.getApprovalMode() === ApprovalMode.AUTO_EDIT; + if (containsRedirection && !isAutoEdit) { // Calculate lines needed for Note and Tip const safeWidth = Math.max(terminalWidth, 1); const noteLength = @@ -737,6 +812,7 @@ export const ToolConfirmationMessage: React.FC< isTrustedFolder, allowPermanentApproval, settings, + config, ]); const bodyOverflowDirection: 'top' | 'bottom' = diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 0740a5c16b..12ff9ad37e 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -6,6 +6,7 @@ import * as fs from 'node:fs'; import * as path from 'node:path'; +import { SandboxPolicyManager } from '../policy/sandboxPolicyManager.js'; import { inspect } from 'node:util'; import process from 'node:process'; import { z } from 'zod'; @@ -730,7 +731,8 @@ export class Config implements McpContext, AgentLoopContext { private readonly telemetrySettings: TelemetrySettings; private readonly usageStatisticsEnabled: boolean; private _geminiClient!: GeminiClient; - private readonly _sandboxManager: SandboxManager; + private _sandboxManager: SandboxManager; + private readonly _sandboxPolicyManager: SandboxPolicyManager; private baseLlmClient!: BaseLlmClient; private localLiteRtLmClient?: LocalLiteRtLmClient; private modelRouterService: ModelRouterService; @@ -905,14 +907,14 @@ export class Config implements McpContext, AgentLoopContext { params.embeddingModel ?? DEFAULT_GEMINI_EMBEDDING_MODEL; this.sandbox = params.sandbox ? { - enabled: params.sandbox.enabled ?? false, + enabled: params.sandbox.enabled || params.toolSandboxing || false, allowedPaths: params.sandbox.allowedPaths ?? [], networkAccess: params.sandbox.networkAccess ?? false, command: params.sandbox.command, image: params.sandbox.image, } : { - enabled: false, + enabled: params.toolSandboxing || false, allowedPaths: [], networkAccess: false, }; @@ -931,6 +933,30 @@ export class Config implements McpContext, AgentLoopContext { this.fileSystemService = new StandardFileSystemService(); } + this._sandboxPolicyManager = new SandboxPolicyManager(); + const initialApprovalMode = + params.approvalMode ?? + params.policyEngineConfig?.approvalMode ?? + 'default'; + this._sandboxManager = createSandboxManager( + this.sandbox, + params.targetDir, + this._sandboxPolicyManager, + initialApprovalMode, + ); + + if ( + !(this._sandboxManager instanceof NoopSandboxManager) && + this.sandbox?.enabled + ) { + this.fileSystemService = new SandboxedFileSystemService( + this._sandboxManager, + params.targetDir, + ); + } else { + this.fileSystemService = new StandardFileSystemService(); + } + this.targetDir = path.resolve(params.targetDir); this.folderTrust = params.folderTrust ?? false; this.workspaceContext = new WorkspaceContext(this.targetDir, []); @@ -1160,12 +1186,19 @@ export class Config implements McpContext, AgentLoopContext { params.policyUpdateConfirmationRequest; this.disableAlwaysAllow = params.disableAlwaysAllow ?? false; + const engineApprovalMode = + params.approvalMode ?? + params.policyEngineConfig?.approvalMode ?? + ApprovalMode.DEFAULT; this.policyEngine = new PolicyEngine( { ...params.policyEngineConfig, - approvalMode: - params.approvalMode ?? params.policyEngineConfig?.approvalMode, + approvalMode: engineApprovalMode, disableAlwaysAllow: this.disableAlwaysAllow, + toolSandboxEnabled: this.getSandboxEnabled(), + sandboxApprovedTools: + this.sandboxPolicyManager?.getModeConfig(engineApprovalMode) + ?.approvedTools ?? [], }, checkerRunner, ); @@ -1560,6 +1593,20 @@ export class Config implements McpContext, AgentLoopContext { return this._geminiClient; } + private refreshSandboxManager(): void { + this._sandboxManager = createSandboxManager( + this.sandbox, + this.targetDir, + this._sandboxPolicyManager, + this.getApprovalMode(), + ); + this.shellExecutionConfig.sandboxManager = this._sandboxManager; + } + + get sandboxPolicyManager() { + return this._sandboxPolicyManager; + } + get sandboxManager(): SandboxManager { return this._sandboxManager; } @@ -2339,7 +2386,11 @@ export class Config implements McpContext, AgentLoopContext { ); } - this.policyEngine.setApprovalMode(mode); + this.policyEngine.setApprovalMode( + mode, + this.sandboxPolicyManager?.getModeConfig(mode)?.approvedTools ?? [], + ); + this.refreshSandboxManager(); const isPlanModeTransition = currentMode !== mode && diff --git a/packages/core/src/config/sandbox-integration.test.ts b/packages/core/src/config/sandbox-integration.test.ts index 305b9e2638..f808b94e32 100644 --- a/packages/core/src/config/sandbox-integration.test.ts +++ b/packages/core/src/config/sandbox-integration.test.ts @@ -22,6 +22,7 @@ vi.mock('../confirmation-bus/message-bus.js', () => ({ vi.mock('../policy/policy-engine.js', () => ({ PolicyEngine: vi.fn().mockImplementation(() => ({ getExcludedTools: vi.fn().mockReturnValue(new Set()), + getApprovalMode: vi.fn().mockReturnValue('yolo'), })), })); vi.mock('../skills/skillManager.js', () => ({ diff --git a/packages/core/src/confirmation-bus/types.ts b/packages/core/src/confirmation-bus/types.ts index 998c32b7f6..c47a1c1cf5 100644 --- a/packages/core/src/confirmation-bus/types.ts +++ b/packages/core/src/confirmation-bus/types.ts @@ -11,6 +11,7 @@ import type { DiffStat, } from '../tools/tools.js'; import type { ToolCall } from '../scheduler/types.js'; +import type { SandboxPermissions } from '../services/sandboxManager.js'; export enum MessageBusType { TOOL_CONFIRMATION_REQUEST = 'tool-confirmation-request', @@ -78,6 +79,14 @@ export interface ToolConfirmationResponse { * Data-only versions of ToolCallConfirmationDetails for bus transmission. */ export type SerializableConfirmationDetails = + | { + type: 'sandbox_expansion'; + title: string; + command: string; + rootCommand: string; + additionalPermissions: SandboxPermissions; + systemMessage?: string; + } | { type: 'info'; title: string; diff --git a/packages/core/src/core/prompts.test.ts b/packages/core/src/core/prompts.test.ts index d3f2087018..6e505dfa2b 100644 --- a/packages/core/src/core/prompts.test.ts +++ b/packages/core/src/core/prompts.test.ts @@ -89,6 +89,7 @@ describe('Core System Prompt (prompts.ts)', () => { mockConfig = { getToolRegistry: vi.fn().mockReturnValue(mockRegistry), getEnableShellOutputEfficiency: vi.fn().mockReturnValue(true), + getSandboxEnabled: vi.fn().mockReturnValue(false), storage: { getProjectTempDir: vi.fn().mockReturnValue('/tmp/project-temp'), getPlansDir: vi.fn().mockReturnValue('/tmp/project-temp/plans'), @@ -418,6 +419,7 @@ describe('Core System Prompt (prompts.ts)', () => { const testConfig = { getToolRegistry: vi.fn().mockReturnValue(mockToolRegistry), getEnableShellOutputEfficiency: vi.fn().mockReturnValue(true), + getSandboxEnabled: vi.fn().mockReturnValue(false), storage: { getProjectTempDir: vi.fn().mockReturnValue('/tmp/project-temp'), }, diff --git a/packages/core/src/policy/policies/sandbox-default.toml b/packages/core/src/policy/policies/sandbox-default.toml new file mode 100644 index 0000000000..0d8467d596 --- /dev/null +++ b/packages/core/src/policy/policies/sandbox-default.toml @@ -0,0 +1,19 @@ +[modes.plan] +network = false +readonly = true +approvedTools = [] +allowOverrides = false + +[modes.default] +network = false +readonly = true +approvedTools = [] +allowOverrides = true + +[modes.accepting_edits] +network = false +readonly = false +approvedTools = ['sed', 'grep', 'awk', 'perl', 'cat', 'echo'] +allowOverrides = true + +[commands] diff --git a/packages/core/src/policy/policy-engine.test.ts b/packages/core/src/policy/policy-engine.test.ts index eb39d6ed8d..805e4cef70 100644 --- a/packages/core/src/policy/policy-engine.test.ts +++ b/packages/core/src/policy/policy-engine.test.ts @@ -329,7 +329,11 @@ describe('PolicyEngine', () => { ); // Switch to autoEdit mode - engine.setApprovalMode(ApprovalMode.AUTO_EDIT); + engine = new PolicyEngine({ + rules, + approvalMode: ApprovalMode.AUTO_EDIT, + toolSandboxEnabled: true, + }); expect((await engine.check({ name: 'edit' }, undefined)).decision).toBe( PolicyDecision.ALLOW, ); @@ -1427,14 +1431,14 @@ describe('PolicyEngine', () => { engine = new PolicyEngine({ rules }); - // Atomic command "whoami" matches the wildcard rule (ASK_USER). + // Atomic command "unknown_command" matches the wildcard rule (ASK_USER). // It should NOT be upgraded to ALLOW. expect( ( await engine.check( { name: 'run_shell_command', - args: { command: 'whoami' }, + args: { command: 'unknown_command' }, }, undefined, ) @@ -1572,7 +1576,7 @@ describe('PolicyEngine', () => { }, ]; - engine = new PolicyEngine({ rules }); + engine = new PolicyEngine({ rules, toolSandboxEnabled: true }); engine.setApprovalMode(ApprovalMode.AUTO_EDIT); const result = await engine.check( diff --git a/packages/core/src/policy/policy-engine.ts b/packages/core/src/policy/policy-engine.ts index c35c9c5d4f..c1709248fe 100644 --- a/packages/core/src/policy/policy-engine.ts +++ b/packages/core/src/policy/policy-engine.ts @@ -5,6 +5,11 @@ */ import { type FunctionCall } from '@google/genai'; +import { + isDangerousCommand, + isKnownSafeCommand, +} from '../sandbox/macos/commandSafety.js'; +import { parse as shellParse } from 'shell-quote'; import { PolicyDecision, type PolicyEngineConfig, @@ -192,6 +197,8 @@ export class PolicyEngine { private readonly disableAlwaysAllow: boolean; private readonly checkerRunner?: CheckerRunner; private approvalMode: ApprovalMode; + private toolSandboxEnabled: boolean; + private sandboxApprovedTools: string[]; constructor(config: PolicyEngineConfig = {}, checkerRunner?: CheckerRunner) { this.rules = (config.rules ?? []).sort( @@ -242,13 +249,18 @@ export class PolicyEngine { this.disableAlwaysAllow = config.disableAlwaysAllow ?? false; this.checkerRunner = checkerRunner; this.approvalMode = config.approvalMode ?? ApprovalMode.DEFAULT; + this.toolSandboxEnabled = config.toolSandboxEnabled ?? false; + this.sandboxApprovedTools = config.sandboxApprovedTools ?? []; } /** * Update the current approval mode. */ - setApprovalMode(mode: ApprovalMode): void { + setApprovalMode(mode: ApprovalMode, sandboxApprovedTools?: string[]): void { this.approvalMode = mode; + if (sandboxApprovedTools !== undefined) { + this.sandboxApprovedTools = sandboxApprovedTools; + } } /** @@ -269,17 +281,58 @@ export class PolicyEngine { command: string, allowRedirection?: boolean, ): boolean { - return ( - !allowRedirection && - hasRedirection(command) && - this.approvalMode !== ApprovalMode.AUTO_EDIT && - this.approvalMode !== ApprovalMode.YOLO - ); + if (allowRedirection) return false; + if (!hasRedirection(command)) return false; + + // Do not downgrade (do not ask user) if sandboxing is enabled and in AUTO_EDIT or YOLO + if ( + this.toolSandboxEnabled && + (this.approvalMode === ApprovalMode.AUTO_EDIT || + this.approvalMode === ApprovalMode.YOLO) + ) { + return false; + } + + return true; } /** * Check if a shell command is allowed. */ + + private async applyShellHeuristics( + command: string, + decision: PolicyDecision, + ): Promise { + await initializeShellParsers(); + try { + const parsedObjArgs = shellParse(command); + if (parsedObjArgs.some((arg) => typeof arg === 'object')) return decision; + const parsedArgs = parsedObjArgs.map(String); + if (isDangerousCommand(parsedArgs)) { + debugLogger.debug( + `[PolicyEngine.check] Command evaluated as dangerous, forcing ASK_USER: ${command}`, + ); + return PolicyDecision.ASK_USER; + } + const isApprovedBySandbox = + this.toolSandboxEnabled && + this.sandboxApprovedTools.includes(parsedArgs[0]); + if ( + (isKnownSafeCommand(parsedArgs) || isApprovedBySandbox) && + decision === PolicyDecision.ASK_USER + ) { + debugLogger.debug( + `[PolicyEngine.check] Command evaluated as known safe, overriding ASK_USER to ALLOW: ${command}`, + ); + return PolicyDecision.ALLOW; + } + } catch { + // Ignore parsing errors + } + return decision; + } + private async checkShellCommand( toolName: string, command: string | undefined, @@ -522,11 +575,21 @@ export class PolicyEngine { `[PolicyEngine.check] MATCHED rule: toolName=${rule.toolName}, decision=${rule.decision}, priority=${rule.priority}, argsPattern=${rule.argsPattern?.source || 'none'}`, ); + let ruleDecision = rule.decision; + if ( + isShellCommand && + command && + !('commandPrefix' in rule) && + !rule.argsPattern + ) { + ruleDecision = await this.applyShellHeuristics(command, ruleDecision); + } + if (isShellCommand && toolName) { const shellResult = await this.checkShellCommand( toolName, command, - rule.decision, + ruleDecision, serverName, shellDirPath, rule.allowRedirection, @@ -562,10 +625,18 @@ export class PolicyEngine { `[PolicyEngine.check] NO MATCH - using default decision: ${this.defaultDecision}`, ); if (toolName && SHELL_TOOL_NAMES.includes(toolName)) { + let heuristicDecision = this.defaultDecision; + if (command) { + heuristicDecision = await this.applyShellHeuristics( + command, + heuristicDecision, + ); + } + const shellResult = await this.checkShellCommand( toolName, command, - this.defaultDecision, + heuristicDecision, serverName, shellDirPath, false, @@ -631,6 +702,15 @@ export class PolicyEngine { } } + // Sandbox Expansion requests MUST always be confirmed by the user, + // even if the base command is otherwise ALLOWED by the policy engine. + if ( + decision === PolicyDecision.ALLOW && + toolCall.args?.['additional_permissions'] + ) { + decision = PolicyDecision.ASK_USER; + } + return { decision: this.applyNonInteractiveMode(decision), rule: matchedRule, diff --git a/packages/core/src/policy/sandboxPolicyManager.ts b/packages/core/src/policy/sandboxPolicyManager.ts new file mode 100644 index 0000000000..5b00150b41 --- /dev/null +++ b/packages/core/src/policy/sandboxPolicyManager.ts @@ -0,0 +1,216 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import fs from 'node:fs'; +import path from 'node:path'; +import os from 'node:os'; +import toml from '@iarna/toml'; +import { z } from 'zod'; +import { fileURLToPath } from 'node:url'; +import { debugLogger } from '../utils/debugLogger.js'; +import { type SandboxPermissions } from '../services/sandboxManager.js'; +import { sanitizePaths } from '../services/sandboxManager.js'; + +export const SandboxModeConfigSchema = z.object({ + network: z.boolean(), + readonly: z.boolean(), + approvedTools: z.array(z.string()), + allowOverrides: z.boolean().optional(), +}); + +export const PersistentCommandConfigSchema = z.object({ + allowed_paths: z.array(z.string()).optional(), + allow_network: z.boolean().optional(), +}); + +export const SandboxTomlSchema = z.object({ + modes: z.object({ + plan: SandboxModeConfigSchema, + default: SandboxModeConfigSchema, + accepting_edits: SandboxModeConfigSchema, + }), + commands: z.record(z.string(), PersistentCommandConfigSchema).default({}), +}); + +export type SandboxModeConfig = z.infer; +export type PersistentCommandConfig = z.infer< + typeof PersistentCommandConfigSchema +>; +export type SandboxTomlSchemaType = z.infer; + +export class SandboxPolicyManager { + private static _DEFAULT_CONFIG: SandboxTomlSchemaType | null = null; + + private static get DEFAULT_CONFIG(): SandboxTomlSchemaType { + if (!SandboxPolicyManager._DEFAULT_CONFIG) { + const __filename = fileURLToPath(import.meta.url); + const __dirname = path.dirname(__filename); + const defaultPath = path.join( + __dirname, + 'policies', + 'sandbox-default.toml', + ); + try { + const content = fs.readFileSync(defaultPath, 'utf8'); + if (typeof content !== 'string') { + SandboxPolicyManager._DEFAULT_CONFIG = { + modes: { + plan: { + network: false, + readonly: true, + approvedTools: [], + allowOverrides: false, + }, + default: { + network: false, + readonly: true, + approvedTools: [], + allowOverrides: true, + }, + accepting_edits: { + network: false, + readonly: false, + approvedTools: ['sed', 'grep', 'awk', 'perl', 'cat', 'echo'], + allowOverrides: true, + }, + }, + commands: {}, + }; + return SandboxPolicyManager._DEFAULT_CONFIG; + } + SandboxPolicyManager._DEFAULT_CONFIG = SandboxTomlSchema.parse( + toml.parse(content), + ); + } catch (e) { + debugLogger.error(`Failed to parse default sandbox policy: ${e}`); + throw new Error(`Failed to parse default sandbox policy: ${e}`); + } + } + return SandboxPolicyManager._DEFAULT_CONFIG; + } + + private config: SandboxTomlSchemaType; + private readonly configPath: string; + private sessionApprovals: Record = {}; + + constructor(customConfigPath?: string) { + this.configPath = + customConfigPath ?? + path.join(os.homedir(), '.gemini', 'policies', 'sandbox.toml'); + this.config = this.loadConfig(); + } + + private loadConfig(): SandboxTomlSchemaType { + if (!fs.existsSync(this.configPath)) { + return SandboxPolicyManager.DEFAULT_CONFIG; + } + + try { + const content = fs.readFileSync(this.configPath, 'utf8'); + return SandboxTomlSchema.parse(toml.parse(content)); + } catch (e) { + debugLogger.error(`Failed to parse sandbox.toml: ${e}`); + return SandboxPolicyManager.DEFAULT_CONFIG; + } + } + + private saveConfig(): void { + try { + const dir = path.dirname(this.configPath); + if (!fs.existsSync(dir)) { + fs.mkdirSync(dir, { recursive: true }); + } + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion + const content = toml.stringify(this.config as unknown as toml.JsonMap); + fs.writeFileSync(this.configPath, content); + } catch (e) { + debugLogger.error(`Failed to save sandbox.toml: ${e}`); + } + } + + getModeConfig( + mode: 'plan' | 'accepting_edits' | 'default' | string, + ): SandboxModeConfig { + if (mode === 'plan') return this.config.modes.plan; + if (mode === 'accepting_edits' || mode === 'autoEdit') + return this.config.modes.accepting_edits; + if (mode === 'default') return this.config.modes.default; + + // Default fallback + return this.config.modes.default ?? this.config.modes.plan; + } + + getCommandPermissions(commandName: string): SandboxPermissions { + const persistent = this.config.commands[commandName]; + const session = this.sessionApprovals[commandName]; + + return { + fileSystem: { + read: [ + ...(persistent?.allowed_paths ?? []), + ...(session?.fileSystem?.read ?? []), + ], + write: [ + ...(persistent?.allowed_paths ?? []), + ...(session?.fileSystem?.write ?? []), + ], + }, + network: persistent?.allow_network || session?.network || false, + }; + } + + addSessionApproval( + commandName: string, + permissions: SandboxPermissions, + ): void { + const existing = this.sessionApprovals[commandName] || { + fileSystem: { read: [], write: [] }, + network: false, + }; + + this.sessionApprovals[commandName] = { + fileSystem: { + read: Array.from( + new Set([ + ...(existing.fileSystem?.read ?? []), + ...(permissions.fileSystem?.read ?? []), + ]), + ), + write: Array.from( + new Set([ + ...(existing.fileSystem?.write ?? []), + ...(permissions.fileSystem?.write ?? []), + ]), + ), + }, + network: existing.network || permissions.network || false, + }; + } + + addPersistentApproval( + commandName: string, + permissions: SandboxPermissions, + ): void { + const existing = this.config.commands[commandName] || { + allowed_paths: [], + allow_network: false, + }; + + const newPathsArray: string[] = [ + ...(existing.allowed_paths ?? []), + ...(permissions.fileSystem?.read ?? []), + ...(permissions.fileSystem?.write ?? []), + ]; + const newPaths = new Set(sanitizePaths(newPathsArray)); + + this.config.commands[commandName] = { + allowed_paths: Array.from(newPaths), + allow_network: existing.allow_network || permissions.network || false, + }; + + this.saveConfig(); + } +} diff --git a/packages/core/src/policy/types.ts b/packages/core/src/policy/types.ts index 494956c364..0fcf682767 100644 --- a/packages/core/src/policy/types.ts +++ b/packages/core/src/policy/types.ts @@ -309,6 +309,15 @@ export interface PolicyEngineConfig { * Used to filter rules that have specific 'modes' defined. */ approvalMode?: ApprovalMode; + + /** + * Whether tool sandboxing is enabled. + */ + toolSandboxEnabled?: boolean; + /** + * List of tools approved by the sandbox policy for the current mode. + */ + sandboxApprovedTools?: string[]; } export interface PolicySettings { diff --git a/packages/core/src/prompts/promptProvider.test.ts b/packages/core/src/prompts/promptProvider.test.ts index 700062de50..d749a41058 100644 --- a/packages/core/src/prompts/promptProvider.test.ts +++ b/packages/core/src/prompts/promptProvider.test.ts @@ -54,6 +54,7 @@ describe('PromptProvider', () => { }, getToolRegistry: vi.fn().mockReturnValue(mockToolRegistry), getEnableShellOutputEfficiency: vi.fn().mockReturnValue(true), + getSandboxEnabled: vi.fn().mockReturnValue(false), storage: { getProjectTempDir: vi.fn().mockReturnValue('/tmp/project-temp'), getPlansDir: vi.fn().mockReturnValue('/tmp/project-temp/plans'), diff --git a/packages/core/src/prompts/promptProvider.ts b/packages/core/src/prompts/promptProvider.ts index bd884aeab5..00765a2a89 100644 --- a/packages/core/src/prompts/promptProvider.ts +++ b/packages/core/src/prompts/promptProvider.ts @@ -195,7 +195,10 @@ export class PromptProvider { memoryManagerEnabled: context.config.isMemoryManagerEnabled(), }), ), - sandbox: this.withSection('sandbox', () => getSandboxMode()), + sandbox: this.withSection('sandbox', () => ({ + mode: getSandboxMode(), + toolSandboxingEnabled: context.config.getSandboxEnabled(), + })), interactiveYoloMode: this.withSection( 'interactiveYoloMode', () => true, diff --git a/packages/core/src/prompts/snippets.legacy.ts b/packages/core/src/prompts/snippets.legacy.ts index 19aaf56d78..f2930e07ca 100644 --- a/packages/core/src/prompts/snippets.legacy.ts +++ b/packages/core/src/prompts/snippets.legacy.ts @@ -36,7 +36,7 @@ export interface SystemPromptOptions { planningWorkflow?: PlanningWorkflowOptions; taskTracker?: boolean; operationalGuidelines?: OperationalGuidelinesOptions; - sandbox?: SandboxMode; + sandbox?: SandboxOptions; interactiveYoloMode?: boolean; gitRepo?: GitRepoOptions; finalReminder?: FinalReminderOptions; @@ -72,6 +72,11 @@ export interface OperationalGuidelinesOptions { export type SandboxMode = 'macos-seatbelt' | 'generic' | 'outside'; +export interface SandboxOptions { + mode: SandboxMode; + toolSandboxingEnabled: boolean; +} + export interface GitRepoOptions { interactive: boolean; } @@ -290,8 +295,9 @@ ${shellEfficiencyGuidelines(options.enableShellEfficiency)} `.trim(); } -export function renderSandbox(mode?: SandboxMode): string { - if (!mode) return ''; +export function renderSandbox(options?: SandboxOptions): string { + if (!options || !options.mode) return ''; + const mode = options.mode; if (mode === 'macos-seatbelt') { return ` # macOS Seatbelt @@ -300,11 +306,12 @@ You are running under macos seatbelt with limited access to files outside the pr return ` # Sandbox You are running in a sandbox container with limited access to files outside the project directory or system temp directory, and with limited access to host system resources such as ports. If you encounter failures that could be due to sandboxing (e.g. if a command fails with 'Operation not permitted' or similar error), when you report the error to the user, also explain why you think it could be due to sandboxing, and how the user may need to adjust their sandbox configuration.`.trim(); - } else { + } else if (mode === 'outside') { return ` # Outside of Sandbox You are running outside of a sandbox container, directly on the user's system. For critical commands that are particularly likely to modify the user's system outside of the project directory or system temp directory, as you explain the command to the user (per the Explain Critical Commands rule above), also remind the user to consider enabling sandboxing.`.trim(); } + return ''; } export function renderInteractiveYoloMode(enabled?: boolean): string { diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index 5c285fc554..1761aabcc2 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -46,7 +46,7 @@ export interface SystemPromptOptions { planningWorkflow?: PlanningWorkflowOptions; taskTracker?: boolean; operationalGuidelines?: OperationalGuidelinesOptions; - sandbox?: SandboxMode; + sandbox?: SandboxOptions; interactiveYoloMode?: boolean; gitRepo?: GitRepoOptions; } @@ -84,6 +84,11 @@ export interface OperationalGuidelinesOptions { export type SandboxMode = 'macos-seatbelt' | 'generic' | 'outside'; +export interface SandboxOptions { + mode: SandboxMode; + toolSandboxingEnabled: boolean; +} + export interface GitRepoOptions { interactive: boolean; } @@ -389,18 +394,44 @@ export function renderOperationalGuidelines( `.trim(); } -export function renderSandbox(mode?: SandboxMode): string { - if (!mode) return ''; +export function renderSandbox(options?: SandboxOptions): string { + if (!options || !options.mode) return ''; + const { mode, toolSandboxingEnabled } = options; if (mode === 'macos-seatbelt') { - return ` + if (toolSandboxingEnabled) { + return ` + # macOS Seatbelt + + You are running under macos seatbelt with limited access to files outside the project directory or system temp directory, and with limited access to host system resources such as ports. + **Sandbox Failure Recovery:** + If an interactive command (like nvim, nano, git) fails to load plugins, crashes, or hangs waiting for user input (e.g. at a '-- More --' prompt) with errors indicating missing files or 'Operation not permitted', it is likely blocked by the sandbox. + If you see these errors (e.g. Vim:E303, 'should be read/write') in the command output (even if the command timed out or was cancelled by the user): + 1. IMMEDIATELY analyze the output to identify the required filesystem paths or network access. + 2. WITHOUT asking the user or explaining the failure first, AUTOMATICALLY retry the command using the 'run_shell_command' tool, explicitly providing the missing permissions in the 'additional_permissions' parameter. + 3. This will immediately present the user with a modal to approve the expansion for the command so they don't have to reprompt you.`.trim(); + } else { + return ` # macOS Seatbelt You are running under macos seatbelt with limited access to files outside the project directory or system temp directory, and with limited access to host system resources such as ports. If you encounter failures that could be due to macOS Seatbelt (e.g. if a command fails with 'Operation not permitted' or similar error), as you report the error to the user, also explain why you think it could be due to macOS Seatbelt, and how the user may need to adjust their Seatbelt profile.`.trim(); + } } else if (mode === 'generic') { - return ` + if (toolSandboxingEnabled) { + return ` + # Sandbox + + You are running in a sandbox container with limited access to files outside the project directory or system temp directory, and with limited access to host system resources such as ports. + **Sandbox Failure Recovery:** + If a command fails with 'Operation not permitted' or similar sandbox errors, do NOT ask the user to adjust settings manually. Instead: + 1. Analyze the command and error to identify the required filesystem paths or network access. + 2. Retry the command using the 'run_shell_command' tool, providing the missing permissions in the 'additional_permissions' parameter. + 3. The user will be presented with a modal to approve this expansion for the current command.`.trim(); + } else { + return ` # Sandbox You are running in a sandbox container with limited access to files outside the project directory or system temp directory, and with limited access to host system resources such as ports. If you encounter failures that could be due to sandboxing (e.g. if a command fails with 'Operation not permitted' or similar error), when you report the error to the user, also explain why you think it could be due to sandboxing, and how the user may need to adjust their sandbox configuration.`.trim(); + } } return ''; } diff --git a/packages/core/src/sandbox/macos/MacOsSandboxManager.test.ts b/packages/core/src/sandbox/macos/MacOsSandboxManager.test.ts index 7bf356d3c6..97d475e303 100644 --- a/packages/core/src/sandbox/macos/MacOsSandboxManager.test.ts +++ b/packages/core/src/sandbox/macos/MacOsSandboxManager.test.ts @@ -63,7 +63,7 @@ describe('MacOsSandboxManager', () => { expect(profile).toContain('(deny default)'); expect(profile).toContain('(allow process-exec)'); expect(profile).toContain('(subpath (param "WORKSPACE"))'); - expect(profile).not.toContain('(allow network*)'); + expect(profile).not.toContain('(allow network-outbound)'); expect(result.args).toContain('-D'); expect(result.args).toContain(`WORKSPACE=${mockWorkspace}`); @@ -91,7 +91,7 @@ describe('MacOsSandboxManager', () => { }); const profile = result.args[1]; - expect(profile).toContain('(allow network*)'); + expect(profile).toContain('(allow network-outbound)'); }); it('should parameterize allowed paths and normalize them', async () => { diff --git a/packages/core/src/sandbox/macos/MacOsSandboxManager.ts b/packages/core/src/sandbox/macos/MacOsSandboxManager.ts index a7b92ff884..04271c991d 100644 --- a/packages/core/src/sandbox/macos/MacOsSandboxManager.ts +++ b/packages/core/src/sandbox/macos/MacOsSandboxManager.ts @@ -4,41 +4,164 @@ * SPDX-License-Identifier: Apache-2.0 */ -import fs from 'node:fs'; -import os from 'node:os'; -import path from 'node:path'; import { type SandboxManager, - type GlobalSandboxOptions, type SandboxRequest, type SandboxedCommand, - type ExecutionPolicy, - sanitizePaths, - GOVERNANCE_FILES, + type SandboxPermissions, + type GlobalSandboxOptions, } from '../../services/sandboxManager.js'; import { sanitizeEnvironment, getSecureSanitizationConfig, + type EnvironmentSanitizationConfig, } from '../../services/environmentSanitization.js'; +import { buildSeatbeltArgs } from './seatbeltArgsBuilder.js'; import { - BASE_SEATBELT_PROFILE, - NETWORK_SEATBELT_PROFILE, -} from './baseProfile.js'; + getCommandRoots, + initializeShellParsers, + splitCommands, + stripShellWrapper, +} from '../../utils/shell-utils.js'; +import { isKnownSafeCommand } from './commandSafety.js'; +import { parse as shellParse } from 'shell-quote'; +import { type SandboxPolicyManager } from '../../policy/sandboxPolicyManager.js'; +import path from 'node:path'; + +export interface MacOsSandboxOptions extends GlobalSandboxOptions { + /** Optional base sanitization config. */ + sanitizationConfig?: EnvironmentSanitizationConfig; + /** The current sandbox mode behavior from config. */ + modeConfig?: { + readonly?: boolean; + network?: boolean; + approvedTools?: string[]; + allowOverrides?: boolean; + }; + /** The policy manager for persistent approvals. */ + policyManager?: SandboxPolicyManager; +} /** * A SandboxManager implementation for macOS that uses Seatbelt. */ export class MacOsSandboxManager implements SandboxManager { - constructor(private readonly options: GlobalSandboxOptions) {} + constructor(private readonly options: MacOsSandboxOptions) {} + + private async isStrictlyApproved(req: SandboxRequest): Promise { + const approvedTools = this.options.modeConfig?.approvedTools; + if (!approvedTools || approvedTools.length === 0) { + return false; + } + + await initializeShellParsers(); + + const fullCmd = [req.command, ...req.args].join(' '); + const stripped = stripShellWrapper(fullCmd); + + const roots = getCommandRoots(stripped); + if (roots.length === 0) return false; + + const allRootsApproved = roots.every((root) => + approvedTools.includes(root), + ); + if (allRootsApproved) { + return true; + } + + const pipelineCommands = splitCommands(stripped); + if (pipelineCommands.length === 0) return false; + + // For safety, every command in the pipeline must be considered safe. + for (const cmdString of pipelineCommands) { + const parsedArgs = shellParse(cmdString).map(String); + if (!isKnownSafeCommand(parsedArgs)) { + return false; + } + } + + return true; + } + + private async getCommandName(req: SandboxRequest): Promise { + await initializeShellParsers(); + const fullCmd = [req.command, ...req.args].join(' '); + const stripped = stripShellWrapper(fullCmd); + const roots = getCommandRoots(stripped).filter( + (r) => r !== 'shopt' && r !== 'set', + ); + if (roots.length > 0) { + return roots[0]; + } + return path.basename(req.command); + } async prepareCommand(req: SandboxRequest): Promise { + await initializeShellParsers(); const sanitizationConfig = getSecureSanitizationConfig( req.policy?.sanitizationConfig, ); const sanitizedEnv = sanitizeEnvironment(req.env, sanitizationConfig); - const sandboxArgs = this.buildSeatbeltArgs(this.options, req.policy); + const isReadonlyMode = this.options.modeConfig?.readonly ?? true; + const allowOverrides = this.options.modeConfig?.allowOverrides ?? true; + + // Reject override attempts in plan mode + if (!allowOverrides && req.policy?.additionalPermissions) { + const perms = req.policy.additionalPermissions; + if ( + perms.network || + (perms.fileSystem?.write && perms.fileSystem.write.length > 0) + ) { + throw new Error( + 'Sandbox request rejected: Cannot override readonly/network restrictions in Plan mode.', + ); + } + } + + // If not in readonly mode OR it's a strictly approved pipeline, allow workspace writes + const isApproved = allowOverrides + ? await this.isStrictlyApproved(req) + : false; + + const workspaceWrite = !isReadonlyMode || isApproved; + const networkAccess = + this.options.modeConfig?.network ?? req.policy?.networkAccess ?? false; + + // Fetch persistent approvals for this command + const commandName = await this.getCommandName(req); + const persistentPermissions = allowOverrides + ? this.options.policyManager?.getCommandPermissions(commandName) + : undefined; + + // Merge all permissions + const mergedAdditional: SandboxPermissions = { + fileSystem: { + read: [ + ...(persistentPermissions?.fileSystem?.read ?? []), + ...(req.policy?.additionalPermissions?.fileSystem?.read ?? []), + ], + write: [ + ...(persistentPermissions?.fileSystem?.write ?? []), + ...(req.policy?.additionalPermissions?.fileSystem?.write ?? []), + ], + }, + network: + networkAccess || + persistentPermissions?.network || + req.policy?.additionalPermissions?.network || + false, + }; + + const sandboxArgs = buildSeatbeltArgs({ + workspace: this.options.workspace, + allowedPaths: [...(req.policy?.allowedPaths || [])], + forbiddenPaths: req.policy?.forbiddenPaths, + networkAccess: mergedAdditional.network, + workspaceWrite, + additionalPermissions: mergedAdditional, + }); return { program: '/usr/bin/sandbox-exec', @@ -47,124 +170,4 @@ export class MacOsSandboxManager implements SandboxManager { cwd: req.cwd, }; } - - /** - * Builds the arguments array for sandbox-exec using a strict allowlist profile. - * It relies on parameters passed to sandbox-exec via the -D flag to avoid - * string interpolation vulnerabilities, and normalizes paths against symlink escapes. - * - * Returns arguments up to the end of sandbox-exec configuration (e.g. ['-p', '', '-D', ...]) - * Does not include the final '--' separator or the command to run. - */ - private buildSeatbeltArgs( - options: GlobalSandboxOptions, - policy?: ExecutionPolicy, - ): string[] { - const profileLines = [BASE_SEATBELT_PROFILE]; - const args: string[] = []; - - const workspacePath = this.tryRealpath(options.workspace); - args.push('-D', `WORKSPACE=${workspacePath}`); - - // Add explicit deny rules for governance files in the workspace. - // These are added after the workspace allow rule (which is in BASE_SEATBELT_PROFILE) - // to ensure they take precedence (Seatbelt evaluates rules in order, later rules win for same path). - for (let i = 0; i < GOVERNANCE_FILES.length; i++) { - const governanceFile = path.join(workspacePath, GOVERNANCE_FILES[i].path); - - // Ensure the file/directory exists so Seatbelt rules are reliably applied. - this.touch(governanceFile, GOVERNANCE_FILES[i].isDirectory); - - const realGovernanceFile = this.tryRealpath(governanceFile); - - // Determine if it should be treated as a directory (subpath) or a file (literal). - // .git is generally a directory, while ignore files are literals. - let isActuallyDirectory = GOVERNANCE_FILES[i].isDirectory; - try { - if (fs.existsSync(realGovernanceFile)) { - isActuallyDirectory = fs.lstatSync(realGovernanceFile).isDirectory(); - } - } catch { - // Ignore errors, use default guess - } - - const ruleType = isActuallyDirectory ? 'subpath' : 'literal'; - - args.push('-D', `GOVERNANCE_FILE_${i}=${governanceFile}`); - profileLines.push( - `(deny file-write* (${ruleType} (param "GOVERNANCE_FILE_${i}")))`, - ); - - if (realGovernanceFile !== governanceFile) { - args.push('-D', `REAL_GOVERNANCE_FILE_${i}=${realGovernanceFile}`); - profileLines.push( - `(deny file-write* (${ruleType} (param "REAL_GOVERNANCE_FILE_${i}")))`, - ); - } - } - - const tmpPath = this.tryRealpath(os.tmpdir()); - args.push('-D', `TMPDIR=${tmpPath}`); - - const allowedPaths = sanitizePaths(policy?.allowedPaths) || []; - for (let i = 0; i < allowedPaths.length; i++) { - const allowedPath = this.tryRealpath(allowedPaths[i]); - args.push('-D', `ALLOWED_PATH_${i}=${allowedPath}`); - profileLines.push( - `(allow file-read* file-write* (subpath (param "ALLOWED_PATH_${i}")))`, - ); - } - - // TODO: handle forbidden paths - - if (policy?.networkAccess) { - profileLines.push(NETWORK_SEATBELT_PROFILE); - } - - args.unshift('-p', profileLines.join('\n')); - - return args; - } - - /** - * Ensures a file or directory exists. - */ - private touch(filePath: string, isDirectory: boolean) { - try { - // If it exists (even as a broken symlink), do nothing - if (fs.lstatSync(filePath)) return; - } catch { - // Ignore ENOENT - } - - if (isDirectory) { - fs.mkdirSync(filePath, { recursive: true }); - } else { - const dir = path.dirname(filePath); - if (!fs.existsSync(dir)) { - fs.mkdirSync(dir, { recursive: true }); - } - fs.closeSync(fs.openSync(filePath, 'a')); - } - } - - /** - * Resolves symlinks for a given path to prevent sandbox escapes. - * If a file does not exist (ENOENT), it recursively resolves the parent directory. - * Other errors (e.g. EACCES) are re-thrown. - */ - private tryRealpath(p: string): string { - try { - return fs.realpathSync(p); - } catch (e) { - if (e instanceof Error && 'code' in e && e.code === 'ENOENT') { - const parentDir = path.dirname(p); - if (parentDir === p) { - return p; - } - return path.join(this.tryRealpath(parentDir), path.basename(p)); - } - throw e; - } - } } diff --git a/packages/core/src/sandbox/macos/baseProfile.ts b/packages/core/src/sandbox/macos/baseProfile.ts index b331b7c58e..4c712b2f1b 100644 --- a/packages/core/src/sandbox/macos/baseProfile.ts +++ b/packages/core/src/sandbox/macos/baseProfile.ts @@ -16,11 +16,101 @@ export const BASE_SEATBELT_PROFILE = `(version 1) (import "system.sb") + ; Core execution requirements (allow process-exec) (allow process-fork) (allow signal (target same-sandbox)) -(allow process-info* (target same-sandbox)) +(allow process-info*) + +(allow file-write-data + (require-all + (path "/dev/null") + (vnode-type CHARACTER-DEVICE))) + +; sysctls permitted. +(allow sysctl-read + (sysctl-name "hw.activecpu") + (sysctl-name "hw.busfrequency_compat") + (sysctl-name "hw.byteorder") + (sysctl-name "hw.cacheconfig") + (sysctl-name "hw.cachelinesize_compat") + (sysctl-name "hw.cpufamily") + (sysctl-name "hw.cpufrequency_compat") + (sysctl-name "hw.cputype") + (sysctl-name "hw.l1dcachesize_compat") + (sysctl-name "hw.l1icachesize_compat") + (sysctl-name "hw.l2cachesize_compat") + (sysctl-name "hw.l3cachesize_compat") + (sysctl-name "hw.logicalcpu_max") + (sysctl-name "hw.machine") + (sysctl-name "hw.model") + (sysctl-name "hw.memsize") + (sysctl-name "hw.ncpu") + (sysctl-name "hw.nperflevels") + (sysctl-name-prefix "hw.optional.arm.") + (sysctl-name-prefix "hw.optional.armv8_") + (sysctl-name "hw.packages") + (sysctl-name "hw.pagesize_compat") + (sysctl-name "hw.pagesize") + (sysctl-name "hw.physicalcpu") + (sysctl-name "hw.physicalcpu_max") + (sysctl-name "hw.logicalcpu") + (sysctl-name "hw.cpufrequency") + (sysctl-name "hw.tbfrequency_compat") + (sysctl-name "hw.vectorunit") + (sysctl-name "machdep.cpu.brand_string") + (sysctl-name "kern.argmax") + (sysctl-name "kern.hostname") + (sysctl-name "kern.maxfilesperproc") + (sysctl-name "kern.maxproc") + (sysctl-name "kern.osproductversion") + (sysctl-name "kern.osrelease") + (sysctl-name "kern.ostype") + (sysctl-name "kern.osvariant_status") + (sysctl-name "kern.osversion") + (sysctl-name "kern.secure_kernel") + (sysctl-name "kern.usrstack64") + (sysctl-name "kern.version") + (sysctl-name "sysctl.proc_cputype") + (sysctl-name "vm.loadavg") + (sysctl-name-prefix "hw.perflevel") + (sysctl-name-prefix "kern.proc.pgrp.") + (sysctl-name-prefix "kern.proc.pid.") + (sysctl-name-prefix "net.routetable.") +) + +(allow sysctl-write + (sysctl-name "kern.grade_cputype")) + + +(allow mach-lookup + (global-name "com.apple.sysmond") +) +\n; IOKit +(allow iokit-open + (iokit-registry-entry-class "RootDomainUserClient") +) + +(allow mach-lookup + (global-name "com.apple.system.opendirectoryd.libinfo") +) + +; Needed for python multiprocessing on MacOS for the SemLock +(allow ipc-posix-sem) + +(allow mach-lookup + (global-name "com.apple.PowerManagement.control") +) + +; PTY and Terminal support +(allow pseudo-tty) +(allow file-read* file-write* file-ioctl (literal "/dev/ptmx")) +(allow file-read* file-write* + (require-all + (regex #"^/dev/ttys[0-9]+") + (extension "com.apple.sandbox.pty"))) +(allow file-ioctl (regex #"^/dev/ttys[0-9]+")) ; Allow basic read access to system frameworks and libraries required to run (allow file-read* @@ -38,11 +128,6 @@ export const BASE_SEATBELT_PROFILE = `(version 1) (subpath "/private/etc") ) -; PTY and Terminal support -(allow pseudo-tty) -(allow file-read* file-write* file-ioctl (literal "/dev/ptmx")) -(allow file-read* file-write* file-ioctl (regex #"^/dev/ttys[0-9]+")) - ; Allow read/write access to temporary directories and common device nodes (allow file-read* file-write* (literal "/dev/null") @@ -53,9 +138,10 @@ export const BASE_SEATBELT_PROFILE = `(version 1) ) ; Workspace access using parameterized paths -(allow file-read* file-write* +(allow file-read* (subpath (param "WORKSPACE")) ) + `; /** @@ -66,7 +152,9 @@ export const BASE_SEATBELT_PROFILE = `(version 1) */ export const NETWORK_SEATBELT_PROFILE = ` ; Network Access -(allow network*) +(allow network-outbound) +(allow network-inbound) +(allow network-bind) (allow system-socket (require-all diff --git a/packages/core/src/sandbox/macos/commandSafety.ts b/packages/core/src/sandbox/macos/commandSafety.ts new file mode 100644 index 0000000000..a9911932fc --- /dev/null +++ b/packages/core/src/sandbox/macos/commandSafety.ts @@ -0,0 +1,469 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ +import { parse as shellParse } from 'shell-quote'; + +/** + * Checks if a command with its arguments is known to be safe to execute + * without requiring user confirmation. This is primarily used to allow + * harmless, read-only commands to run silently in the macOS sandbox. + * + * It handles raw command execution as well as wrapped commands like `bash -c "..."` or `bash -lc "..."`. + * For wrapped commands, it parses the script and ensures all individual + * sub-commands are in the known-safe list and no dangerous shell operators + * (like subshells or redirection) are used. + * + * @param args - The command and its arguments (e.g., ['ls', '-la']) + * @returns true if the command is considered safe, false otherwise. + */ +export function isKnownSafeCommand(args: string[]): boolean { + if (!args || args.length === 0) { + return false; + } + + // Normalize zsh to bash + const normalizedArgs = args.map((a) => (a === 'zsh' ? 'bash' : a)); + + if (isSafeToCallWithExec(normalizedArgs)) { + return true; + } + + // Support `bash -lc "..."` + if ( + normalizedArgs.length === 3 && + normalizedArgs[0] === 'bash' && + (normalizedArgs[1] === '-lc' || normalizedArgs[1] === '-c') + ) { + try { + const script = normalizedArgs[2]; + + // Basic check for dangerous operators that could spawn subshells or redirect output + // We allow &&, ||, |, ; but explicitly block subshells () and redirection >, >>, < + if (/[()<>]/g.test(script)) { + return false; + } + + const commands = script.split(/&&|\|\||\||;/); + + let allSafe = true; + for (const cmd of commands) { + const trimmed = cmd.trim(); + if (!trimmed) continue; + + const parsed = shellParse(trimmed).map(String); + if (parsed.length === 0) continue; + + if (!isSafeToCallWithExec(parsed)) { + allSafe = false; + break; + } + } + + if (allSafe && commands.length > 0) { + return true; + } + } catch { + return false; + } + } + + return false; +} + +/** + * Core validation logic that checks a single command and its arguments + * against an allowlist of known safe operations. It performs deep validation + * for specific tools like `base64`, `find`, `rg`, `git`, and `sed` to ensure + * unsafe flags (like `--output`, `-exec`, or mutating options) are not used. + * + * @param args - The command and its arguments. + * @returns true if the command is strictly read-only and safe. + */ +function isSafeToCallWithExec(args: string[]): boolean { + if (!args || args.length === 0) return false; + const cmd = args[0]; + + const safeCommands = new Set([ + 'cat', + 'cd', + 'cut', + 'echo', + 'expr', + 'false', + 'grep', + 'head', + 'id', + 'ls', + 'nl', + 'paste', + 'pwd', + 'rev', + 'seq', + 'stat', + 'tail', + 'tr', + 'true', + 'uname', + 'uniq', + 'wc', + 'which', + 'whoami', + 'numfmt', + 'tac', + ]); + + if (safeCommands.has(cmd)) { + return true; + } + + if (cmd === 'base64') { + const unsafeOptions = new Set(['-o', '--output']); + return !args + .slice(1) + .some( + (arg) => + unsafeOptions.has(arg) || + arg.startsWith('--output=') || + (arg.startsWith('-o') && arg !== '-o'), + ); + } + + if (cmd === 'find') { + const unsafeOptions = new Set([ + '-exec', + '-execdir', + '-ok', + '-okdir', + '-delete', + '-fls', + '-fprint', + '-fprint0', + '-fprintf', + ]); + return !args.some((arg) => unsafeOptions.has(arg)); + } + + if (cmd === 'rg') { + const unsafeWithArgs = new Set(['--pre', '--hostname-bin']); + const unsafeWithoutArgs = new Set(['--search-zip', '-z']); + + return !args.some((arg) => { + if (unsafeWithoutArgs.has(arg)) return true; + for (const opt of unsafeWithArgs) { + if (arg === opt || arg.startsWith(opt + '=')) return true; + } + return false; + }); + } + + if (cmd === 'git') { + if (gitHasConfigOverrideGlobalOption(args)) { + return false; + } + + const { idx, subcommand } = findGitSubcommand(args, [ + 'status', + 'log', + 'diff', + 'show', + 'branch', + ]); + if (!subcommand) { + return false; + } + + const subcommandArgs = args.slice(idx + 1); + + if (['status', 'log', 'diff', 'show'].includes(subcommand)) { + return gitSubcommandArgsAreReadOnly(subcommandArgs); + } + + if (subcommand === 'branch') { + return ( + gitSubcommandArgsAreReadOnly(subcommandArgs) && + gitBranchIsReadOnly(subcommandArgs) + ); + } + + return false; + } + + if (cmd === 'sed') { + // Special-case sed -n {N|M,N}p + if (args.length <= 4 && args[1] === '-n' && isValidSedNArg(args[2])) { + return true; + } + return false; + } + + return false; +} + +/** + * Helper to identify which git subcommand is being executed, skipping over + * global git options like `-c` or `--git-dir`. + * + * @param args - The full git command arguments. + * @param subcommands - A list of subcommands to look for. + * @returns An object containing the index of the subcommand and its name. + */ +function findGitSubcommand( + args: string[], + subcommands: string[], +): { idx: number; subcommand: string | null } { + let skipNext = false; + + for (let idx = 1; idx < args.length; idx++) { + if (skipNext) { + skipNext = false; + continue; + } + + const arg = args[idx]; + + if ( + arg.startsWith('--config-env=') || + arg.startsWith('--exec-path=') || + arg.startsWith('--git-dir=') || + arg.startsWith('--namespace=') || + arg.startsWith('--super-prefix=') || + arg.startsWith('--work-tree=') || + ((arg.startsWith('-C') || arg.startsWith('-c')) && arg.length > 2) + ) { + continue; + } + + if ( + arg === '-C' || + arg === '-c' || + arg === '--config-env' || + arg === '--exec-path' || + arg === '--git-dir' || + arg === '--namespace' || + arg === '--super-prefix' || + arg === '--work-tree' + ) { + skipNext = true; + continue; + } + + if (arg === '--' || arg.startsWith('-')) { + continue; + } + + if (subcommands.includes(arg)) { + return { idx, subcommand: arg }; + } + + return { idx: -1, subcommand: null }; + } + + return { idx: -1, subcommand: null }; +} + +/** + * Checks if a git command contains global configuration override flags + * (e.g., `-c` or `--config-env`) which could be used maliciously to + * execute arbitrary code via git config. + * + * @param args - The git command arguments. + * @returns true if config overrides are present. + */ +function gitHasConfigOverrideGlobalOption(args: string[]): boolean { + return args.some( + (arg) => + arg === '-c' || + arg === '--config-env' || + (arg.startsWith('-c') && arg.length > 2) || + arg.startsWith('--config-env='), + ); +} + +/** + * Validates that the arguments for safe git subcommands (like `status`, `log`, + * `diff`, `show`) do not contain flags that could cause mutations or execute + * arbitrary commands (e.g., `--output`, `--exec`). + * + * @param args - Arguments passed to the git subcommand. + * @returns true if the arguments only represent read-only operations. + */ +function gitSubcommandArgsAreReadOnly(args: string[]): boolean { + const unsafeFlags = new Set([ + '--output', + '--ext-diff', + '--textconv', + '--exec', + '--paginate', + ]); + + return !args.some( + (arg) => + unsafeFlags.has(arg) || + arg.startsWith('--output=') || + arg.startsWith('--exec='), + ); +} + +/** + * Validates that `git branch` is only used for read operations + * (e.g., listing branches) rather than creating, deleting, or renaming branches. + * + * @param args - Arguments passed to `git branch`. + * @returns true if it's purely a listing/read-only branch command. + */ +function gitBranchIsReadOnly(args: string[]): boolean { + if (args.length === 0) return true; + + let sawReadOnlyFlag = false; + for (const arg of args) { + if ( + [ + '--list', + '-l', + '--show-current', + '-a', + '--all', + '-r', + '--remotes', + '-v', + '-vv', + '--verbose', + ].includes(arg) + ) { + sawReadOnlyFlag = true; + } else if (arg.startsWith('--format=')) { + sawReadOnlyFlag = true; + } else { + return false; + } + } + return sawReadOnlyFlag; +} + +/** + * Ensures that a `sed` command argument is a valid line-printing instruction + * (e.g., `10p` or `5,10p`), preventing unsafe script execution in `sed`. + * + * @param arg - The script argument passed to `sed -n`. + * @returns true if it's a valid, safe print command. + */ +function isValidSedNArg(arg: string | undefined): boolean { + if (!arg) return false; + + if (!arg.endsWith('p')) return false; + const core = arg.slice(0, -1); + + const parts = core.split(','); + if (parts.length === 1) { + const num = parts[0]; + return num.length > 0 && /^\d+$/.test(num); + } else if (parts.length === 2) { + const a = parts[0]; + const b = parts[1]; + return a.length > 0 && b.length > 0 && /^\d+$/.test(a) && /^\d+$/.test(b); + } + + return false; +} + +/** + * Checks if a command with its arguments is explicitly known to be dangerous + * and should be blocked or require strict user confirmation. This catches + * destructive commands like `rm -rf`, `sudo`, and commands with execution + * flags like `find -exec`. + * + * @param args - The command and its arguments. + * @returns true if the command is identified as dangerous, false otherwise. + */ +export function isDangerousCommand(args: string[]): boolean { + if (!args || args.length === 0) { + return false; + } + + const cmd = args[0]; + + if (cmd === 'rm') { + return args[1] === '-f' || args[1] === '-rf' || args[1] === '-fr'; + } + + if (cmd === 'sudo') { + return isDangerousCommand(args.slice(1)); + } + + if (cmd === 'find') { + const unsafeOptions = new Set([ + '-exec', + '-execdir', + '-ok', + '-okdir', + '-delete', + '-fls', + '-fprint', + '-fprint0', + '-fprintf', + ]); + return args.some((arg) => unsafeOptions.has(arg)); + } + + if (cmd === 'rg') { + const unsafeWithArgs = new Set(['--pre', '--hostname-bin']); + const unsafeWithoutArgs = new Set(['--search-zip', '-z']); + + return args.some((arg) => { + if (unsafeWithoutArgs.has(arg)) return true; + for (const opt of unsafeWithArgs) { + if (arg === opt || arg.startsWith(opt + '=')) return true; + } + return false; + }); + } + + if (cmd === 'git') { + if (gitHasConfigOverrideGlobalOption(args)) { + return true; + } + + const { idx, subcommand } = findGitSubcommand(args, [ + 'status', + 'log', + 'diff', + 'show', + 'branch', + ]); + if (!subcommand) { + // It's a git command we don't recognize as explicitly safe. + return false; + } + + const subcommandArgs = args.slice(idx + 1); + + if (['status', 'log', 'diff', 'show'].includes(subcommand)) { + return !gitSubcommandArgsAreReadOnly(subcommandArgs); + } + + if (subcommand === 'branch') { + return !( + gitSubcommandArgsAreReadOnly(subcommandArgs) && + gitBranchIsReadOnly(subcommandArgs) + ); + } + + return false; + } + + if (cmd === 'base64') { + const unsafeOptions = new Set(['-o', '--output']); + return args + .slice(1) + .some( + (arg) => + unsafeOptions.has(arg) || + arg.startsWith('--output=') || + (arg.startsWith('-o') && arg !== '-o'), + ); + } + + return false; +} diff --git a/packages/core/src/sandbox/macos/seatbeltArgsBuilder.test.ts b/packages/core/src/sandbox/macos/seatbeltArgsBuilder.test.ts new file mode 100644 index 0000000000..8bc3ac87b4 --- /dev/null +++ b/packages/core/src/sandbox/macos/seatbeltArgsBuilder.test.ts @@ -0,0 +1,160 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ +import { describe, it, expect, vi } from 'vitest'; +import { buildSeatbeltArgs } from './seatbeltArgsBuilder.js'; +import fs from 'node:fs'; +import os from 'node:os'; + +describe('seatbeltArgsBuilder', () => { + it('should build a strict allowlist profile allowing the workspace via param', () => { + // Mock realpathSync to just return the path for testing + vi.spyOn(fs, 'realpathSync').mockImplementation((p) => p as string); + + const args = buildSeatbeltArgs({ workspace: '/Users/test/workspace' }); + + expect(args[0]).toBe('-p'); + const profile = args[1]; + expect(profile).toContain('(version 1)'); + expect(profile).toContain('(deny default)'); + expect(profile).toContain('(allow process-exec)'); + expect(profile).toContain('(subpath (param "WORKSPACE"))'); + expect(profile).not.toContain('(allow network*)'); + + expect(args).toContain('-D'); + expect(args).toContain('WORKSPACE=/Users/test/workspace'); + expect(args).toContain(`TMPDIR=${os.tmpdir()}`); + + vi.restoreAllMocks(); + }); + + it('should allow network when networkAccess is true', () => { + const args = buildSeatbeltArgs({ workspace: '/test', networkAccess: true }); + const profile = args[1]; + expect(profile).toContain('(allow network-outbound)'); + }); + + it('should parameterize allowed paths and normalize them', () => { + vi.spyOn(fs, 'realpathSync').mockImplementation((p) => { + if (p === '/test/symlink') return '/test/real_path'; + return p as string; + }); + + const args = buildSeatbeltArgs({ + workspace: '/test', + allowedPaths: ['/custom/path1', '/test/symlink'], + }); + + const profile = args[1]; + expect(profile).toContain('(subpath (param "ALLOWED_PATH_0"))'); + expect(profile).toContain('(subpath (param "ALLOWED_PATH_1"))'); + + expect(args).toContain('-D'); + expect(args).toContain('ALLOWED_PATH_0=/custom/path1'); + expect(args).toContain('ALLOWED_PATH_1=/test/real_path'); + + vi.restoreAllMocks(); + }); + + it('should resolve parent directories if a file does not exist', () => { + vi.spyOn(fs, 'realpathSync').mockImplementation((p) => { + if (p === '/test/symlink/nonexistent.txt') { + const error = new Error('ENOENT'); + Object.assign(error, { code: 'ENOENT' }); + throw error; + } + if (p === '/test/symlink') { + return '/test/real_path'; + } + return p as string; + }); + + const args = buildSeatbeltArgs({ + workspace: '/test/symlink/nonexistent.txt', + }); + + expect(args).toContain('WORKSPACE=/test/real_path/nonexistent.txt'); + vi.restoreAllMocks(); + }); + + it('should throw if realpathSync throws a non-ENOENT error', () => { + vi.spyOn(fs, 'realpathSync').mockImplementation(() => { + const error = new Error('Permission denied'); + Object.assign(error, { code: 'EACCES' }); + throw error; + }); + + expect(() => + buildSeatbeltArgs({ + workspace: '/test/workspace', + }), + ).toThrow('Permission denied'); + + vi.restoreAllMocks(); + }); + + describe('governance files', () => { + it('should inject explicit deny rules for governance files', () => { + vi.spyOn(fs, 'realpathSync').mockImplementation((p) => p.toString()); + vi.spyOn(fs, 'existsSync').mockReturnValue(true); + vi.spyOn(fs, 'lstatSync').mockImplementation( + (p) => + ({ + isDirectory: () => p.toString().endsWith('.git'), + isFile: () => !p.toString().endsWith('.git'), + }) as unknown as fs.Stats, + ); + + const args = buildSeatbeltArgs({ workspace: '/Users/test/workspace' }); + const profile = args[1]; + + // .gitignore should be a literal deny + expect(args).toContain('-D'); + expect(args).toContain( + 'GOVERNANCE_FILE_0=/Users/test/workspace/.gitignore', + ); + expect(profile).toContain( + '(deny file-write* (literal (param "GOVERNANCE_FILE_0")))', + ); + + // .git should be a subpath deny + expect(args).toContain('GOVERNANCE_FILE_2=/Users/test/workspace/.git'); + expect(profile).toContain( + '(deny file-write* (subpath (param "GOVERNANCE_FILE_2")))', + ); + + vi.restoreAllMocks(); + }); + + it('should protect both the symlink and the real path if they differ', () => { + vi.spyOn(fs, 'realpathSync').mockImplementation((p) => { + if (p === '/test/workspace/.gitignore') return '/test/real/.gitignore'; + return p.toString(); + }); + vi.spyOn(fs, 'existsSync').mockReturnValue(true); + vi.spyOn(fs, 'lstatSync').mockImplementation( + () => + ({ + isDirectory: () => false, + isFile: () => true, + }) as unknown as fs.Stats, + ); + + const args = buildSeatbeltArgs({ workspace: '/test/workspace' }); + const profile = args[1]; + + expect(args).toContain('GOVERNANCE_FILE_0=/test/workspace/.gitignore'); + expect(args).toContain('REAL_GOVERNANCE_FILE_0=/test/real/.gitignore'); + expect(profile).toContain( + '(deny file-write* (literal (param "GOVERNANCE_FILE_0")))', + ); + expect(profile).toContain( + '(deny file-write* (literal (param "REAL_GOVERNANCE_FILE_0")))', + ); + + vi.restoreAllMocks(); + }); + }); +}); diff --git a/packages/core/src/sandbox/macos/seatbeltArgsBuilder.ts b/packages/core/src/sandbox/macos/seatbeltArgsBuilder.ts new file mode 100644 index 0000000000..3a4a9d3ab7 --- /dev/null +++ b/packages/core/src/sandbox/macos/seatbeltArgsBuilder.ts @@ -0,0 +1,247 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import { + BASE_SEATBELT_PROFILE, + NETWORK_SEATBELT_PROFILE, +} from './baseProfile.js'; +import { + type SandboxPermissions, + sanitizePaths, + GOVERNANCE_FILES, +} from '../../services/sandboxManager.js'; + +/** + * Options for building macOS Seatbelt arguments. + */ +export interface SeatbeltArgsOptions { + /** The primary workspace path to allow access to. */ + workspace: string; + /** Additional paths to allow access to. */ + allowedPaths?: string[]; + /** Absolute paths to explicitly deny read/write access to (overrides allowlists). */ + forbiddenPaths?: string[]; + /** Whether to allow network access. */ + networkAccess?: boolean; + /** Granular additional permissions. */ + additionalPermissions?: SandboxPermissions; + /** Whether to allow write access to the workspace. */ + workspaceWrite?: boolean; +} + +/** + * Resolves symlinks for a given path to prevent sandbox escapes. + * If a file does not exist (ENOENT), it recursively resolves the parent directory. + * Other errors (e.g. EACCES) are re-thrown. + */ +function tryRealpath(p: string): string { + try { + return fs.realpathSync(p); + } catch (e) { + if (e instanceof Error && 'code' in e && e.code === 'ENOENT') { + const parentDir = path.dirname(p); + if (parentDir === p) { + return p; + } + return path.join(tryRealpath(parentDir), path.basename(p)); + } + throw e; + } +} + +/** + * Builds the arguments array for sandbox-exec using a strict allowlist profile. + * It relies on parameters passed to sandbox-exec via the -D flag to avoid + * string interpolation vulnerabilities, and normalizes paths against symlink escapes. + * + * Returns arguments up to the end of sandbox-exec configuration (e.g. ['-p', '', '-D', ...]) + * Does not include the final '--' separator or the command to run. + */ +export function buildSeatbeltArgs(options: SeatbeltArgsOptions): string[] { + let profile = BASE_SEATBELT_PROFILE + '\n'; + const args: string[] = []; + + const workspacePath = tryRealpath(options.workspace); + args.push('-D', `WORKSPACE=${workspacePath}`); + args.push('-D', `WORKSPACE_RAW=${options.workspace}`); + profile += `(allow file-read* (subpath (param "WORKSPACE_RAW")))\n`; + if (options.workspaceWrite) { + profile += `(allow file-write* (subpath (param "WORKSPACE_RAW")))\n`; + } + + if (options.workspaceWrite) { + profile += `(allow file-write* (subpath (param "WORKSPACE")))\n`; + } + + // Add explicit deny rules for governance files in the workspace. + // These are added after the workspace allow rule to ensure they take precedence + // (Seatbelt evaluates rules in order, later rules win for same path). + for (let i = 0; i < GOVERNANCE_FILES.length; i++) { + const governanceFile = path.join(workspacePath, GOVERNANCE_FILES[i].path); + const realGovernanceFile = tryRealpath(governanceFile); + + // Determine if it should be treated as a directory (subpath) or a file (literal). + // .git is generally a directory, while ignore files are literals. + let isDirectory = GOVERNANCE_FILES[i].isDirectory; + try { + if (fs.existsSync(realGovernanceFile)) { + isDirectory = fs.lstatSync(realGovernanceFile).isDirectory(); + } + } catch { + // Ignore errors, use default guess + } + + const ruleType = isDirectory ? 'subpath' : 'literal'; + + args.push('-D', `GOVERNANCE_FILE_${i}=${governanceFile}`); + profile += `(deny file-write* (${ruleType} (param "GOVERNANCE_FILE_${i}")))\n`; + + if (realGovernanceFile !== governanceFile) { + args.push('-D', `REAL_GOVERNANCE_FILE_${i}=${realGovernanceFile}`); + profile += `(deny file-write* (${ruleType} (param "REAL_GOVERNANCE_FILE_${i}")))\n`; + } + } + + // Auto-detect and support git worktrees by granting read and write access to the underlying git directory + try { + const gitPath = path.join(workspacePath, '.git'); + const gitStat = fs.lstatSync(gitPath); + if (gitStat.isFile()) { + const gitContent = fs.readFileSync(gitPath, 'utf8'); + const match = gitContent.match(/^gitdir:\s*(.+)$/m); + if (match && match[1]) { + let worktreeGitDir = match[1].trim(); + if (!path.isAbsolute(worktreeGitDir)) { + worktreeGitDir = path.resolve(workspacePath, worktreeGitDir); + } + const resolvedWorktreeGitDir = tryRealpath(worktreeGitDir); + + // Grant write access to the worktree's specific .git directory + args.push('-D', `WORKTREE_GIT_DIR=${resolvedWorktreeGitDir}`); + profile += `(allow file-read* file-write* (subpath (param "WORKTREE_GIT_DIR")))\n`; + + // Grant write access to the main repository's .git directory (objects, refs, etc. are shared) + // resolvedWorktreeGitDir is usually like: /path/to/main-repo/.git/worktrees/worktree-name + const mainGitDir = tryRealpath( + path.dirname(path.dirname(resolvedWorktreeGitDir)), + ); + if (mainGitDir && mainGitDir.endsWith('.git')) { + args.push('-D', `MAIN_GIT_DIR=${mainGitDir}`); + profile += `(allow file-read* file-write* (subpath (param "MAIN_GIT_DIR")))\n`; + } + } + } + } catch (_e) { + // Ignore if .git doesn't exist, isn't readable, etc. + } + + const tmpPath = tryRealpath(os.tmpdir()); + args.push('-D', `TMPDIR=${tmpPath}`); + + const nodeRootPath = tryRealpath( + path.dirname(path.dirname(process.execPath)), + ); + args.push('-D', `NODE_ROOT=${nodeRootPath}`); + profile += `(allow file-read* (subpath (param "NODE_ROOT")))\n`; + + // Add PATH directories as read-only to support nvm, homebrew, etc. + if (process.env['PATH']) { + const paths = process.env['PATH'].split(':'); + let pathIndex = 0; + const addedPaths = new Set(); + + for (const p of paths) { + if (!p.trim()) continue; + try { + let resolved = tryRealpath(p); + + // If this is a 'bin' directory (like /usr/local/bin or homebrew/bin), + // also grant read access to its parent directory so that symlinked + // assets (like Cellar or libexec) can be read. + if (resolved.endsWith('/bin')) { + resolved = path.dirname(resolved); + } + + if (!addedPaths.has(resolved)) { + addedPaths.add(resolved); + args.push('-D', `SYS_PATH_${pathIndex}=${resolved}`); + profile += `(allow file-read* (subpath (param "SYS_PATH_${pathIndex}")))\n`; + pathIndex++; + } + } catch (_e) { + // Ignore paths that do not exist or are inaccessible + } + } + } + + // Handle allowedPaths + const allowedPaths = sanitizePaths(options.allowedPaths) || []; + for (let i = 0; i < allowedPaths.length; i++) { + const allowedPath = tryRealpath(allowedPaths[i]); + args.push('-D', `ALLOWED_PATH_${i}=${allowedPath}`); + profile += `(allow file-read* file-write* (subpath (param "ALLOWED_PATH_${i}")))\n`; + } + + // Handle granular additional permissions + if (options.additionalPermissions?.fileSystem) { + const { read, write } = options.additionalPermissions.fileSystem; + if (read) { + read.forEach((p, i) => { + const resolved = tryRealpath(p); + const paramName = `ADDITIONAL_READ_${i}`; + args.push('-D', `${paramName}=${resolved}`); + let isFile = false; + try { + isFile = fs.statSync(resolved).isFile(); + } catch { + // Ignore error + } + if (isFile) { + profile += `(allow file-read* (literal (param "${paramName}")))\n`; + } else { + profile += `(allow file-read* (subpath (param "${paramName}")))\n`; + } + }); + } + if (write) { + write.forEach((p, i) => { + const resolved = tryRealpath(p); + const paramName = `ADDITIONAL_WRITE_${i}`; + args.push('-D', `${paramName}=${resolved}`); + let isFile = false; + try { + isFile = fs.statSync(resolved).isFile(); + } catch { + // Ignore error + } + if (isFile) { + profile += `(allow file-read* file-write* (literal (param "${paramName}")))\n`; + } else { + profile += `(allow file-read* file-write* (subpath (param "${paramName}")))\n`; + } + }); + } + } + + // Handle forbiddenPaths + const forbiddenPaths = sanitizePaths(options.forbiddenPaths) || []; + for (let i = 0; i < forbiddenPaths.length; i++) { + const forbiddenPath = tryRealpath(forbiddenPaths[i]); + args.push('-D', `FORBIDDEN_PATH_${i}=${forbiddenPath}`); + profile += `(deny file-read* file-write* (subpath (param "FORBIDDEN_PATH_${i}")))\n`; + } + + if (options.networkAccess || options.additionalPermissions?.network) { + profile += NETWORK_SEATBELT_PROFILE; + } + + args.unshift('-p', profile); + + return args; +} diff --git a/packages/core/src/scheduler/policy.ts b/packages/core/src/scheduler/policy.ts index ca84447261..4faa9a209b 100644 --- a/packages/core/src/scheduler/policy.ts +++ b/packages/core/src/scheduler/policy.ts @@ -77,7 +77,8 @@ export async function checkPolicy( // confirmation prompt if the policy engine's decision is 'ASK_USER'. if ( decision === PolicyDecision.ASK_USER && - toolCall.request.isClientInitiated + toolCall.request.isClientInitiated && + !toolCall.request.args?.['additional_permissions'] ) { return { decision: PolicyDecision.ALLOW, diff --git a/packages/core/src/scheduler/scheduler.ts b/packages/core/src/scheduler/scheduler.ts index ce2e530a16..f442118b8e 100644 --- a/packages/core/src/scheduler/scheduler.ts +++ b/packages/core/src/scheduler/scheduler.ts @@ -792,6 +792,110 @@ export class Scheduler { return true; } + let isSandboxError = false; + let sandboxDetailsStr = ''; + + if ( + result.status === CoreToolCallStatus.Error && + result.response.errorType === 'sandbox_expansion_required' + ) { + isSandboxError = true; + sandboxDetailsStr = result.response.error?.message || ''; + } + + if (isSandboxError) { + try { + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion + const parsedError = JSON.parse(sandboxDetailsStr) as { + rootCommand: string; + additionalPermissions: import('../services/sandboxManager.js').SandboxPermissions; + }; + + const confirmationDetails: SerializableConfirmationDetails = { + type: 'sandbox_expansion', + title: 'Sandbox Expansion Request', + command: String( + activeCall.request.args['command'] ?? parsedError.rootCommand, + ), + rootCommand: parsedError.rootCommand, + additionalPermissions: parsedError.additionalPermissions, + }; + + const correlationId = crypto.randomUUID(); + + // Mutate the active call so resolveConfirmation generates the correct Sandbox Expansion details + activeCall.request.args['additional_permissions'] = + parsedError.additionalPermissions; + activeCall.invocation = activeCall.tool.build(activeCall.request.args); + + // CRITICAL: We must push the new args and invocation into the state manager + // before calling resolveConfirmation, because resolveConfirmation fetches + // the tool call directly from the state manager! + this.state.updateArgs( + callId, + activeCall.request.args, + activeCall.invocation, + ); + + this.state.updateStatus(callId, CoreToolCallStatus.AwaitingApproval, { + confirmationDetails, + correlationId, + }); + + const validatingCall = { + ...activeCall, + status: CoreToolCallStatus.Validating, + } as ValidatingToolCall; + + const confResult = await resolveConfirmation(validatingCall, signal, { + config: this.config, + messageBus: this.messageBus, + state: this.state, + modifier: this.modifier, + getPreferredEditor: this.getPreferredEditor, + schedulerId: this.schedulerId, + onWaitingForConfirmation: this.onWaitingForConfirmation, + }); + + if (confResult.outcome === ToolConfirmationOutcome.Cancel) { + type LegacyHack = ToolCallResponseInfo & { + llmContent?: string; + returnDisplay?: string; + }; + const errorResponse = { ...result.response } as LegacyHack; + errorResponse.llmContent = + 'User cancelled sandbox expansion. The command failed with a sandbox denial. Shell output:\n' + + String(errorResponse.returnDisplay); + + this.state.updateStatus( + callId, + CoreToolCallStatus.Error, + errorResponse, + ); + return false; + } + + activeCall.request.args['additional_permissions'] = + parsedError.additionalPermissions; + + // Reset the output stream visual so it replaces the error text + this.state.updateStatus(callId, CoreToolCallStatus.Executing, { + liveOutput: undefined, + }); + + // Call _execute synchronously and properly return its promise to loop internally! + return await this._execute( + { + ...activeCall, + status: CoreToolCallStatus.Scheduled, + } as ScheduledToolCall, + signal, + ); + } catch (_e) { + // Fallback to normal error handling if parsing/looping fails + } + } + if (result.status === CoreToolCallStatus.Success) { this.state.updateStatus( callId, diff --git a/packages/core/src/services/sandboxManager.ts b/packages/core/src/services/sandboxManager.ts index 32d7344a05..4bf1db2875 100644 --- a/packages/core/src/services/sandboxManager.ts +++ b/packages/core/src/services/sandboxManager.ts @@ -11,6 +11,18 @@ import { getSecureSanitizationConfig, type EnvironmentSanitizationConfig, } from './environmentSanitization.js'; +export interface SandboxPermissions { + /** Filesystem permissions. */ + fileSystem?: { + /** Paths that should be readable by the command. */ + read?: string[]; + /** Paths that should be writable by the command. */ + write?: string[]; + }; + /** Whether the command should have network access. */ + network?: boolean; +} + /** * Security boundaries and permissions applied to a specific sandboxed execution. */ @@ -23,6 +35,8 @@ export interface ExecutionPolicy { networkAccess?: boolean; /** Rules for scrubbing sensitive environment variables. */ sanitizationConfig?: Partial; + /** Additional granular permissions to grant to this command. */ + additionalPermissions?: SandboxPermissions; } /** diff --git a/packages/core/src/services/sandboxManagerFactory.ts b/packages/core/src/services/sandboxManagerFactory.ts index 410f5e07dc..fa24b99f6e 100644 --- a/packages/core/src/services/sandboxManagerFactory.ts +++ b/packages/core/src/services/sandboxManagerFactory.ts @@ -14,6 +14,7 @@ import { LinuxSandboxManager } from '../sandbox/linux/LinuxSandboxManager.js'; import { MacOsSandboxManager } from '../sandbox/macos/MacOsSandboxManager.js'; import { WindowsSandboxManager } from './windowsSandboxManager.js'; import type { SandboxConfig } from '../config/config.js'; +import { type SandboxPolicyManager } from '../policy/sandboxPolicyManager.js'; /** * Creates a sandbox manager based on the provided settings. @@ -21,7 +22,13 @@ import type { SandboxConfig } from '../config/config.js'; export function createSandboxManager( sandbox: SandboxConfig | undefined, workspace: string, + policyManager?: SandboxPolicyManager, + approvalMode?: string, ): SandboxManager { + if (approvalMode === 'yolo') { + return new NoopSandboxManager(); + } + const isWindows = os.platform() === 'win32'; if ( @@ -36,7 +43,15 @@ export function createSandboxManager( return new LinuxSandboxManager({ workspace }); } if (os.platform() === 'darwin') { - return new MacOsSandboxManager({ workspace }); + const modeConfig = + policyManager && approvalMode + ? policyManager.getModeConfig(approvalMode) + : undefined; + return new MacOsSandboxManager({ + workspace, + modeConfig, + policyManager, + }); } return new LocalSandboxManager(); } diff --git a/packages/core/src/services/shellExecutionService.ts b/packages/core/src/services/shellExecutionService.ts index 98396fa4ee..a5697104ec 100644 --- a/packages/core/src/services/shellExecutionService.ts +++ b/packages/core/src/services/shellExecutionService.ts @@ -31,7 +31,11 @@ import { sanitizeEnvironment, type EnvironmentSanitizationConfig, } from './environmentSanitization.js'; -import { NoopSandboxManager, type SandboxManager } from './sandboxManager.js'; +import { + NoopSandboxManager, + type SandboxManager, + type SandboxPermissions, +} from './sandboxManager.js'; import type { SandboxConfig } from '../config/config.js'; import { killProcessGroup } from '../utils/process-utils.js'; import { @@ -84,6 +88,7 @@ export type ShellExecutionResult = ExecutionResult; export type ShellExecutionHandle = ExecutionHandle; export interface ShellExecutionConfig { + additionalPermissions?: SandboxPermissions; terminalWidth?: number; terminalHeight?: number; pager?: string; @@ -441,6 +446,7 @@ export class ShellExecutionService { ...shellExecutionConfig, ...(shellExecutionConfig.sandboxConfig || {}), sanitizationConfig, + additionalPermissions: shellExecutionConfig.additionalPermissions, }, }); diff --git a/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap b/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap index e2bab4d050..65e193cfcf 100644 --- a/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap +++ b/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap @@ -583,6 +583,35 @@ exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snaps "name": "run_shell_command", "parametersJsonSchema": { "properties": { + "additional_permissions": { + "description": "Sandbox permissions for the command. Use this to request additional sandboxed filesystem or network permissions if a previous command failed with "Operation not permitted".", + "properties": { + "fileSystem": { + "properties": { + "read": { + "description": "List of additional absolute paths to allow reading.", + "items": { + "type": "string", + }, + "type": "array", + }, + "write": { + "description": "List of additional absolute paths to allow writing.", + "items": { + "type": "string", + }, + "type": "array", + }, + }, + "type": "object", + }, + "network": { + "description": "Set to true to enable network access for this command.", + "type": "boolean", + }, + }, + "type": "object", + }, "command": { "description": "Exact bash command to execute as \`bash -c \`", "type": "string", @@ -1348,6 +1377,35 @@ exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > "name": "run_shell_command", "parametersJsonSchema": { "properties": { + "additional_permissions": { + "description": "Sandbox permissions for the command. Use this to request additional sandboxed filesystem or network permissions if a previous command failed with "Operation not permitted".", + "properties": { + "fileSystem": { + "properties": { + "read": { + "description": "List of additional absolute paths to allow reading.", + "items": { + "type": "string", + }, + "type": "array", + }, + "write": { + "description": "List of additional absolute paths to allow writing.", + "items": { + "type": "string", + }, + "type": "array", + }, + }, + "type": "object", + }, + "network": { + "description": "Set to true to enable network access for this command.", + "type": "boolean", + }, + }, + "type": "object", + }, "command": { "description": "Exact bash command to execute as \`bash -c \`", "type": "string", diff --git a/packages/core/src/tools/definitions/base-declarations.ts b/packages/core/src/tools/definitions/base-declarations.ts index b39dc42286..8fcaf95905 100644 --- a/packages/core/src/tools/definitions/base-declarations.ts +++ b/packages/core/src/tools/definitions/base-declarations.ts @@ -122,3 +122,6 @@ export const EXIT_PLAN_PARAM_PLAN_PATH = 'plan_path'; // -- enter_plan_mode -- export const ENTER_PLAN_MODE_TOOL_NAME = 'enter_plan_mode'; export const PLAN_MODE_PARAM_REASON = 'reason'; + +// -- sandbox -- +export const PARAM_ADDITIONAL_PERMISSIONS = 'additional_permissions'; diff --git a/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts b/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts index 79c66d81f6..b884b2a9ea 100644 --- a/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts +++ b/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts @@ -23,6 +23,7 @@ import { SHELL_PARAM_IS_BACKGROUND, EXIT_PLAN_PARAM_PLAN_PATH, SKILL_PARAM_NAME, + PARAM_ADDITIONAL_PERMISSIONS, } from './base-declarations.js'; /** @@ -109,6 +110,35 @@ export function getShellDeclaration( description: 'Set to true if this command should be run in the background (e.g. for long-running servers or watchers). The command will be started, allowed to run for a brief moment to check for immediate errors, and then moved to the background.', }, + [PARAM_ADDITIONAL_PERMISSIONS]: { + type: 'object', + description: + 'Sandbox permissions for the command. Use this to request additional sandboxed filesystem or network permissions if a previous command failed with "Operation not permitted".', + properties: { + network: { + type: 'boolean', + description: + 'Set to true to enable network access for this command.', + }, + fileSystem: { + type: 'object', + properties: { + read: { + type: 'array', + items: { type: 'string' }, + description: + 'List of additional absolute paths to allow reading.', + }, + write: { + type: 'array', + items: { type: 'string' }, + description: + 'List of additional absolute paths to allow writing.', + }, + }, + }, + }, + }, }, required: [SHELL_PARAM_COMMAND], }, diff --git a/packages/core/src/tools/shell.ts b/packages/core/src/tools/shell.ts index 86e3a68bc5..116718c946 100644 --- a/packages/core/src/tools/shell.ts +++ b/packages/core/src/tools/shell.ts @@ -5,10 +5,12 @@ */ import fsPromises from 'node:fs/promises'; +import fs from 'node:fs'; import path from 'node:path'; import os from 'node:os'; import crypto from 'node:crypto'; import { debugLogger } from '../index.js'; +import type { SandboxPermissions } from '../services/sandboxManager.js'; import { ToolErrorType } from './tool-error.js'; import { BaseDeclarativeTool, @@ -41,6 +43,7 @@ import { hasRedirection, } from '../utils/shell-utils.js'; import { SHELL_TOOL_NAME } from './tool-names.js'; +import { PARAM_ADDITIONAL_PERMISSIONS } from './definitions/base-declarations.js'; import type { MessageBus } from '../confirmation-bus/message-bus.js'; import { getShellDefinition } from './definitions/coreTools.js'; import { resolveToolDeclaration } from './definitions/resolver.js'; @@ -56,6 +59,7 @@ export interface ShellToolParams { description?: string; dir_path?: string; is_background?: boolean; + [PARAM_ADDITIONAL_PERMISSIONS]?: SandboxPermissions; } export class ShellToolInvocation extends BaseToolInvocation< @@ -122,6 +126,15 @@ export class ShellToolInvocation extends BaseToolInvocation< return undefined; } + override async shouldConfirmExecute( + abortSignal: AbortSignal, + ): Promise { + if (this.params[PARAM_ADDITIONAL_PERMISSIONS]) { + return this.getConfirmationDetails(abortSignal); + } + return super.shouldConfirmExecute(abortSignal); + } + protected override async getConfirmationDetails( _abortSignal: AbortSignal, ): Promise { @@ -148,6 +161,32 @@ export class ShellToolInvocation extends BaseToolInvocation< // Rely entirely on PolicyEngine for interactive confirmation. // If we are here, it means PolicyEngine returned ASK_USER (or no message bus), // so we must provide confirmation details. + // If additional_permissions are provided, it's an expansion request + if (this.params[PARAM_ADDITIONAL_PERMISSIONS]) { + return { + type: 'sandbox_expansion', + title: 'Sandbox Expansion Request', + command: this.params.command, + rootCommand: rootCommandDisplay, + additionalPermissions: this.params[PARAM_ADDITIONAL_PERMISSIONS], + onConfirm: async (outcome: ToolConfirmationOutcome) => { + if (outcome === ToolConfirmationOutcome.ProceedAlwaysAndSave) { + const commandName = rootCommands[0] || 'shell'; + this.context.config.sandboxPolicyManager.addPersistentApproval( + commandName, + this.params[PARAM_ADDITIONAL_PERMISSIONS]!, + ); + } else if (outcome === ToolConfirmationOutcome.ProceedAlways) { + const commandName = rootCommands[0] || 'shell'; + this.context.config.sandboxPolicyManager.addSessionApproval( + commandName, + this.params[PARAM_ADDITIONAL_PERMISSIONS]!, + ); + } + }, + }; + } + const confirmationDetails: ToolExecuteConfirmationDetails = { type: 'exec', title: 'Confirm Shell Command', @@ -293,6 +332,7 @@ export class ShellToolInvocation extends BaseToolInvocation< shellExecutionConfig?.sanitizationConfig ?? this.context.config.sanitizationConfig, sandboxManager: this.context.config.sandboxManager, + additionalPermissions: this.params[PARAM_ADDITIONAL_PERMISSIONS], }, ); @@ -326,6 +366,13 @@ export class ShellToolInvocation extends BaseToolInvocation< const pgrepLines = pgrepContent.split(os.EOL).filter(Boolean); for (const line of pgrepLines) { if (!/^\d+$/.test(line)) { + if ( + line.includes('sysmond service not found') || + line.includes('Cannot get process list') || + line.includes('sysmon request failed') + ) { + continue; + } debugLogger.error(`pgrep: ${line}`); } const pid = Number(line); @@ -430,6 +477,165 @@ export class ShellToolInvocation extends BaseToolInvocation< } } + // Heuristic Sandbox Denial Detection + const lowerOutput = ( + (result.output || '') + + ' ' + + (result.error?.message || '') + ).toLowerCase(); + const isFileDenial = [ + 'operation not permitted', + 'vim:e303', + 'should be read/write', + 'sandbox_apply', + 'sandbox: ', + ].some((keyword) => lowerOutput.includes(keyword)); + + const isNetworkDenial = [ + 'error connecting to', + 'network is unreachable', + 'could not resolve host', + 'connection refused', + 'no address associated with hostname', + ].some((keyword) => lowerOutput.includes(keyword)); + + // Only trigger heuristic if the command actually failed (exit code != 0 or aborted) + const failed = + !!result.error || + !!result.signal || + (result.exitCode !== undefined && result.exitCode !== 0) || + result.aborted; + + if (failed && (isFileDenial || isNetworkDenial)) { + const strippedCommand = stripShellWrapper(this.params.command); + const rootCommands = getCommandRoots(strippedCommand).filter( + (r) => r !== 'shopt', + ); + const rootCommandDisplay = + rootCommands.length > 0 ? rootCommands[0] : 'shell'; + // Extract denied paths + const deniedPaths = new Set(); + const regex = + /(?:^|\s)['"]?(\/[\w.-/]+)['"]?:\s*[Oo]peration not permitted/gi; + let match; + while ((match = regex.exec(result.output || '')) !== null) { + deniedPaths.add(match[1]); + } + while ((match = regex.exec(result.error?.message || '')) !== null) { + deniedPaths.add(match[1]); + } + + if (isFileDenial && deniedPaths.size === 0) { + // Fallback heuristic: look for any absolute path in the output + // Avoid matching simple commands like /bin/sh + const fallbackRegex = + /(?:^|[\s"'[\]])(\/[a-zA-Z0-9_.-]+(?:\/[a-zA-Z0-9_.-]+)+)(?:$|[\s"'[\]:])/gi; + let m; + while ((m = fallbackRegex.exec(result.output || '')) !== null) { + const p = m[1]; + if (p && !p.startsWith('/bin/') && !p.startsWith('/usr/bin/')) { + deniedPaths.add(p); + } + } + while ( + (m = fallbackRegex.exec(result.error?.message || '')) !== null + ) { + const p = m[1]; + if (p && !p.startsWith('/bin/') && !p.startsWith('/usr/bin/')) { + deniedPaths.add(p); + } + } + } + + const readPaths = new Set( + this.params[PARAM_ADDITIONAL_PERMISSIONS]?.fileSystem?.read || [], + ); + const writePaths = new Set( + this.params[PARAM_ADDITIONAL_PERMISSIONS]?.fileSystem?.write || [], + ); + + for (const p of deniedPaths) { + try { + // Find an existing parent directory to add instead of a non-existent file + let currentPath = p; + try { + if ( + fs.existsSync(currentPath) && + fs.statSync(currentPath).isFile() + ) { + currentPath = path.dirname(currentPath); + } + } catch (_e) { + /* ignore */ + } + while (currentPath.length > 1) { + if (fs.existsSync(currentPath)) { + writePaths.add(currentPath); + readPaths.add(currentPath); + break; + } + currentPath = path.dirname(currentPath); + } + } catch (_e) { + // ignore + } + } + + const additionalPermissions = { + network: + isNetworkDenial || + this.params[PARAM_ADDITIONAL_PERMISSIONS]?.network || + undefined, + fileSystem: + isFileDenial || writePaths.size > 0 + ? { + read: Array.from(readPaths), + write: Array.from(writePaths), + } + : undefined, + }; + + const originalReadSize = + this.params[PARAM_ADDITIONAL_PERMISSIONS]?.fileSystem?.read?.length || + 0; + const originalWriteSize = + this.params[PARAM_ADDITIONAL_PERMISSIONS]?.fileSystem?.write + ?.length || 0; + const originalNetwork = + !!this.params[PARAM_ADDITIONAL_PERMISSIONS]?.network; + + const newReadSize = additionalPermissions.fileSystem?.read?.length || 0; + const newWriteSize = + additionalPermissions.fileSystem?.write?.length || 0; + const newNetwork = !!additionalPermissions.network; + + const hasNewPermissions = + newReadSize > originalReadSize || + newWriteSize > originalWriteSize || + (!originalNetwork && newNetwork); + + if (hasNewPermissions) { + const confirmationDetails = { + type: 'sandbox_expansion', + title: 'Sandbox Expansion Request', + command: this.params.command, + rootCommand: rootCommandDisplay, + additionalPermissions, + }; + + return { + llmContent: 'Sandbox expansion required', + returnDisplay: returnDisplayMessage, + error: { + type: ToolErrorType.SANDBOX_EXPANSION_REQUIRED, + message: JSON.stringify(confirmationDetails), + }, + }; + } + // If no new permissions were found by heuristic, do not intercept. + // Just return the normal execution error so the LLM can try providing explicit paths itself. + } + const summarizeConfig = this.context.config.getSummarizeToolOutputConfig(); const executionError = result.error diff --git a/packages/core/src/tools/tool-error.ts b/packages/core/src/tools/tool-error.ts index f29470b780..3ab221404a 100644 --- a/packages/core/src/tools/tool-error.ts +++ b/packages/core/src/tools/tool-error.ts @@ -64,6 +64,7 @@ export enum ToolErrorType { // Shell errors SHELL_EXECUTE_ERROR = 'shell_execute_error', + SANDBOX_EXPANSION_REQUIRED = 'sandbox_expansion_required', // DiscoveredTool-specific Errors DISCOVERED_TOOL_EXECUTION_ERROR = 'discovered_tool_execution_error', diff --git a/packages/core/src/tools/tools.ts b/packages/core/src/tools/tools.ts index a9f3b57f4e..6b22f7a3e3 100644 --- a/packages/core/src/tools/tools.ts +++ b/packages/core/src/tools/tools.ts @@ -992,6 +992,16 @@ export type ToolConfirmationPayload = | ToolAskUserConfirmationPayload | ToolExitPlanModeConfirmationPayload; +export interface ToolSandboxExpansionConfirmationDetails { + type: 'sandbox_expansion'; + systemMessage?: string; + title: string; + command: string; + rootCommand: string; + additionalPermissions: import('../services/sandboxManager.js').SandboxPermissions; + onConfirm: (outcome: ToolConfirmationOutcome) => Promise; +} + export interface ToolExecuteConfirmationDetails { type: 'exec'; title: string; @@ -1048,6 +1058,7 @@ export interface ToolExitPlanModeConfirmationDetails { } export type ToolCallConfirmationDetails = + | ToolSandboxExpansionConfirmationDetails | ToolEditConfirmationDetails | ToolExecuteConfirmationDetails | ToolMcpConfirmationDetails diff --git a/packages/core/src/utils/shell-utils.ts b/packages/core/src/utils/shell-utils.ts index 14fce36a34..119e8cd7f8 100644 --- a/packages/core/src/utils/shell-utils.ts +++ b/packages/core/src/utils/shell-utils.ts @@ -704,7 +704,7 @@ export function getCommandRoots(command: string): string[] { export function stripShellWrapper(command: string): string { const pattern = - /^\s*(?:(?:sh|bash|zsh)\s+-c|cmd\.exe\s+\/c|powershell(?:\.exe)?\s+(?:-NoProfile\s+)?-Command|pwsh(?:\.exe)?\s+(?:-NoProfile\s+)?-Command)\s+/i; + /^\s*(?:(?:(?:\S+\/)?(?:sh|bash|zsh))\s+-c|cmd\.exe\s+\/c|powershell(?:\.exe)?\s+(?:-NoProfile\s+)?-Command|pwsh(?:\.exe)?\s+(?:-NoProfile\s+)?-Command)\s+/i; const match = command.match(pattern); if (match) { let newCommand = command.substring(match[0].length).trim(); From 46fd7b4864111032a1c7dfa1821b2000fc7531da Mon Sep 17 00:00:00 2001 From: Sri Pasumarthi <111310667+sripasg@users.noreply.github.com> Date: Mon, 23 Mar 2026 22:34:08 -0700 Subject: [PATCH 04/11] fix(acp): Pass the cwd to `AcpFileSystemService` to avoid looping failures in asking for perms to write plan md file (#23612) --- packages/cli/src/acp/acpClient.ts | 25 +-- .../cli/src/acp/fileSystemService.test.ts | 147 ++++++++++++++++-- packages/cli/src/acp/fileSystemService.ts | 68 ++++++-- 3 files changed, 202 insertions(+), 38 deletions(-) diff --git a/packages/cli/src/acp/acpClient.ts b/packages/cli/src/acp/acpClient.ts index 7a45f98dc7..57903822e9 100644 --- a/packages/cli/src/acp/acpClient.ts +++ b/packages/cli/src/acp/acpClient.ts @@ -300,6 +300,7 @@ export class GeminiAgent { sessionId, this.clientCapabilities.fs, config.getFileSystemService(), + cwd, ); config.setFileSystemService(acpFileSystemService); } @@ -357,16 +358,6 @@ export class GeminiAgent { const { sessionData, sessionPath } = await sessionSelector.resolveSession(sessionId); - if (this.clientCapabilities?.fs) { - const acpFileSystemService = new AcpFileSystemService( - this.connection, - sessionId, - this.clientCapabilities.fs, - config.getFileSystemService(), - ); - config.setFileSystemService(acpFileSystemService); - } - const clientHistory = convertSessionToClientHistory(sessionData.messages); const geminiClient = config.getGeminiClient(); @@ -440,7 +431,19 @@ export class GeminiAgent { throw acp.RequestError.authRequired(); } - // 3. Now that we are authenticated, it is safe to initialize the config + // 3. Set the ACP FileSystemService (if supported) before config initialization + if (this.clientCapabilities?.fs) { + const acpFileSystemService = new AcpFileSystemService( + this.connection, + sessionId, + this.clientCapabilities.fs, + config.getFileSystemService(), + cwd, + ); + config.setFileSystemService(acpFileSystemService); + } + + // 4. Now that we are authenticated, it is safe to initialize the config // which starts the MCP servers and other heavy resources. await config.initialize(); startupProfiler.flush(config); diff --git a/packages/cli/src/acp/fileSystemService.test.ts b/packages/cli/src/acp/fileSystemService.test.ts index 66624d5449..188aadbc09 100644 --- a/packages/cli/src/acp/fileSystemService.test.ts +++ b/packages/cli/src/acp/fileSystemService.test.ts @@ -4,10 +4,25 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { describe, it, expect, vi, beforeEach, type Mocked } from 'vitest'; +import { + describe, + it, + expect, + vi, + beforeEach, + afterEach, + type Mocked, +} from 'vitest'; import { AcpFileSystemService } from './fileSystemService.js'; import type { AgentSideConnection } from '@agentclientprotocol/sdk'; import type { FileSystemService } from '@google/gemini-cli-core'; +import os from 'node:os'; + +vi.mock('node:os', () => ({ + default: { + homedir: vi.fn(), + }, +})); describe('AcpFileSystemService', () => { let mockConnection: Mocked; @@ -25,13 +40,19 @@ describe('AcpFileSystemService', () => { readTextFile: vi.fn(), writeTextFile: vi.fn(), }; + vi.mocked(os.homedir).mockReturnValue('/home/user'); + }); + + afterEach(() => { + vi.restoreAllMocks(); }); describe('readTextFile', () => { it.each([ { capability: true, - desc: 'connection if capability exists', + path: '/path/to/file', + desc: 'connection if capability exists and file is inside root', setup: () => { mockConnection.readTextFile.mockResolvedValue({ content: 'content' }); }, @@ -45,6 +66,7 @@ describe('AcpFileSystemService', () => { }, { capability: false, + path: '/path/to/file', desc: 'fallback if capability missing', setup: () => { mockFallback.readTextFile.mockResolvedValue('content'); @@ -56,19 +78,72 @@ describe('AcpFileSystemService', () => { expect(mockConnection.readTextFile).not.toHaveBeenCalled(); }, }, - ])('should use $desc', async ({ capability, setup, verify }) => { + { + capability: true, + path: '/outside/file', + desc: 'fallback if capability exists but file is outside root', + setup: () => { + mockFallback.readTextFile.mockResolvedValue('content'); + }, + verify: () => { + expect(mockFallback.readTextFile).toHaveBeenCalledWith( + '/outside/file', + ); + expect(mockConnection.readTextFile).not.toHaveBeenCalled(); + }, + }, + { + capability: true, + path: '/home/user/.gemini/tmp/file.md', + root: '/home/user', + desc: 'fallback if file is inside global gemini dir, even if root overlaps', + setup: () => { + mockFallback.readTextFile.mockResolvedValue('content'); + }, + verify: () => { + expect(mockFallback.readTextFile).toHaveBeenCalledWith( + '/home/user/.gemini/tmp/file.md', + ); + expect(mockConnection.readTextFile).not.toHaveBeenCalled(); + }, + }, + ])( + 'should use $desc', + async ({ capability, path, root, setup, verify }) => { + service = new AcpFileSystemService( + mockConnection, + 'session-1', + { readTextFile: capability, writeTextFile: true }, + mockFallback, + root || '/path/to', + ); + setup(); + + const result = await service.readTextFile(path); + + expect(result).toBe('content'); + verify(); + }, + ); + + it('should throw normalized ENOENT error when readTextFile encounters "Resource not found"', async () => { service = new AcpFileSystemService( mockConnection, 'session-1', - { readTextFile: capability, writeTextFile: true }, + { readTextFile: true, writeTextFile: true }, mockFallback, + '/path/to', + ); + mockConnection.readTextFile.mockRejectedValue( + new Error('Resource not found for document'), ); - setup(); - const result = await service.readTextFile('/path/to/file'); - - expect(result).toBe('content'); - verify(); + await expect( + service.readTextFile('/path/to/missing'), + ).rejects.toMatchObject({ + code: 'ENOENT', + message: 'Resource not found for document', + }); }); }); @@ -76,7 +151,8 @@ describe('AcpFileSystemService', () => { it.each([ { capability: true, - desc: 'connection if capability exists', + path: '/path/to/file', + desc: 'connection if capability exists and file is inside root', verify: () => { expect(mockConnection.writeTextFile).toHaveBeenCalledWith({ path: '/path/to/file', @@ -88,6 +164,7 @@ describe('AcpFileSystemService', () => { }, { capability: false, + path: '/path/to/file', desc: 'fallback if capability missing', verify: () => { expect(mockFallback.writeTextFile).toHaveBeenCalledWith( @@ -97,17 +174,63 @@ describe('AcpFileSystemService', () => { expect(mockConnection.writeTextFile).not.toHaveBeenCalled(); }, }, - ])('should use $desc', async ({ capability, verify }) => { + { + capability: true, + path: '/outside/file', + desc: 'fallback if capability exists but file is outside root', + verify: () => { + expect(mockFallback.writeTextFile).toHaveBeenCalledWith( + '/outside/file', + 'content', + ); + expect(mockConnection.writeTextFile).not.toHaveBeenCalled(); + }, + }, + { + capability: true, + path: '/home/user/.gemini/tmp/file.md', + root: '/home/user', + desc: 'fallback if file is inside global gemini dir, even if root overlaps', + verify: () => { + expect(mockFallback.writeTextFile).toHaveBeenCalledWith( + '/home/user/.gemini/tmp/file.md', + 'content', + ); + expect(mockConnection.writeTextFile).not.toHaveBeenCalled(); + }, + }, + ])('should use $desc', async ({ capability, path, root, verify }) => { service = new AcpFileSystemService( mockConnection, 'session-1', { writeTextFile: capability, readTextFile: true }, mockFallback, + root || '/path/to', ); - await service.writeTextFile('/path/to/file', 'content'); + await service.writeTextFile(path, 'content'); verify(); }); + + it('should throw normalized ENOENT error when writeTextFile encounters "Resource not found"', async () => { + service = new AcpFileSystemService( + mockConnection, + 'session-1', + { readTextFile: true, writeTextFile: true }, + mockFallback, + '/path/to', + ); + mockConnection.writeTextFile.mockRejectedValue( + new Error('Resource not found for directory'), + ); + + await expect( + service.writeTextFile('/path/to/missing', 'content'), + ).rejects.toMatchObject({ + code: 'ENOENT', + message: 'Resource not found for directory', + }); + }); }); }); diff --git a/packages/cli/src/acp/fileSystemService.ts b/packages/cli/src/acp/fileSystemService.ts index 02b9d68195..b020cd27f2 100644 --- a/packages/cli/src/acp/fileSystemService.ts +++ b/packages/cli/src/acp/fileSystemService.ts @@ -4,44 +4,82 @@ * SPDX-License-Identifier: Apache-2.0 */ -import type { FileSystemService } from '@google/gemini-cli-core'; +import { isWithinRoot, type FileSystemService } from '@google/gemini-cli-core'; import type * as acp from '@agentclientprotocol/sdk'; +import os from 'node:os'; +import path from 'node:path'; /** * ACP client-based implementation of FileSystemService */ export class AcpFileSystemService implements FileSystemService { + private readonly geminiDir = path.join(os.homedir(), '.gemini'); + constructor( private readonly connection: acp.AgentSideConnection, private readonly sessionId: string, private readonly capabilities: acp.FileSystemCapabilities, private readonly fallback: FileSystemService, + private readonly root: string, ) {} + private shouldUseFallback(filePath: string): boolean { + // Files inside the global CLI directory must always use the native file system, + // even if the user runs the CLI directly from their home directory (which + // would make the IDE's project root overlap with the global directory). + return ( + !isWithinRoot(filePath, this.root) || + isWithinRoot(filePath, this.geminiDir) + ); + } + + private normalizeFileSystemError(err: unknown): never { + const errorMessage = err instanceof Error ? err.message : String(err); + if ( + errorMessage.includes('Resource not found') || + errorMessage.includes('ENOENT') || + errorMessage.includes('does not exist') || + errorMessage.includes('No such file') + ) { + const newErr = new Error(errorMessage) as NodeJS.ErrnoException; + newErr.code = 'ENOENT'; + throw newErr; + } + throw err; + } + async readTextFile(filePath: string): Promise { - if (!this.capabilities.readTextFile) { + if (!this.capabilities.readTextFile || this.shouldUseFallback(filePath)) { return this.fallback.readTextFile(filePath); } - // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment - const response = await this.connection.readTextFile({ - path: filePath, - sessionId: this.sessionId, - }); + try { + // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment + const response = await this.connection.readTextFile({ + path: filePath, + sessionId: this.sessionId, + }); - // eslint-disable-next-line @typescript-eslint/no-unsafe-return - return response.content; + // eslint-disable-next-line @typescript-eslint/no-unsafe-return + return response.content; + } catch (err: unknown) { + this.normalizeFileSystemError(err); + } } async writeTextFile(filePath: string, content: string): Promise { - if (!this.capabilities.writeTextFile) { + if (!this.capabilities.writeTextFile || this.shouldUseFallback(filePath)) { return this.fallback.writeTextFile(filePath, content); } - await this.connection.writeTextFile({ - path: filePath, - content, - sessionId: this.sessionId, - }); + try { + await this.connection.writeTextFile({ + path: filePath, + content, + sessionId: this.sessionId, + }); + } catch (err: unknown) { + this.normalizeFileSystemError(err); + } } } From dcedc429798ab85500b53bb1a29159fa8090e740 Mon Sep 17 00:00:00 2001 From: Adib234 <30782825+Adib234@users.noreply.github.com> Date: Tue, 24 Mar 2026 09:19:29 -0400 Subject: [PATCH 05/11] fix(plan): sandbox path resolution in Plan Mode to prevent hallucinations (#22737) Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- evals/plan_mode.eval.ts | 50 +++++++++++++ .../src/ui/components/ExitPlanModeDialog.tsx | 1 - packages/core/src/config/config.ts | 4 + .../src/tools/confirmation-policy.test.ts | 1 + .../coreToolsModelSnapshots.test.ts.snap | 12 +-- .../tools/definitions/base-declarations.ts | 2 +- .../core/src/tools/definitions/coreTools.ts | 8 +- .../coreToolsModelSnapshots.test.ts | 2 +- .../dynamic-declaration-helpers.ts | 12 ++- .../model-family-sets/default-legacy.ts | 2 +- .../definitions/model-family-sets/gemini-3.ts | 2 +- packages/core/src/tools/definitions/types.ts | 2 +- packages/core/src/tools/edit.test.ts | 40 ++++++++++ packages/core/src/tools/edit.ts | 8 +- .../core/src/tools/exit-plan-mode.test.ts | 75 +++++++------------ packages/core/src/tools/exit-plan-mode.ts | 31 ++++---- packages/core/src/tools/line-endings.test.ts | 4 + packages/core/src/tools/tool-names.ts | 4 +- packages/core/src/tools/write-file.test.ts | 1 + packages/core/src/tools/write-file.ts | 17 ++++- packages/core/src/utils/planUtils.test.ts | 16 +--- packages/core/src/utils/planUtils.ts | 10 +-- 22 files changed, 193 insertions(+), 111 deletions(-) diff --git a/evals/plan_mode.eval.ts b/evals/plan_mode.eval.ts index a37e5f91b4..8b01f68155 100644 --- a/evals/plan_mode.eval.ts +++ b/evals/plan_mode.eval.ts @@ -136,6 +136,32 @@ describe('plan_mode', () => { expect(wasToolCalled, 'Expected exit_plan_mode tool to be called').toBe( true, ); + + const toolLogs = rig.readToolLogs(); + const exitPlanCall = toolLogs.find( + (log) => log.toolRequest.name === 'exit_plan_mode', + ); + expect( + exitPlanCall, + 'Expected to find exit_plan_mode in tool logs', + ).toBeDefined(); + + const args = JSON.parse(exitPlanCall!.toolRequest.args); + expect(args.plan_filename, 'plan_filename should be a string').toBeTypeOf( + 'string', + ); + expect(args.plan_filename, 'plan_filename should end with .md').toMatch( + /\.md$/, + ); + expect( + args.plan_filename, + 'plan_filename should not be a path', + ).not.toContain('/'); + expect( + args.plan_filename, + 'plan_filename should not be a path', + ).not.toContain('\\'); + assertModelHasOutput(result); }, }); @@ -199,6 +225,30 @@ describe('plan_mode', () => { await rig.waitForTelemetryReady(); const toolLogs = rig.readToolLogs(); + const exitPlanCall = toolLogs.find( + (log) => log.toolRequest.name === 'exit_plan_mode', + ); + expect( + exitPlanCall, + 'Expected to find exit_plan_mode in tool logs', + ).toBeDefined(); + + const args = JSON.parse(exitPlanCall!.toolRequest.args); + expect(args.plan_filename, 'plan_filename should be a string').toBeTypeOf( + 'string', + ); + expect(args.plan_filename, 'plan_filename should end with .md').toMatch( + /\.md$/, + ); + expect( + args.plan_filename, + 'plan_filename should not be a path', + ).not.toContain('/'); + expect( + args.plan_filename, + 'plan_filename should not be a path', + ).not.toContain('\\'); + // Check if plan was written const planWrite = toolLogs.find( (log) => diff --git a/packages/cli/src/ui/components/ExitPlanModeDialog.tsx b/packages/cli/src/ui/components/ExitPlanModeDialog.tsx index 4124a7c6d7..b2c28abaeb 100644 --- a/packages/cli/src/ui/components/ExitPlanModeDialog.tsx +++ b/packages/cli/src/ui/components/ExitPlanModeDialog.tsx @@ -80,7 +80,6 @@ function usePlanContent(planPath: string, config: Config): PlanContentState { const pathError = await validatePlanPath( planPath, config.storage.getPlansDir(), - config.getTargetDir(), ); if (ignore) return; if (pathError) { diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 12ff9ad37e..e32205d070 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -2335,6 +2335,10 @@ export class Config implements McpContext, AgentLoopContext { return this.policyEngine.getApprovalMode(); } + isPlanMode(): boolean { + return this.getApprovalMode() === ApprovalMode.PLAN; + } + getPolicyUpdateConfirmationRequest(): | PolicyUpdateConfirmationRequest | undefined { diff --git a/packages/core/src/tools/confirmation-policy.test.ts b/packages/core/src/tools/confirmation-policy.test.ts index af9f178b8b..2d006b3d2c 100644 --- a/packages/core/src/tools/confirmation-policy.test.ts +++ b/packages/core/src/tools/confirmation-policy.test.ts @@ -71,6 +71,7 @@ describe('Tool Confirmation Policy Updates', () => { getDisableLLMCorrection: () => true, getIdeMode: () => false, getActiveModel: () => 'test-model', + isPlanMode: () => false, getWorkspaceContext: () => ({ isPathWithinWorkspace: () => true, getDirectories: () => [rootDir], diff --git a/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap b/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap index 65e193cfcf..5a8291bcfc 100644 --- a/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap +++ b/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap @@ -169,13 +169,13 @@ exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snaps "name": "exit_plan_mode", "parametersJsonSchema": { "properties": { - "plan_path": { - "description": "The file path to the finalized plan (e.g., "/mock/plans/feature-x.md"). This path MUST be within the designated plans directory: /mock/plans/", + "plan_filename": { + "description": "The filename of the finalized plan (e.g., "feature-x.md"). Do not provide an absolute path.", "type": "string", }, }, "required": [ - "plan_path", + "plan_filename", ], "type": "object", }, @@ -987,13 +987,13 @@ exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > "name": "exit_plan_mode", "parametersJsonSchema": { "properties": { - "plan_path": { - "description": "The file path to the finalized plan (e.g., "/mock/plans/feature-x.md"). This path MUST be within the designated plans directory: /mock/plans/", + "plan_filename": { + "description": "The filename of the finalized plan (e.g., "feature-x.md"). Do not provide an absolute path.", "type": "string", }, }, "required": [ - "plan_path", + "plan_filename", ], "type": "object", }, diff --git a/packages/core/src/tools/definitions/base-declarations.ts b/packages/core/src/tools/definitions/base-declarations.ts index 8fcaf95905..c7c4223546 100644 --- a/packages/core/src/tools/definitions/base-declarations.ts +++ b/packages/core/src/tools/definitions/base-declarations.ts @@ -117,7 +117,7 @@ export const ASK_USER_OPTION_PARAM_DESCRIPTION = 'description'; // -- exit_plan_mode -- export const EXIT_PLAN_MODE_TOOL_NAME = 'exit_plan_mode'; -export const EXIT_PLAN_PARAM_PLAN_PATH = 'plan_path'; +export const EXIT_PLAN_PARAM_PLAN_FILENAME = 'plan_filename'; // -- enter_plan_mode -- export const ENTER_PLAN_MODE_TOOL_NAME = 'enter_plan_mode'; diff --git a/packages/core/src/tools/definitions/coreTools.ts b/packages/core/src/tools/definitions/coreTools.ts index b5121ca5d2..9204f9240e 100644 --- a/packages/core/src/tools/definitions/coreTools.ts +++ b/packages/core/src/tools/definitions/coreTools.ts @@ -89,7 +89,7 @@ export { ASK_USER_OPTION_PARAM_LABEL, ASK_USER_OPTION_PARAM_DESCRIPTION, PLAN_MODE_PARAM_REASON, - EXIT_PLAN_PARAM_PLAN_PATH, + EXIT_PLAN_PARAM_PLAN_FILENAME, SKILL_PARAM_NAME, } from './base-declarations.js'; @@ -244,10 +244,10 @@ export function getShellDefinition( }; } -export function getExitPlanModeDefinition(plansDir: string): ToolDefinition { +export function getExitPlanModeDefinition(): ToolDefinition { return { - base: getExitPlanModeDeclaration(plansDir), - overrides: (modelId) => getToolSet(modelId).exit_plan_mode(plansDir), + base: getExitPlanModeDeclaration(), + overrides: (modelId) => getToolSet(modelId).exit_plan_mode(), }; } diff --git a/packages/core/src/tools/definitions/coreToolsModelSnapshots.test.ts b/packages/core/src/tools/definitions/coreToolsModelSnapshots.test.ts index c80350808e..6ccea4274c 100644 --- a/packages/core/src/tools/definitions/coreToolsModelSnapshots.test.ts +++ b/packages/core/src/tools/definitions/coreToolsModelSnapshots.test.ts @@ -82,7 +82,7 @@ describe('coreTools snapshots for specific models', () => { { name: 'enter_plan_mode', definition: ENTER_PLAN_MODE_DEFINITION }, { name: 'exit_plan_mode', - definition: getExitPlanModeDefinition('/mock/plans'), + definition: getExitPlanModeDefinition(), }, { name: 'activate_skill', diff --git a/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts b/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts index b884b2a9ea..e33d42311a 100644 --- a/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts +++ b/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts @@ -21,7 +21,7 @@ import { PARAM_DESCRIPTION, PARAM_DIR_PATH, SHELL_PARAM_IS_BACKGROUND, - EXIT_PLAN_PARAM_PLAN_PATH, + EXIT_PLAN_PARAM_PLAN_FILENAME, SKILL_PARAM_NAME, PARAM_ADDITIONAL_PERMISSIONS, } from './base-declarations.js'; @@ -148,20 +148,18 @@ export function getShellDeclaration( /** * Returns the FunctionDeclaration for exiting plan mode. */ -export function getExitPlanModeDeclaration( - plansDir: string, -): FunctionDeclaration { +export function getExitPlanModeDeclaration(): FunctionDeclaration { return { name: EXIT_PLAN_MODE_TOOL_NAME, description: 'Finalizes the planning phase and transitions to implementation by presenting the plan for user approval. This tool MUST be used to exit Plan Mode before any source code edits can be performed. Call this whenever a plan is ready or the user requests implementation.', parametersJsonSchema: { type: 'object', - required: [EXIT_PLAN_PARAM_PLAN_PATH], + required: [EXIT_PLAN_PARAM_PLAN_FILENAME], properties: { - [EXIT_PLAN_PARAM_PLAN_PATH]: { + [EXIT_PLAN_PARAM_PLAN_FILENAME]: { type: 'string', - description: `The file path to the finalized plan (e.g., "${plansDir}/feature-x.md"). This path MUST be within the designated plans directory: ${plansDir}/`, + description: `The filename of the finalized plan (e.g., "feature-x.md"). Do not provide an absolute path.`, }, }, }, diff --git a/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts b/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts index 5c219f4685..061dfdbc8b 100644 --- a/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts +++ b/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts @@ -739,6 +739,6 @@ The agent did not use the todo list because this task could be completed by a ti }, }, - exit_plan_mode: (plansDir) => getExitPlanModeDeclaration(plansDir), + exit_plan_mode: () => getExitPlanModeDeclaration(), activate_skill: (skillNames) => getActivateSkillDeclaration(skillNames), }; diff --git a/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts b/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts index cac98a90b3..f7d9fa499c 100644 --- a/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts +++ b/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts @@ -714,6 +714,6 @@ The agent did not use the todo list because this task could be completed by a ti }, }, - exit_plan_mode: (plansDir) => getExitPlanModeDeclaration(plansDir), + exit_plan_mode: () => getExitPlanModeDeclaration(), activate_skill: (skillNames) => getActivateSkillDeclaration(skillNames), }; diff --git a/packages/core/src/tools/definitions/types.ts b/packages/core/src/tools/definitions/types.ts index a9bd3d85d7..9d335310e9 100644 --- a/packages/core/src/tools/definitions/types.ts +++ b/packages/core/src/tools/definitions/types.ts @@ -47,6 +47,6 @@ export interface CoreToolSet { get_internal_docs: FunctionDeclaration; ask_user: FunctionDeclaration; enter_plan_mode: FunctionDeclaration; - exit_plan_mode: (plansDir: string) => FunctionDeclaration; + exit_plan_mode: () => FunctionDeclaration; activate_skill: (skillNames: string[]) => FunctionDeclaration; } diff --git a/packages/core/src/tools/edit.test.ts b/packages/core/src/tools/edit.test.ts index 71762faea1..66111aed9d 100644 --- a/packages/core/src/tools/edit.test.ts +++ b/packages/core/src/tools/edit.test.ts @@ -131,8 +131,10 @@ describe('EditTool', () => { isInteractive: () => false, getDisableLLMCorrection: vi.fn(() => true), getExperiments: () => {}, + isPlanMode: vi.fn(() => false), storage: { getProjectTempDir: vi.fn().mockReturnValue('/tmp/project'), + getPlansDir: vi.fn().mockReturnValue('/tmp/plans'), }, isPathAllowed(this: Config, absolutePath: string): boolean { const workspaceContext = this.getWorkspaceContext(); @@ -1299,4 +1301,42 @@ function doIt() { ); }); }); + + describe('plan mode', () => { + it('should allow edits to plans directory when isPlanMode is true', async () => { + const mockProjectTempDir = path.join(tempDir, 'project'); + fs.mkdirSync(mockProjectTempDir); + vi.mocked(mockConfig.storage.getProjectTempDir).mockReturnValue( + mockProjectTempDir, + ); + + const plansDir = path.join(mockProjectTempDir, 'plans'); + fs.mkdirSync(plansDir); + + vi.mocked(mockConfig.isPlanMode).mockReturnValue(true); + vi.mocked(mockConfig.storage.getPlansDir).mockReturnValue(plansDir); + + const filePath = path.join(rootDir, 'test-file.txt'); + const planFilePath = path.join(plansDir, 'test-file.txt'); + const initialContent = 'some initial content'; + fs.writeFileSync(planFilePath, initialContent, 'utf8'); + + const params: EditToolParams = { + file_path: filePath, + instruction: 'Replace initial with new', + old_string: 'initial', + new_string: 'new', + }; + + const invocation = tool.build(params); + const result = await invocation.execute(new AbortController().signal); + + expect(result.llmContent).toMatch(/Successfully modified file/); + + // Verify plan file is written with new content + expect(fs.readFileSync(planFilePath, 'utf8')).toBe('some new content'); + + fs.rmSync(plansDir, { recursive: true, force: true }); + }); + }); }); diff --git a/packages/core/src/tools/edit.ts b/packages/core/src/tools/edit.ts index 434f4b2518..55c7f2f9ab 100644 --- a/packages/core/src/tools/edit.ts +++ b/packages/core/src/tools/edit.ts @@ -463,7 +463,13 @@ class EditToolInvocation true, () => this.config.getApprovalMode(), ); - if (!path.isAbsolute(this.params.file_path)) { + if (this.config.isPlanMode()) { + const safeFilename = path.basename(this.params.file_path); + this.resolvedPath = path.join( + this.config.storage.getPlansDir(), + safeFilename, + ); + } else if (!path.isAbsolute(this.params.file_path)) { const result = correctPath(this.params.file_path, this.config); if (result.success) { this.resolvedPath = result.correctedPath; diff --git a/packages/core/src/tools/exit-plan-mode.test.ts b/packages/core/src/tools/exit-plan-mode.test.ts index 855c5d2aba..ad643c6cb2 100644 --- a/packages/core/src/tools/exit-plan-mode.test.ts +++ b/packages/core/src/tools/exit-plan-mode.test.ts @@ -79,7 +79,7 @@ describe('ExitPlanModeTool', () => { describe('shouldConfirmExecute', () => { it('should return plan approval confirmation details when plan has content', async () => { const planRelativePath = createPlanFile('test-plan.md', '# My Plan'); - const invocation = tool.build({ plan_path: planRelativePath }); + const invocation = tool.build({ plan_filename: planRelativePath }); const result = await invocation.shouldConfirmExecute( new AbortController().signal, @@ -98,7 +98,7 @@ describe('ExitPlanModeTool', () => { it('should return false when plan file is empty', async () => { const planRelativePath = createPlanFile('empty.md', ' '); - const invocation = tool.build({ plan_path: planRelativePath }); + const invocation = tool.build({ plan_filename: planRelativePath }); const result = await invocation.shouldConfirmExecute( new AbortController().signal, @@ -109,7 +109,7 @@ describe('ExitPlanModeTool', () => { it('should return false when plan file cannot be read', async () => { const planRelativePath = path.join('plans', 'non-existent.md'); - const invocation = tool.build({ plan_path: planRelativePath }); + const invocation = tool.build({ plan_filename: planRelativePath }); const result = await invocation.shouldConfirmExecute( new AbortController().signal, @@ -120,7 +120,7 @@ describe('ExitPlanModeTool', () => { it('should auto-approve when policy decision is ALLOW', async () => { const planRelativePath = createPlanFile('test.md', '# Content'); - const invocation = tool.build({ plan_path: planRelativePath }); + const invocation = tool.build({ plan_filename: planRelativePath }); vi.spyOn( invocation as unknown as { @@ -143,7 +143,7 @@ describe('ExitPlanModeTool', () => { it('should throw error when policy decision is DENY', async () => { const planRelativePath = createPlanFile('test.md', '# Content'); - const invocation = tool.build({ plan_path: planRelativePath }); + const invocation = tool.build({ plan_filename: planRelativePath }); vi.spyOn( invocation as unknown as { @@ -161,7 +161,7 @@ describe('ExitPlanModeTool', () => { describe('execute with invalid plan', () => { it('should return error when plan file is empty', async () => { const planRelativePath = createPlanFile('empty.md', ''); - const invocation = tool.build({ plan_path: planRelativePath }); + const invocation = tool.build({ plan_filename: planRelativePath }); await invocation.shouldConfirmExecute(new AbortController().signal); const result = await invocation.execute(new AbortController().signal); @@ -171,8 +171,8 @@ describe('ExitPlanModeTool', () => { }); it('should return error when plan file cannot be read', async () => { - const planRelativePath = 'plans/ghost.md'; - const invocation = tool.build({ plan_path: planRelativePath }); + const planRelativePath = 'ghost.md'; + const invocation = tool.build({ plan_filename: planRelativePath }); await invocation.shouldConfirmExecute(new AbortController().signal); const result = await invocation.execute(new AbortController().signal); @@ -184,7 +184,7 @@ describe('ExitPlanModeTool', () => { describe('execute', () => { it('should return approval message when plan is approved with DEFAULT mode', async () => { const planRelativePath = createPlanFile('test.md', '# Content'); - const invocation = tool.build({ plan_path: planRelativePath }); + const invocation = tool.build({ plan_filename: planRelativePath }); const confirmDetails = await invocation.shouldConfirmExecute( new AbortController().signal, @@ -212,7 +212,7 @@ Read and follow the plan strictly during implementation.`, it('should return approval message when plan is approved with AUTO_EDIT mode', async () => { const planRelativePath = createPlanFile('test.md', '# Content'); - const invocation = tool.build({ plan_path: planRelativePath }); + const invocation = tool.build({ plan_filename: planRelativePath }); const confirmDetails = await invocation.shouldConfirmExecute( new AbortController().signal, @@ -243,7 +243,7 @@ Read and follow the plan strictly during implementation.`, it('should return feedback message when plan is rejected with feedback', async () => { const planRelativePath = createPlanFile('test.md', '# Content'); - const invocation = tool.build({ plan_path: planRelativePath }); + const invocation = tool.build({ plan_filename: planRelativePath }); const confirmDetails = await invocation.shouldConfirmExecute( new AbortController().signal, @@ -270,7 +270,7 @@ Revise the plan based on the feedback.`, it('should handle rejection without feedback gracefully', async () => { const planRelativePath = createPlanFile('test.md', '# Content'); - const invocation = tool.build({ plan_path: planRelativePath }); + const invocation = tool.build({ plan_filename: planRelativePath }); const confirmDetails = await invocation.shouldConfirmExecute( new AbortController().signal, @@ -296,7 +296,7 @@ Ask the user for specific feedback on how to improve the plan.`, it('should log plan execution event when plan is approved', async () => { const planRelativePath = createPlanFile('test.md', '# Content'); - const invocation = tool.build({ plan_path: planRelativePath }); + const invocation = tool.build({ plan_filename: planRelativePath }); const confirmDetails = await invocation.shouldConfirmExecute( new AbortController().signal, @@ -320,7 +320,7 @@ Ask the user for specific feedback on how to improve the plan.`, it('should return cancellation message when cancelled', async () => { const planRelativePath = createPlanFile('test.md', '# Content'); - const invocation = tool.build({ plan_path: planRelativePath }); + const invocation = tool.build({ plan_filename: planRelativePath }); const confirmDetails = await invocation.shouldConfirmExecute( new AbortController().signal, @@ -343,7 +343,7 @@ Ask the user for specific feedback on how to improve the plan.`, describe('execute when shouldConfirmExecute is never called', () => { it('should approve with DEFAULT mode when approvalPayload is null (policy ALLOW skips confirmation)', async () => { const planRelativePath = createPlanFile('test.md', '# Content'); - const invocation = tool.build({ plan_path: planRelativePath }); + const invocation = tool.build({ plan_filename: planRelativePath }); // Simulate the scheduler's policy ALLOW path: execute() is called // directly without ever calling shouldConfirmExecute(), leaving @@ -364,7 +364,7 @@ Ask the user for specific feedback on how to improve the plan.`, it('should return YOLO when config.isInteractive() is false', async () => { mockConfig.isInteractive = vi.fn().mockReturnValue(false); const planRelativePath = createPlanFile('test.md', '# Content'); - const invocation = tool.build({ plan_path: planRelativePath }); + const invocation = tool.build({ plan_filename: planRelativePath }); // Directly call execute to trigger the internal getAllowApprovalMode const result = await invocation.execute(new AbortController().signal); @@ -378,7 +378,7 @@ Ask the user for specific feedback on how to improve the plan.`, it('should return DEFAULT when config.isInteractive() is true', async () => { mockConfig.isInteractive = vi.fn().mockReturnValue(true); const planRelativePath = createPlanFile('test.md', '# Content'); - const invocation = tool.build({ plan_path: planRelativePath }); + const invocation = tool.build({ plan_filename: planRelativePath }); // Directly call execute to trigger the internal getAllowApprovalMode const result = await invocation.execute(new AbortController().signal); @@ -393,7 +393,7 @@ Ask the user for specific feedback on how to improve the plan.`, describe('getApprovalModeDescription (internal)', () => { it('should handle all valid approval modes', async () => { const planRelativePath = createPlanFile('test.md', '# Content'); - const invocation = tool.build({ plan_path: planRelativePath }); + const invocation = tool.build({ plan_filename: planRelativePath }); const testMode = async (mode: ApprovalMode, expected: string) => { const confirmDetails = await invocation.shouldConfirmExecute( @@ -426,7 +426,7 @@ Ask the user for specific feedback on how to improve the plan.`, it('should throw for invalid post-planning modes', async () => { const planRelativePath = createPlanFile('test.md', '# Content'); - const invocation = tool.build({ plan_path: planRelativePath }); + const invocation = tool.build({ plan_filename: planRelativePath }); const testInvalidMode = async (mode: ApprovalMode) => { const confirmDetails = await invocation.shouldConfirmExecute( @@ -448,36 +448,19 @@ Ask the user for specific feedback on how to improve the plan.`, }); }); - it('should throw error during build if plan path is outside plans directory', () => { - expect(() => tool.build({ plan_path: '../../../etc/passwd' })).toThrow( - /Access denied/, - ); - }); - describe('validateToolParams', () => { - it('should reject empty plan_path', () => { - const result = tool.validateToolParams({ plan_path: '' }); - expect(result).toBe('plan_path is required.'); + it('should reject empty plan_filename', () => { + const result = tool.validateToolParams({ plan_filename: '' }); + expect(result).toBe('plan_filename is required.'); }); - it('should reject whitespace-only plan_path', () => { - const result = tool.validateToolParams({ plan_path: ' ' }); - expect(result).toBe('plan_path is required.'); - }); - - it('should reject path outside plans directory', () => { - const result = tool.validateToolParams({ - plan_path: '../../../etc/passwd', - }); - expect(result).toContain('Access denied'); + it('should reject whitespace-only plan_filename', () => { + const result = tool.validateToolParams({ plan_filename: ' ' }); + expect(result).toBe('plan_filename is required.'); }); it('should reject non-existent plan file', async () => { - const result = await validatePlanPath( - 'plans/ghost.md', - mockPlansDir, - tempRootDir, - ); + const result = await validatePlanPath('ghost.md', mockPlansDir); expect(result).toContain('Plan file does not exist'); }); @@ -488,18 +471,18 @@ Ask the user for specific feedback on how to improve the plan.`, fs.symlinkSync(outsideFile, maliciousPath); const result = tool.validateToolParams({ - plan_path: 'plans/malicious.md', + plan_filename: 'malicious.md', }); expect(result).toBe( - 'Access denied: plan path must be within the designated plans directory.', + `Access denied: plan path (${path.join(mockPlansDir, 'malicious.md')}) must be within the designated plans directory (${mockPlansDir}).`, ); }); it('should accept valid path within plans directory', () => { createPlanFile('valid.md', '# Content'); const result = tool.validateToolParams({ - plan_path: 'plans/valid.md', + plan_filename: 'valid.md', }); expect(result).toBeNull(); }); diff --git a/packages/core/src/tools/exit-plan-mode.ts b/packages/core/src/tools/exit-plan-mode.ts index 892e8926e0..483b1e5f3d 100644 --- a/packages/core/src/tools/exit-plan-mode.ts +++ b/packages/core/src/tools/exit-plan-mode.ts @@ -28,7 +28,7 @@ import { resolveToolDeclaration } from './definitions/resolver.js'; import { getPlanModeExitMessage } from '../utils/approvalModeUtils.js'; export interface ExitPlanModeParams { - plan_path: string; + plan_filename: string; } export class ExitPlanModeTool extends BaseDeclarativeTool< @@ -41,8 +41,7 @@ export class ExitPlanModeTool extends BaseDeclarativeTool< private config: Config, messageBus: MessageBus, ) { - const plansDir = config.storage.getPlansDir(); - const definition = getExitPlanModeDefinition(plansDir); + const definition = getExitPlanModeDefinition(); super( ExitPlanModeTool.Name, 'Exit Plan Mode', @@ -56,22 +55,21 @@ export class ExitPlanModeTool extends BaseDeclarativeTool< protected override validateToolParamValues( params: ExitPlanModeParams, ): string | null { - if (!params.plan_path || params.plan_path.trim() === '') { - return 'plan_path is required.'; + if (!params.plan_filename || params.plan_filename.trim() === '') { + return 'plan_filename is required.'; } - // Since validateToolParamValues is synchronous, we use a basic synchronous check - // for path traversal safety. High-level async validation is deferred to shouldConfirmExecute. + const safeFilename = path.basename(params.plan_filename); const plansDir = resolveToRealPath(this.config.storage.getPlansDir()); - const resolvedPath = path.resolve( - this.config.getTargetDir(), - params.plan_path, + const resolvedPath = path.join( + this.config.storage.getPlansDir(), + safeFilename, ); const realPath = resolveToRealPath(resolvedPath); if (!isSubpath(plansDir, realPath)) { - return `Access denied: plan path must be within the designated plans directory.`; + return `Access denied: plan path (${resolvedPath}) must be within the designated plans directory (${plansDir}).`; } return null; @@ -93,8 +91,7 @@ export class ExitPlanModeTool extends BaseDeclarativeTool< } override getSchema(modelId?: string) { - const plansDir = this.config.storage.getPlansDir(); - return resolveToolDeclaration(getExitPlanModeDefinition(plansDir), modelId); + return resolveToolDeclaration(getExitPlanModeDefinition(), modelId); } } @@ -122,9 +119,8 @@ export class ExitPlanModeInvocation extends BaseToolInvocation< const resolvedPlanPath = this.getResolvedPlanPath(); const pathError = await validatePlanPath( - this.params.plan_path, + this.params.plan_filename, this.config.storage.getPlansDir(), - this.config.getTargetDir(), ); if (pathError) { this.planValidationError = pathError; @@ -174,7 +170,7 @@ export class ExitPlanModeInvocation extends BaseToolInvocation< } getDescription(): string { - return `Requesting plan approval for: ${this.params.plan_path}`; + return `Requesting plan approval for: ${path.join(this.config.storage.getPlansDir(), this.params.plan_filename)}`; } /** @@ -182,7 +178,8 @@ export class ExitPlanModeInvocation extends BaseToolInvocation< * Note: Validation is done in validateToolParamValues, so this assumes the path is valid. */ private getResolvedPlanPath(): string { - return path.resolve(this.config.getTargetDir(), this.params.plan_path); + const safeFilename = path.basename(this.params.plan_filename); + return path.join(this.config.storage.getPlansDir(), safeFilename); } async execute(_signal: AbortSignal): Promise { diff --git a/packages/core/src/tools/line-endings.test.ts b/packages/core/src/tools/line-endings.test.ts index 981e602b5b..45c60e3b37 100644 --- a/packages/core/src/tools/line-endings.test.ts +++ b/packages/core/src/tools/line-endings.test.ts @@ -85,6 +85,10 @@ const mockConfigInternal = { discoverTools: vi.fn(), }) as unknown as ToolRegistry, isInteractive: () => false, + isPlanMode: () => false, + storage: { + getPlansDir: () => '/tmp/plans', + }, }; const mockConfig = mockConfigInternal as unknown as Config; diff --git a/packages/core/src/tools/tool-names.ts b/packages/core/src/tools/tool-names.ts index 154a9de58f..1bd97aca9c 100644 --- a/packages/core/src/tools/tool-names.ts +++ b/packages/core/src/tools/tool-names.ts @@ -73,7 +73,7 @@ import { ASK_USER_OPTION_PARAM_LABEL, ASK_USER_OPTION_PARAM_DESCRIPTION, PLAN_MODE_PARAM_REASON, - EXIT_PLAN_PARAM_PLAN_PATH, + EXIT_PLAN_PARAM_PLAN_FILENAME, SKILL_PARAM_NAME, } from './definitions/coreTools.js'; @@ -146,7 +146,7 @@ export { ASK_USER_OPTION_PARAM_LABEL, ASK_USER_OPTION_PARAM_DESCRIPTION, PLAN_MODE_PARAM_REASON, - EXIT_PLAN_PARAM_PLAN_PATH, + EXIT_PLAN_PARAM_PLAN_FILENAME, SKILL_PARAM_NAME, }; diff --git a/packages/core/src/tools/write-file.test.ts b/packages/core/src/tools/write-file.test.ts index b3d762554a..aa8ff623ea 100644 --- a/packages/core/src/tools/write-file.test.ts +++ b/packages/core/src/tools/write-file.test.ts @@ -105,6 +105,7 @@ const mockConfigInternal = { }) as unknown as ToolRegistry, isInteractive: () => false, getDisableLLMCorrection: vi.fn(() => true), + isPlanMode: vi.fn(() => false), getActiveModel: () => 'test-model', storage: { getProjectTempDir: vi.fn().mockReturnValue('/tmp/project'), diff --git a/packages/core/src/tools/write-file.ts b/packages/core/src/tools/write-file.ts index 8ba967114c..1d36909dd4 100644 --- a/packages/core/src/tools/write-file.ts +++ b/packages/core/src/tools/write-file.ts @@ -165,10 +165,19 @@ class WriteFileToolInvocation extends BaseToolInvocation< true, () => this.config.getApprovalMode(), ); - this.resolvedPath = path.resolve( - this.config.getTargetDir(), - this.params.file_path, - ); + + if (this.config.isPlanMode()) { + const safeFilename = path.basename(this.params.file_path); + this.resolvedPath = path.join( + this.config.storage.getPlansDir(), + safeFilename, + ); + } else { + this.resolvedPath = path.resolve( + this.config.getTargetDir(), + this.params.file_path, + ); + } } override toolLocations(): ToolLocation[] { diff --git a/packages/core/src/utils/planUtils.test.ts b/packages/core/src/utils/planUtils.test.ts index 2e4f4f04eb..e7d953b41a 100644 --- a/packages/core/src/utils/planUtils.test.ts +++ b/packages/core/src/utils/planUtils.test.ts @@ -35,19 +35,13 @@ describe('planUtils', () => { const fullPath = path.join(tempRootDir, planPath); fs.writeFileSync(fullPath, '# My Plan'); - const result = await validatePlanPath(planPath, plansDir, tempRootDir); + const result = await validatePlanPath(planPath, plansDir); expect(result).toBeNull(); }); - it('should return error for path traversal', async () => { - const planPath = path.join('..', 'secret.txt'); - const result = await validatePlanPath(planPath, plansDir, tempRootDir); - expect(result).toContain('Access denied'); - }); - it('should return error for non-existent file', async () => { const planPath = path.join('plans', 'ghost.md'); - const result = await validatePlanPath(planPath, plansDir, tempRootDir); + const result = await validatePlanPath(planPath, plansDir); expect(result).toContain('Plan file does not exist'); }); @@ -60,11 +54,7 @@ describe('planUtils', () => { // Create a symbolic link pointing outside the plans directory fs.symlinkSync(outsideFile, fullMaliciousPath); - const result = await validatePlanPath( - maliciousPath, - plansDir, - tempRootDir, - ); + const result = await validatePlanPath(maliciousPath, plansDir); expect(result).toContain('Access denied'); }); }); diff --git a/packages/core/src/utils/planUtils.ts b/packages/core/src/utils/planUtils.ts index 534fe6923f..559434b1e3 100644 --- a/packages/core/src/utils/planUtils.ts +++ b/packages/core/src/utils/planUtils.ts @@ -13,8 +13,8 @@ import { isSubpath, resolveToRealPath } from './paths.js'; * Shared between backend tools and CLI UI for consistency. */ export const PlanErrorMessages = { - PATH_ACCESS_DENIED: - 'Access denied: plan path must be within the designated plans directory.', + PATH_ACCESS_DENIED: (planPath: string, plansDir: string) => + `Access denied: plan path (${planPath}) must be within the designated plans directory (${plansDir}).`, FILE_NOT_FOUND: (path: string) => `Plan file does not exist: ${path}. You must create the plan file before requesting approval.`, FILE_EMPTY: @@ -32,14 +32,14 @@ export const PlanErrorMessages = { export async function validatePlanPath( planPath: string, plansDir: string, - targetDir: string, ): Promise { - const resolvedPath = path.resolve(targetDir, planPath); + const safeFilename = path.basename(planPath); + const resolvedPath = path.join(plansDir, safeFilename); const realPath = resolveToRealPath(resolvedPath); const realPlansDir = resolveToRealPath(plansDir); if (!isSubpath(realPlansDir, realPath)) { - return PlanErrorMessages.PATH_ACCESS_DENIED; + return PlanErrorMessages.PATH_ACCESS_DENIED(planPath, realPlansDir); } if (!(await fileExists(resolvedPath))) { From 893c7d38801a9934d080e45d2a71c098cee8d710 Mon Sep 17 00:00:00 2001 From: Sehoon Shon Date: Tue, 24 Mar 2026 09:33:17 -0400 Subject: [PATCH 06/11] feat(ui): allow immediate user input during startup (#23661) --- packages/cli/src/ui/AppContainer.tsx | 17 +++++++++++------ packages/cli/src/ui/components/Composer.tsx | 8 ++------ 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx index 326d02b250..8c199c9387 100644 --- a/packages/cli/src/ui/AppContainer.tsx +++ b/packages/cli/src/ui/AppContainer.tsx @@ -700,7 +700,10 @@ export const AppContainer = (props: AppContainerProps) => { // Derive auth state variables for backward compatibility with UIStateContext const isAuthDialogOpen = authState === AuthState.Updating; - const isAuthenticating = authState === AuthState.Unauthenticated; + // TODO: Consider handling other auth types that should also skip the blocking screen + const isAuthenticating = + authState === AuthState.Unauthenticated && + settings.merged.security.auth.selectedType !== AuthType.USE_GEMINI; // Session browser and resume functionality const isGeminiClientInitialized = config.getGeminiClient()?.isInitialized(); @@ -1300,7 +1303,8 @@ Logging in with Google... Restarting Gemini CLI to continue. return; } - if (isSlash || (isIdle && isMcpReady)) { + const isMcpOrConfigReady = isConfigInitialized && isMcpReady; + if ((isSlash && isConfigInitialized) || (isIdle && isMcpOrConfigReady)) { if (!isSlash) { const permissions = await checkPermissions(submittedValue, config); if (permissions.length > 0) { @@ -1323,10 +1327,12 @@ Logging in with Google... Restarting Gemini CLI to continue. void submitQuery(submittedValue); } else { // Check messageQueue.length === 0 to only notify on the first queued item - if (isIdle && !isMcpReady && messageQueue.length === 0) { + if (isIdle && !isMcpOrConfigReady && messageQueue.length === 0) { coreEvents.emitFeedback( 'info', - 'Waiting for MCP servers to initialize... Slash commands are still available and prompts will be queued.', + !isConfigInitialized + ? 'Initializing... Prompts will be queued.' + : 'Waiting for MCP servers to initialize... Slash commands are still available and prompts will be queued.', ); } addMessage(submittedValue); @@ -1350,6 +1356,7 @@ Logging in with Google... Restarting Gemini CLI to continue. refreshStatic, reset, handleHintSubmit, + isConfigInitialized, triggerExpandHint, ], ); @@ -1380,11 +1387,9 @@ Logging in with Google... Restarting Gemini CLI to continue. * - Any future streaming states not explicitly allowed */ const isInputActive = - isConfigInitialized && !initError && !isProcessing && !isResuming && - !!slashCommands && (streamingState === StreamingState.Idle || streamingState === StreamingState.Responding || streamingState === StreamingState.WaitingForConfirmation) && diff --git a/packages/cli/src/ui/components/Composer.tsx b/packages/cli/src/ui/components/Composer.tsx index 042f50776d..593b4e2a6a 100644 --- a/packages/cli/src/ui/components/Composer.tsx +++ b/packages/cli/src/ui/components/Composer.tsx @@ -518,12 +518,8 @@ export const Composer = ({ isFocused = true }: { isFocused?: boolean }) => { flexGrow={0} flexShrink={0} > - {(!uiState.slashCommands || - !uiState.isConfigInitialized || - uiState.isResuming) && ( - + {uiState.isResuming && ( + )} {showUiDetails && ( From fc1876815556486e6eb4bfff2df6c1301d0eab97 Mon Sep 17 00:00:00 2001 From: Gal Zahavi <38544478+galz10@users.noreply.github.com> Date: Tue, 24 Mar 2026 07:32:20 -0700 Subject: [PATCH 07/11] refactor(sandbox): reorganize Windows sandbox files (#23645) --- packages/core/scripts/compile-windows-sandbox.js | 6 +++--- packages/core/src/index.ts | 2 +- .../scripts => sandbox/windows}/GeminiSandbox.cs | 0 .../windows/WindowsSandboxManager.test.ts} | 8 ++++---- .../windows/WindowsSandboxManager.ts} | 10 +++++----- packages/core/src/services/sandboxManager.test.ts | 2 +- packages/core/src/services/sandboxManagerFactory.ts | 2 +- 7 files changed, 15 insertions(+), 15 deletions(-) rename packages/core/src/{services/scripts => sandbox/windows}/GeminiSandbox.cs (100%) rename packages/core/src/{services/windowsSandboxManager.test.ts => sandbox/windows/WindowsSandboxManager.test.ts} (93%) rename packages/core/src/{services/windowsSandboxManager.ts => sandbox/windows/WindowsSandboxManager.ts} (96%) diff --git a/packages/core/scripts/compile-windows-sandbox.js b/packages/core/scripts/compile-windows-sandbox.js index a52987c24e..0a5ce49246 100644 --- a/packages/core/scripts/compile-windows-sandbox.js +++ b/packages/core/scripts/compile-windows-sandbox.js @@ -26,15 +26,15 @@ function compileWindowsSandbox() { const srcHelperPath = path.resolve( __dirname, - '../src/services/scripts/GeminiSandbox.exe', + '../src/sandbox/windows/GeminiSandbox.exe', ); const distHelperPath = path.resolve( __dirname, - '../dist/src/services/scripts/GeminiSandbox.exe', + '../dist/src/sandbox/windows/GeminiSandbox.exe', ); const sourcePath = path.resolve( __dirname, - '../src/services/scripts/GeminiSandbox.cs', + '../src/sandbox/windows/GeminiSandbox.cs', ); if (!fs.existsSync(sourcePath)) { diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 4a5dc9d11d..e607775345 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -126,7 +126,7 @@ export * from './services/FolderTrustDiscoveryService.js'; export * from './services/chatRecordingService.js'; export * from './services/fileSystemService.js'; export * from './services/sandboxedFileSystemService.js'; -export * from './services/windowsSandboxManager.js'; +export * from './sandbox/windows/WindowsSandboxManager.js'; export * from './services/sessionSummaryUtils.js'; export * from './services/contextManager.js'; export * from './services/trackerService.js'; diff --git a/packages/core/src/services/scripts/GeminiSandbox.cs b/packages/core/src/sandbox/windows/GeminiSandbox.cs similarity index 100% rename from packages/core/src/services/scripts/GeminiSandbox.cs rename to packages/core/src/sandbox/windows/GeminiSandbox.cs diff --git a/packages/core/src/services/windowsSandboxManager.test.ts b/packages/core/src/sandbox/windows/WindowsSandboxManager.test.ts similarity index 93% rename from packages/core/src/services/windowsSandboxManager.test.ts rename to packages/core/src/sandbox/windows/WindowsSandboxManager.test.ts index 4b430ffa85..de526e2eaf 100644 --- a/packages/core/src/services/windowsSandboxManager.test.ts +++ b/packages/core/src/sandbox/windows/WindowsSandboxManager.test.ts @@ -8,11 +8,11 @@ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import fs from 'node:fs'; import os from 'node:os'; import path from 'node:path'; -import { WindowsSandboxManager } from './windowsSandboxManager.js'; -import type { SandboxRequest } from './sandboxManager.js'; -import { spawnAsync } from '../utils/shell-utils.js'; +import { WindowsSandboxManager } from './WindowsSandboxManager.js'; +import type { SandboxRequest } from '../../services/sandboxManager.js'; +import { spawnAsync } from '../../utils/shell-utils.js'; -vi.mock('../utils/shell-utils.js', () => ({ +vi.mock('../../utils/shell-utils.js', () => ({ spawnAsync: vi.fn(), })); diff --git a/packages/core/src/services/windowsSandboxManager.ts b/packages/core/src/sandbox/windows/WindowsSandboxManager.ts similarity index 96% rename from packages/core/src/services/windowsSandboxManager.ts rename to packages/core/src/sandbox/windows/WindowsSandboxManager.ts index e0cfb2201a..b4391c8595 100644 --- a/packages/core/src/services/windowsSandboxManager.ts +++ b/packages/core/src/sandbox/windows/WindowsSandboxManager.ts @@ -15,13 +15,13 @@ import { GOVERNANCE_FILES, type GlobalSandboxOptions, sanitizePaths, -} from './sandboxManager.js'; +} from '../../services/sandboxManager.js'; import { sanitizeEnvironment, getSecureSanitizationConfig, -} from './environmentSanitization.js'; -import { debugLogger } from '../utils/debugLogger.js'; -import { spawnAsync } from '../utils/shell-utils.js'; +} from '../../services/environmentSanitization.js'; +import { debugLogger } from '../../utils/debugLogger.js'; +import { spawnAsync } from '../../utils/shell-utils.js'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); @@ -37,7 +37,7 @@ export class WindowsSandboxManager implements SandboxManager { private readonly lowIntegrityCache = new Set(); constructor(private readonly options: GlobalSandboxOptions) { - this.helperPath = path.resolve(__dirname, 'scripts', 'GeminiSandbox.exe'); + this.helperPath = path.resolve(__dirname, 'GeminiSandbox.exe'); } /** diff --git a/packages/core/src/services/sandboxManager.test.ts b/packages/core/src/services/sandboxManager.test.ts index 50760ccf1c..9b1903ef3a 100644 --- a/packages/core/src/services/sandboxManager.test.ts +++ b/packages/core/src/services/sandboxManager.test.ts @@ -10,7 +10,7 @@ import { NoopSandboxManager, sanitizePaths } from './sandboxManager.js'; import { createSandboxManager } from './sandboxManagerFactory.js'; import { LinuxSandboxManager } from '../sandbox/linux/LinuxSandboxManager.js'; import { MacOsSandboxManager } from '../sandbox/macos/MacOsSandboxManager.js'; -import { WindowsSandboxManager } from './windowsSandboxManager.js'; +import { WindowsSandboxManager } from '../sandbox/windows/WindowsSandboxManager.js'; describe('sanitizePaths', () => { it('should return undefined if no paths are provided', () => { diff --git a/packages/core/src/services/sandboxManagerFactory.ts b/packages/core/src/services/sandboxManagerFactory.ts index fa24b99f6e..669257b7b0 100644 --- a/packages/core/src/services/sandboxManagerFactory.ts +++ b/packages/core/src/services/sandboxManagerFactory.ts @@ -12,7 +12,7 @@ import { } from './sandboxManager.js'; import { LinuxSandboxManager } from '../sandbox/linux/LinuxSandboxManager.js'; import { MacOsSandboxManager } from '../sandbox/macos/MacOsSandboxManager.js'; -import { WindowsSandboxManager } from './windowsSandboxManager.js'; +import { WindowsSandboxManager } from '../sandbox/windows/WindowsSandboxManager.js'; import type { SandboxConfig } from '../config/config.js'; import { type SandboxPolicyManager } from '../policy/sandboxPolicyManager.js'; From 91d756f391e82801ae54d454975fcde1a051442e Mon Sep 17 00:00:00 2001 From: Adam Weidman <65992621+adamfweidman@users.noreply.github.com> Date: Tue, 24 Mar 2026 11:34:04 -0400 Subject: [PATCH 08/11] fix(core): improve remote agent streaming UI and UX (#23633) --- .../messages/SubagentProgressDisplay.tsx | 4 +- packages/core/src/agents/a2aUtils.test.ts | 2 +- packages/core/src/agents/a2aUtils.ts | 27 ++++- .../core/src/agents/remote-invocation.test.ts | 107 +++++++++++++----- packages/core/src/agents/remote-invocation.ts | 59 +++++++++- 5 files changed, 161 insertions(+), 38 deletions(-) diff --git a/packages/cli/src/ui/components/messages/SubagentProgressDisplay.tsx b/packages/cli/src/ui/components/messages/SubagentProgressDisplay.tsx index 5d1086c759..a84429cd10 100644 --- a/packages/cli/src/ui/components/messages/SubagentProgressDisplay.tsx +++ b/packages/cli/src/ui/components/messages/SubagentProgressDisplay.tsx @@ -153,7 +153,7 @@ export const SubagentProgressDisplay: React.FC< })} - {progress.state === 'completed' && progress.result && ( + {progress.result && ( {progress.terminateReason && progress.terminateReason !== 'GOAL' && ( @@ -164,7 +164,7 @@ export const SubagentProgressDisplay: React.FC< )} diff --git a/packages/core/src/agents/a2aUtils.test.ts b/packages/core/src/agents/a2aUtils.test.ts index 0dce551be4..f8416ae2ad 100644 --- a/packages/core/src/agents/a2aUtils.test.ts +++ b/packages/core/src/agents/a2aUtils.test.ts @@ -403,7 +403,7 @@ describe('a2aUtils', () => { const output = reassembler.toString(); expect(output).toBe( - 'Analyzing...\n\nProcessing...\n\nArtifact (Code):\nprint("Done")', + 'Analyzing...Processing...\n\nArtifact (Code):\nprint("Done")', ); }); diff --git a/packages/core/src/agents/a2aUtils.ts b/packages/core/src/agents/a2aUtils.ts index 70fc9cf557..b617082416 100644 --- a/packages/core/src/agents/a2aUtils.ts +++ b/packages/core/src/agents/a2aUtils.ts @@ -16,6 +16,7 @@ import type { AgentInterface, } from '@a2a-js/sdk'; import type { SendMessageResult } from './a2a-client-manager.js'; +import type { SubagentActivityItem } from './types.js'; export const AUTH_REQUIRED_MSG = `[Authorization Required] The agent has indicated it requires authorization to proceed. Please follow the agent's instructions.`; @@ -123,17 +124,39 @@ export class A2AResultReassembler { private pushMessage(message: Message | undefined) { if (!message) return; - const text = extractPartsText(message.parts, '\n'); + const text = extractPartsText(message.parts, ''); if (text && this.messageLog[this.messageLog.length - 1] !== text) { this.messageLog.push(text); } } + /** + * Returns an array of activity items representing the current reassembled state. + */ + toActivityItems(): SubagentActivityItem[] { + const isAuthRequired = this.messageLog.includes(AUTH_REQUIRED_MSG); + return [ + isAuthRequired + ? { + id: 'auth-required', + type: 'thought', + content: AUTH_REQUIRED_MSG, + status: 'running', + } + : { + id: 'pending', + type: 'thought', + content: 'Working...', + status: 'running', + }, + ]; + } + /** * Returns a human-readable string representation of the current reassembled state. */ toString(): string { - const joinedMessages = this.messageLog.join('\n\n'); + const joinedMessages = this.messageLog.join(''); const artifactsOutput = Array.from(this.artifacts.keys()) .map((id) => { diff --git a/packages/core/src/agents/remote-invocation.test.ts b/packages/core/src/agents/remote-invocation.test.ts index 870071b321..b5fdd4a4fa 100644 --- a/packages/core/src/agents/remote-invocation.test.ts +++ b/packages/core/src/agents/remote-invocation.test.ts @@ -20,7 +20,7 @@ import { type A2AClientManager, } from './a2a-client-manager.js'; -import type { RemoteAgentDefinition } from './types.js'; +import type { RemoteAgentDefinition, SubagentProgress } from './types.js'; import { createMockMessageBus } from '../test-utils/mock-message-bus.js'; import { A2AAuthProviderFactory } from './auth-provider/factory.js'; import type { A2AAuthProvider } from './auth-provider/types.js'; @@ -266,9 +266,11 @@ describe('RemoteAgentInvocation', () => { ); const result = await invocation.execute(new AbortController().signal); - expect(result.error?.message).toContain( - "Failed to create auth provider for agent 'test-agent'", - ); + expect(result.returnDisplay).toMatchObject({ + result: expect.stringContaining( + "Failed to create auth provider for agent 'test-agent'", + ), + }); }); it('should not load the agent if already present', async () => { @@ -325,7 +327,9 @@ describe('RemoteAgentInvocation', () => { // Execute first time const result1 = await invocation1.execute(new AbortController().signal); - expect(result1.returnDisplay).toBe('Response 1'); + expect(result1.returnDisplay).toMatchObject({ + result: 'Response 1', + }); expect(mockClientManager.sendMessageStream).toHaveBeenLastCalledWith( 'test-agent', 'first', @@ -355,7 +359,9 @@ describe('RemoteAgentInvocation', () => { mockMessageBus, ); const result2 = await invocation2.execute(new AbortController().signal); - expect(result2.returnDisplay).toBe('Response 2'); + expect((result2.returnDisplay as SubagentProgress).result).toBe( + 'Response 2', + ); expect(mockClientManager.sendMessageStream).toHaveBeenLastCalledWith( 'test-agent', @@ -444,8 +450,22 @@ describe('RemoteAgentInvocation', () => { ); await invocation.execute(new AbortController().signal, updateOutput); - expect(updateOutput).toHaveBeenCalledWith('Hello'); - expect(updateOutput).toHaveBeenCalledWith('Hello\n\nHello World'); + expect(updateOutput).toHaveBeenCalledWith( + expect.objectContaining({ + isSubagentProgress: true, + state: 'running', + recentActivity: expect.arrayContaining([ + expect.objectContaining({ content: 'Working...' }), + ]), + }), + ); + expect(updateOutput).toHaveBeenCalledWith( + expect.objectContaining({ + isSubagentProgress: true, + state: 'completed', + result: 'HelloHello World', + }), + ); }); it('should abort when signal is aborted during streaming', async () => { @@ -478,8 +498,7 @@ describe('RemoteAgentInvocation', () => { ); const result = await invocation.execute(controller.signal); - expect(result.error).toBeDefined(); - expect(result.error?.message).toContain('Operation aborted'); + expect(result.returnDisplay).toMatchObject({ state: 'error' }); }); it('should handle errors gracefully', async () => { @@ -501,9 +520,10 @@ describe('RemoteAgentInvocation', () => { ); const result = await invocation.execute(new AbortController().signal); - expect(result.error).toBeDefined(); - expect(result.error?.message).toContain('Network error'); - expect(result.returnDisplay).toContain('Network error'); + expect(result.returnDisplay).toMatchObject({ + state: 'error', + result: expect.stringContaining('Network error'), + }); }); it('should use a2a helpers for extracting text', async () => { @@ -534,7 +554,9 @@ describe('RemoteAgentInvocation', () => { const result = await invocation.execute(new AbortController().signal); // Just check that text is present, exact formatting depends on helper - expect(result.returnDisplay).toContain('Extracted text'); + expect((result.returnDisplay as SubagentProgress).result).toContain( + 'Extracted text', + ); }); it('should handle mixed response types during streaming (TaskStatusUpdateEvent + Message)', async () => { @@ -577,9 +599,25 @@ describe('RemoteAgentInvocation', () => { updateOutput, ); - expect(updateOutput).toHaveBeenCalledWith('Thinking...'); - expect(updateOutput).toHaveBeenCalledWith('Thinking...\n\nFinal Answer'); - expect(result.returnDisplay).toBe('Thinking...\n\nFinal Answer'); + expect(updateOutput).toHaveBeenCalledWith( + expect.objectContaining({ + isSubagentProgress: true, + state: 'running', + recentActivity: expect.arrayContaining([ + expect.objectContaining({ content: 'Working...' }), + ]), + }), + ); + expect(updateOutput).toHaveBeenCalledWith( + expect.objectContaining({ + isSubagentProgress: true, + state: 'completed', + result: 'Thinking...Final Answer', + }), + ); + expect(result.returnDisplay).toMatchObject({ + result: 'Thinking...Final Answer', + }); }); it('should handle artifact reassembly with append: true', async () => { @@ -635,12 +673,21 @@ describe('RemoteAgentInvocation', () => { ); await invocation.execute(new AbortController().signal, updateOutput); - expect(updateOutput).toHaveBeenCalledWith('Generating...'); expect(updateOutput).toHaveBeenCalledWith( - 'Generating...\n\nArtifact (Result):\nPart 1', + expect.objectContaining({ + isSubagentProgress: true, + state: 'running', + recentActivity: expect.arrayContaining([ + expect.objectContaining({ content: 'Working...' }), + ]), + }), ); expect(updateOutput).toHaveBeenCalledWith( - 'Generating...\n\nArtifact (Result):\nPart 1 Part 2', + expect.objectContaining({ + isSubagentProgress: true, + state: 'completed', + result: 'Generating...\n\nArtifact (Result):\nPart 1 Part 2', + }), ); }); }); @@ -694,8 +741,10 @@ describe('RemoteAgentInvocation', () => { ); const result = await invocation.execute(new AbortController().signal); - expect(result.error).toBeDefined(); - expect(result.returnDisplay).toContain(a2aError.userMessage); + expect(result.returnDisplay).toMatchObject({ state: 'error' }); + expect((result.returnDisplay as SubagentProgress).result).toContain( + a2aError.userMessage, + ); }); it('should use generic message for non-A2AAgentError errors', async () => { @@ -712,8 +761,8 @@ describe('RemoteAgentInvocation', () => { ); const result = await invocation.execute(new AbortController().signal); - expect(result.error).toBeDefined(); - expect(result.returnDisplay).toContain( + expect(result.returnDisplay).toMatchObject({ state: 'error' }); + expect((result.returnDisplay as SubagentProgress).result).toContain( 'Error calling remote agent: something unexpected', ); }); @@ -741,10 +790,14 @@ describe('RemoteAgentInvocation', () => { ); const result = await invocation.execute(new AbortController().signal); - expect(result.error).toBeDefined(); + expect(result.returnDisplay).toMatchObject({ state: 'error' }); // Should contain both the partial output and the error message - expect(result.returnDisplay).toContain('Partial response'); - expect(result.returnDisplay).toContain('connection reset'); + expect(result.returnDisplay).toMatchObject({ + result: expect.stringContaining('Partial response'), + }); + expect(result.returnDisplay).toMatchObject({ + result: expect.stringContaining('connection reset'), + }); }); }); }); diff --git a/packages/core/src/agents/remote-invocation.ts b/packages/core/src/agents/remote-invocation.ts index 0933ca026e..130f0f1a38 100644 --- a/packages/core/src/agents/remote-invocation.ts +++ b/packages/core/src/agents/remote-invocation.ts @@ -15,6 +15,7 @@ import { type RemoteAgentInputs, type RemoteAgentDefinition, type AgentInputs, + type SubagentProgress, } from './types.js'; import { type AgentLoopContext } from '../config/agent-loop-context.js'; import type { MessageBus } from '../confirmation-bus/message-bus.js'; @@ -25,7 +26,6 @@ import type { import { extractIdsFromResponse, A2AResultReassembler } from './a2aUtils.js'; import type { AuthenticationHandler } from '@a2a-js/sdk/client'; import { debugLogger } from '../utils/debugLogger.js'; -import { safeJsonToMarkdown } from '../utils/markdownUtils.js'; import type { AnsiOutput } from '../utils/terminalSerializer.js'; import { A2AAuthProviderFactory } from './auth-provider/factory.js'; import { A2AAgentError } from './a2a-errors.js'; @@ -125,13 +125,30 @@ export class RemoteAgentInvocation extends BaseToolInvocation< async execute( _signal: AbortSignal, - updateOutput?: (output: string | AnsiOutput) => void, + updateOutput?: (output: string | AnsiOutput | SubagentProgress) => void, ): Promise { // 1. Ensure the agent is loaded (cached by manager) // We assume the user has provided an access token via some mechanism (TODO), // or we rely on ADC. const reassembler = new A2AResultReassembler(); + const agentName = this.definition.displayName ?? this.definition.name; try { + if (updateOutput) { + updateOutput({ + isSubagentProgress: true, + agentName, + state: 'running', + recentActivity: [ + { + id: 'pending', + type: 'thought', + content: 'Working...', + status: 'running', + }, + ], + }); + } + const priorState = RemoteAgentInvocation.sessionState.get( this.definition.name, ); @@ -172,7 +189,13 @@ export class RemoteAgentInvocation extends BaseToolInvocation< reassembler.update(chunk); if (updateOutput) { - updateOutput(reassembler.toString()); + updateOutput({ + isSubagentProgress: true, + agentName, + state: 'running', + recentActivity: reassembler.toActivityItems(), + result: reassembler.toString(), + }); } const { @@ -198,9 +221,21 @@ export class RemoteAgentInvocation extends BaseToolInvocation< `[RemoteAgent] Final response from ${this.definition.name}:\n${JSON.stringify(finalResponse, null, 2)}`, ); + const finalProgress: SubagentProgress = { + isSubagentProgress: true, + agentName, + state: 'completed', + result: finalOutput, + recentActivity: reassembler.toActivityItems(), + }; + + if (updateOutput) { + updateOutput(finalProgress); + } + return { llmContent: [{ text: finalOutput }], - returnDisplay: safeJsonToMarkdown(finalOutput), + returnDisplay: finalProgress, }; } catch (error: unknown) { const partialOutput = reassembler.toString(); @@ -209,10 +244,22 @@ export class RemoteAgentInvocation extends BaseToolInvocation< const fullDisplay = partialOutput ? `${partialOutput}\n\n${errorMessage}` : errorMessage; + + const errorProgress: SubagentProgress = { + isSubagentProgress: true, + agentName, + state: 'error', + result: fullDisplay, + recentActivity: reassembler.toActivityItems(), + }; + + if (updateOutput) { + updateOutput(errorProgress); + } + return { llmContent: [{ text: fullDisplay }], - returnDisplay: fullDisplay, - error: { message: errorMessage }, + returnDisplay: errorProgress, }; } finally { // Persist state even on partial failures or aborts to maintain conversational continuity. From 139cc7b97cb3d9b59d3533b53c1305b6720ce2d4 Mon Sep 17 00:00:00 2001 From: Sehoon Shon Date: Tue, 24 Mar 2026 11:58:41 -0400 Subject: [PATCH 09/11] perf(cli): optimize --version startup time (#23671) --- packages/cli/index.ts | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/packages/cli/index.ts b/packages/cli/index.ts index 5444fe1b74..fa6537d7bf 100644 --- a/packages/cli/index.ts +++ b/packages/cli/index.ts @@ -6,12 +6,19 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { main } from './src/gemini.js'; -import { FatalError, writeToStderr } from '@google/gemini-cli-core'; -import { runExitCleanup } from './src/utils/cleanup.js'; +// --- Fast Path for Version --- +// We check for version flags at the very top to avoid loading any heavy dependencies. +// process.env.CLI_VERSION is defined during the build process by esbuild. +if (process.argv.includes('--version') || process.argv.includes('-v')) { + console.log(process.env['CLI_VERSION'] || 'unknown'); + process.exit(0); +} // --- Global Entry Point --- +let writeToStderrFn: (message: string) => void = (msg) => + process.stderr.write(msg); + // Suppress known race condition error in node-pty on Windows // Tracking bug: https://github.com/microsoft/node-pty/issues/827 process.on('uncaughtException', (error) => { @@ -28,13 +35,22 @@ process.on('uncaughtException', (error) => { // For other errors, we rely on the default behavior, but since we attached a listener, // we must manually replicate it. if (error instanceof Error) { - writeToStderr(error.stack + '\n'); + writeToStderrFn(error.stack + '\n'); } else { - writeToStderr(String(error) + '\n'); + writeToStderrFn(String(error) + '\n'); } process.exit(1); }); +const [{ main }, { FatalError, writeToStderr }, { runExitCleanup }] = + await Promise.all([ + import('./src/gemini.js'), + import('@google/gemini-cli-core'), + import('./src/utils/cleanup.js'), + ]); + +writeToStderrFn = writeToStderr; + main().catch(async (error) => { // Set a timeout to force exit if cleanup hangs const cleanupTimeout = setTimeout(() => { From 6b7dc4d822329ca70b3e67fac0f79ddd32ed176a Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Tue, 24 Mar 2026 16:19:59 +0000 Subject: [PATCH 10/11] refactor(core): stop gemini CLI from producing unsafe casts (#23611) --- evals/redundant_casts.eval.ts | 82 +++++++++++++++++++ .../core/__snapshots__/prompts.test.ts.snap | 19 +++++ packages/core/src/prompts/snippets.ts | 1 + 3 files changed, 102 insertions(+) create mode 100644 evals/redundant_casts.eval.ts diff --git a/evals/redundant_casts.eval.ts b/evals/redundant_casts.eval.ts new file mode 100644 index 0000000000..83750e44d4 --- /dev/null +++ b/evals/redundant_casts.eval.ts @@ -0,0 +1,82 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect } from 'vitest'; +import { evalTest } from './test-helper.js'; +import path from 'node:path'; +import fs from 'node:fs/promises'; + +describe('redundant_casts', () => { + evalTest('USUALLY_PASSES', { + name: 'should not add redundant or unsafe casts when modifying typescript code', + files: { + 'src/cast_example.ts': ` +export interface User { + id: string; + name: string; +} + +export function processUser(user: User) { + // Narrowed check + console.log("Processing user: " + user.name); +} + +export function handleUnknown(data: unknown) { + // Goal: log data.id if it exists + console.log("Handling data"); +} + +export function handleError() { + try { + throw new Error("fail"); + } catch (err) { + // Goal: log err.message + console.error("Error happened"); + } +} +`, + }, + prompt: ` +1. In src/cast_example.ts, update processUser to return the name in uppercase. +2. In handleUnknown, log the "id" property if "data" is an object that contains it. +3. In handleError, log the error message from "err". +`, + assert: async (rig) => { + const filePath = path.join(rig.testDir!, 'src/cast_example.ts'); + const content = await fs.readFile(filePath, 'utf-8'); + + // 1. Redundant Cast Check (Same type) + // Bad: (user.name as string).toUpperCase() + expect(content, 'Should not cast a known string to string').not.toContain( + 'as string', + ); + + // 2. Unsafe Cast Check (Unknown object) + // Bad: (data as any).id or (data as {id: string}).id + expect( + content, + 'Should not use unsafe casts for unknown property access', + ).not.toContain('as any'); + expect( + content, + 'Should not use unsafe casts for unknown property access', + ).not.toContain('as {'); + + // 3. Unsafe Cast Check (Error handling) + // Bad: (err as Error).message + // Good: if (err instanceof Error) { ... } + expect( + content, + 'Should prefer instanceof over casting for errors', + ).not.toContain('as Error'); + + // Verify implementation + expect(content).toContain('toUpperCase()'); + expect(content).toContain('message'); + expect(content).toContain('id'); + }, + }); +}); diff --git a/packages/core/src/core/__snapshots__/prompts.test.ts.snap b/packages/core/src/core/__snapshots__/prompts.test.ts.snap index a39ef962e1..51f9a9e59e 100644 --- a/packages/core/src/core/__snapshots__/prompts.test.ts.snap +++ b/packages/core/src/core/__snapshots__/prompts.test.ts.snap @@ -42,6 +42,7 @@ Use the following guidelines to optimize your search and read patterns. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. - **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update. +- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions). - **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it. - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. @@ -213,6 +214,7 @@ Use the following guidelines to optimize your search and read patterns. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. - **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update. +- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions). - **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it. - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. @@ -503,6 +505,7 @@ Use the following guidelines to optimize your search and read patterns. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. - **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update. +- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions). - **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it. - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. @@ -674,6 +677,7 @@ Use the following guidelines to optimize your search and read patterns. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. - **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update. +- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions). - **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it. - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. @@ -845,6 +849,7 @@ Use the following guidelines to optimize your search and read patterns. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. - **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update. +- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions). - **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it. - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, you must work autonomously as no further user input is available. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. @@ -968,6 +973,7 @@ Use the following guidelines to optimize your search and read patterns. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. - **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update. +- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions). - **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it. - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, you must work autonomously as no further user input is available. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. @@ -1564,6 +1570,7 @@ Use the following guidelines to optimize your search and read patterns. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. - **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update. +- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions). - **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it. - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. @@ -1731,6 +1738,7 @@ Use the following guidelines to optimize your search and read patterns. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. - **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update. +- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions). - **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it. - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. @@ -1889,6 +1897,7 @@ Use the following guidelines to optimize your search and read patterns. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. - **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update. +- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions). - **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it. - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. @@ -2047,6 +2056,7 @@ Use the following guidelines to optimize your search and read patterns. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. - **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update. +- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions). - **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it. - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. @@ -2201,6 +2211,7 @@ Use the following guidelines to optimize your search and read patterns. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. - **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update. +- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions). - **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it. - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. @@ -2355,6 +2366,7 @@ Use the following guidelines to optimize your search and read patterns. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. - **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update. +- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions). - **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it. - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. @@ -2503,6 +2515,7 @@ Use the following guidelines to optimize your search and read patterns. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. - **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update. +- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions). - **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it. - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. @@ -2656,6 +2669,7 @@ Use the following guidelines to optimize your search and read patterns. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. - **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update. +- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions). - **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it. - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. @@ -2934,6 +2948,7 @@ Use the following guidelines to optimize your search and read patterns. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. - **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update. +- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions). - **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it. - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. @@ -3340,6 +3355,7 @@ Use the following guidelines to optimize your search and read patterns. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. - **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update. +- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions). - **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it. - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. @@ -3494,6 +3510,7 @@ Use the following guidelines to optimize your search and read patterns. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. - **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update. +- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions). - **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it. - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. @@ -3760,6 +3777,7 @@ Use the following guidelines to optimize your search and read patterns. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. - **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update. +- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions). - **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it. - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. @@ -3914,6 +3932,7 @@ Use the following guidelines to optimize your search and read patterns. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. - **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update. +- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions). - **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it. - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index 1761aabcc2..27c1fa60a1 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -227,6 +227,7 @@ Use the following guidelines to optimize your search and read patterns. ## Engineering Standards - **Contextual Precedence:** Instructions found in ${formattedFilenames} files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. - **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update. +- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions). - **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it. - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. ${options.interactive ? 'For Directives, only clarify if critically underspecified; otherwise, work autonomously.' : 'For Directives, you must work autonomously as no further user input is available.'} You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. From 1c3d3977822fe55f48f5edac2a6d4ffbc3818e0a Mon Sep 17 00:00:00 2001 From: Tommaso Sciortino Date: Tue, 24 Mar 2026 17:23:57 +0000 Subject: [PATCH 11/11] use enableAutoUpdate in test rig (#23681) --- packages/test-utils/src/test-rig.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/test-utils/src/test-rig.ts b/packages/test-utils/src/test-rig.ts index bf85697a5c..ae2e9cc0ef 100644 --- a/packages/test-utils/src/test-rig.ts +++ b/packages/test-utils/src/test-rig.ts @@ -435,7 +435,7 @@ export class TestRig { general: { // Nightly releases sometimes becomes out of sync with local code and // triggers auto-update, which causes tests to fail. - disableAutoUpdate: true, + enableAutoUpdate: false, }, telemetry: { enabled: true,