Merge remote-tracking branch 'origin/main' into fix/agent-loader-error-formatting

This commit is contained in:
Adam Weidman
2026-03-24 14:45:13 -04:00
80 changed files with 3216 additions and 396 deletions
+2 -2
View File
@@ -250,8 +250,8 @@ Slash commands provide meta-level control over the CLI itself.
- **`list`** or **`ls`**:
- **Description:** List configured MCP servers and tools. This is the
default action if no subcommand is specified.
- **`refresh`**:
- **Description:** Restarts all MCP servers and re-discovers their available
- **`reload`**:
- **Description:** Reloads all MCP servers and re-discovers their available
tools.
- **`schema`**:
- **Description:** List configured MCP servers and tools with descriptions
+50
View File
@@ -136,6 +136,32 @@ describe('plan_mode', () => {
expect(wasToolCalled, 'Expected exit_plan_mode tool to be called').toBe(
true,
);
const toolLogs = rig.readToolLogs();
const exitPlanCall = toolLogs.find(
(log) => log.toolRequest.name === 'exit_plan_mode',
);
expect(
exitPlanCall,
'Expected to find exit_plan_mode in tool logs',
).toBeDefined();
const args = JSON.parse(exitPlanCall!.toolRequest.args);
expect(args.plan_filename, 'plan_filename should be a string').toBeTypeOf(
'string',
);
expect(args.plan_filename, 'plan_filename should end with .md').toMatch(
/\.md$/,
);
expect(
args.plan_filename,
'plan_filename should not be a path',
).not.toContain('/');
expect(
args.plan_filename,
'plan_filename should not be a path',
).not.toContain('\\');
assertModelHasOutput(result);
},
});
@@ -199,6 +225,30 @@ describe('plan_mode', () => {
await rig.waitForTelemetryReady();
const toolLogs = rig.readToolLogs();
const exitPlanCall = toolLogs.find(
(log) => log.toolRequest.name === 'exit_plan_mode',
);
expect(
exitPlanCall,
'Expected to find exit_plan_mode in tool logs',
).toBeDefined();
const args = JSON.parse(exitPlanCall!.toolRequest.args);
expect(args.plan_filename, 'plan_filename should be a string').toBeTypeOf(
'string',
);
expect(args.plan_filename, 'plan_filename should end with .md').toMatch(
/\.md$/,
);
expect(
args.plan_filename,
'plan_filename should not be a path',
).not.toContain('/');
expect(
args.plan_filename,
'plan_filename should not be a path',
).not.toContain('\\');
// Check if plan was written
const planWrite = toolLogs.find(
(log) =>
+82
View File
@@ -0,0 +1,82 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, expect } from 'vitest';
import { evalTest } from './test-helper.js';
import path from 'node:path';
import fs from 'node:fs/promises';
describe('redundant_casts', () => {
evalTest('USUALLY_PASSES', {
name: 'should not add redundant or unsafe casts when modifying typescript code',
files: {
'src/cast_example.ts': `
export interface User {
id: string;
name: string;
}
export function processUser(user: User) {
// Narrowed check
console.log("Processing user: " + user.name);
}
export function handleUnknown(data: unknown) {
// Goal: log data.id if it exists
console.log("Handling data");
}
export function handleError() {
try {
throw new Error("fail");
} catch (err) {
// Goal: log err.message
console.error("Error happened");
}
}
`,
},
prompt: `
1. In src/cast_example.ts, update processUser to return the name in uppercase.
2. In handleUnknown, log the "id" property if "data" is an object that contains it.
3. In handleError, log the error message from "err".
`,
assert: async (rig) => {
const filePath = path.join(rig.testDir!, 'src/cast_example.ts');
const content = await fs.readFile(filePath, 'utf-8');
// 1. Redundant Cast Check (Same type)
// Bad: (user.name as string).toUpperCase()
expect(content, 'Should not cast a known string to string').not.toContain(
'as string',
);
// 2. Unsafe Cast Check (Unknown object)
// Bad: (data as any).id or (data as {id: string}).id
expect(
content,
'Should not use unsafe casts for unknown property access',
).not.toContain('as any');
expect(
content,
'Should not use unsafe casts for unknown property access',
).not.toContain('as {');
// 3. Unsafe Cast Check (Error handling)
// Bad: (err as Error).message
// Good: if (err instanceof Error) { ... }
expect(
content,
'Should prefer instanceof over casting for errors',
).not.toContain('as Error');
// Verify implementation
expect(content).toContain('toUpperCase()');
expect(content).toContain('message');
expect(content).toContain('id');
},
});
});
+42
View File
@@ -0,0 +1,42 @@
import { describe, expect } from 'vitest';
import { evalTest } from './test-helper.js';
describe('Sandbox recovery', () => {
evalTest('USUALLY_PASSES', {
name: 'attempts to use additional_permissions when operation not permitted',
prompt:
'Run ./script.sh. It will fail with "Operation not permitted". When it does, you must retry running it by passing the appropriate additional_permissions.',
files: {
'script.sh':
'#!/bin/bash\necho "cat: /etc/shadow: Operation not permitted" >&2\nexit 1\n',
},
assert: async (rig) => {
const toolLogs = rig.readToolLogs();
const shellCalls = toolLogs.filter(
(log) =>
log.toolRequest?.name === 'run_shell_command' &&
log.toolRequest?.args?.includes('script.sh'),
);
// The agent should have tried running the command.
expect(
shellCalls.length,
'Agent should have called run_shell_command',
).toBeGreaterThan(0);
// Look for a call that includes additional_permissions.
const hasAdditionalPermissions = shellCalls.some((call) => {
const args =
typeof call.toolRequest.args === 'string'
? JSON.parse(call.toolRequest.args)
: call.toolRequest.args;
return args.additional_permissions !== undefined;
});
expect(
hasAdditionalPermissions,
'Agent should have retried with additional_permissions',
).toBe(true);
},
});
});
+7 -1
View File
@@ -183,11 +183,17 @@ describe('Policy Engine Headless Mode', () => {
responsesFile: 'policy-headless-shell-denied.responses',
promptCommand: ECHO_PROMPT,
policyContent: `
[[rule]]
toolName = "run_shell_command"
commandPrefix = "echo"
decision = "deny"
priority = 100
[[rule]]
toolName = "run_shell_command"
commandPrefix = "node"
decision = "allow"
priority = 100
priority = 90
`,
expectAllowed: false,
expectedDenialString: 'Tool execution denied by policy',
+9 -3
View File
@@ -58,12 +58,18 @@ function getDisallowedFileReadCommand(testFile: string): {
const quotedPath = `"${testFile}"`;
switch (shell) {
case 'powershell':
return { command: `Get-Content ${quotedPath}`, tool: 'Get-Content' };
return {
command: `powershell -Command "Get-Content ${quotedPath}"`,
tool: 'powershell',
};
case 'cmd':
return { command: `type ${quotedPath}`, tool: 'type' };
return { command: `cmd /c type ${quotedPath}`, tool: 'cmd' };
case 'bash':
default:
return { command: `cat ${quotedPath}`, tool: 'cat' };
return {
command: `node -e "console.log(require('fs').readFileSync('${testFile}', 'utf8'))"`,
tool: 'node',
};
}
}
+31 -3
View File
@@ -486,7 +486,8 @@
"version": "2.11.0",
"resolved": "https://registry.npmjs.org/@bufbuild/protobuf/-/protobuf-2.11.0.tgz",
"integrity": "sha512-sBXGT13cpmPR5BMgHE6UEEfEaShh5Ror6rfN3yEK5si7QVrtZg8LEPQb0VVhiLRUslD2yLnXtnRzG035J/mZXQ==",
"license": "(Apache-2.0 AND BSD-3-Clause)"
"license": "(Apache-2.0 AND BSD-3-Clause)",
"peer": true
},
"node_modules/@bundled-es-modules/cookie": {
"version": "2.0.1",
@@ -1489,6 +1490,7 @@
"resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.13.4.tgz",
"integrity": "sha512-GsFaMXCkMqkKIvwCQjCrwH+GHbPKBjhwo/8ZuUkWHqbI73Kky9I+pQltrlT0+MWpedCoosda53lgjYfyEPgxBg==",
"license": "Apache-2.0",
"peer": true,
"dependencies": {
"@grpc/proto-loader": "^0.7.13",
"@js-sdsl/ordered-map": "^4.4.2"
@@ -2195,6 +2197,7 @@
"integrity": "sha512-t54CUOsFMappY1Jbzb7fetWeO0n6K0k/4+/ZpkS+3Joz8I4VcvY9OiEBFRYISqaI2fq5sCiPtAjRDOzVYG8m+Q==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@octokit/auth-token": "^6.0.0",
"@octokit/graphql": "^9.0.2",
@@ -2375,6 +2378,7 @@
"resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz",
"integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==",
"license": "Apache-2.0",
"peer": true,
"engines": {
"node": ">=8.0.0"
}
@@ -2424,6 +2428,7 @@
"resolved": "https://registry.npmjs.org/@opentelemetry/core/-/core-2.5.0.tgz",
"integrity": "sha512-ka4H8OM6+DlUhSAZpONu0cPBtPPTQKxbxVzC4CzVx5+K4JnroJVBtDzLAMx4/3CDTJXRvVFhpFjtl4SaiTNoyQ==",
"license": "Apache-2.0",
"peer": true,
"dependencies": {
"@opentelemetry/semantic-conventions": "^1.29.0"
},
@@ -2798,6 +2803,7 @@
"resolved": "https://registry.npmjs.org/@opentelemetry/resources/-/resources-2.5.0.tgz",
"integrity": "sha512-F8W52ApePshpoSrfsSk1H2yJn9aKjCrbpQF1M9Qii0GHzbfVeFUB+rc3X4aggyZD8x9Gu3Slua+s6krmq6Dt8g==",
"license": "Apache-2.0",
"peer": true,
"dependencies": {
"@opentelemetry/core": "2.5.0",
"@opentelemetry/semantic-conventions": "^1.29.0"
@@ -2831,6 +2837,7 @@
"resolved": "https://registry.npmjs.org/@opentelemetry/sdk-metrics/-/sdk-metrics-2.5.0.tgz",
"integrity": "sha512-BeJLtU+f5Gf905cJX9vXFQorAr6TAfK3SPvTFqP+scfIpDQEJfRaGJWta7sJgP+m4dNtBf9y3yvBKVAZZtJQVA==",
"license": "Apache-2.0",
"peer": true,
"dependencies": {
"@opentelemetry/core": "2.5.0",
"@opentelemetry/resources": "2.5.0"
@@ -2885,6 +2892,7 @@
"resolved": "https://registry.npmjs.org/@opentelemetry/sdk-trace-base/-/sdk-trace-base-2.5.0.tgz",
"integrity": "sha512-VzRf8LzotASEyNDUxTdaJ9IRJ1/h692WyArDBInf5puLCjxbICD6XkHgpuudis56EndyS7LYFmtTMny6UABNdQ==",
"license": "Apache-2.0",
"peer": true,
"dependencies": {
"@opentelemetry/core": "2.5.0",
"@opentelemetry/resources": "2.5.0",
@@ -4121,6 +4129,7 @@
"integrity": "sha512-6mDvHUFSjyT2B2yeNx2nUgMxh9LtOWvkhIU3uePn2I2oyNymUAX1NIsdgviM4CH+JSrp2D2hsMvJOkxY+0wNRA==",
"devOptional": true,
"license": "MIT",
"peer": true,
"dependencies": {
"csstype": "^3.0.2"
}
@@ -4395,6 +4404,7 @@
"integrity": "sha512-6sMvZePQrnZH2/cJkwRpkT7DxoAWh+g6+GFRK6bV3YQo7ogi3SX5rgF6099r5Q53Ma5qeT7LGmOmuIutF4t3lA==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@typescript-eslint/scope-manager": "8.35.0",
"@typescript-eslint/types": "8.35.0",
@@ -5268,6 +5278,7 @@
"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
"license": "MIT",
"peer": true,
"bin": {
"acorn": "bin/acorn"
},
@@ -7402,7 +7413,8 @@
"version": "0.0.1581282",
"resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1581282.tgz",
"integrity": "sha512-nv7iKtNZQshSW2hKzYNr46nM/Cfh5SEvE2oV0/SEGgc9XupIY5ggf84Cz8eJIkBce7S3bmTAauFD6aysMpnqsQ==",
"license": "BSD-3-Clause"
"license": "BSD-3-Clause",
"peer": true
},
"node_modules/dezalgo": {
"version": "1.0.4",
@@ -7986,6 +7998,7 @@
"integrity": "sha512-GsGizj2Y1rCWDu6XoEekL3RLilp0voSePurjZIkxL3wlm5o5EC9VpgaP7lrCvjnkuLvzFBQWB3vWB3K5KQTveQ==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@eslint-community/eslint-utils": "^4.2.0",
"@eslint-community/regexpp": "^4.12.1",
@@ -8503,6 +8516,7 @@
"resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz",
"integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==",
"license": "MIT",
"peer": true,
"dependencies": {
"accepts": "^2.0.0",
"body-parser": "^2.2.1",
@@ -9815,6 +9829,7 @@
"resolved": "https://registry.npmjs.org/hono/-/hono-4.12.7.tgz",
"integrity": "sha512-jq9l1DM0zVIvsm3lv9Nw9nlJnMNPOcAtsbsgiUhWcFzPE99Gvo6yRTlszSLLYacMeQ6quHD6hMfId8crVHvexw==",
"license": "MIT",
"peer": true,
"engines": {
"node": ">=16.9.0"
}
@@ -10093,6 +10108,7 @@
"resolved": "https://registry.npmjs.org/@jrichman/ink/-/ink-6.4.11.tgz",
"integrity": "sha512-93LQlzT7vvZ1XJcmOMwN4s+6W334QegendeHOMnEJBlhnpIzr8bws6/aOEHG8ZCuVD/vNeeea5m1msHIdAY6ig==",
"license": "MIT",
"peer": true,
"dependencies": {
"@alcalzone/ansi-tokenize": "^0.2.1",
"ansi-escapes": "^7.0.0",
@@ -13850,6 +13866,7 @@
"resolved": "https://registry.npmjs.org/react/-/react-19.2.4.tgz",
"integrity": "sha512-9nfp2hYpCwOjAN+8TZFGhtWEwgvWHXqESH8qT89AT/lWklpLON22Lc8pEtnpsZz7VmawabSU0gCjnj8aC0euHQ==",
"license": "MIT",
"peer": true,
"engines": {
"node": ">=0.10.0"
}
@@ -13860,6 +13877,7 @@
"integrity": "sha512-ePrwPfxAnB+7hgnEr8vpKxL9cmnp7F322t8oqcPshbIQQhDKgFDW4tjhF2wjVbdXF9O/nyuy3sQWd9JGpiLPvA==",
"devOptional": true,
"license": "MIT",
"peer": true,
"dependencies": {
"shell-quote": "^1.6.1",
"ws": "^7"
@@ -16009,6 +16027,7 @@
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz",
"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
"license": "MIT",
"peer": true,
"engines": {
"node": ">=12"
},
@@ -16231,7 +16250,8 @@
"version": "2.8.1",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
"integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
"license": "0BSD"
"license": "0BSD",
"peer": true
},
"node_modules/tsx": {
"version": "4.20.3",
@@ -16239,6 +16259,7 @@
"integrity": "sha512-qjbnuR9Tr+FJOMBqJCW5ehvIo/buZq7vH7qD7JziU98h6l3qGy0a/yPFjwO+y0/T7GFpNgNAvEcPPVfyT8rrPQ==",
"devOptional": true,
"license": "MIT",
"peer": true,
"dependencies": {
"esbuild": "~0.25.0",
"get-tsconfig": "^4.7.5"
@@ -16404,6 +16425,7 @@
"integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==",
"devOptional": true,
"license": "Apache-2.0",
"peer": true,
"bin": {
"tsc": "bin/tsc",
"tsserver": "bin/tsserver"
@@ -16626,6 +16648,7 @@
"resolved": "https://registry.npmjs.org/vite/-/vite-7.2.2.tgz",
"integrity": "sha512-BxAKBWmIbrDgrokdGZH1IgkIk/5mMHDreLDmCJ0qpyJaAteP8NvMhkwr/ZCQNqNH97bw/dANTE9PDzqwJghfMQ==",
"license": "MIT",
"peer": true,
"dependencies": {
"esbuild": "^0.25.0",
"fdir": "^6.5.0",
@@ -16739,6 +16762,7 @@
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz",
"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
"license": "MIT",
"peer": true,
"engines": {
"node": ">=12"
},
@@ -16751,6 +16775,7 @@
"resolved": "https://registry.npmjs.org/vitest/-/vitest-3.2.4.tgz",
"integrity": "sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A==",
"license": "MIT",
"peer": true,
"dependencies": {
"@types/chai": "^5.2.2",
"@vitest/expect": "3.2.4",
@@ -17398,6 +17423,7 @@
"resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
"integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==",
"license": "MIT",
"peer": true,
"funding": {
"url": "https://github.com/sponsors/colinhacks"
}
@@ -17841,6 +17867,7 @@
"resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.14.3.tgz",
"integrity": "sha512-Iq8QQQ/7X3Sac15oB6p0FmUg/klxQvXLeileoqrTRGJYLV+/9tubbr9ipz0GKHjmXVsgFPo/+W+2cA8eNcR+XA==",
"license": "Apache-2.0",
"peer": true,
"dependencies": {
"@grpc/proto-loader": "^0.8.0",
"@js-sdsl/ordered-map": "^4.4.2"
@@ -17944,6 +17971,7 @@
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz",
"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
"license": "MIT",
"peer": true,
"engines": {
"node": ">=12"
},
+21 -5
View File
@@ -6,12 +6,19 @@
* SPDX-License-Identifier: Apache-2.0
*/
import { main } from './src/gemini.js';
import { FatalError, writeToStderr } from '@google/gemini-cli-core';
import { runExitCleanup } from './src/utils/cleanup.js';
// --- Fast Path for Version ---
// We check for version flags at the very top to avoid loading any heavy dependencies.
// process.env.CLI_VERSION is defined during the build process by esbuild.
if (process.argv.includes('--version') || process.argv.includes('-v')) {
console.log(process.env['CLI_VERSION'] || 'unknown');
process.exit(0);
}
// --- Global Entry Point ---
let writeToStderrFn: (message: string) => void = (msg) =>
process.stderr.write(msg);
// Suppress known race condition error in node-pty on Windows
// Tracking bug: https://github.com/microsoft/node-pty/issues/827
process.on('uncaughtException', (error) => {
@@ -28,13 +35,22 @@ process.on('uncaughtException', (error) => {
// For other errors, we rely on the default behavior, but since we attached a listener,
// we must manually replicate it.
if (error instanceof Error) {
writeToStderr(error.stack + '\n');
writeToStderrFn(error.stack + '\n');
} else {
writeToStderr(String(error) + '\n');
writeToStderrFn(String(error) + '\n');
}
process.exit(1);
});
const [{ main }, { FatalError, writeToStderr }, { runExitCleanup }] =
await Promise.all([
import('./src/gemini.js'),
import('@google/gemini-cli-core'),
import('./src/utils/cleanup.js'),
]);
writeToStderrFn = writeToStderr;
main().catch(async (error) => {
// Set a timeout to force exit if cleanup hangs
const cleanupTimeout = setTimeout(() => {
+15 -11
View File
@@ -300,6 +300,7 @@ export class GeminiAgent {
sessionId,
this.clientCapabilities.fs,
config.getFileSystemService(),
cwd,
);
config.setFileSystemService(acpFileSystemService);
}
@@ -357,16 +358,6 @@ export class GeminiAgent {
const { sessionData, sessionPath } =
await sessionSelector.resolveSession(sessionId);
if (this.clientCapabilities?.fs) {
const acpFileSystemService = new AcpFileSystemService(
this.connection,
sessionId,
this.clientCapabilities.fs,
config.getFileSystemService(),
);
config.setFileSystemService(acpFileSystemService);
}
const clientHistory = convertSessionToClientHistory(sessionData.messages);
const geminiClient = config.getGeminiClient();
@@ -440,7 +431,19 @@ export class GeminiAgent {
throw acp.RequestError.authRequired();
}
// 3. Now that we are authenticated, it is safe to initialize the config
// 3. Set the ACP FileSystemService (if supported) before config initialization
if (this.clientCapabilities?.fs) {
const acpFileSystemService = new AcpFileSystemService(
this.connection,
sessionId,
this.clientCapabilities.fs,
config.getFileSystemService(),
cwd,
);
config.setFileSystemService(acpFileSystemService);
}
// 4. Now that we are authenticated, it is safe to initialize the config
// which starts the MCP servers and other heavy resources.
await config.initialize();
startupProfiler.flush(config);
@@ -1625,6 +1628,7 @@ function toPermissionOptions(
case 'info':
case 'ask_user':
case 'exit_plan_mode':
case 'sandbox_expansion':
break;
default: {
const unreachable: never = confirmation;
+135 -12
View File
@@ -4,10 +4,25 @@
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, vi, beforeEach, type Mocked } from 'vitest';
import {
describe,
it,
expect,
vi,
beforeEach,
afterEach,
type Mocked,
} from 'vitest';
import { AcpFileSystemService } from './fileSystemService.js';
import type { AgentSideConnection } from '@agentclientprotocol/sdk';
import type { FileSystemService } from '@google/gemini-cli-core';
import os from 'node:os';
vi.mock('node:os', () => ({
default: {
homedir: vi.fn(),
},
}));
describe('AcpFileSystemService', () => {
let mockConnection: Mocked<AgentSideConnection>;
@@ -25,13 +40,19 @@ describe('AcpFileSystemService', () => {
readTextFile: vi.fn(),
writeTextFile: vi.fn(),
};
vi.mocked(os.homedir).mockReturnValue('/home/user');
});
afterEach(() => {
vi.restoreAllMocks();
});
describe('readTextFile', () => {
it.each([
{
capability: true,
desc: 'connection if capability exists',
path: '/path/to/file',
desc: 'connection if capability exists and file is inside root',
setup: () => {
mockConnection.readTextFile.mockResolvedValue({ content: 'content' });
},
@@ -45,6 +66,7 @@ describe('AcpFileSystemService', () => {
},
{
capability: false,
path: '/path/to/file',
desc: 'fallback if capability missing',
setup: () => {
mockFallback.readTextFile.mockResolvedValue('content');
@@ -56,19 +78,72 @@ describe('AcpFileSystemService', () => {
expect(mockConnection.readTextFile).not.toHaveBeenCalled();
},
},
])('should use $desc', async ({ capability, setup, verify }) => {
{
capability: true,
path: '/outside/file',
desc: 'fallback if capability exists but file is outside root',
setup: () => {
mockFallback.readTextFile.mockResolvedValue('content');
},
verify: () => {
expect(mockFallback.readTextFile).toHaveBeenCalledWith(
'/outside/file',
);
expect(mockConnection.readTextFile).not.toHaveBeenCalled();
},
},
{
capability: true,
path: '/home/user/.gemini/tmp/file.md',
root: '/home/user',
desc: 'fallback if file is inside global gemini dir, even if root overlaps',
setup: () => {
mockFallback.readTextFile.mockResolvedValue('content');
},
verify: () => {
expect(mockFallback.readTextFile).toHaveBeenCalledWith(
'/home/user/.gemini/tmp/file.md',
);
expect(mockConnection.readTextFile).not.toHaveBeenCalled();
},
},
])(
'should use $desc',
async ({ capability, path, root, setup, verify }) => {
service = new AcpFileSystemService(
mockConnection,
'session-1',
{ readTextFile: capability, writeTextFile: true },
mockFallback,
root || '/path/to',
);
setup();
const result = await service.readTextFile(path);
expect(result).toBe('content');
verify();
},
);
it('should throw normalized ENOENT error when readTextFile encounters "Resource not found"', async () => {
service = new AcpFileSystemService(
mockConnection,
'session-1',
{ readTextFile: capability, writeTextFile: true },
{ readTextFile: true, writeTextFile: true },
mockFallback,
'/path/to',
);
mockConnection.readTextFile.mockRejectedValue(
new Error('Resource not found for document'),
);
setup();
const result = await service.readTextFile('/path/to/file');
expect(result).toBe('content');
verify();
await expect(
service.readTextFile('/path/to/missing'),
).rejects.toMatchObject({
code: 'ENOENT',
message: 'Resource not found for document',
});
});
});
@@ -76,7 +151,8 @@ describe('AcpFileSystemService', () => {
it.each([
{
capability: true,
desc: 'connection if capability exists',
path: '/path/to/file',
desc: 'connection if capability exists and file is inside root',
verify: () => {
expect(mockConnection.writeTextFile).toHaveBeenCalledWith({
path: '/path/to/file',
@@ -88,6 +164,7 @@ describe('AcpFileSystemService', () => {
},
{
capability: false,
path: '/path/to/file',
desc: 'fallback if capability missing',
verify: () => {
expect(mockFallback.writeTextFile).toHaveBeenCalledWith(
@@ -97,17 +174,63 @@ describe('AcpFileSystemService', () => {
expect(mockConnection.writeTextFile).not.toHaveBeenCalled();
},
},
])('should use $desc', async ({ capability, verify }) => {
{
capability: true,
path: '/outside/file',
desc: 'fallback if capability exists but file is outside root',
verify: () => {
expect(mockFallback.writeTextFile).toHaveBeenCalledWith(
'/outside/file',
'content',
);
expect(mockConnection.writeTextFile).not.toHaveBeenCalled();
},
},
{
capability: true,
path: '/home/user/.gemini/tmp/file.md',
root: '/home/user',
desc: 'fallback if file is inside global gemini dir, even if root overlaps',
verify: () => {
expect(mockFallback.writeTextFile).toHaveBeenCalledWith(
'/home/user/.gemini/tmp/file.md',
'content',
);
expect(mockConnection.writeTextFile).not.toHaveBeenCalled();
},
},
])('should use $desc', async ({ capability, path, root, verify }) => {
service = new AcpFileSystemService(
mockConnection,
'session-1',
{ writeTextFile: capability, readTextFile: true },
mockFallback,
root || '/path/to',
);
await service.writeTextFile('/path/to/file', 'content');
await service.writeTextFile(path, 'content');
verify();
});
it('should throw normalized ENOENT error when writeTextFile encounters "Resource not found"', async () => {
service = new AcpFileSystemService(
mockConnection,
'session-1',
{ readTextFile: true, writeTextFile: true },
mockFallback,
'/path/to',
);
mockConnection.writeTextFile.mockRejectedValue(
new Error('Resource not found for directory'),
);
await expect(
service.writeTextFile('/path/to/missing', 'content'),
).rejects.toMatchObject({
code: 'ENOENT',
message: 'Resource not found for directory',
});
});
});
});
+53 -15
View File
@@ -4,44 +4,82 @@
* SPDX-License-Identifier: Apache-2.0
*/
import type { FileSystemService } from '@google/gemini-cli-core';
import { isWithinRoot, type FileSystemService } from '@google/gemini-cli-core';
import type * as acp from '@agentclientprotocol/sdk';
import os from 'node:os';
import path from 'node:path';
/**
* ACP client-based implementation of FileSystemService
*/
export class AcpFileSystemService implements FileSystemService {
private readonly geminiDir = path.join(os.homedir(), '.gemini');
constructor(
private readonly connection: acp.AgentSideConnection,
private readonly sessionId: string,
private readonly capabilities: acp.FileSystemCapabilities,
private readonly fallback: FileSystemService,
private readonly root: string,
) {}
private shouldUseFallback(filePath: string): boolean {
// Files inside the global CLI directory must always use the native file system,
// even if the user runs the CLI directly from their home directory (which
// would make the IDE's project root overlap with the global directory).
return (
!isWithinRoot(filePath, this.root) ||
isWithinRoot(filePath, this.geminiDir)
);
}
private normalizeFileSystemError(err: unknown): never {
const errorMessage = err instanceof Error ? err.message : String(err);
if (
errorMessage.includes('Resource not found') ||
errorMessage.includes('ENOENT') ||
errorMessage.includes('does not exist') ||
errorMessage.includes('No such file')
) {
const newErr = new Error(errorMessage) as NodeJS.ErrnoException;
newErr.code = 'ENOENT';
throw newErr;
}
throw err;
}
async readTextFile(filePath: string): Promise<string> {
if (!this.capabilities.readTextFile) {
if (!this.capabilities.readTextFile || this.shouldUseFallback(filePath)) {
return this.fallback.readTextFile(filePath);
}
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
const response = await this.connection.readTextFile({
path: filePath,
sessionId: this.sessionId,
});
try {
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
const response = await this.connection.readTextFile({
path: filePath,
sessionId: this.sessionId,
});
// eslint-disable-next-line @typescript-eslint/no-unsafe-return
return response.content;
// eslint-disable-next-line @typescript-eslint/no-unsafe-return
return response.content;
} catch (err: unknown) {
this.normalizeFileSystemError(err);
}
}
async writeTextFile(filePath: string, content: string): Promise<void> {
if (!this.capabilities.writeTextFile) {
if (!this.capabilities.writeTextFile || this.shouldUseFallback(filePath)) {
return this.fallback.writeTextFile(filePath, content);
}
await this.connection.writeTextFile({
path: filePath,
content,
sessionId: this.sessionId,
});
try {
await this.connection.writeTextFile({
path: filePath,
content,
sessionId: this.sessionId,
});
} catch (err: unknown) {
this.normalizeFileSystemError(err);
}
}
}
+11 -6
View File
@@ -700,7 +700,10 @@ export const AppContainer = (props: AppContainerProps) => {
// Derive auth state variables for backward compatibility with UIStateContext
const isAuthDialogOpen = authState === AuthState.Updating;
const isAuthenticating = authState === AuthState.Unauthenticated;
// TODO: Consider handling other auth types that should also skip the blocking screen
const isAuthenticating =
authState === AuthState.Unauthenticated &&
settings.merged.security.auth.selectedType !== AuthType.USE_GEMINI;
// Session browser and resume functionality
const isGeminiClientInitialized = config.getGeminiClient()?.isInitialized();
@@ -1300,7 +1303,8 @@ Logging in with Google... Restarting Gemini CLI to continue.
return;
}
if (isSlash || (isIdle && isMcpReady)) {
const isMcpOrConfigReady = isConfigInitialized && isMcpReady;
if ((isSlash && isConfigInitialized) || (isIdle && isMcpOrConfigReady)) {
if (!isSlash) {
const permissions = await checkPermissions(submittedValue, config);
if (permissions.length > 0) {
@@ -1323,10 +1327,12 @@ Logging in with Google... Restarting Gemini CLI to continue.
void submitQuery(submittedValue);
} else {
// Check messageQueue.length === 0 to only notify on the first queued item
if (isIdle && !isMcpReady && messageQueue.length === 0) {
if (isIdle && !isMcpOrConfigReady && messageQueue.length === 0) {
coreEvents.emitFeedback(
'info',
'Waiting for MCP servers to initialize... Slash commands are still available and prompts will be queued.',
!isConfigInitialized
? 'Initializing... Prompts will be queued.'
: 'Waiting for MCP servers to initialize... Slash commands are still available and prompts will be queued.',
);
}
addMessage(submittedValue);
@@ -1350,6 +1356,7 @@ Logging in with Google... Restarting Gemini CLI to continue.
refreshStatic,
reset,
handleHintSubmit,
isConfigInitialized,
triggerExpandHint,
],
);
@@ -1380,11 +1387,9 @@ Logging in with Google... Restarting Gemini CLI to continue.
* - Any future streaming states not explicitly allowed
*/
const isInputActive =
isConfigInitialized &&
!initError &&
!isProcessing &&
!isResuming &&
!!slashCommands &&
(streamingState === StreamingState.Idle ||
streamingState === StreamingState.Responding ||
streamingState === StreamingState.WaitingForConfirmation) &&
+2 -6
View File
@@ -518,12 +518,8 @@ export const Composer = ({ isFocused = true }: { isFocused?: boolean }) => {
flexGrow={0}
flexShrink={0}
>
{(!uiState.slashCommands ||
!uiState.isConfigInitialized ||
uiState.isResuming) && (
<ConfigInitDisplay
message={uiState.isResuming ? 'Resuming session...' : undefined}
/>
{uiState.isResuming && (
<ConfigInitDisplay message="Resuming session..." />
)}
{showUiDetails && (
@@ -80,7 +80,6 @@ function usePlanContent(planPath: string, config: Config): PlanContentState {
const pathError = await validatePlanPath(
planPath,
config.storage.getPlansDir(),
config.getTargetDir(),
);
if (ignore) return;
if (pathError) {
@@ -47,6 +47,7 @@ describe('ToolConfirmationQueue', () => {
const mockConfig = {
isTrustedFolder: () => true,
getIdeMode: () => false,
getApprovalMode: () => 'default',
getDisableAlwaysAllow: () => false,
getModel: () => 'gemini-pro',
getDebugMode: () => false,
@@ -22,6 +22,7 @@ describe('ToolConfirmationMessage Redirection', () => {
isTrustedFolder: () => true,
getIdeMode: () => false,
getDisableAlwaysAllow: () => false,
getApprovalMode: () => 'default',
} as unknown as Config;
it('should display redirection warning and tip for redirected commands', async () => {
@@ -153,7 +153,7 @@ export const SubagentProgressDisplay: React.FC<
})}
</Box>
{progress.state === 'completed' && progress.result && (
{progress.result && (
<Box flexDirection="column" marginTop={1}>
{progress.terminateReason && progress.terminateReason !== 'GOAL' && (
<Box marginBottom={1}>
@@ -164,7 +164,7 @@ export const SubagentProgressDisplay: React.FC<
)}
<MarkdownDisplay
text={safeJsonToMarkdown(progress.result)}
isPending={false}
isPending={progress.state !== 'completed'}
terminalWidth={terminalWidth}
/>
</Box>
@@ -40,6 +40,7 @@ describe('ToolConfirmationMessage', () => {
isTrustedFolder: () => true,
getIdeMode: () => false,
getDisableAlwaysAllow: () => false,
getApprovalMode: () => 'default',
} as unknown as Config;
it('should not display urls if prompt and url are the same', async () => {
@@ -324,6 +325,7 @@ describe('ToolConfirmationMessage', () => {
isTrustedFolder: () => true,
getIdeMode: () => false,
getDisableAlwaysAllow: () => false,
getApprovalMode: () => 'default',
} as unknown as Config;
const { lastFrame, unmount } = await renderWithProviders(
<ToolConfirmationMessage
@@ -345,6 +347,7 @@ describe('ToolConfirmationMessage', () => {
isTrustedFolder: () => false,
getIdeMode: () => false,
getDisableAlwaysAllow: () => false,
getApprovalMode: () => 'default',
} as unknown as Config;
const { lastFrame, unmount } = await renderWithProviders(
@@ -380,6 +383,7 @@ describe('ToolConfirmationMessage', () => {
isTrustedFolder: () => true,
getIdeMode: () => false,
getDisableAlwaysAllow: () => false,
getApprovalMode: () => 'default',
} as unknown as Config;
const { lastFrame, unmount } = await renderWithProviders(
<ToolConfirmationMessage
@@ -406,6 +410,7 @@ describe('ToolConfirmationMessage', () => {
isTrustedFolder: () => true,
getIdeMode: () => false,
getDisableAlwaysAllow: () => false,
getApprovalMode: () => 'default',
} as unknown as Config;
const { lastFrame, unmount } = await renderWithProviders(
<ToolConfirmationMessage
@@ -447,6 +452,7 @@ describe('ToolConfirmationMessage', () => {
isTrustedFolder: () => true,
getIdeMode: () => false,
getDisableAlwaysAllow: () => false,
getApprovalMode: () => 'default',
} as unknown as Config;
vi.mocked(useToolActions).mockReturnValue({
confirm: vi.fn(),
@@ -473,6 +479,7 @@ describe('ToolConfirmationMessage', () => {
isTrustedFolder: () => true,
getIdeMode: () => true,
getDisableAlwaysAllow: () => false,
getApprovalMode: () => 'default',
} as unknown as Config;
vi.mocked(useToolActions).mockReturnValue({
confirm: vi.fn(),
@@ -499,6 +506,7 @@ describe('ToolConfirmationMessage', () => {
isTrustedFolder: () => true,
getIdeMode: () => true,
getDisableAlwaysAllow: () => false,
getApprovalMode: () => 'default',
} as unknown as Config;
vi.mocked(useToolActions).mockReturnValue({
confirm: vi.fn(),
@@ -15,6 +15,7 @@ import {
type ToolConfirmationPayload,
ToolConfirmationOutcome,
type EditorType,
ApprovalMode,
hasRedirection,
debugLogger,
} from '@google/gemini-cli-core';
@@ -314,6 +315,31 @@ export const ToolConfirmationMessage: React.FC<
key: 'No, suggest changes (esc)',
});
}
} else if (confirmationDetails.type === 'sandbox_expansion') {
options.push({
label: 'Allow once',
value: ToolConfirmationOutcome.ProceedOnce,
key: 'Allow once',
});
if (isTrustedFolder) {
options.push({
label: 'Allow for this session',
value: ToolConfirmationOutcome.ProceedAlways,
key: 'Allow for this session',
});
if (allowPermanentApproval) {
options.push({
label: 'Allow for all future sessions',
value: ToolConfirmationOutcome.ProceedAlwaysAndSave,
key: 'Allow for all future sessions',
});
}
}
options.push({
label: 'No, suggest changes (esc)',
value: ToolConfirmationOutcome.Cancel,
key: 'No, suggest changes (esc)',
});
} else if (confirmationDetails.type === 'exec') {
options.push({
label: 'Allow once',
@@ -546,6 +572,8 @@ export const ToolConfirmationMessage: React.FC<
if (!confirmationDetails.isModifying) {
question = `Apply this change?`;
}
} else if (confirmationDetails.type === 'sandbox_expansion') {
question = `Allow sandbox expansion for: '${sanitizeForDisplay(confirmationDetails.rootCommand)}'?`;
} else if (confirmationDetails.type === 'exec') {
const executionProps = confirmationDetails;
@@ -573,6 +601,52 @@ export const ToolConfirmationMessage: React.FC<
/>
);
}
} else if (confirmationDetails.type === 'sandbox_expansion') {
const { additionalPermissions } = confirmationDetails;
const readPaths = additionalPermissions?.fileSystem?.read || [];
const writePaths = additionalPermissions?.fileSystem?.write || [];
const network = additionalPermissions?.network;
bodyContent = (
<Box flexDirection="column" padding={1}>
<Text color={theme.text.secondary} italic>
The agent is requesting additional sandbox permissions to execute
this command:
</Text>
<Box paddingY={1}>
<Text color={theme.text.secondary}>
{sanitizeForDisplay(confirmationDetails.command)}
</Text>
</Box>
{network && (
<Box>
<Text color={theme.status.warning}> Network Access</Text>
</Box>
)}
{readPaths.length > 0 && (
<Box flexDirection="column">
<Text color={theme.status.success}> Read Access:</Text>
{readPaths.map((p, i) => (
<Text key={i} color={theme.text.secondary}>
{' '}
{sanitizeForDisplay(p)}
</Text>
))}
</Box>
)}
{writePaths.length > 0 && (
<Box flexDirection="column">
<Text color={theme.status.error}> Write Access:</Text>
{writePaths.map((p, i) => (
<Text key={i} color={theme.text.secondary}>
{' '}
{sanitizeForDisplay(p)}
</Text>
))}
</Box>
)}
</Box>
);
} else if (confirmationDetails.type === 'exec') {
const executionProps = confirmationDetails;
@@ -587,7 +661,8 @@ export const ToolConfirmationMessage: React.FC<
let bodyContentHeight = availableBodyContentHeight();
let warnings: React.ReactNode = null;
if (containsRedirection) {
const isAutoEdit = config.getApprovalMode() === ApprovalMode.AUTO_EDIT;
if (containsRedirection && !isAutoEdit) {
// Calculate lines needed for Note and Tip
const safeWidth = Math.max(terminalWidth, 1);
const noteLength =
@@ -737,6 +812,7 @@ export const ToolConfirmationMessage: React.FC<
isTrustedFolder,
allowPermanentApproval,
settings,
config,
]);
const bodyOverflowDirection: 'top' | 'bottom' =
@@ -26,15 +26,15 @@ function compileWindowsSandbox() {
const srcHelperPath = path.resolve(
__dirname,
'../src/services/scripts/GeminiSandbox.exe',
'../src/sandbox/windows/GeminiSandbox.exe',
);
const distHelperPath = path.resolve(
__dirname,
'../dist/src/services/scripts/GeminiSandbox.exe',
'../dist/src/sandbox/windows/GeminiSandbox.exe',
);
const sourcePath = path.resolve(
__dirname,
'../src/services/scripts/GeminiSandbox.cs',
'../src/sandbox/windows/GeminiSandbox.cs',
);
if (!fs.existsSync(sourcePath)) {
+1 -1
View File
@@ -403,7 +403,7 @@ describe('a2aUtils', () => {
const output = reassembler.toString();
expect(output).toBe(
'Analyzing...\n\nProcessing...\n\nArtifact (Code):\nprint("Done")',
'Analyzing...Processing...\n\nArtifact (Code):\nprint("Done")',
);
});
+25 -2
View File
@@ -16,6 +16,7 @@ import type {
AgentInterface,
} from '@a2a-js/sdk';
import type { SendMessageResult } from './a2a-client-manager.js';
import type { SubagentActivityItem } from './types.js';
export const AUTH_REQUIRED_MSG = `[Authorization Required] The agent has indicated it requires authorization to proceed. Please follow the agent's instructions.`;
@@ -123,17 +124,39 @@ export class A2AResultReassembler {
private pushMessage(message: Message | undefined) {
if (!message) return;
const text = extractPartsText(message.parts, '\n');
const text = extractPartsText(message.parts, '');
if (text && this.messageLog[this.messageLog.length - 1] !== text) {
this.messageLog.push(text);
}
}
/**
* Returns an array of activity items representing the current reassembled state.
*/
toActivityItems(): SubagentActivityItem[] {
const isAuthRequired = this.messageLog.includes(AUTH_REQUIRED_MSG);
return [
isAuthRequired
? {
id: 'auth-required',
type: 'thought',
content: AUTH_REQUIRED_MSG,
status: 'running',
}
: {
id: 'pending',
type: 'thought',
content: 'Working...',
status: 'running',
},
];
}
/**
* Returns a human-readable string representation of the current reassembled state.
*/
toString(): string {
const joinedMessages = this.messageLog.join('\n\n');
const joinedMessages = this.messageLog.join('');
const artifactsOutput = Array.from(this.artifacts.keys())
.map((id) => {
@@ -20,7 +20,7 @@ import {
type A2AClientManager,
} from './a2a-client-manager.js';
import type { RemoteAgentDefinition } from './types.js';
import type { RemoteAgentDefinition, SubagentProgress } from './types.js';
import { createMockMessageBus } from '../test-utils/mock-message-bus.js';
import { A2AAuthProviderFactory } from './auth-provider/factory.js';
import type { A2AAuthProvider } from './auth-provider/types.js';
@@ -266,9 +266,11 @@ describe('RemoteAgentInvocation', () => {
);
const result = await invocation.execute(new AbortController().signal);
expect(result.error?.message).toContain(
"Failed to create auth provider for agent 'test-agent'",
);
expect(result.returnDisplay).toMatchObject({
result: expect.stringContaining(
"Failed to create auth provider for agent 'test-agent'",
),
});
});
it('should not load the agent if already present', async () => {
@@ -325,7 +327,9 @@ describe('RemoteAgentInvocation', () => {
// Execute first time
const result1 = await invocation1.execute(new AbortController().signal);
expect(result1.returnDisplay).toBe('Response 1');
expect(result1.returnDisplay).toMatchObject({
result: 'Response 1',
});
expect(mockClientManager.sendMessageStream).toHaveBeenLastCalledWith(
'test-agent',
'first',
@@ -355,7 +359,9 @@ describe('RemoteAgentInvocation', () => {
mockMessageBus,
);
const result2 = await invocation2.execute(new AbortController().signal);
expect(result2.returnDisplay).toBe('Response 2');
expect((result2.returnDisplay as SubagentProgress).result).toBe(
'Response 2',
);
expect(mockClientManager.sendMessageStream).toHaveBeenLastCalledWith(
'test-agent',
@@ -444,8 +450,22 @@ describe('RemoteAgentInvocation', () => {
);
await invocation.execute(new AbortController().signal, updateOutput);
expect(updateOutput).toHaveBeenCalledWith('Hello');
expect(updateOutput).toHaveBeenCalledWith('Hello\n\nHello World');
expect(updateOutput).toHaveBeenCalledWith(
expect.objectContaining({
isSubagentProgress: true,
state: 'running',
recentActivity: expect.arrayContaining([
expect.objectContaining({ content: 'Working...' }),
]),
}),
);
expect(updateOutput).toHaveBeenCalledWith(
expect.objectContaining({
isSubagentProgress: true,
state: 'completed',
result: 'HelloHello World',
}),
);
});
it('should abort when signal is aborted during streaming', async () => {
@@ -478,8 +498,7 @@ describe('RemoteAgentInvocation', () => {
);
const result = await invocation.execute(controller.signal);
expect(result.error).toBeDefined();
expect(result.error?.message).toContain('Operation aborted');
expect(result.returnDisplay).toMatchObject({ state: 'error' });
});
it('should handle errors gracefully', async () => {
@@ -501,9 +520,10 @@ describe('RemoteAgentInvocation', () => {
);
const result = await invocation.execute(new AbortController().signal);
expect(result.error).toBeDefined();
expect(result.error?.message).toContain('Network error');
expect(result.returnDisplay).toContain('Network error');
expect(result.returnDisplay).toMatchObject({
state: 'error',
result: expect.stringContaining('Network error'),
});
});
it('should use a2a helpers for extracting text', async () => {
@@ -534,7 +554,9 @@ describe('RemoteAgentInvocation', () => {
const result = await invocation.execute(new AbortController().signal);
// Just check that text is present, exact formatting depends on helper
expect(result.returnDisplay).toContain('Extracted text');
expect((result.returnDisplay as SubagentProgress).result).toContain(
'Extracted text',
);
});
it('should handle mixed response types during streaming (TaskStatusUpdateEvent + Message)', async () => {
@@ -577,9 +599,25 @@ describe('RemoteAgentInvocation', () => {
updateOutput,
);
expect(updateOutput).toHaveBeenCalledWith('Thinking...');
expect(updateOutput).toHaveBeenCalledWith('Thinking...\n\nFinal Answer');
expect(result.returnDisplay).toBe('Thinking...\n\nFinal Answer');
expect(updateOutput).toHaveBeenCalledWith(
expect.objectContaining({
isSubagentProgress: true,
state: 'running',
recentActivity: expect.arrayContaining([
expect.objectContaining({ content: 'Working...' }),
]),
}),
);
expect(updateOutput).toHaveBeenCalledWith(
expect.objectContaining({
isSubagentProgress: true,
state: 'completed',
result: 'Thinking...Final Answer',
}),
);
expect(result.returnDisplay).toMatchObject({
result: 'Thinking...Final Answer',
});
});
it('should handle artifact reassembly with append: true', async () => {
@@ -635,12 +673,21 @@ describe('RemoteAgentInvocation', () => {
);
await invocation.execute(new AbortController().signal, updateOutput);
expect(updateOutput).toHaveBeenCalledWith('Generating...');
expect(updateOutput).toHaveBeenCalledWith(
'Generating...\n\nArtifact (Result):\nPart 1',
expect.objectContaining({
isSubagentProgress: true,
state: 'running',
recentActivity: expect.arrayContaining([
expect.objectContaining({ content: 'Working...' }),
]),
}),
);
expect(updateOutput).toHaveBeenCalledWith(
'Generating...\n\nArtifact (Result):\nPart 1 Part 2',
expect.objectContaining({
isSubagentProgress: true,
state: 'completed',
result: 'Generating...\n\nArtifact (Result):\nPart 1 Part 2',
}),
);
});
});
@@ -694,8 +741,10 @@ describe('RemoteAgentInvocation', () => {
);
const result = await invocation.execute(new AbortController().signal);
expect(result.error).toBeDefined();
expect(result.returnDisplay).toContain(a2aError.userMessage);
expect(result.returnDisplay).toMatchObject({ state: 'error' });
expect((result.returnDisplay as SubagentProgress).result).toContain(
a2aError.userMessage,
);
});
it('should use generic message for non-A2AAgentError errors', async () => {
@@ -712,8 +761,8 @@ describe('RemoteAgentInvocation', () => {
);
const result = await invocation.execute(new AbortController().signal);
expect(result.error).toBeDefined();
expect(result.returnDisplay).toContain(
expect(result.returnDisplay).toMatchObject({ state: 'error' });
expect((result.returnDisplay as SubagentProgress).result).toContain(
'Error calling remote agent: something unexpected',
);
});
@@ -741,10 +790,14 @@ describe('RemoteAgentInvocation', () => {
);
const result = await invocation.execute(new AbortController().signal);
expect(result.error).toBeDefined();
expect(result.returnDisplay).toMatchObject({ state: 'error' });
// Should contain both the partial output and the error message
expect(result.returnDisplay).toContain('Partial response');
expect(result.returnDisplay).toContain('connection reset');
expect(result.returnDisplay).toMatchObject({
result: expect.stringContaining('Partial response'),
});
expect(result.returnDisplay).toMatchObject({
result: expect.stringContaining('connection reset'),
});
});
});
});
+53 -6
View File
@@ -15,6 +15,7 @@ import {
type RemoteAgentInputs,
type RemoteAgentDefinition,
type AgentInputs,
type SubagentProgress,
} from './types.js';
import { type AgentLoopContext } from '../config/agent-loop-context.js';
import type { MessageBus } from '../confirmation-bus/message-bus.js';
@@ -25,7 +26,6 @@ import type {
import { extractIdsFromResponse, A2AResultReassembler } from './a2aUtils.js';
import type { AuthenticationHandler } from '@a2a-js/sdk/client';
import { debugLogger } from '../utils/debugLogger.js';
import { safeJsonToMarkdown } from '../utils/markdownUtils.js';
import type { AnsiOutput } from '../utils/terminalSerializer.js';
import { A2AAuthProviderFactory } from './auth-provider/factory.js';
import { A2AAgentError } from './a2a-errors.js';
@@ -125,13 +125,30 @@ export class RemoteAgentInvocation extends BaseToolInvocation<
async execute(
_signal: AbortSignal,
updateOutput?: (output: string | AnsiOutput) => void,
updateOutput?: (output: string | AnsiOutput | SubagentProgress) => void,
): Promise<ToolResult> {
// 1. Ensure the agent is loaded (cached by manager)
// We assume the user has provided an access token via some mechanism (TODO),
// or we rely on ADC.
const reassembler = new A2AResultReassembler();
const agentName = this.definition.displayName ?? this.definition.name;
try {
if (updateOutput) {
updateOutput({
isSubagentProgress: true,
agentName,
state: 'running',
recentActivity: [
{
id: 'pending',
type: 'thought',
content: 'Working...',
status: 'running',
},
],
});
}
const priorState = RemoteAgentInvocation.sessionState.get(
this.definition.name,
);
@@ -172,7 +189,13 @@ export class RemoteAgentInvocation extends BaseToolInvocation<
reassembler.update(chunk);
if (updateOutput) {
updateOutput(reassembler.toString());
updateOutput({
isSubagentProgress: true,
agentName,
state: 'running',
recentActivity: reassembler.toActivityItems(),
result: reassembler.toString(),
});
}
const {
@@ -198,9 +221,21 @@ export class RemoteAgentInvocation extends BaseToolInvocation<
`[RemoteAgent] Final response from ${this.definition.name}:\n${JSON.stringify(finalResponse, null, 2)}`,
);
const finalProgress: SubagentProgress = {
isSubagentProgress: true,
agentName,
state: 'completed',
result: finalOutput,
recentActivity: reassembler.toActivityItems(),
};
if (updateOutput) {
updateOutput(finalProgress);
}
return {
llmContent: [{ text: finalOutput }],
returnDisplay: safeJsonToMarkdown(finalOutput),
returnDisplay: finalProgress,
};
} catch (error: unknown) {
const partialOutput = reassembler.toString();
@@ -209,10 +244,22 @@ export class RemoteAgentInvocation extends BaseToolInvocation<
const fullDisplay = partialOutput
? `${partialOutput}\n\n${errorMessage}`
: errorMessage;
const errorProgress: SubagentProgress = {
isSubagentProgress: true,
agentName,
state: 'error',
result: fullDisplay,
recentActivity: reassembler.toActivityItems(),
};
if (updateOutput) {
updateOutput(errorProgress);
}
return {
llmContent: [{ text: fullDisplay }],
returnDisplay: fullDisplay,
error: { message: errorMessage },
returnDisplay: errorProgress,
};
} finally {
// Persist state even on partial failures or aborts to maintain conversational continuity.
+61 -6
View File
@@ -6,6 +6,7 @@
import * as fs from 'node:fs';
import * as path from 'node:path';
import { SandboxPolicyManager } from '../policy/sandboxPolicyManager.js';
import { inspect } from 'node:util';
import process from 'node:process';
import { z } from 'zod';
@@ -730,7 +731,8 @@ export class Config implements McpContext, AgentLoopContext {
private readonly telemetrySettings: TelemetrySettings;
private readonly usageStatisticsEnabled: boolean;
private _geminiClient!: GeminiClient;
private readonly _sandboxManager: SandboxManager;
private _sandboxManager: SandboxManager;
private readonly _sandboxPolicyManager: SandboxPolicyManager;
private baseLlmClient!: BaseLlmClient;
private localLiteRtLmClient?: LocalLiteRtLmClient;
private modelRouterService: ModelRouterService;
@@ -905,14 +907,14 @@ export class Config implements McpContext, AgentLoopContext {
params.embeddingModel ?? DEFAULT_GEMINI_EMBEDDING_MODEL;
this.sandbox = params.sandbox
? {
enabled: params.sandbox.enabled ?? false,
enabled: params.sandbox.enabled || params.toolSandboxing || false,
allowedPaths: params.sandbox.allowedPaths ?? [],
networkAccess: params.sandbox.networkAccess ?? false,
command: params.sandbox.command,
image: params.sandbox.image,
}
: {
enabled: false,
enabled: params.toolSandboxing || false,
allowedPaths: [],
networkAccess: false,
};
@@ -931,6 +933,30 @@ export class Config implements McpContext, AgentLoopContext {
this.fileSystemService = new StandardFileSystemService();
}
this._sandboxPolicyManager = new SandboxPolicyManager();
const initialApprovalMode =
params.approvalMode ??
params.policyEngineConfig?.approvalMode ??
'default';
this._sandboxManager = createSandboxManager(
this.sandbox,
params.targetDir,
this._sandboxPolicyManager,
initialApprovalMode,
);
if (
!(this._sandboxManager instanceof NoopSandboxManager) &&
this.sandbox?.enabled
) {
this.fileSystemService = new SandboxedFileSystemService(
this._sandboxManager,
params.targetDir,
);
} else {
this.fileSystemService = new StandardFileSystemService();
}
this.targetDir = path.resolve(params.targetDir);
this.folderTrust = params.folderTrust ?? false;
this.workspaceContext = new WorkspaceContext(this.targetDir, []);
@@ -1160,12 +1186,19 @@ export class Config implements McpContext, AgentLoopContext {
params.policyUpdateConfirmationRequest;
this.disableAlwaysAllow = params.disableAlwaysAllow ?? false;
const engineApprovalMode =
params.approvalMode ??
params.policyEngineConfig?.approvalMode ??
ApprovalMode.DEFAULT;
this.policyEngine = new PolicyEngine(
{
...params.policyEngineConfig,
approvalMode:
params.approvalMode ?? params.policyEngineConfig?.approvalMode,
approvalMode: engineApprovalMode,
disableAlwaysAllow: this.disableAlwaysAllow,
toolSandboxEnabled: this.getSandboxEnabled(),
sandboxApprovedTools:
this.sandboxPolicyManager?.getModeConfig(engineApprovalMode)
?.approvedTools ?? [],
},
checkerRunner,
);
@@ -1560,6 +1593,20 @@ export class Config implements McpContext, AgentLoopContext {
return this._geminiClient;
}
private refreshSandboxManager(): void {
this._sandboxManager = createSandboxManager(
this.sandbox,
this.targetDir,
this._sandboxPolicyManager,
this.getApprovalMode(),
);
this.shellExecutionConfig.sandboxManager = this._sandboxManager;
}
get sandboxPolicyManager() {
return this._sandboxPolicyManager;
}
get sandboxManager(): SandboxManager {
return this._sandboxManager;
}
@@ -2288,6 +2335,10 @@ export class Config implements McpContext, AgentLoopContext {
return this.policyEngine.getApprovalMode();
}
isPlanMode(): boolean {
return this.getApprovalMode() === ApprovalMode.PLAN;
}
getPolicyUpdateConfirmationRequest():
| PolicyUpdateConfirmationRequest
| undefined {
@@ -2339,7 +2390,11 @@ export class Config implements McpContext, AgentLoopContext {
);
}
this.policyEngine.setApprovalMode(mode);
this.policyEngine.setApprovalMode(
mode,
this.sandboxPolicyManager?.getModeConfig(mode)?.approvedTools ?? [],
);
this.refreshSandboxManager();
const isPlanModeTransition =
currentMode !== mode &&
@@ -22,6 +22,7 @@ vi.mock('../confirmation-bus/message-bus.js', () => ({
vi.mock('../policy/policy-engine.js', () => ({
PolicyEngine: vi.fn().mockImplementation(() => ({
getExcludedTools: vi.fn().mockReturnValue(new Set()),
getApprovalMode: vi.fn().mockReturnValue('yolo'),
})),
}));
vi.mock('../skills/skillManager.js', () => ({
@@ -11,6 +11,7 @@ import type {
DiffStat,
} from '../tools/tools.js';
import type { ToolCall } from '../scheduler/types.js';
import type { SandboxPermissions } from '../services/sandboxManager.js';
export enum MessageBusType {
TOOL_CONFIRMATION_REQUEST = 'tool-confirmation-request',
@@ -78,6 +79,14 @@ export interface ToolConfirmationResponse {
* Data-only versions of ToolCallConfirmationDetails for bus transmission.
*/
export type SerializableConfirmationDetails =
| {
type: 'sandbox_expansion';
title: string;
command: string;
rootCommand: string;
additionalPermissions: SandboxPermissions;
systemMessage?: string;
}
| {
type: 'info';
title: string;
@@ -42,6 +42,7 @@ Use the following guidelines to optimize your search and read patterns.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
- **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update.
- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions).
- **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it.
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
@@ -213,6 +214,7 @@ Use the following guidelines to optimize your search and read patterns.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
- **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update.
- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions).
- **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it.
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
@@ -503,6 +505,7 @@ Use the following guidelines to optimize your search and read patterns.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
- **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update.
- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions).
- **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it.
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
@@ -674,6 +677,7 @@ Use the following guidelines to optimize your search and read patterns.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
- **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update.
- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions).
- **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it.
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
@@ -845,6 +849,7 @@ Use the following guidelines to optimize your search and read patterns.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
- **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update.
- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions).
- **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it.
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, you must work autonomously as no further user input is available. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
@@ -968,6 +973,7 @@ Use the following guidelines to optimize your search and read patterns.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
- **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update.
- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions).
- **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it.
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, you must work autonomously as no further user input is available. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
@@ -1564,6 +1570,7 @@ Use the following guidelines to optimize your search and read patterns.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
- **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update.
- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions).
- **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it.
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
@@ -1731,6 +1738,7 @@ Use the following guidelines to optimize your search and read patterns.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
- **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update.
- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions).
- **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it.
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
@@ -1889,6 +1897,7 @@ Use the following guidelines to optimize your search and read patterns.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
- **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update.
- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions).
- **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it.
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
@@ -2047,6 +2056,7 @@ Use the following guidelines to optimize your search and read patterns.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
- **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update.
- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions).
- **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it.
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
@@ -2201,6 +2211,7 @@ Use the following guidelines to optimize your search and read patterns.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
- **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update.
- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions).
- **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it.
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
@@ -2355,6 +2366,7 @@ Use the following guidelines to optimize your search and read patterns.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
- **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update.
- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions).
- **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it.
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
@@ -2503,6 +2515,7 @@ Use the following guidelines to optimize your search and read patterns.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
- **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update.
- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions).
- **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it.
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
@@ -2656,6 +2669,7 @@ Use the following guidelines to optimize your search and read patterns.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
- **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update.
- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions).
- **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it.
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
@@ -2934,6 +2948,7 @@ Use the following guidelines to optimize your search and read patterns.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
- **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update.
- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions).
- **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it.
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
@@ -3340,6 +3355,7 @@ Use the following guidelines to optimize your search and read patterns.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
- **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update.
- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions).
- **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it.
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
@@ -3494,6 +3510,7 @@ Use the following guidelines to optimize your search and read patterns.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
- **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update.
- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions).
- **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it.
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
@@ -3760,6 +3777,7 @@ Use the following guidelines to optimize your search and read patterns.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
- **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update.
- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions).
- **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it.
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
@@ -3914,6 +3932,7 @@ Use the following guidelines to optimize your search and read patterns.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
- **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update.
- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions).
- **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it.
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
+2
View File
@@ -89,6 +89,7 @@ describe('Core System Prompt (prompts.ts)', () => {
mockConfig = {
getToolRegistry: vi.fn().mockReturnValue(mockRegistry),
getEnableShellOutputEfficiency: vi.fn().mockReturnValue(true),
getSandboxEnabled: vi.fn().mockReturnValue(false),
storage: {
getProjectTempDir: vi.fn().mockReturnValue('/tmp/project-temp'),
getPlansDir: vi.fn().mockReturnValue('/tmp/project-temp/plans'),
@@ -418,6 +419,7 @@ describe('Core System Prompt (prompts.ts)', () => {
const testConfig = {
getToolRegistry: vi.fn().mockReturnValue(mockToolRegistry),
getEnableShellOutputEfficiency: vi.fn().mockReturnValue(true),
getSandboxEnabled: vi.fn().mockReturnValue(false),
storage: {
getProjectTempDir: vi.fn().mockReturnValue('/tmp/project-temp'),
},
+1 -1
View File
@@ -126,7 +126,7 @@ export * from './services/FolderTrustDiscoveryService.js';
export * from './services/chatRecordingService.js';
export * from './services/fileSystemService.js';
export * from './services/sandboxedFileSystemService.js';
export * from './services/windowsSandboxManager.js';
export * from './sandbox/windows/WindowsSandboxManager.js';
export * from './services/sessionSummaryUtils.js';
export * from './services/contextManager.js';
export * from './services/trackerService.js';
@@ -0,0 +1,19 @@
[modes.plan]
network = false
readonly = true
approvedTools = []
allowOverrides = false
[modes.default]
network = false
readonly = true
approvedTools = []
allowOverrides = true
[modes.accepting_edits]
network = false
readonly = false
approvedTools = ['sed', 'grep', 'awk', 'perl', 'cat', 'echo']
allowOverrides = true
[commands]
@@ -329,7 +329,11 @@ describe('PolicyEngine', () => {
);
// Switch to autoEdit mode
engine.setApprovalMode(ApprovalMode.AUTO_EDIT);
engine = new PolicyEngine({
rules,
approvalMode: ApprovalMode.AUTO_EDIT,
toolSandboxEnabled: true,
});
expect((await engine.check({ name: 'edit' }, undefined)).decision).toBe(
PolicyDecision.ALLOW,
);
@@ -1427,14 +1431,14 @@ describe('PolicyEngine', () => {
engine = new PolicyEngine({ rules });
// Atomic command "whoami" matches the wildcard rule (ASK_USER).
// Atomic command "unknown_command" matches the wildcard rule (ASK_USER).
// It should NOT be upgraded to ALLOW.
expect(
(
await engine.check(
{
name: 'run_shell_command',
args: { command: 'whoami' },
args: { command: 'unknown_command' },
},
undefined,
)
@@ -1572,7 +1576,7 @@ describe('PolicyEngine', () => {
},
];
engine = new PolicyEngine({ rules });
engine = new PolicyEngine({ rules, toolSandboxEnabled: true });
engine.setApprovalMode(ApprovalMode.AUTO_EDIT);
const result = await engine.check(
+89 -9
View File
@@ -5,6 +5,11 @@
*/
import { type FunctionCall } from '@google/genai';
import {
isDangerousCommand,
isKnownSafeCommand,
} from '../sandbox/macos/commandSafety.js';
import { parse as shellParse } from 'shell-quote';
import {
PolicyDecision,
type PolicyEngineConfig,
@@ -192,6 +197,8 @@ export class PolicyEngine {
private readonly disableAlwaysAllow: boolean;
private readonly checkerRunner?: CheckerRunner;
private approvalMode: ApprovalMode;
private toolSandboxEnabled: boolean;
private sandboxApprovedTools: string[];
constructor(config: PolicyEngineConfig = {}, checkerRunner?: CheckerRunner) {
this.rules = (config.rules ?? []).sort(
@@ -242,13 +249,18 @@ export class PolicyEngine {
this.disableAlwaysAllow = config.disableAlwaysAllow ?? false;
this.checkerRunner = checkerRunner;
this.approvalMode = config.approvalMode ?? ApprovalMode.DEFAULT;
this.toolSandboxEnabled = config.toolSandboxEnabled ?? false;
this.sandboxApprovedTools = config.sandboxApprovedTools ?? [];
}
/**
* Update the current approval mode.
*/
setApprovalMode(mode: ApprovalMode): void {
setApprovalMode(mode: ApprovalMode, sandboxApprovedTools?: string[]): void {
this.approvalMode = mode;
if (sandboxApprovedTools !== undefined) {
this.sandboxApprovedTools = sandboxApprovedTools;
}
}
/**
@@ -269,17 +281,58 @@ export class PolicyEngine {
command: string,
allowRedirection?: boolean,
): boolean {
return (
!allowRedirection &&
hasRedirection(command) &&
this.approvalMode !== ApprovalMode.AUTO_EDIT &&
this.approvalMode !== ApprovalMode.YOLO
);
if (allowRedirection) return false;
if (!hasRedirection(command)) return false;
// Do not downgrade (do not ask user) if sandboxing is enabled and in AUTO_EDIT or YOLO
if (
this.toolSandboxEnabled &&
(this.approvalMode === ApprovalMode.AUTO_EDIT ||
this.approvalMode === ApprovalMode.YOLO)
) {
return false;
}
return true;
}
/**
* Check if a shell command is allowed.
*/
private async applyShellHeuristics(
command: string,
decision: PolicyDecision,
): Promise<PolicyDecision> {
await initializeShellParsers();
try {
const parsedObjArgs = shellParse(command);
if (parsedObjArgs.some((arg) => typeof arg === 'object')) return decision;
const parsedArgs = parsedObjArgs.map(String);
if (isDangerousCommand(parsedArgs)) {
debugLogger.debug(
`[PolicyEngine.check] Command evaluated as dangerous, forcing ASK_USER: ${command}`,
);
return PolicyDecision.ASK_USER;
}
const isApprovedBySandbox =
this.toolSandboxEnabled &&
this.sandboxApprovedTools.includes(parsedArgs[0]);
if (
(isKnownSafeCommand(parsedArgs) || isApprovedBySandbox) &&
decision === PolicyDecision.ASK_USER
) {
debugLogger.debug(
`[PolicyEngine.check] Command evaluated as known safe, overriding ASK_USER to ALLOW: ${command}`,
);
return PolicyDecision.ALLOW;
}
} catch {
// Ignore parsing errors
}
return decision;
}
private async checkShellCommand(
toolName: string,
command: string | undefined,
@@ -522,11 +575,21 @@ export class PolicyEngine {
`[PolicyEngine.check] MATCHED rule: toolName=${rule.toolName}, decision=${rule.decision}, priority=${rule.priority}, argsPattern=${rule.argsPattern?.source || 'none'}`,
);
let ruleDecision = rule.decision;
if (
isShellCommand &&
command &&
!('commandPrefix' in rule) &&
!rule.argsPattern
) {
ruleDecision = await this.applyShellHeuristics(command, ruleDecision);
}
if (isShellCommand && toolName) {
const shellResult = await this.checkShellCommand(
toolName,
command,
rule.decision,
ruleDecision,
serverName,
shellDirPath,
rule.allowRedirection,
@@ -562,10 +625,18 @@ export class PolicyEngine {
`[PolicyEngine.check] NO MATCH - using default decision: ${this.defaultDecision}`,
);
if (toolName && SHELL_TOOL_NAMES.includes(toolName)) {
let heuristicDecision = this.defaultDecision;
if (command) {
heuristicDecision = await this.applyShellHeuristics(
command,
heuristicDecision,
);
}
const shellResult = await this.checkShellCommand(
toolName,
command,
this.defaultDecision,
heuristicDecision,
serverName,
shellDirPath,
false,
@@ -631,6 +702,15 @@ export class PolicyEngine {
}
}
// Sandbox Expansion requests MUST always be confirmed by the user,
// even if the base command is otherwise ALLOWED by the policy engine.
if (
decision === PolicyDecision.ALLOW &&
toolCall.args?.['additional_permissions']
) {
decision = PolicyDecision.ASK_USER;
}
return {
decision: this.applyNonInteractiveMode(decision),
rule: matchedRule,
@@ -0,0 +1,216 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import fs from 'node:fs';
import path from 'node:path';
import os from 'node:os';
import toml from '@iarna/toml';
import { z } from 'zod';
import { fileURLToPath } from 'node:url';
import { debugLogger } from '../utils/debugLogger.js';
import { type SandboxPermissions } from '../services/sandboxManager.js';
import { sanitizePaths } from '../services/sandboxManager.js';
export const SandboxModeConfigSchema = z.object({
network: z.boolean(),
readonly: z.boolean(),
approvedTools: z.array(z.string()),
allowOverrides: z.boolean().optional(),
});
export const PersistentCommandConfigSchema = z.object({
allowed_paths: z.array(z.string()).optional(),
allow_network: z.boolean().optional(),
});
export const SandboxTomlSchema = z.object({
modes: z.object({
plan: SandboxModeConfigSchema,
default: SandboxModeConfigSchema,
accepting_edits: SandboxModeConfigSchema,
}),
commands: z.record(z.string(), PersistentCommandConfigSchema).default({}),
});
export type SandboxModeConfig = z.infer<typeof SandboxModeConfigSchema>;
export type PersistentCommandConfig = z.infer<
typeof PersistentCommandConfigSchema
>;
export type SandboxTomlSchemaType = z.infer<typeof SandboxTomlSchema>;
export class SandboxPolicyManager {
private static _DEFAULT_CONFIG: SandboxTomlSchemaType | null = null;
private static get DEFAULT_CONFIG(): SandboxTomlSchemaType {
if (!SandboxPolicyManager._DEFAULT_CONFIG) {
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const defaultPath = path.join(
__dirname,
'policies',
'sandbox-default.toml',
);
try {
const content = fs.readFileSync(defaultPath, 'utf8');
if (typeof content !== 'string') {
SandboxPolicyManager._DEFAULT_CONFIG = {
modes: {
plan: {
network: false,
readonly: true,
approvedTools: [],
allowOverrides: false,
},
default: {
network: false,
readonly: true,
approvedTools: [],
allowOverrides: true,
},
accepting_edits: {
network: false,
readonly: false,
approvedTools: ['sed', 'grep', 'awk', 'perl', 'cat', 'echo'],
allowOverrides: true,
},
},
commands: {},
};
return SandboxPolicyManager._DEFAULT_CONFIG;
}
SandboxPolicyManager._DEFAULT_CONFIG = SandboxTomlSchema.parse(
toml.parse(content),
);
} catch (e) {
debugLogger.error(`Failed to parse default sandbox policy: ${e}`);
throw new Error(`Failed to parse default sandbox policy: ${e}`);
}
}
return SandboxPolicyManager._DEFAULT_CONFIG;
}
private config: SandboxTomlSchemaType;
private readonly configPath: string;
private sessionApprovals: Record<string, SandboxPermissions> = {};
constructor(customConfigPath?: string) {
this.configPath =
customConfigPath ??
path.join(os.homedir(), '.gemini', 'policies', 'sandbox.toml');
this.config = this.loadConfig();
}
private loadConfig(): SandboxTomlSchemaType {
if (!fs.existsSync(this.configPath)) {
return SandboxPolicyManager.DEFAULT_CONFIG;
}
try {
const content = fs.readFileSync(this.configPath, 'utf8');
return SandboxTomlSchema.parse(toml.parse(content));
} catch (e) {
debugLogger.error(`Failed to parse sandbox.toml: ${e}`);
return SandboxPolicyManager.DEFAULT_CONFIG;
}
}
private saveConfig(): void {
try {
const dir = path.dirname(this.configPath);
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true });
}
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
const content = toml.stringify(this.config as unknown as toml.JsonMap);
fs.writeFileSync(this.configPath, content);
} catch (e) {
debugLogger.error(`Failed to save sandbox.toml: ${e}`);
}
}
getModeConfig(
mode: 'plan' | 'accepting_edits' | 'default' | string,
): SandboxModeConfig {
if (mode === 'plan') return this.config.modes.plan;
if (mode === 'accepting_edits' || mode === 'autoEdit')
return this.config.modes.accepting_edits;
if (mode === 'default') return this.config.modes.default;
// Default fallback
return this.config.modes.default ?? this.config.modes.plan;
}
getCommandPermissions(commandName: string): SandboxPermissions {
const persistent = this.config.commands[commandName];
const session = this.sessionApprovals[commandName];
return {
fileSystem: {
read: [
...(persistent?.allowed_paths ?? []),
...(session?.fileSystem?.read ?? []),
],
write: [
...(persistent?.allowed_paths ?? []),
...(session?.fileSystem?.write ?? []),
],
},
network: persistent?.allow_network || session?.network || false,
};
}
addSessionApproval(
commandName: string,
permissions: SandboxPermissions,
): void {
const existing = this.sessionApprovals[commandName] || {
fileSystem: { read: [], write: [] },
network: false,
};
this.sessionApprovals[commandName] = {
fileSystem: {
read: Array.from(
new Set([
...(existing.fileSystem?.read ?? []),
...(permissions.fileSystem?.read ?? []),
]),
),
write: Array.from(
new Set([
...(existing.fileSystem?.write ?? []),
...(permissions.fileSystem?.write ?? []),
]),
),
},
network: existing.network || permissions.network || false,
};
}
addPersistentApproval(
commandName: string,
permissions: SandboxPermissions,
): void {
const existing = this.config.commands[commandName] || {
allowed_paths: [],
allow_network: false,
};
const newPathsArray: string[] = [
...(existing.allowed_paths ?? []),
...(permissions.fileSystem?.read ?? []),
...(permissions.fileSystem?.write ?? []),
];
const newPaths = new Set(sanitizePaths(newPathsArray));
this.config.commands[commandName] = {
allowed_paths: Array.from(newPaths),
allow_network: existing.allow_network || permissions.network || false,
};
this.saveConfig();
}
}
+9
View File
@@ -309,6 +309,15 @@ export interface PolicyEngineConfig {
* Used to filter rules that have specific 'modes' defined.
*/
approvalMode?: ApprovalMode;
/**
* Whether tool sandboxing is enabled.
*/
toolSandboxEnabled?: boolean;
/**
* List of tools approved by the sandbox policy for the current mode.
*/
sandboxApprovedTools?: string[];
}
export interface PolicySettings {
@@ -54,6 +54,7 @@ describe('PromptProvider', () => {
},
getToolRegistry: vi.fn().mockReturnValue(mockToolRegistry),
getEnableShellOutputEfficiency: vi.fn().mockReturnValue(true),
getSandboxEnabled: vi.fn().mockReturnValue(false),
storage: {
getProjectTempDir: vi.fn().mockReturnValue('/tmp/project-temp'),
getPlansDir: vi.fn().mockReturnValue('/tmp/project-temp/plans'),
+4 -1
View File
@@ -195,7 +195,10 @@ export class PromptProvider {
memoryManagerEnabled: context.config.isMemoryManagerEnabled(),
}),
),
sandbox: this.withSection('sandbox', () => getSandboxMode()),
sandbox: this.withSection('sandbox', () => ({
mode: getSandboxMode(),
toolSandboxingEnabled: context.config.getSandboxEnabled(),
})),
interactiveYoloMode: this.withSection(
'interactiveYoloMode',
() => true,
+11 -4
View File
@@ -36,7 +36,7 @@ export interface SystemPromptOptions {
planningWorkflow?: PlanningWorkflowOptions;
taskTracker?: boolean;
operationalGuidelines?: OperationalGuidelinesOptions;
sandbox?: SandboxMode;
sandbox?: SandboxOptions;
interactiveYoloMode?: boolean;
gitRepo?: GitRepoOptions;
finalReminder?: FinalReminderOptions;
@@ -72,6 +72,11 @@ export interface OperationalGuidelinesOptions {
export type SandboxMode = 'macos-seatbelt' | 'generic' | 'outside';
export interface SandboxOptions {
mode: SandboxMode;
toolSandboxingEnabled: boolean;
}
export interface GitRepoOptions {
interactive: boolean;
}
@@ -290,8 +295,9 @@ ${shellEfficiencyGuidelines(options.enableShellEfficiency)}
`.trim();
}
export function renderSandbox(mode?: SandboxMode): string {
if (!mode) return '';
export function renderSandbox(options?: SandboxOptions): string {
if (!options || !options.mode) return '';
const mode = options.mode;
if (mode === 'macos-seatbelt') {
return `
# macOS Seatbelt
@@ -300,11 +306,12 @@ You are running under macos seatbelt with limited access to files outside the pr
return `
# Sandbox
You are running in a sandbox container with limited access to files outside the project directory or system temp directory, and with limited access to host system resources such as ports. If you encounter failures that could be due to sandboxing (e.g. if a command fails with 'Operation not permitted' or similar error), when you report the error to the user, also explain why you think it could be due to sandboxing, and how the user may need to adjust their sandbox configuration.`.trim();
} else {
} else if (mode === 'outside') {
return `
# Outside of Sandbox
You are running outside of a sandbox container, directly on the user's system. For critical commands that are particularly likely to modify the user's system outside of the project directory or system temp directory, as you explain the command to the user (per the Explain Critical Commands rule above), also remind the user to consider enabling sandboxing.`.trim();
}
return '';
}
export function renderInteractiveYoloMode(enabled?: boolean): string {
+37 -5
View File
@@ -46,7 +46,7 @@ export interface SystemPromptOptions {
planningWorkflow?: PlanningWorkflowOptions;
taskTracker?: boolean;
operationalGuidelines?: OperationalGuidelinesOptions;
sandbox?: SandboxMode;
sandbox?: SandboxOptions;
interactiveYoloMode?: boolean;
gitRepo?: GitRepoOptions;
}
@@ -84,6 +84,11 @@ export interface OperationalGuidelinesOptions {
export type SandboxMode = 'macos-seatbelt' | 'generic' | 'outside';
export interface SandboxOptions {
mode: SandboxMode;
toolSandboxingEnabled: boolean;
}
export interface GitRepoOptions {
interactive: boolean;
}
@@ -222,6 +227,7 @@ Use the following guidelines to optimize your search and read patterns.
## Engineering Standards
- **Contextual Precedence:** Instructions found in ${formattedFilenames} files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
- **Conventions & Style:** Rigorously adhere to existing workspace conventions, architectural patterns, and style (naming, formatting, typing, commenting). During the research phase, analyze surrounding files, tests, and configuration to ensure your changes are seamless, idiomatic, and consistent with the local context. Never compromise idiomatic quality or completeness (e.g., proper declarations, type safety, documentation) to minimize tool calls; all supporting changes required by local conventions are part of a surgical update.
- **Types, warnings and linters:** NEVER use hacks like disabling or suppressing warnings or bypassing the type system (i.e.: casts in TypeScript) unless explicitly instructed to by the user. Instead, use idiomatic language features (e.g.: type guard functions).
- **Libraries/Frameworks:** NEVER assume a library/framework is available. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', etc.) before employing it.
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your changebehavioral, structural, and stylisticis correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. ${options.interactive ? 'For Directives, only clarify if critically underspecified; otherwise, work autonomously.' : 'For Directives, you must work autonomously as no further user input is available.'} You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
@@ -389,18 +395,44 @@ export function renderOperationalGuidelines(
`.trim();
}
export function renderSandbox(mode?: SandboxMode): string {
if (!mode) return '';
export function renderSandbox(options?: SandboxOptions): string {
if (!options || !options.mode) return '';
const { mode, toolSandboxingEnabled } = options;
if (mode === 'macos-seatbelt') {
return `
if (toolSandboxingEnabled) {
return `
# macOS Seatbelt
You are running under macos seatbelt with limited access to files outside the project directory or system temp directory, and with limited access to host system resources such as ports.
**Sandbox Failure Recovery:**
If an interactive command (like nvim, nano, git) fails to load plugins, crashes, or hangs waiting for user input (e.g. at a '-- More --' prompt) with errors indicating missing files or 'Operation not permitted', it is likely blocked by the sandbox.
If you see these errors (e.g. Vim:E303, 'should be read/write') in the command output (even if the command timed out or was cancelled by the user):
1. IMMEDIATELY analyze the output to identify the required filesystem paths or network access.
2. WITHOUT asking the user or explaining the failure first, AUTOMATICALLY retry the command using the 'run_shell_command' tool, explicitly providing the missing permissions in the 'additional_permissions' parameter.
3. This will immediately present the user with a modal to approve the expansion for the command so they don't have to reprompt you.`.trim();
} else {
return `
# macOS Seatbelt
You are running under macos seatbelt with limited access to files outside the project directory or system temp directory, and with limited access to host system resources such as ports. If you encounter failures that could be due to macOS Seatbelt (e.g. if a command fails with 'Operation not permitted' or similar error), as you report the error to the user, also explain why you think it could be due to macOS Seatbelt, and how the user may need to adjust their Seatbelt profile.`.trim();
}
} else if (mode === 'generic') {
return `
if (toolSandboxingEnabled) {
return `
# Sandbox
You are running in a sandbox container with limited access to files outside the project directory or system temp directory, and with limited access to host system resources such as ports.
**Sandbox Failure Recovery:**
If a command fails with 'Operation not permitted' or similar sandbox errors, do NOT ask the user to adjust settings manually. Instead:
1. Analyze the command and error to identify the required filesystem paths or network access.
2. Retry the command using the 'run_shell_command' tool, providing the missing permissions in the 'additional_permissions' parameter.
3. The user will be presented with a modal to approve this expansion for the current command.`.trim();
} else {
return `
# Sandbox
You are running in a sandbox container with limited access to files outside the project directory or system temp directory, and with limited access to host system resources such as ports. If you encounter failures that could be due to sandboxing (e.g. if a command fails with 'Operation not permitted' or similar error), when you report the error to the user, also explain why you think it could be due to sandboxing, and how the user may need to adjust their sandbox configuration.`.trim();
}
}
return '';
}
@@ -4,15 +4,42 @@
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, beforeEach } from 'vitest';
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { LinuxSandboxManager } from './LinuxSandboxManager.js';
import type { SandboxRequest } from '../../services/sandboxManager.js';
import fs from 'node:fs';
vi.mock('node:fs', async () => {
const actual = await vi.importActual<typeof import('node:fs')>('node:fs');
return {
...actual,
default: {
// @ts-expect-error - Property 'default' does not exist on type 'typeof import("node:fs")'
...actual.default,
existsSync: vi.fn(() => true),
realpathSync: vi.fn((p: string | Buffer) => p.toString()),
mkdirSync: vi.fn(),
openSync: vi.fn(),
closeSync: vi.fn(),
writeFileSync: vi.fn(),
},
existsSync: vi.fn(() => true),
realpathSync: vi.fn((p: string | Buffer) => p.toString()),
mkdirSync: vi.fn(),
openSync: vi.fn(),
closeSync: vi.fn(),
writeFileSync: vi.fn(),
};
});
describe('LinuxSandboxManager', () => {
const workspace = '/home/user/workspace';
let manager: LinuxSandboxManager;
beforeEach(() => {
vi.clearAllMocks();
vi.mocked(fs.existsSync).mockReturnValue(true);
vi.mocked(fs.realpathSync).mockImplementation((p) => p.toString());
manager = new LinuxSandboxManager({ workspace });
});
@@ -52,6 +79,15 @@ describe('LinuxSandboxManager', () => {
'--bind',
workspace,
workspace,
'--ro-bind',
`${workspace}/.gitignore`,
`${workspace}/.gitignore`,
'--ro-bind',
`${workspace}/.geminiignore`,
`${workspace}/.geminiignore`,
'--ro-bind',
`${workspace}/.git`,
`${workspace}/.git`,
'--seccomp',
'9',
'--',
@@ -79,6 +115,15 @@ describe('LinuxSandboxManager', () => {
'--bind',
workspace,
workspace,
'--ro-bind',
`${workspace}/.gitignore`,
`${workspace}/.gitignore`,
'--ro-bind',
`${workspace}/.geminiignore`,
`${workspace}/.geminiignore`,
'--ro-bind',
`${workspace}/.git`,
`${workspace}/.git`,
'--bind-try',
'/tmp/cache',
'/tmp/cache',
@@ -88,6 +133,48 @@ describe('LinuxSandboxManager', () => {
]);
});
it('protects real paths of governance files if they are symlinks', async () => {
vi.mocked(fs.realpathSync).mockImplementation((p) => {
if (p.toString() === `${workspace}/.gitignore`)
return '/shared/global.gitignore';
return p.toString();
});
const bwrapArgs = await getBwrapArgs({
command: 'ls',
args: [],
cwd: workspace,
env: {},
});
expect(bwrapArgs).toContain('--ro-bind');
expect(bwrapArgs).toContain(`${workspace}/.gitignore`);
expect(bwrapArgs).toContain('/shared/global.gitignore');
// Check that both are bound
const gitignoreIndex = bwrapArgs.indexOf(`${workspace}/.gitignore`);
expect(bwrapArgs[gitignoreIndex - 1]).toBe('--ro-bind');
expect(bwrapArgs[gitignoreIndex + 1]).toBe(`${workspace}/.gitignore`);
const realGitignoreIndex = bwrapArgs.indexOf('/shared/global.gitignore');
expect(bwrapArgs[realGitignoreIndex - 1]).toBe('--ro-bind');
expect(bwrapArgs[realGitignoreIndex + 1]).toBe('/shared/global.gitignore');
});
it('touches governance files if they do not exist', async () => {
vi.mocked(fs.existsSync).mockReturnValue(false);
await getBwrapArgs({
command: 'ls',
args: [],
cwd: workspace,
env: {},
});
expect(fs.mkdirSync).toHaveBeenCalled();
expect(fs.openSync).toHaveBeenCalled();
});
it('should not bind the workspace twice even if it has a trailing slash in allowedPaths', async () => {
const bwrapArgs = await getBwrapArgs({
command: 'ls',
@@ -102,7 +189,20 @@ describe('LinuxSandboxManager', () => {
const bindsIndex = bwrapArgs.indexOf('--seccomp');
const binds = bwrapArgs.slice(bwrapArgs.indexOf('--bind'), bindsIndex);
// Should only contain the primary workspace bind, not the second one with a trailing slash
expect(binds).toEqual(['--bind', workspace, workspace]);
// Should only contain the primary workspace bind and governance files, not the second workspace bind with a trailing slash
expect(binds).toEqual([
'--bind',
workspace,
workspace,
'--ro-bind',
`${workspace}/.gitignore`,
`${workspace}/.gitignore`,
'--ro-bind',
`${workspace}/.geminiignore`,
`${workspace}/.geminiignore`,
'--ro-bind',
`${workspace}/.git`,
`${workspace}/.git`,
]);
});
});
@@ -4,14 +4,15 @@
* SPDX-License-Identifier: Apache-2.0
*/
import { join, normalize } from 'node:path';
import { writeFileSync } from 'node:fs';
import fs from 'node:fs';
import { join, dirname, normalize } from 'node:path';
import os from 'node:os';
import {
type SandboxManager,
type GlobalSandboxOptions,
type SandboxRequest,
type SandboxedCommand,
GOVERNANCE_FILES,
sanitizePaths,
} from '../../services/sandboxManager.js';
import {
@@ -72,11 +73,30 @@ function getSeccompBpfPath(): string {
}
const bpfPath = join(os.tmpdir(), `gemini-cli-seccomp-${process.pid}.bpf`);
writeFileSync(bpfPath, buf);
fs.writeFileSync(bpfPath, buf);
cachedBpfPath = bpfPath;
return bpfPath;
}
/**
* Ensures a file or directory exists.
*/
function touch(filePath: string, isDirectory: boolean) {
try {
// If it exists (even as a broken symlink), do nothing
if (fs.lstatSync(filePath)) return;
} catch {
// Ignore ENOENT
}
if (isDirectory) {
fs.mkdirSync(filePath, { recursive: true });
} else {
fs.mkdirSync(dirname(filePath), { recursive: true });
fs.closeSync(fs.openSync(filePath, 'a'));
}
}
/**
* A SandboxManager implementation for Linux that uses Bubblewrap (bwrap).
*/
@@ -109,6 +129,21 @@ export class LinuxSandboxManager implements SandboxManager {
this.options.workspace,
];
// Protected governance files are bind-mounted as read-only, even if the workspace is RW.
// We ensure they exist on the host and resolve real paths to prevent symlink bypasses.
// In bwrap, later binds override earlier ones for the same path.
for (const file of GOVERNANCE_FILES) {
const filePath = join(this.options.workspace, file.path);
touch(filePath, file.isDirectory);
const realPath = fs.realpathSync(filePath);
bwrapArgs.push('--ro-bind', filePath, filePath);
if (realPath !== filePath) {
bwrapArgs.push('--ro-bind', realPath, realPath);
}
}
const allowedPaths = sanitizePaths(req.policy?.allowedPaths) || [];
const normalizedWorkspace = normalize(this.options.workspace).replace(
/\/$/,
@@ -8,20 +8,32 @@ import { MacOsSandboxManager } from './MacOsSandboxManager.js';
import type { ExecutionPolicy } from '../../services/sandboxManager.js';
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
describe('MacOsSandboxManager', () => {
const mockWorkspace = '/test/workspace';
const mockAllowedPaths = ['/test/allowed'];
let mockWorkspace: string;
let mockAllowedPaths: string[];
const mockNetworkAccess = true;
const mockPolicy: ExecutionPolicy = {
allowedPaths: mockAllowedPaths,
networkAccess: mockNetworkAccess,
};
let mockPolicy: ExecutionPolicy;
let manager: MacOsSandboxManager;
beforeEach(() => {
mockWorkspace = fs.mkdtempSync(
path.join(os.tmpdir(), 'gemini-cli-macos-test-'),
);
mockAllowedPaths = [
path.join(os.tmpdir(), 'gemini-cli-macos-test-allowed'),
];
if (!fs.existsSync(mockAllowedPaths[0])) {
fs.mkdirSync(mockAllowedPaths[0]);
}
mockPolicy = {
allowedPaths: mockAllowedPaths,
networkAccess: mockNetworkAccess,
};
manager = new MacOsSandboxManager({ workspace: mockWorkspace });
// Mock realpathSync to just return the path for testing
vi.spyOn(fs, 'realpathSync').mockImplementation((p) => p as string);
@@ -29,6 +41,10 @@ describe('MacOsSandboxManager', () => {
afterEach(() => {
vi.restoreAllMocks();
fs.rmSync(mockWorkspace, { recursive: true, force: true });
if (mockAllowedPaths && mockAllowedPaths[0]) {
fs.rmSync(mockAllowedPaths[0], { recursive: true, force: true });
}
});
describe('prepareCommand', () => {
@@ -47,11 +63,22 @@ describe('MacOsSandboxManager', () => {
expect(profile).toContain('(deny default)');
expect(profile).toContain('(allow process-exec)');
expect(profile).toContain('(subpath (param "WORKSPACE"))');
expect(profile).not.toContain('(allow network*)');
expect(profile).not.toContain('(allow network-outbound)');
expect(result.args).toContain('-D');
expect(result.args).toContain('WORKSPACE=/test/workspace');
expect(result.args).toContain(`WORKSPACE=${mockWorkspace}`);
expect(result.args).toContain(`TMPDIR=${os.tmpdir()}`);
// Governance files should be protected
expect(profile).toContain(
'(deny file-write* (literal (param "GOVERNANCE_FILE_0")))',
); // .gitignore
expect(profile).toContain(
'(deny file-write* (literal (param "GOVERNANCE_FILE_1")))',
); // .geminiignore
expect(profile).toContain(
'(deny file-write* (subpath (param "GOVERNANCE_FILE_2")))',
); // .git
});
it('should allow network when networkAccess is true in policy', async () => {
@@ -64,7 +91,7 @@ describe('MacOsSandboxManager', () => {
});
const profile = result.args[1];
expect(profile).toContain('(allow network*)');
expect(profile).toContain('(allow network-outbound)');
});
it('should parameterize allowed paths and normalize them', async () => {
@@ -134,31 +161,41 @@ describe('MacOsSandboxManager', () => {
});
it('should resolve parent directories if a file does not exist', async () => {
const baseTmpDir = fs.mkdtempSync(
path.join(os.tmpdir(), 'gemini-cli-macos-realpath-test-'),
);
const realPath = path.join(baseTmpDir, 'real_path');
const nonexistentFile = path.join(realPath, 'nonexistent.txt');
vi.spyOn(fs, 'realpathSync').mockImplementation((p) => {
if (p === '/test/symlink/nonexistent.txt') {
if (p === nonexistentFile) {
const error = new Error('ENOENT');
Object.assign(error, { code: 'ENOENT' });
throw error;
}
if (p === '/test/symlink') {
return '/test/real_path';
if (p === realPath) {
return path.join(baseTmpDir, 'resolved_path');
}
return p as string;
});
const dynamicManager = new MacOsSandboxManager({
workspace: '/test/symlink/nonexistent.txt',
});
const dynamicResult = await dynamicManager.prepareCommand({
command: 'echo',
args: ['hello'],
cwd: '/test/symlink/nonexistent.txt',
env: {},
});
try {
const dynamicManager = new MacOsSandboxManager({
workspace: nonexistentFile,
});
const dynamicResult = await dynamicManager.prepareCommand({
command: 'echo',
args: ['hello'],
cwd: nonexistentFile,
env: {},
});
expect(dynamicResult.args).toContain(
'WORKSPACE=/test/real_path/nonexistent.txt',
);
expect(dynamicResult.args).toContain(
`WORKSPACE=${path.join(baseTmpDir, 'resolved_path', 'nonexistent.txt')}`,
);
} finally {
fs.rmSync(baseTmpDir, { recursive: true, force: true });
}
});
it('should throw if realpathSync throws a non-ENOENT error', async () => {
@@ -169,7 +206,7 @@ describe('MacOsSandboxManager', () => {
});
const errorManager = new MacOsSandboxManager({
workspace: '/test/workspace',
workspace: mockWorkspace,
});
await expect(
errorManager.prepareCommand({
@@ -4,40 +4,164 @@
* SPDX-License-Identifier: Apache-2.0
*/
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import {
type SandboxManager,
type GlobalSandboxOptions,
type SandboxRequest,
type SandboxedCommand,
type ExecutionPolicy,
sanitizePaths,
type SandboxPermissions,
type GlobalSandboxOptions,
} from '../../services/sandboxManager.js';
import {
sanitizeEnvironment,
getSecureSanitizationConfig,
type EnvironmentSanitizationConfig,
} from '../../services/environmentSanitization.js';
import { buildSeatbeltArgs } from './seatbeltArgsBuilder.js';
import {
BASE_SEATBELT_PROFILE,
NETWORK_SEATBELT_PROFILE,
} from './baseProfile.js';
getCommandRoots,
initializeShellParsers,
splitCommands,
stripShellWrapper,
} from '../../utils/shell-utils.js';
import { isKnownSafeCommand } from './commandSafety.js';
import { parse as shellParse } from 'shell-quote';
import { type SandboxPolicyManager } from '../../policy/sandboxPolicyManager.js';
import path from 'node:path';
export interface MacOsSandboxOptions extends GlobalSandboxOptions {
/** Optional base sanitization config. */
sanitizationConfig?: EnvironmentSanitizationConfig;
/** The current sandbox mode behavior from config. */
modeConfig?: {
readonly?: boolean;
network?: boolean;
approvedTools?: string[];
allowOverrides?: boolean;
};
/** The policy manager for persistent approvals. */
policyManager?: SandboxPolicyManager;
}
/**
* A SandboxManager implementation for macOS that uses Seatbelt.
*/
export class MacOsSandboxManager implements SandboxManager {
constructor(private readonly options: GlobalSandboxOptions) {}
constructor(private readonly options: MacOsSandboxOptions) {}
private async isStrictlyApproved(req: SandboxRequest): Promise<boolean> {
const approvedTools = this.options.modeConfig?.approvedTools;
if (!approvedTools || approvedTools.length === 0) {
return false;
}
await initializeShellParsers();
const fullCmd = [req.command, ...req.args].join(' ');
const stripped = stripShellWrapper(fullCmd);
const roots = getCommandRoots(stripped);
if (roots.length === 0) return false;
const allRootsApproved = roots.every((root) =>
approvedTools.includes(root),
);
if (allRootsApproved) {
return true;
}
const pipelineCommands = splitCommands(stripped);
if (pipelineCommands.length === 0) return false;
// For safety, every command in the pipeline must be considered safe.
for (const cmdString of pipelineCommands) {
const parsedArgs = shellParse(cmdString).map(String);
if (!isKnownSafeCommand(parsedArgs)) {
return false;
}
}
return true;
}
private async getCommandName(req: SandboxRequest): Promise<string> {
await initializeShellParsers();
const fullCmd = [req.command, ...req.args].join(' ');
const stripped = stripShellWrapper(fullCmd);
const roots = getCommandRoots(stripped).filter(
(r) => r !== 'shopt' && r !== 'set',
);
if (roots.length > 0) {
return roots[0];
}
return path.basename(req.command);
}
async prepareCommand(req: SandboxRequest): Promise<SandboxedCommand> {
await initializeShellParsers();
const sanitizationConfig = getSecureSanitizationConfig(
req.policy?.sanitizationConfig,
);
const sanitizedEnv = sanitizeEnvironment(req.env, sanitizationConfig);
const sandboxArgs = this.buildSeatbeltArgs(this.options, req.policy);
const isReadonlyMode = this.options.modeConfig?.readonly ?? true;
const allowOverrides = this.options.modeConfig?.allowOverrides ?? true;
// Reject override attempts in plan mode
if (!allowOverrides && req.policy?.additionalPermissions) {
const perms = req.policy.additionalPermissions;
if (
perms.network ||
(perms.fileSystem?.write && perms.fileSystem.write.length > 0)
) {
throw new Error(
'Sandbox request rejected: Cannot override readonly/network restrictions in Plan mode.',
);
}
}
// If not in readonly mode OR it's a strictly approved pipeline, allow workspace writes
const isApproved = allowOverrides
? await this.isStrictlyApproved(req)
: false;
const workspaceWrite = !isReadonlyMode || isApproved;
const networkAccess =
this.options.modeConfig?.network ?? req.policy?.networkAccess ?? false;
// Fetch persistent approvals for this command
const commandName = await this.getCommandName(req);
const persistentPermissions = allowOverrides
? this.options.policyManager?.getCommandPermissions(commandName)
: undefined;
// Merge all permissions
const mergedAdditional: SandboxPermissions = {
fileSystem: {
read: [
...(persistentPermissions?.fileSystem?.read ?? []),
...(req.policy?.additionalPermissions?.fileSystem?.read ?? []),
],
write: [
...(persistentPermissions?.fileSystem?.write ?? []),
...(req.policy?.additionalPermissions?.fileSystem?.write ?? []),
],
},
network:
networkAccess ||
persistentPermissions?.network ||
req.policy?.additionalPermissions?.network ||
false,
};
const sandboxArgs = buildSeatbeltArgs({
workspace: this.options.workspace,
allowedPaths: [...(req.policy?.allowedPaths || [])],
forbiddenPaths: req.policy?.forbiddenPaths,
networkAccess: mergedAdditional.network,
workspaceWrite,
additionalPermissions: mergedAdditional,
});
return {
program: '/usr/bin/sandbox-exec',
@@ -46,65 +170,4 @@ export class MacOsSandboxManager implements SandboxManager {
cwd: req.cwd,
};
}
/**
* Builds the arguments array for sandbox-exec using a strict allowlist profile.
* It relies on parameters passed to sandbox-exec via the -D flag to avoid
* string interpolation vulnerabilities, and normalizes paths against symlink escapes.
*
* Returns arguments up to the end of sandbox-exec configuration (e.g. ['-p', '<profile>', '-D', ...])
* Does not include the final '--' separator or the command to run.
*/
private buildSeatbeltArgs(
options: GlobalSandboxOptions,
policy?: ExecutionPolicy,
): string[] {
const profileLines = [BASE_SEATBELT_PROFILE];
const args: string[] = [];
const workspacePath = this.tryRealpath(options.workspace);
args.push('-D', `WORKSPACE=${workspacePath}`);
const tmpPath = this.tryRealpath(os.tmpdir());
args.push('-D', `TMPDIR=${tmpPath}`);
const allowedPaths = sanitizePaths(policy?.allowedPaths) || [];
for (let i = 0; i < allowedPaths.length; i++) {
const allowedPath = this.tryRealpath(allowedPaths[i]);
args.push('-D', `ALLOWED_PATH_${i}=${allowedPath}`);
profileLines.push(
`(allow file-read* file-write* (subpath (param "ALLOWED_PATH_${i}")))`,
);
}
// TODO: handle forbidden paths
if (policy?.networkAccess) {
profileLines.push(NETWORK_SEATBELT_PROFILE);
}
args.unshift('-p', profileLines.join('\n'));
return args;
}
/**
* Resolves symlinks for a given path to prevent sandbox escapes.
* If a file does not exist (ENOENT), it recursively resolves the parent directory.
* Other errors (e.g. EACCES) are re-thrown.
*/
private tryRealpath(p: string): string {
try {
return fs.realpathSync(p);
} catch (e) {
if (e instanceof Error && 'code' in e && e.code === 'ENOENT') {
const parentDir = path.dirname(p);
if (parentDir === p) {
return p;
}
return path.join(this.tryRealpath(parentDir), path.basename(p));
}
throw e;
}
}
}
+96 -8
View File
@@ -16,11 +16,101 @@ export const BASE_SEATBELT_PROFILE = `(version 1)
(import "system.sb")
; Core execution requirements
(allow process-exec)
(allow process-fork)
(allow signal (target same-sandbox))
(allow process-info* (target same-sandbox))
(allow process-info*)
(allow file-write-data
(require-all
(path "/dev/null")
(vnode-type CHARACTER-DEVICE)))
; sysctls permitted.
(allow sysctl-read
(sysctl-name "hw.activecpu")
(sysctl-name "hw.busfrequency_compat")
(sysctl-name "hw.byteorder")
(sysctl-name "hw.cacheconfig")
(sysctl-name "hw.cachelinesize_compat")
(sysctl-name "hw.cpufamily")
(sysctl-name "hw.cpufrequency_compat")
(sysctl-name "hw.cputype")
(sysctl-name "hw.l1dcachesize_compat")
(sysctl-name "hw.l1icachesize_compat")
(sysctl-name "hw.l2cachesize_compat")
(sysctl-name "hw.l3cachesize_compat")
(sysctl-name "hw.logicalcpu_max")
(sysctl-name "hw.machine")
(sysctl-name "hw.model")
(sysctl-name "hw.memsize")
(sysctl-name "hw.ncpu")
(sysctl-name "hw.nperflevels")
(sysctl-name-prefix "hw.optional.arm.")
(sysctl-name-prefix "hw.optional.armv8_")
(sysctl-name "hw.packages")
(sysctl-name "hw.pagesize_compat")
(sysctl-name "hw.pagesize")
(sysctl-name "hw.physicalcpu")
(sysctl-name "hw.physicalcpu_max")
(sysctl-name "hw.logicalcpu")
(sysctl-name "hw.cpufrequency")
(sysctl-name "hw.tbfrequency_compat")
(sysctl-name "hw.vectorunit")
(sysctl-name "machdep.cpu.brand_string")
(sysctl-name "kern.argmax")
(sysctl-name "kern.hostname")
(sysctl-name "kern.maxfilesperproc")
(sysctl-name "kern.maxproc")
(sysctl-name "kern.osproductversion")
(sysctl-name "kern.osrelease")
(sysctl-name "kern.ostype")
(sysctl-name "kern.osvariant_status")
(sysctl-name "kern.osversion")
(sysctl-name "kern.secure_kernel")
(sysctl-name "kern.usrstack64")
(sysctl-name "kern.version")
(sysctl-name "sysctl.proc_cputype")
(sysctl-name "vm.loadavg")
(sysctl-name-prefix "hw.perflevel")
(sysctl-name-prefix "kern.proc.pgrp.")
(sysctl-name-prefix "kern.proc.pid.")
(sysctl-name-prefix "net.routetable.")
)
(allow sysctl-write
(sysctl-name "kern.grade_cputype"))
(allow mach-lookup
(global-name "com.apple.sysmond")
)
\n; IOKit
(allow iokit-open
(iokit-registry-entry-class "RootDomainUserClient")
)
(allow mach-lookup
(global-name "com.apple.system.opendirectoryd.libinfo")
)
; Needed for python multiprocessing on MacOS for the SemLock
(allow ipc-posix-sem)
(allow mach-lookup
(global-name "com.apple.PowerManagement.control")
)
; PTY and Terminal support
(allow pseudo-tty)
(allow file-read* file-write* file-ioctl (literal "/dev/ptmx"))
(allow file-read* file-write*
(require-all
(regex #"^/dev/ttys[0-9]+")
(extension "com.apple.sandbox.pty")))
(allow file-ioctl (regex #"^/dev/ttys[0-9]+"))
; Allow basic read access to system frameworks and libraries required to run
(allow file-read*
@@ -38,11 +128,6 @@ export const BASE_SEATBELT_PROFILE = `(version 1)
(subpath "/private/etc")
)
; PTY and Terminal support
(allow pseudo-tty)
(allow file-read* file-write* file-ioctl (literal "/dev/ptmx"))
(allow file-read* file-write* file-ioctl (regex #"^/dev/ttys[0-9]+"))
; Allow read/write access to temporary directories and common device nodes
(allow file-read* file-write*
(literal "/dev/null")
@@ -53,9 +138,10 @@ export const BASE_SEATBELT_PROFILE = `(version 1)
)
; Workspace access using parameterized paths
(allow file-read* file-write*
(allow file-read*
(subpath (param "WORKSPACE"))
)
`;
/**
@@ -66,7 +152,9 @@ export const BASE_SEATBELT_PROFILE = `(version 1)
*/
export const NETWORK_SEATBELT_PROFILE = `
; Network Access
(allow network*)
(allow network-outbound)
(allow network-inbound)
(allow network-bind)
(allow system-socket
(require-all
@@ -0,0 +1,469 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { parse as shellParse } from 'shell-quote';
/**
* Checks if a command with its arguments is known to be safe to execute
* without requiring user confirmation. This is primarily used to allow
* harmless, read-only commands to run silently in the macOS sandbox.
*
* It handles raw command execution as well as wrapped commands like `bash -c "..."` or `bash -lc "..."`.
* For wrapped commands, it parses the script and ensures all individual
* sub-commands are in the known-safe list and no dangerous shell operators
* (like subshells or redirection) are used.
*
* @param args - The command and its arguments (e.g., ['ls', '-la'])
* @returns true if the command is considered safe, false otherwise.
*/
export function isKnownSafeCommand(args: string[]): boolean {
if (!args || args.length === 0) {
return false;
}
// Normalize zsh to bash
const normalizedArgs = args.map((a) => (a === 'zsh' ? 'bash' : a));
if (isSafeToCallWithExec(normalizedArgs)) {
return true;
}
// Support `bash -lc "..."`
if (
normalizedArgs.length === 3 &&
normalizedArgs[0] === 'bash' &&
(normalizedArgs[1] === '-lc' || normalizedArgs[1] === '-c')
) {
try {
const script = normalizedArgs[2];
// Basic check for dangerous operators that could spawn subshells or redirect output
// We allow &&, ||, |, ; but explicitly block subshells () and redirection >, >>, <
if (/[()<>]/g.test(script)) {
return false;
}
const commands = script.split(/&&|\|\||\||;/);
let allSafe = true;
for (const cmd of commands) {
const trimmed = cmd.trim();
if (!trimmed) continue;
const parsed = shellParse(trimmed).map(String);
if (parsed.length === 0) continue;
if (!isSafeToCallWithExec(parsed)) {
allSafe = false;
break;
}
}
if (allSafe && commands.length > 0) {
return true;
}
} catch {
return false;
}
}
return false;
}
/**
* Core validation logic that checks a single command and its arguments
* against an allowlist of known safe operations. It performs deep validation
* for specific tools like `base64`, `find`, `rg`, `git`, and `sed` to ensure
* unsafe flags (like `--output`, `-exec`, or mutating options) are not used.
*
* @param args - The command and its arguments.
* @returns true if the command is strictly read-only and safe.
*/
function isSafeToCallWithExec(args: string[]): boolean {
if (!args || args.length === 0) return false;
const cmd = args[0];
const safeCommands = new Set([
'cat',
'cd',
'cut',
'echo',
'expr',
'false',
'grep',
'head',
'id',
'ls',
'nl',
'paste',
'pwd',
'rev',
'seq',
'stat',
'tail',
'tr',
'true',
'uname',
'uniq',
'wc',
'which',
'whoami',
'numfmt',
'tac',
]);
if (safeCommands.has(cmd)) {
return true;
}
if (cmd === 'base64') {
const unsafeOptions = new Set(['-o', '--output']);
return !args
.slice(1)
.some(
(arg) =>
unsafeOptions.has(arg) ||
arg.startsWith('--output=') ||
(arg.startsWith('-o') && arg !== '-o'),
);
}
if (cmd === 'find') {
const unsafeOptions = new Set([
'-exec',
'-execdir',
'-ok',
'-okdir',
'-delete',
'-fls',
'-fprint',
'-fprint0',
'-fprintf',
]);
return !args.some((arg) => unsafeOptions.has(arg));
}
if (cmd === 'rg') {
const unsafeWithArgs = new Set(['--pre', '--hostname-bin']);
const unsafeWithoutArgs = new Set(['--search-zip', '-z']);
return !args.some((arg) => {
if (unsafeWithoutArgs.has(arg)) return true;
for (const opt of unsafeWithArgs) {
if (arg === opt || arg.startsWith(opt + '=')) return true;
}
return false;
});
}
if (cmd === 'git') {
if (gitHasConfigOverrideGlobalOption(args)) {
return false;
}
const { idx, subcommand } = findGitSubcommand(args, [
'status',
'log',
'diff',
'show',
'branch',
]);
if (!subcommand) {
return false;
}
const subcommandArgs = args.slice(idx + 1);
if (['status', 'log', 'diff', 'show'].includes(subcommand)) {
return gitSubcommandArgsAreReadOnly(subcommandArgs);
}
if (subcommand === 'branch') {
return (
gitSubcommandArgsAreReadOnly(subcommandArgs) &&
gitBranchIsReadOnly(subcommandArgs)
);
}
return false;
}
if (cmd === 'sed') {
// Special-case sed -n {N|M,N}p
if (args.length <= 4 && args[1] === '-n' && isValidSedNArg(args[2])) {
return true;
}
return false;
}
return false;
}
/**
* Helper to identify which git subcommand is being executed, skipping over
* global git options like `-c` or `--git-dir`.
*
* @param args - The full git command arguments.
* @param subcommands - A list of subcommands to look for.
* @returns An object containing the index of the subcommand and its name.
*/
function findGitSubcommand(
args: string[],
subcommands: string[],
): { idx: number; subcommand: string | null } {
let skipNext = false;
for (let idx = 1; idx < args.length; idx++) {
if (skipNext) {
skipNext = false;
continue;
}
const arg = args[idx];
if (
arg.startsWith('--config-env=') ||
arg.startsWith('--exec-path=') ||
arg.startsWith('--git-dir=') ||
arg.startsWith('--namespace=') ||
arg.startsWith('--super-prefix=') ||
arg.startsWith('--work-tree=') ||
((arg.startsWith('-C') || arg.startsWith('-c')) && arg.length > 2)
) {
continue;
}
if (
arg === '-C' ||
arg === '-c' ||
arg === '--config-env' ||
arg === '--exec-path' ||
arg === '--git-dir' ||
arg === '--namespace' ||
arg === '--super-prefix' ||
arg === '--work-tree'
) {
skipNext = true;
continue;
}
if (arg === '--' || arg.startsWith('-')) {
continue;
}
if (subcommands.includes(arg)) {
return { idx, subcommand: arg };
}
return { idx: -1, subcommand: null };
}
return { idx: -1, subcommand: null };
}
/**
* Checks if a git command contains global configuration override flags
* (e.g., `-c` or `--config-env`) which could be used maliciously to
* execute arbitrary code via git config.
*
* @param args - The git command arguments.
* @returns true if config overrides are present.
*/
function gitHasConfigOverrideGlobalOption(args: string[]): boolean {
return args.some(
(arg) =>
arg === '-c' ||
arg === '--config-env' ||
(arg.startsWith('-c') && arg.length > 2) ||
arg.startsWith('--config-env='),
);
}
/**
* Validates that the arguments for safe git subcommands (like `status`, `log`,
* `diff`, `show`) do not contain flags that could cause mutations or execute
* arbitrary commands (e.g., `--output`, `--exec`).
*
* @param args - Arguments passed to the git subcommand.
* @returns true if the arguments only represent read-only operations.
*/
function gitSubcommandArgsAreReadOnly(args: string[]): boolean {
const unsafeFlags = new Set([
'--output',
'--ext-diff',
'--textconv',
'--exec',
'--paginate',
]);
return !args.some(
(arg) =>
unsafeFlags.has(arg) ||
arg.startsWith('--output=') ||
arg.startsWith('--exec='),
);
}
/**
* Validates that `git branch` is only used for read operations
* (e.g., listing branches) rather than creating, deleting, or renaming branches.
*
* @param args - Arguments passed to `git branch`.
* @returns true if it's purely a listing/read-only branch command.
*/
function gitBranchIsReadOnly(args: string[]): boolean {
if (args.length === 0) return true;
let sawReadOnlyFlag = false;
for (const arg of args) {
if (
[
'--list',
'-l',
'--show-current',
'-a',
'--all',
'-r',
'--remotes',
'-v',
'-vv',
'--verbose',
].includes(arg)
) {
sawReadOnlyFlag = true;
} else if (arg.startsWith('--format=')) {
sawReadOnlyFlag = true;
} else {
return false;
}
}
return sawReadOnlyFlag;
}
/**
* Ensures that a `sed` command argument is a valid line-printing instruction
* (e.g., `10p` or `5,10p`), preventing unsafe script execution in `sed`.
*
* @param arg - The script argument passed to `sed -n`.
* @returns true if it's a valid, safe print command.
*/
function isValidSedNArg(arg: string | undefined): boolean {
if (!arg) return false;
if (!arg.endsWith('p')) return false;
const core = arg.slice(0, -1);
const parts = core.split(',');
if (parts.length === 1) {
const num = parts[0];
return num.length > 0 && /^\d+$/.test(num);
} else if (parts.length === 2) {
const a = parts[0];
const b = parts[1];
return a.length > 0 && b.length > 0 && /^\d+$/.test(a) && /^\d+$/.test(b);
}
return false;
}
/**
* Checks if a command with its arguments is explicitly known to be dangerous
* and should be blocked or require strict user confirmation. This catches
* destructive commands like `rm -rf`, `sudo`, and commands with execution
* flags like `find -exec`.
*
* @param args - The command and its arguments.
* @returns true if the command is identified as dangerous, false otherwise.
*/
export function isDangerousCommand(args: string[]): boolean {
if (!args || args.length === 0) {
return false;
}
const cmd = args[0];
if (cmd === 'rm') {
return args[1] === '-f' || args[1] === '-rf' || args[1] === '-fr';
}
if (cmd === 'sudo') {
return isDangerousCommand(args.slice(1));
}
if (cmd === 'find') {
const unsafeOptions = new Set([
'-exec',
'-execdir',
'-ok',
'-okdir',
'-delete',
'-fls',
'-fprint',
'-fprint0',
'-fprintf',
]);
return args.some((arg) => unsafeOptions.has(arg));
}
if (cmd === 'rg') {
const unsafeWithArgs = new Set(['--pre', '--hostname-bin']);
const unsafeWithoutArgs = new Set(['--search-zip', '-z']);
return args.some((arg) => {
if (unsafeWithoutArgs.has(arg)) return true;
for (const opt of unsafeWithArgs) {
if (arg === opt || arg.startsWith(opt + '=')) return true;
}
return false;
});
}
if (cmd === 'git') {
if (gitHasConfigOverrideGlobalOption(args)) {
return true;
}
const { idx, subcommand } = findGitSubcommand(args, [
'status',
'log',
'diff',
'show',
'branch',
]);
if (!subcommand) {
// It's a git command we don't recognize as explicitly safe.
return false;
}
const subcommandArgs = args.slice(idx + 1);
if (['status', 'log', 'diff', 'show'].includes(subcommand)) {
return !gitSubcommandArgsAreReadOnly(subcommandArgs);
}
if (subcommand === 'branch') {
return !(
gitSubcommandArgsAreReadOnly(subcommandArgs) &&
gitBranchIsReadOnly(subcommandArgs)
);
}
return false;
}
if (cmd === 'base64') {
const unsafeOptions = new Set(['-o', '--output']);
return args
.slice(1)
.some(
(arg) =>
unsafeOptions.has(arg) ||
arg.startsWith('--output=') ||
(arg.startsWith('-o') && arg !== '-o'),
);
}
return false;
}
@@ -0,0 +1,160 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, vi } from 'vitest';
import { buildSeatbeltArgs } from './seatbeltArgsBuilder.js';
import fs from 'node:fs';
import os from 'node:os';
describe('seatbeltArgsBuilder', () => {
it('should build a strict allowlist profile allowing the workspace via param', () => {
// Mock realpathSync to just return the path for testing
vi.spyOn(fs, 'realpathSync').mockImplementation((p) => p as string);
const args = buildSeatbeltArgs({ workspace: '/Users/test/workspace' });
expect(args[0]).toBe('-p');
const profile = args[1];
expect(profile).toContain('(version 1)');
expect(profile).toContain('(deny default)');
expect(profile).toContain('(allow process-exec)');
expect(profile).toContain('(subpath (param "WORKSPACE"))');
expect(profile).not.toContain('(allow network*)');
expect(args).toContain('-D');
expect(args).toContain('WORKSPACE=/Users/test/workspace');
expect(args).toContain(`TMPDIR=${os.tmpdir()}`);
vi.restoreAllMocks();
});
it('should allow network when networkAccess is true', () => {
const args = buildSeatbeltArgs({ workspace: '/test', networkAccess: true });
const profile = args[1];
expect(profile).toContain('(allow network-outbound)');
});
it('should parameterize allowed paths and normalize them', () => {
vi.spyOn(fs, 'realpathSync').mockImplementation((p) => {
if (p === '/test/symlink') return '/test/real_path';
return p as string;
});
const args = buildSeatbeltArgs({
workspace: '/test',
allowedPaths: ['/custom/path1', '/test/symlink'],
});
const profile = args[1];
expect(profile).toContain('(subpath (param "ALLOWED_PATH_0"))');
expect(profile).toContain('(subpath (param "ALLOWED_PATH_1"))');
expect(args).toContain('-D');
expect(args).toContain('ALLOWED_PATH_0=/custom/path1');
expect(args).toContain('ALLOWED_PATH_1=/test/real_path');
vi.restoreAllMocks();
});
it('should resolve parent directories if a file does not exist', () => {
vi.spyOn(fs, 'realpathSync').mockImplementation((p) => {
if (p === '/test/symlink/nonexistent.txt') {
const error = new Error('ENOENT');
Object.assign(error, { code: 'ENOENT' });
throw error;
}
if (p === '/test/symlink') {
return '/test/real_path';
}
return p as string;
});
const args = buildSeatbeltArgs({
workspace: '/test/symlink/nonexistent.txt',
});
expect(args).toContain('WORKSPACE=/test/real_path/nonexistent.txt');
vi.restoreAllMocks();
});
it('should throw if realpathSync throws a non-ENOENT error', () => {
vi.spyOn(fs, 'realpathSync').mockImplementation(() => {
const error = new Error('Permission denied');
Object.assign(error, { code: 'EACCES' });
throw error;
});
expect(() =>
buildSeatbeltArgs({
workspace: '/test/workspace',
}),
).toThrow('Permission denied');
vi.restoreAllMocks();
});
describe('governance files', () => {
it('should inject explicit deny rules for governance files', () => {
vi.spyOn(fs, 'realpathSync').mockImplementation((p) => p.toString());
vi.spyOn(fs, 'existsSync').mockReturnValue(true);
vi.spyOn(fs, 'lstatSync').mockImplementation(
(p) =>
({
isDirectory: () => p.toString().endsWith('.git'),
isFile: () => !p.toString().endsWith('.git'),
}) as unknown as fs.Stats,
);
const args = buildSeatbeltArgs({ workspace: '/Users/test/workspace' });
const profile = args[1];
// .gitignore should be a literal deny
expect(args).toContain('-D');
expect(args).toContain(
'GOVERNANCE_FILE_0=/Users/test/workspace/.gitignore',
);
expect(profile).toContain(
'(deny file-write* (literal (param "GOVERNANCE_FILE_0")))',
);
// .git should be a subpath deny
expect(args).toContain('GOVERNANCE_FILE_2=/Users/test/workspace/.git');
expect(profile).toContain(
'(deny file-write* (subpath (param "GOVERNANCE_FILE_2")))',
);
vi.restoreAllMocks();
});
it('should protect both the symlink and the real path if they differ', () => {
vi.spyOn(fs, 'realpathSync').mockImplementation((p) => {
if (p === '/test/workspace/.gitignore') return '/test/real/.gitignore';
return p.toString();
});
vi.spyOn(fs, 'existsSync').mockReturnValue(true);
vi.spyOn(fs, 'lstatSync').mockImplementation(
() =>
({
isDirectory: () => false,
isFile: () => true,
}) as unknown as fs.Stats,
);
const args = buildSeatbeltArgs({ workspace: '/test/workspace' });
const profile = args[1];
expect(args).toContain('GOVERNANCE_FILE_0=/test/workspace/.gitignore');
expect(args).toContain('REAL_GOVERNANCE_FILE_0=/test/real/.gitignore');
expect(profile).toContain(
'(deny file-write* (literal (param "GOVERNANCE_FILE_0")))',
);
expect(profile).toContain(
'(deny file-write* (literal (param "REAL_GOVERNANCE_FILE_0")))',
);
vi.restoreAllMocks();
});
});
});
@@ -0,0 +1,247 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import {
BASE_SEATBELT_PROFILE,
NETWORK_SEATBELT_PROFILE,
} from './baseProfile.js';
import {
type SandboxPermissions,
sanitizePaths,
GOVERNANCE_FILES,
} from '../../services/sandboxManager.js';
/**
* Options for building macOS Seatbelt arguments.
*/
export interface SeatbeltArgsOptions {
/** The primary workspace path to allow access to. */
workspace: string;
/** Additional paths to allow access to. */
allowedPaths?: string[];
/** Absolute paths to explicitly deny read/write access to (overrides allowlists). */
forbiddenPaths?: string[];
/** Whether to allow network access. */
networkAccess?: boolean;
/** Granular additional permissions. */
additionalPermissions?: SandboxPermissions;
/** Whether to allow write access to the workspace. */
workspaceWrite?: boolean;
}
/**
* Resolves symlinks for a given path to prevent sandbox escapes.
* If a file does not exist (ENOENT), it recursively resolves the parent directory.
* Other errors (e.g. EACCES) are re-thrown.
*/
function tryRealpath(p: string): string {
try {
return fs.realpathSync(p);
} catch (e) {
if (e instanceof Error && 'code' in e && e.code === 'ENOENT') {
const parentDir = path.dirname(p);
if (parentDir === p) {
return p;
}
return path.join(tryRealpath(parentDir), path.basename(p));
}
throw e;
}
}
/**
* Builds the arguments array for sandbox-exec using a strict allowlist profile.
* It relies on parameters passed to sandbox-exec via the -D flag to avoid
* string interpolation vulnerabilities, and normalizes paths against symlink escapes.
*
* Returns arguments up to the end of sandbox-exec configuration (e.g. ['-p', '<profile>', '-D', ...])
* Does not include the final '--' separator or the command to run.
*/
export function buildSeatbeltArgs(options: SeatbeltArgsOptions): string[] {
let profile = BASE_SEATBELT_PROFILE + '\n';
const args: string[] = [];
const workspacePath = tryRealpath(options.workspace);
args.push('-D', `WORKSPACE=${workspacePath}`);
args.push('-D', `WORKSPACE_RAW=${options.workspace}`);
profile += `(allow file-read* (subpath (param "WORKSPACE_RAW")))\n`;
if (options.workspaceWrite) {
profile += `(allow file-write* (subpath (param "WORKSPACE_RAW")))\n`;
}
if (options.workspaceWrite) {
profile += `(allow file-write* (subpath (param "WORKSPACE")))\n`;
}
// Add explicit deny rules for governance files in the workspace.
// These are added after the workspace allow rule to ensure they take precedence
// (Seatbelt evaluates rules in order, later rules win for same path).
for (let i = 0; i < GOVERNANCE_FILES.length; i++) {
const governanceFile = path.join(workspacePath, GOVERNANCE_FILES[i].path);
const realGovernanceFile = tryRealpath(governanceFile);
// Determine if it should be treated as a directory (subpath) or a file (literal).
// .git is generally a directory, while ignore files are literals.
let isDirectory = GOVERNANCE_FILES[i].isDirectory;
try {
if (fs.existsSync(realGovernanceFile)) {
isDirectory = fs.lstatSync(realGovernanceFile).isDirectory();
}
} catch {
// Ignore errors, use default guess
}
const ruleType = isDirectory ? 'subpath' : 'literal';
args.push('-D', `GOVERNANCE_FILE_${i}=${governanceFile}`);
profile += `(deny file-write* (${ruleType} (param "GOVERNANCE_FILE_${i}")))\n`;
if (realGovernanceFile !== governanceFile) {
args.push('-D', `REAL_GOVERNANCE_FILE_${i}=${realGovernanceFile}`);
profile += `(deny file-write* (${ruleType} (param "REAL_GOVERNANCE_FILE_${i}")))\n`;
}
}
// Auto-detect and support git worktrees by granting read and write access to the underlying git directory
try {
const gitPath = path.join(workspacePath, '.git');
const gitStat = fs.lstatSync(gitPath);
if (gitStat.isFile()) {
const gitContent = fs.readFileSync(gitPath, 'utf8');
const match = gitContent.match(/^gitdir:\s*(.+)$/m);
if (match && match[1]) {
let worktreeGitDir = match[1].trim();
if (!path.isAbsolute(worktreeGitDir)) {
worktreeGitDir = path.resolve(workspacePath, worktreeGitDir);
}
const resolvedWorktreeGitDir = tryRealpath(worktreeGitDir);
// Grant write access to the worktree's specific .git directory
args.push('-D', `WORKTREE_GIT_DIR=${resolvedWorktreeGitDir}`);
profile += `(allow file-read* file-write* (subpath (param "WORKTREE_GIT_DIR")))\n`;
// Grant write access to the main repository's .git directory (objects, refs, etc. are shared)
// resolvedWorktreeGitDir is usually like: /path/to/main-repo/.git/worktrees/worktree-name
const mainGitDir = tryRealpath(
path.dirname(path.dirname(resolvedWorktreeGitDir)),
);
if (mainGitDir && mainGitDir.endsWith('.git')) {
args.push('-D', `MAIN_GIT_DIR=${mainGitDir}`);
profile += `(allow file-read* file-write* (subpath (param "MAIN_GIT_DIR")))\n`;
}
}
}
} catch (_e) {
// Ignore if .git doesn't exist, isn't readable, etc.
}
const tmpPath = tryRealpath(os.tmpdir());
args.push('-D', `TMPDIR=${tmpPath}`);
const nodeRootPath = tryRealpath(
path.dirname(path.dirname(process.execPath)),
);
args.push('-D', `NODE_ROOT=${nodeRootPath}`);
profile += `(allow file-read* (subpath (param "NODE_ROOT")))\n`;
// Add PATH directories as read-only to support nvm, homebrew, etc.
if (process.env['PATH']) {
const paths = process.env['PATH'].split(':');
let pathIndex = 0;
const addedPaths = new Set();
for (const p of paths) {
if (!p.trim()) continue;
try {
let resolved = tryRealpath(p);
// If this is a 'bin' directory (like /usr/local/bin or homebrew/bin),
// also grant read access to its parent directory so that symlinked
// assets (like Cellar or libexec) can be read.
if (resolved.endsWith('/bin')) {
resolved = path.dirname(resolved);
}
if (!addedPaths.has(resolved)) {
addedPaths.add(resolved);
args.push('-D', `SYS_PATH_${pathIndex}=${resolved}`);
profile += `(allow file-read* (subpath (param "SYS_PATH_${pathIndex}")))\n`;
pathIndex++;
}
} catch (_e) {
// Ignore paths that do not exist or are inaccessible
}
}
}
// Handle allowedPaths
const allowedPaths = sanitizePaths(options.allowedPaths) || [];
for (let i = 0; i < allowedPaths.length; i++) {
const allowedPath = tryRealpath(allowedPaths[i]);
args.push('-D', `ALLOWED_PATH_${i}=${allowedPath}`);
profile += `(allow file-read* file-write* (subpath (param "ALLOWED_PATH_${i}")))\n`;
}
// Handle granular additional permissions
if (options.additionalPermissions?.fileSystem) {
const { read, write } = options.additionalPermissions.fileSystem;
if (read) {
read.forEach((p, i) => {
const resolved = tryRealpath(p);
const paramName = `ADDITIONAL_READ_${i}`;
args.push('-D', `${paramName}=${resolved}`);
let isFile = false;
try {
isFile = fs.statSync(resolved).isFile();
} catch {
// Ignore error
}
if (isFile) {
profile += `(allow file-read* (literal (param "${paramName}")))\n`;
} else {
profile += `(allow file-read* (subpath (param "${paramName}")))\n`;
}
});
}
if (write) {
write.forEach((p, i) => {
const resolved = tryRealpath(p);
const paramName = `ADDITIONAL_WRITE_${i}`;
args.push('-D', `${paramName}=${resolved}`);
let isFile = false;
try {
isFile = fs.statSync(resolved).isFile();
} catch {
// Ignore error
}
if (isFile) {
profile += `(allow file-read* file-write* (literal (param "${paramName}")))\n`;
} else {
profile += `(allow file-read* file-write* (subpath (param "${paramName}")))\n`;
}
});
}
}
// Handle forbiddenPaths
const forbiddenPaths = sanitizePaths(options.forbiddenPaths) || [];
for (let i = 0; i < forbiddenPaths.length; i++) {
const forbiddenPath = tryRealpath(forbiddenPaths[i]);
args.push('-D', `FORBIDDEN_PATH_${i}=${forbiddenPath}`);
profile += `(deny file-read* file-write* (subpath (param "FORBIDDEN_PATH_${i}")))\n`;
}
if (options.networkAccess || options.additionalPermissions?.network) {
profile += NETWORK_SEATBELT_PROFILE;
}
args.unshift('-p', profile);
return args;
}
@@ -5,33 +5,37 @@
*/
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import { WindowsSandboxManager } from './windowsSandboxManager.js';
import type { SandboxRequest } from './sandboxManager.js';
import { spawnAsync } from '../utils/shell-utils.js';
import { WindowsSandboxManager } from './WindowsSandboxManager.js';
import type { SandboxRequest } from '../../services/sandboxManager.js';
import { spawnAsync } from '../../utils/shell-utils.js';
vi.mock('../utils/shell-utils.js', () => ({
vi.mock('../../utils/shell-utils.js', () => ({
spawnAsync: vi.fn(),
}));
describe('WindowsSandboxManager', () => {
let manager: WindowsSandboxManager;
let testCwd: string;
beforeEach(() => {
vi.spyOn(os, 'platform').mockReturnValue('win32');
manager = new WindowsSandboxManager({ workspace: '/test/workspace' });
testCwd = fs.mkdtempSync(path.join(os.tmpdir(), 'gemini-cli-test-'));
manager = new WindowsSandboxManager({ workspace: testCwd });
});
afterEach(() => {
vi.restoreAllMocks();
fs.rmSync(testCwd, { recursive: true, force: true });
});
it('should prepare a GeminiSandbox.exe command', async () => {
const req: SandboxRequest = {
command: 'whoami',
args: ['/groups'],
cwd: '/test/cwd',
cwd: testCwd,
env: { TEST_VAR: 'test_value' },
policy: {
networkAccess: false,
@@ -41,14 +45,14 @@ describe('WindowsSandboxManager', () => {
const result = await manager.prepareCommand(req);
expect(result.program).toContain('GeminiSandbox.exe');
expect(result.args).toEqual(['0', '/test/cwd', 'whoami', '/groups']);
expect(result.args).toEqual(['0', testCwd, 'whoami', '/groups']);
});
it('should handle networkAccess from config', async () => {
const req: SandboxRequest = {
command: 'whoami',
args: [],
cwd: '/test/cwd',
cwd: testCwd,
env: {},
policy: {
networkAccess: true,
@@ -63,7 +67,7 @@ describe('WindowsSandboxManager', () => {
const req: SandboxRequest = {
command: 'test',
args: [],
cwd: '/test/cwd',
cwd: testCwd,
env: {
API_KEY: 'secret',
PATH: '/usr/bin',
@@ -82,29 +86,53 @@ describe('WindowsSandboxManager', () => {
expect(result.env['API_KEY']).toBeUndefined();
});
it('should grant Low Integrity access to the workspace and allowed paths', async () => {
it('should ensure governance files exist', async () => {
const req: SandboxRequest = {
command: 'test',
args: [],
cwd: '/test/cwd',
cwd: testCwd,
env: {},
policy: {
allowedPaths: ['/test/allowed1'],
},
};
await manager.prepareCommand(req);
expect(spawnAsync).toHaveBeenCalledWith('icacls', [
path.resolve('/test/workspace'),
'/setintegritylevel',
'Low',
]);
expect(fs.existsSync(path.join(testCwd, '.gitignore'))).toBe(true);
expect(fs.existsSync(path.join(testCwd, '.geminiignore'))).toBe(true);
expect(fs.existsSync(path.join(testCwd, '.git'))).toBe(true);
expect(fs.lstatSync(path.join(testCwd, '.git')).isDirectory()).toBe(true);
});
expect(spawnAsync).toHaveBeenCalledWith('icacls', [
path.resolve('/test/allowed1'),
'/setintegritylevel',
'Low',
]);
it('should grant Low Integrity access to the workspace and allowed paths', async () => {
const allowedPath = path.join(os.tmpdir(), 'gemini-cli-test-allowed');
if (!fs.existsSync(allowedPath)) {
fs.mkdirSync(allowedPath);
}
try {
const req: SandboxRequest = {
command: 'test',
args: [],
cwd: testCwd,
env: {},
policy: {
allowedPaths: [allowedPath],
},
};
await manager.prepareCommand(req);
expect(spawnAsync).toHaveBeenCalledWith('icacls', [
path.resolve(testCwd),
'/setintegritylevel',
'Low',
]);
expect(spawnAsync).toHaveBeenCalledWith('icacls', [
path.resolve(allowedPath),
'/setintegritylevel',
'Low',
]);
} finally {
fs.rmSync(allowedPath, { recursive: true, force: true });
}
});
});
@@ -12,15 +12,16 @@ import {
type SandboxManager,
type SandboxRequest,
type SandboxedCommand,
GOVERNANCE_FILES,
type GlobalSandboxOptions,
sanitizePaths,
} from './sandboxManager.js';
} from '../../services/sandboxManager.js';
import {
sanitizeEnvironment,
getSecureSanitizationConfig,
} from './environmentSanitization.js';
import { debugLogger } from '../utils/debugLogger.js';
import { spawnAsync } from '../utils/shell-utils.js';
} from '../../services/environmentSanitization.js';
import { debugLogger } from '../../utils/debugLogger.js';
import { spawnAsync } from '../../utils/shell-utils.js';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
@@ -36,7 +37,29 @@ export class WindowsSandboxManager implements SandboxManager {
private readonly lowIntegrityCache = new Set<string>();
constructor(private readonly options: GlobalSandboxOptions) {
this.helperPath = path.resolve(__dirname, 'scripts', 'GeminiSandbox.exe');
this.helperPath = path.resolve(__dirname, 'GeminiSandbox.exe');
}
/**
* Ensures a file or directory exists.
*/
private touch(filePath: string, isDirectory: boolean): void {
try {
// If it exists (even as a broken symlink), do nothing
if (fs.lstatSync(filePath)) return;
} catch {
// Ignore ENOENT
}
if (isDirectory) {
fs.mkdirSync(filePath, { recursive: true });
} else {
const dir = path.dirname(filePath);
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true });
}
fs.closeSync(fs.openSync(filePath, 'a'));
}
}
private async ensureInitialized(): Promise<void> {
@@ -164,7 +187,28 @@ export class WindowsSandboxManager implements SandboxManager {
// TODO: handle forbidden paths
// 2. Construct the helper command
// 2. Protected governance files
// These must exist on the host before running the sandbox to prevent
// the sandboxed process from creating them with Low integrity.
// By being created as Medium integrity, they are write-protected from Low processes.
for (const file of GOVERNANCE_FILES) {
const filePath = path.join(this.options.workspace, file.path);
this.touch(filePath, file.isDirectory);
// We resolve real paths to ensure protection for both the symlink and its target.
try {
const realPath = fs.realpathSync(filePath);
if (realPath !== filePath) {
// If it's a symlink, the target is already implicitly protected
// if it's outside the Low integrity workspace (likely Medium).
// If it's inside, we ensure it's not accidentally Low.
}
} catch {
// Ignore realpath errors
}
}
// 3. Construct the helper command
// GeminiSandbox.exe <network:0|1> <cwd> <command> [args...]
const program = this.helperPath;
+2 -1
View File
@@ -77,7 +77,8 @@ export async function checkPolicy(
// confirmation prompt if the policy engine's decision is 'ASK_USER'.
if (
decision === PolicyDecision.ASK_USER &&
toolCall.request.isClientInitiated
toolCall.request.isClientInitiated &&
!toolCall.request.args?.['additional_permissions']
) {
return {
decision: PolicyDecision.ALLOW,
+104
View File
@@ -792,6 +792,110 @@ export class Scheduler {
return true;
}
let isSandboxError = false;
let sandboxDetailsStr = '';
if (
result.status === CoreToolCallStatus.Error &&
result.response.errorType === 'sandbox_expansion_required'
) {
isSandboxError = true;
sandboxDetailsStr = result.response.error?.message || '';
}
if (isSandboxError) {
try {
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
const parsedError = JSON.parse(sandboxDetailsStr) as {
rootCommand: string;
additionalPermissions: import('../services/sandboxManager.js').SandboxPermissions;
};
const confirmationDetails: SerializableConfirmationDetails = {
type: 'sandbox_expansion',
title: 'Sandbox Expansion Request',
command: String(
activeCall.request.args['command'] ?? parsedError.rootCommand,
),
rootCommand: parsedError.rootCommand,
additionalPermissions: parsedError.additionalPermissions,
};
const correlationId = crypto.randomUUID();
// Mutate the active call so resolveConfirmation generates the correct Sandbox Expansion details
activeCall.request.args['additional_permissions'] =
parsedError.additionalPermissions;
activeCall.invocation = activeCall.tool.build(activeCall.request.args);
// CRITICAL: We must push the new args and invocation into the state manager
// before calling resolveConfirmation, because resolveConfirmation fetches
// the tool call directly from the state manager!
this.state.updateArgs(
callId,
activeCall.request.args,
activeCall.invocation,
);
this.state.updateStatus(callId, CoreToolCallStatus.AwaitingApproval, {
confirmationDetails,
correlationId,
});
const validatingCall = {
...activeCall,
status: CoreToolCallStatus.Validating,
} as ValidatingToolCall;
const confResult = await resolveConfirmation(validatingCall, signal, {
config: this.config,
messageBus: this.messageBus,
state: this.state,
modifier: this.modifier,
getPreferredEditor: this.getPreferredEditor,
schedulerId: this.schedulerId,
onWaitingForConfirmation: this.onWaitingForConfirmation,
});
if (confResult.outcome === ToolConfirmationOutcome.Cancel) {
type LegacyHack = ToolCallResponseInfo & {
llmContent?: string;
returnDisplay?: string;
};
const errorResponse = { ...result.response } as LegacyHack;
errorResponse.llmContent =
'User cancelled sandbox expansion. The command failed with a sandbox denial. Shell output:\n' +
String(errorResponse.returnDisplay);
this.state.updateStatus(
callId,
CoreToolCallStatus.Error,
errorResponse,
);
return false;
}
activeCall.request.args['additional_permissions'] =
parsedError.additionalPermissions;
// Reset the output stream visual so it replaces the error text
this.state.updateStatus(callId, CoreToolCallStatus.Executing, {
liveOutput: undefined,
});
// Call _execute synchronously and properly return its promise to loop internally!
return await this._execute(
{
...activeCall,
status: CoreToolCallStatus.Scheduled,
} as ScheduledToolCall,
signal,
);
} catch (_e) {
// Fallback to normal error handling if parsing/looping fails
}
}
if (result.status === CoreToolCallStatus.Success) {
this.state.updateStatus(
callId,
@@ -10,7 +10,7 @@ import { NoopSandboxManager, sanitizePaths } from './sandboxManager.js';
import { createSandboxManager } from './sandboxManagerFactory.js';
import { LinuxSandboxManager } from '../sandbox/linux/LinuxSandboxManager.js';
import { MacOsSandboxManager } from '../sandbox/macos/MacOsSandboxManager.js';
import { WindowsSandboxManager } from './windowsSandboxManager.js';
import { WindowsSandboxManager } from '../sandbox/windows/WindowsSandboxManager.js';
describe('sanitizePaths', () => {
it('should return undefined if no paths are provided', () => {
@@ -11,6 +11,18 @@ import {
getSecureSanitizationConfig,
type EnvironmentSanitizationConfig,
} from './environmentSanitization.js';
export interface SandboxPermissions {
/** Filesystem permissions. */
fileSystem?: {
/** Paths that should be readable by the command. */
read?: string[];
/** Paths that should be writable by the command. */
write?: string[];
};
/** Whether the command should have network access. */
network?: boolean;
}
/**
* Security boundaries and permissions applied to a specific sandboxed execution.
*/
@@ -23,6 +35,8 @@ export interface ExecutionPolicy {
networkAccess?: boolean;
/** Rules for scrubbing sensitive environment variables. */
sanitizationConfig?: Partial<EnvironmentSanitizationConfig>;
/** Additional granular permissions to grant to this command. */
additionalPermissions?: SandboxPermissions;
}
/**
@@ -76,6 +90,16 @@ export interface SandboxManager {
prepareCommand(req: SandboxRequest): Promise<SandboxedCommand>;
}
/**
* Files that represent the governance or "constitution" of the repository
* and should be write-protected in any sandbox.
*/
export const GOVERNANCE_FILES = [
{ path: '.gitignore', isDirectory: false },
{ path: '.geminiignore', isDirectory: false },
{ path: '.git', isDirectory: true },
] as const;
/**
* A no-op implementation of SandboxManager that silently passes commands
* through while applying environment sanitization.
@@ -12,8 +12,9 @@ import {
} from './sandboxManager.js';
import { LinuxSandboxManager } from '../sandbox/linux/LinuxSandboxManager.js';
import { MacOsSandboxManager } from '../sandbox/macos/MacOsSandboxManager.js';
import { WindowsSandboxManager } from './windowsSandboxManager.js';
import { WindowsSandboxManager } from '../sandbox/windows/WindowsSandboxManager.js';
import type { SandboxConfig } from '../config/config.js';
import { type SandboxPolicyManager } from '../policy/sandboxPolicyManager.js';
/**
* Creates a sandbox manager based on the provided settings.
@@ -21,7 +22,13 @@ import type { SandboxConfig } from '../config/config.js';
export function createSandboxManager(
sandbox: SandboxConfig | undefined,
workspace: string,
policyManager?: SandboxPolicyManager,
approvalMode?: string,
): SandboxManager {
if (approvalMode === 'yolo') {
return new NoopSandboxManager();
}
const isWindows = os.platform() === 'win32';
if (
@@ -36,7 +43,15 @@ export function createSandboxManager(
return new LinuxSandboxManager({ workspace });
}
if (os.platform() === 'darwin') {
return new MacOsSandboxManager({ workspace });
const modeConfig =
policyManager && approvalMode
? policyManager.getModeConfig(approvalMode)
: undefined;
return new MacOsSandboxManager({
workspace,
modeConfig,
policyManager,
});
}
return new LocalSandboxManager();
}
@@ -31,7 +31,11 @@ import {
sanitizeEnvironment,
type EnvironmentSanitizationConfig,
} from './environmentSanitization.js';
import { NoopSandboxManager, type SandboxManager } from './sandboxManager.js';
import {
NoopSandboxManager,
type SandboxManager,
type SandboxPermissions,
} from './sandboxManager.js';
import type { SandboxConfig } from '../config/config.js';
import { killProcessGroup } from '../utils/process-utils.js';
import {
@@ -84,6 +88,7 @@ export type ShellExecutionResult = ExecutionResult;
export type ShellExecutionHandle = ExecutionHandle;
export interface ShellExecutionConfig {
additionalPermissions?: SandboxPermissions;
terminalWidth?: number;
terminalHeight?: number;
pager?: string;
@@ -441,6 +446,7 @@ export class ShellExecutionService {
...shellExecutionConfig,
...(shellExecutionConfig.sandboxConfig || {}),
sanitizationConfig,
additionalPermissions: shellExecutionConfig.additionalPermissions,
},
});
@@ -71,6 +71,7 @@ describe('Tool Confirmation Policy Updates', () => {
getDisableLLMCorrection: () => true,
getIdeMode: () => false,
getActiveModel: () => 'test-model',
isPlanMode: () => false,
getWorkspaceContext: () => ({
isPathWithinWorkspace: () => true,
getDirectories: () => [rootDir],
@@ -169,13 +169,13 @@ exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snaps
"name": "exit_plan_mode",
"parametersJsonSchema": {
"properties": {
"plan_path": {
"description": "The file path to the finalized plan (e.g., "/mock/plans/feature-x.md"). This path MUST be within the designated plans directory: /mock/plans/",
"plan_filename": {
"description": "The filename of the finalized plan (e.g., "feature-x.md"). Do not provide an absolute path.",
"type": "string",
},
},
"required": [
"plan_path",
"plan_filename",
],
"type": "object",
},
@@ -583,6 +583,35 @@ exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snaps
"name": "run_shell_command",
"parametersJsonSchema": {
"properties": {
"additional_permissions": {
"description": "Sandbox permissions for the command. Use this to request additional sandboxed filesystem or network permissions if a previous command failed with "Operation not permitted".",
"properties": {
"fileSystem": {
"properties": {
"read": {
"description": "List of additional absolute paths to allow reading.",
"items": {
"type": "string",
},
"type": "array",
},
"write": {
"description": "List of additional absolute paths to allow writing.",
"items": {
"type": "string",
},
"type": "array",
},
},
"type": "object",
},
"network": {
"description": "Set to true to enable network access for this command.",
"type": "boolean",
},
},
"type": "object",
},
"command": {
"description": "Exact bash command to execute as \`bash -c <command>\`",
"type": "string",
@@ -958,13 +987,13 @@ exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview >
"name": "exit_plan_mode",
"parametersJsonSchema": {
"properties": {
"plan_path": {
"description": "The file path to the finalized plan (e.g., "/mock/plans/feature-x.md"). This path MUST be within the designated plans directory: /mock/plans/",
"plan_filename": {
"description": "The filename of the finalized plan (e.g., "feature-x.md"). Do not provide an absolute path.",
"type": "string",
},
},
"required": [
"plan_path",
"plan_filename",
],
"type": "object",
},
@@ -1348,6 +1377,35 @@ exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview >
"name": "run_shell_command",
"parametersJsonSchema": {
"properties": {
"additional_permissions": {
"description": "Sandbox permissions for the command. Use this to request additional sandboxed filesystem or network permissions if a previous command failed with "Operation not permitted".",
"properties": {
"fileSystem": {
"properties": {
"read": {
"description": "List of additional absolute paths to allow reading.",
"items": {
"type": "string",
},
"type": "array",
},
"write": {
"description": "List of additional absolute paths to allow writing.",
"items": {
"type": "string",
},
"type": "array",
},
},
"type": "object",
},
"network": {
"description": "Set to true to enable network access for this command.",
"type": "boolean",
},
},
"type": "object",
},
"command": {
"description": "Exact bash command to execute as \`bash -c <command>\`",
"type": "string",
@@ -117,8 +117,11 @@ export const ASK_USER_OPTION_PARAM_DESCRIPTION = 'description';
// -- exit_plan_mode --
export const EXIT_PLAN_MODE_TOOL_NAME = 'exit_plan_mode';
export const EXIT_PLAN_PARAM_PLAN_PATH = 'plan_path';
export const EXIT_PLAN_PARAM_PLAN_FILENAME = 'plan_filename';
// -- enter_plan_mode --
export const ENTER_PLAN_MODE_TOOL_NAME = 'enter_plan_mode';
export const PLAN_MODE_PARAM_REASON = 'reason';
// -- sandbox --
export const PARAM_ADDITIONAL_PERMISSIONS = 'additional_permissions';
@@ -89,7 +89,7 @@ export {
ASK_USER_OPTION_PARAM_LABEL,
ASK_USER_OPTION_PARAM_DESCRIPTION,
PLAN_MODE_PARAM_REASON,
EXIT_PLAN_PARAM_PLAN_PATH,
EXIT_PLAN_PARAM_PLAN_FILENAME,
SKILL_PARAM_NAME,
} from './base-declarations.js';
@@ -244,10 +244,10 @@ export function getShellDefinition(
};
}
export function getExitPlanModeDefinition(plansDir: string): ToolDefinition {
export function getExitPlanModeDefinition(): ToolDefinition {
return {
base: getExitPlanModeDeclaration(plansDir),
overrides: (modelId) => getToolSet(modelId).exit_plan_mode(plansDir),
base: getExitPlanModeDeclaration(),
overrides: (modelId) => getToolSet(modelId).exit_plan_mode(),
};
}
@@ -82,7 +82,7 @@ describe('coreTools snapshots for specific models', () => {
{ name: 'enter_plan_mode', definition: ENTER_PLAN_MODE_DEFINITION },
{
name: 'exit_plan_mode',
definition: getExitPlanModeDefinition('/mock/plans'),
definition: getExitPlanModeDefinition(),
},
{
name: 'activate_skill',
@@ -21,8 +21,9 @@ import {
PARAM_DESCRIPTION,
PARAM_DIR_PATH,
SHELL_PARAM_IS_BACKGROUND,
EXIT_PLAN_PARAM_PLAN_PATH,
EXIT_PLAN_PARAM_PLAN_FILENAME,
SKILL_PARAM_NAME,
PARAM_ADDITIONAL_PERMISSIONS,
} from './base-declarations.js';
/**
@@ -109,6 +110,35 @@ export function getShellDeclaration(
description:
'Set to true if this command should be run in the background (e.g. for long-running servers or watchers). The command will be started, allowed to run for a brief moment to check for immediate errors, and then moved to the background.',
},
[PARAM_ADDITIONAL_PERMISSIONS]: {
type: 'object',
description:
'Sandbox permissions for the command. Use this to request additional sandboxed filesystem or network permissions if a previous command failed with "Operation not permitted".',
properties: {
network: {
type: 'boolean',
description:
'Set to true to enable network access for this command.',
},
fileSystem: {
type: 'object',
properties: {
read: {
type: 'array',
items: { type: 'string' },
description:
'List of additional absolute paths to allow reading.',
},
write: {
type: 'array',
items: { type: 'string' },
description:
'List of additional absolute paths to allow writing.',
},
},
},
},
},
},
required: [SHELL_PARAM_COMMAND],
},
@@ -118,20 +148,18 @@ export function getShellDeclaration(
/**
* Returns the FunctionDeclaration for exiting plan mode.
*/
export function getExitPlanModeDeclaration(
plansDir: string,
): FunctionDeclaration {
export function getExitPlanModeDeclaration(): FunctionDeclaration {
return {
name: EXIT_PLAN_MODE_TOOL_NAME,
description:
'Finalizes the planning phase and transitions to implementation by presenting the plan for user approval. This tool MUST be used to exit Plan Mode before any source code edits can be performed. Call this whenever a plan is ready or the user requests implementation.',
parametersJsonSchema: {
type: 'object',
required: [EXIT_PLAN_PARAM_PLAN_PATH],
required: [EXIT_PLAN_PARAM_PLAN_FILENAME],
properties: {
[EXIT_PLAN_PARAM_PLAN_PATH]: {
[EXIT_PLAN_PARAM_PLAN_FILENAME]: {
type: 'string',
description: `The file path to the finalized plan (e.g., "${plansDir}/feature-x.md"). This path MUST be within the designated plans directory: ${plansDir}/`,
description: `The filename of the finalized plan (e.g., "feature-x.md"). Do not provide an absolute path.`,
},
},
},
@@ -739,6 +739,6 @@ The agent did not use the todo list because this task could be completed by a ti
},
},
exit_plan_mode: (plansDir) => getExitPlanModeDeclaration(plansDir),
exit_plan_mode: () => getExitPlanModeDeclaration(),
activate_skill: (skillNames) => getActivateSkillDeclaration(skillNames),
};
@@ -714,6 +714,6 @@ The agent did not use the todo list because this task could be completed by a ti
},
},
exit_plan_mode: (plansDir) => getExitPlanModeDeclaration(plansDir),
exit_plan_mode: () => getExitPlanModeDeclaration(),
activate_skill: (skillNames) => getActivateSkillDeclaration(skillNames),
};
+1 -1
View File
@@ -47,6 +47,6 @@ export interface CoreToolSet {
get_internal_docs: FunctionDeclaration;
ask_user: FunctionDeclaration;
enter_plan_mode: FunctionDeclaration;
exit_plan_mode: (plansDir: string) => FunctionDeclaration;
exit_plan_mode: () => FunctionDeclaration;
activate_skill: (skillNames: string[]) => FunctionDeclaration;
}
+40
View File
@@ -131,8 +131,10 @@ describe('EditTool', () => {
isInteractive: () => false,
getDisableLLMCorrection: vi.fn(() => true),
getExperiments: () => {},
isPlanMode: vi.fn(() => false),
storage: {
getProjectTempDir: vi.fn().mockReturnValue('/tmp/project'),
getPlansDir: vi.fn().mockReturnValue('/tmp/plans'),
},
isPathAllowed(this: Config, absolutePath: string): boolean {
const workspaceContext = this.getWorkspaceContext();
@@ -1299,4 +1301,42 @@ function doIt() {
);
});
});
describe('plan mode', () => {
it('should allow edits to plans directory when isPlanMode is true', async () => {
const mockProjectTempDir = path.join(tempDir, 'project');
fs.mkdirSync(mockProjectTempDir);
vi.mocked(mockConfig.storage.getProjectTempDir).mockReturnValue(
mockProjectTempDir,
);
const plansDir = path.join(mockProjectTempDir, 'plans');
fs.mkdirSync(plansDir);
vi.mocked(mockConfig.isPlanMode).mockReturnValue(true);
vi.mocked(mockConfig.storage.getPlansDir).mockReturnValue(plansDir);
const filePath = path.join(rootDir, 'test-file.txt');
const planFilePath = path.join(plansDir, 'test-file.txt');
const initialContent = 'some initial content';
fs.writeFileSync(planFilePath, initialContent, 'utf8');
const params: EditToolParams = {
file_path: filePath,
instruction: 'Replace initial with new',
old_string: 'initial',
new_string: 'new',
};
const invocation = tool.build(params);
const result = await invocation.execute(new AbortController().signal);
expect(result.llmContent).toMatch(/Successfully modified file/);
// Verify plan file is written with new content
expect(fs.readFileSync(planFilePath, 'utf8')).toBe('some new content');
fs.rmSync(plansDir, { recursive: true, force: true });
});
});
});
+7 -1
View File
@@ -463,7 +463,13 @@ class EditToolInvocation
true,
() => this.config.getApprovalMode(),
);
if (!path.isAbsolute(this.params.file_path)) {
if (this.config.isPlanMode()) {
const safeFilename = path.basename(this.params.file_path);
this.resolvedPath = path.join(
this.config.storage.getPlansDir(),
safeFilename,
);
} else if (!path.isAbsolute(this.params.file_path)) {
const result = correctPath(this.params.file_path, this.config);
if (result.success) {
this.resolvedPath = result.correctedPath;
+29 -46
View File
@@ -79,7 +79,7 @@ describe('ExitPlanModeTool', () => {
describe('shouldConfirmExecute', () => {
it('should return plan approval confirmation details when plan has content', async () => {
const planRelativePath = createPlanFile('test-plan.md', '# My Plan');
const invocation = tool.build({ plan_path: planRelativePath });
const invocation = tool.build({ plan_filename: planRelativePath });
const result = await invocation.shouldConfirmExecute(
new AbortController().signal,
@@ -98,7 +98,7 @@ describe('ExitPlanModeTool', () => {
it('should return false when plan file is empty', async () => {
const planRelativePath = createPlanFile('empty.md', ' ');
const invocation = tool.build({ plan_path: planRelativePath });
const invocation = tool.build({ plan_filename: planRelativePath });
const result = await invocation.shouldConfirmExecute(
new AbortController().signal,
@@ -109,7 +109,7 @@ describe('ExitPlanModeTool', () => {
it('should return false when plan file cannot be read', async () => {
const planRelativePath = path.join('plans', 'non-existent.md');
const invocation = tool.build({ plan_path: planRelativePath });
const invocation = tool.build({ plan_filename: planRelativePath });
const result = await invocation.shouldConfirmExecute(
new AbortController().signal,
@@ -120,7 +120,7 @@ describe('ExitPlanModeTool', () => {
it('should auto-approve when policy decision is ALLOW', async () => {
const planRelativePath = createPlanFile('test.md', '# Content');
const invocation = tool.build({ plan_path: planRelativePath });
const invocation = tool.build({ plan_filename: planRelativePath });
vi.spyOn(
invocation as unknown as {
@@ -143,7 +143,7 @@ describe('ExitPlanModeTool', () => {
it('should throw error when policy decision is DENY', async () => {
const planRelativePath = createPlanFile('test.md', '# Content');
const invocation = tool.build({ plan_path: planRelativePath });
const invocation = tool.build({ plan_filename: planRelativePath });
vi.spyOn(
invocation as unknown as {
@@ -161,7 +161,7 @@ describe('ExitPlanModeTool', () => {
describe('execute with invalid plan', () => {
it('should return error when plan file is empty', async () => {
const planRelativePath = createPlanFile('empty.md', '');
const invocation = tool.build({ plan_path: planRelativePath });
const invocation = tool.build({ plan_filename: planRelativePath });
await invocation.shouldConfirmExecute(new AbortController().signal);
const result = await invocation.execute(new AbortController().signal);
@@ -171,8 +171,8 @@ describe('ExitPlanModeTool', () => {
});
it('should return error when plan file cannot be read', async () => {
const planRelativePath = 'plans/ghost.md';
const invocation = tool.build({ plan_path: planRelativePath });
const planRelativePath = 'ghost.md';
const invocation = tool.build({ plan_filename: planRelativePath });
await invocation.shouldConfirmExecute(new AbortController().signal);
const result = await invocation.execute(new AbortController().signal);
@@ -184,7 +184,7 @@ describe('ExitPlanModeTool', () => {
describe('execute', () => {
it('should return approval message when plan is approved with DEFAULT mode', async () => {
const planRelativePath = createPlanFile('test.md', '# Content');
const invocation = tool.build({ plan_path: planRelativePath });
const invocation = tool.build({ plan_filename: planRelativePath });
const confirmDetails = await invocation.shouldConfirmExecute(
new AbortController().signal,
@@ -212,7 +212,7 @@ Read and follow the plan strictly during implementation.`,
it('should return approval message when plan is approved with AUTO_EDIT mode', async () => {
const planRelativePath = createPlanFile('test.md', '# Content');
const invocation = tool.build({ plan_path: planRelativePath });
const invocation = tool.build({ plan_filename: planRelativePath });
const confirmDetails = await invocation.shouldConfirmExecute(
new AbortController().signal,
@@ -243,7 +243,7 @@ Read and follow the plan strictly during implementation.`,
it('should return feedback message when plan is rejected with feedback', async () => {
const planRelativePath = createPlanFile('test.md', '# Content');
const invocation = tool.build({ plan_path: planRelativePath });
const invocation = tool.build({ plan_filename: planRelativePath });
const confirmDetails = await invocation.shouldConfirmExecute(
new AbortController().signal,
@@ -270,7 +270,7 @@ Revise the plan based on the feedback.`,
it('should handle rejection without feedback gracefully', async () => {
const planRelativePath = createPlanFile('test.md', '# Content');
const invocation = tool.build({ plan_path: planRelativePath });
const invocation = tool.build({ plan_filename: planRelativePath });
const confirmDetails = await invocation.shouldConfirmExecute(
new AbortController().signal,
@@ -296,7 +296,7 @@ Ask the user for specific feedback on how to improve the plan.`,
it('should log plan execution event when plan is approved', async () => {
const planRelativePath = createPlanFile('test.md', '# Content');
const invocation = tool.build({ plan_path: planRelativePath });
const invocation = tool.build({ plan_filename: planRelativePath });
const confirmDetails = await invocation.shouldConfirmExecute(
new AbortController().signal,
@@ -320,7 +320,7 @@ Ask the user for specific feedback on how to improve the plan.`,
it('should return cancellation message when cancelled', async () => {
const planRelativePath = createPlanFile('test.md', '# Content');
const invocation = tool.build({ plan_path: planRelativePath });
const invocation = tool.build({ plan_filename: planRelativePath });
const confirmDetails = await invocation.shouldConfirmExecute(
new AbortController().signal,
@@ -343,7 +343,7 @@ Ask the user for specific feedback on how to improve the plan.`,
describe('execute when shouldConfirmExecute is never called', () => {
it('should approve with DEFAULT mode when approvalPayload is null (policy ALLOW skips confirmation)', async () => {
const planRelativePath = createPlanFile('test.md', '# Content');
const invocation = tool.build({ plan_path: planRelativePath });
const invocation = tool.build({ plan_filename: planRelativePath });
// Simulate the scheduler's policy ALLOW path: execute() is called
// directly without ever calling shouldConfirmExecute(), leaving
@@ -364,7 +364,7 @@ Ask the user for specific feedback on how to improve the plan.`,
it('should return YOLO when config.isInteractive() is false', async () => {
mockConfig.isInteractive = vi.fn().mockReturnValue(false);
const planRelativePath = createPlanFile('test.md', '# Content');
const invocation = tool.build({ plan_path: planRelativePath });
const invocation = tool.build({ plan_filename: planRelativePath });
// Directly call execute to trigger the internal getAllowApprovalMode
const result = await invocation.execute(new AbortController().signal);
@@ -378,7 +378,7 @@ Ask the user for specific feedback on how to improve the plan.`,
it('should return DEFAULT when config.isInteractive() is true', async () => {
mockConfig.isInteractive = vi.fn().mockReturnValue(true);
const planRelativePath = createPlanFile('test.md', '# Content');
const invocation = tool.build({ plan_path: planRelativePath });
const invocation = tool.build({ plan_filename: planRelativePath });
// Directly call execute to trigger the internal getAllowApprovalMode
const result = await invocation.execute(new AbortController().signal);
@@ -393,7 +393,7 @@ Ask the user for specific feedback on how to improve the plan.`,
describe('getApprovalModeDescription (internal)', () => {
it('should handle all valid approval modes', async () => {
const planRelativePath = createPlanFile('test.md', '# Content');
const invocation = tool.build({ plan_path: planRelativePath });
const invocation = tool.build({ plan_filename: planRelativePath });
const testMode = async (mode: ApprovalMode, expected: string) => {
const confirmDetails = await invocation.shouldConfirmExecute(
@@ -426,7 +426,7 @@ Ask the user for specific feedback on how to improve the plan.`,
it('should throw for invalid post-planning modes', async () => {
const planRelativePath = createPlanFile('test.md', '# Content');
const invocation = tool.build({ plan_path: planRelativePath });
const invocation = tool.build({ plan_filename: planRelativePath });
const testInvalidMode = async (mode: ApprovalMode) => {
const confirmDetails = await invocation.shouldConfirmExecute(
@@ -448,36 +448,19 @@ Ask the user for specific feedback on how to improve the plan.`,
});
});
it('should throw error during build if plan path is outside plans directory', () => {
expect(() => tool.build({ plan_path: '../../../etc/passwd' })).toThrow(
/Access denied/,
);
});
describe('validateToolParams', () => {
it('should reject empty plan_path', () => {
const result = tool.validateToolParams({ plan_path: '' });
expect(result).toBe('plan_path is required.');
it('should reject empty plan_filename', () => {
const result = tool.validateToolParams({ plan_filename: '' });
expect(result).toBe('plan_filename is required.');
});
it('should reject whitespace-only plan_path', () => {
const result = tool.validateToolParams({ plan_path: ' ' });
expect(result).toBe('plan_path is required.');
});
it('should reject path outside plans directory', () => {
const result = tool.validateToolParams({
plan_path: '../../../etc/passwd',
});
expect(result).toContain('Access denied');
it('should reject whitespace-only plan_filename', () => {
const result = tool.validateToolParams({ plan_filename: ' ' });
expect(result).toBe('plan_filename is required.');
});
it('should reject non-existent plan file', async () => {
const result = await validatePlanPath(
'plans/ghost.md',
mockPlansDir,
tempRootDir,
);
const result = await validatePlanPath('ghost.md', mockPlansDir);
expect(result).toContain('Plan file does not exist');
});
@@ -488,18 +471,18 @@ Ask the user for specific feedback on how to improve the plan.`,
fs.symlinkSync(outsideFile, maliciousPath);
const result = tool.validateToolParams({
plan_path: 'plans/malicious.md',
plan_filename: 'malicious.md',
});
expect(result).toBe(
'Access denied: plan path must be within the designated plans directory.',
`Access denied: plan path (${path.join(mockPlansDir, 'malicious.md')}) must be within the designated plans directory (${mockPlansDir}).`,
);
});
it('should accept valid path within plans directory', () => {
createPlanFile('valid.md', '# Content');
const result = tool.validateToolParams({
plan_path: 'plans/valid.md',
plan_filename: 'valid.md',
});
expect(result).toBeNull();
});
+14 -17
View File
@@ -28,7 +28,7 @@ import { resolveToolDeclaration } from './definitions/resolver.js';
import { getPlanModeExitMessage } from '../utils/approvalModeUtils.js';
export interface ExitPlanModeParams {
plan_path: string;
plan_filename: string;
}
export class ExitPlanModeTool extends BaseDeclarativeTool<
@@ -41,8 +41,7 @@ export class ExitPlanModeTool extends BaseDeclarativeTool<
private config: Config,
messageBus: MessageBus,
) {
const plansDir = config.storage.getPlansDir();
const definition = getExitPlanModeDefinition(plansDir);
const definition = getExitPlanModeDefinition();
super(
ExitPlanModeTool.Name,
'Exit Plan Mode',
@@ -56,22 +55,21 @@ export class ExitPlanModeTool extends BaseDeclarativeTool<
protected override validateToolParamValues(
params: ExitPlanModeParams,
): string | null {
if (!params.plan_path || params.plan_path.trim() === '') {
return 'plan_path is required.';
if (!params.plan_filename || params.plan_filename.trim() === '') {
return 'plan_filename is required.';
}
// Since validateToolParamValues is synchronous, we use a basic synchronous check
// for path traversal safety. High-level async validation is deferred to shouldConfirmExecute.
const safeFilename = path.basename(params.plan_filename);
const plansDir = resolveToRealPath(this.config.storage.getPlansDir());
const resolvedPath = path.resolve(
this.config.getTargetDir(),
params.plan_path,
const resolvedPath = path.join(
this.config.storage.getPlansDir(),
safeFilename,
);
const realPath = resolveToRealPath(resolvedPath);
if (!isSubpath(plansDir, realPath)) {
return `Access denied: plan path must be within the designated plans directory.`;
return `Access denied: plan path (${resolvedPath}) must be within the designated plans directory (${plansDir}).`;
}
return null;
@@ -93,8 +91,7 @@ export class ExitPlanModeTool extends BaseDeclarativeTool<
}
override getSchema(modelId?: string) {
const plansDir = this.config.storage.getPlansDir();
return resolveToolDeclaration(getExitPlanModeDefinition(plansDir), modelId);
return resolveToolDeclaration(getExitPlanModeDefinition(), modelId);
}
}
@@ -122,9 +119,8 @@ export class ExitPlanModeInvocation extends BaseToolInvocation<
const resolvedPlanPath = this.getResolvedPlanPath();
const pathError = await validatePlanPath(
this.params.plan_path,
this.params.plan_filename,
this.config.storage.getPlansDir(),
this.config.getTargetDir(),
);
if (pathError) {
this.planValidationError = pathError;
@@ -174,7 +170,7 @@ export class ExitPlanModeInvocation extends BaseToolInvocation<
}
getDescription(): string {
return `Requesting plan approval for: ${this.params.plan_path}`;
return `Requesting plan approval for: ${path.join(this.config.storage.getPlansDir(), this.params.plan_filename)}`;
}
/**
@@ -182,7 +178,8 @@ export class ExitPlanModeInvocation extends BaseToolInvocation<
* Note: Validation is done in validateToolParamValues, so this assumes the path is valid.
*/
private getResolvedPlanPath(): string {
return path.resolve(this.config.getTargetDir(), this.params.plan_path);
const safeFilename = path.basename(this.params.plan_filename);
return path.join(this.config.storage.getPlansDir(), safeFilename);
}
async execute(_signal: AbortSignal): Promise<ToolResult> {
@@ -85,6 +85,10 @@ const mockConfigInternal = {
discoverTools: vi.fn(),
}) as unknown as ToolRegistry,
isInteractive: () => false,
isPlanMode: () => false,
storage: {
getPlansDir: () => '/tmp/plans',
},
};
const mockConfig = mockConfigInternal as unknown as Config;
+206
View File
@@ -5,10 +5,12 @@
*/
import fsPromises from 'node:fs/promises';
import fs from 'node:fs';
import path from 'node:path';
import os from 'node:os';
import crypto from 'node:crypto';
import { debugLogger } from '../index.js';
import type { SandboxPermissions } from '../services/sandboxManager.js';
import { ToolErrorType } from './tool-error.js';
import {
BaseDeclarativeTool,
@@ -41,6 +43,7 @@ import {
hasRedirection,
} from '../utils/shell-utils.js';
import { SHELL_TOOL_NAME } from './tool-names.js';
import { PARAM_ADDITIONAL_PERMISSIONS } from './definitions/base-declarations.js';
import type { MessageBus } from '../confirmation-bus/message-bus.js';
import { getShellDefinition } from './definitions/coreTools.js';
import { resolveToolDeclaration } from './definitions/resolver.js';
@@ -56,6 +59,7 @@ export interface ShellToolParams {
description?: string;
dir_path?: string;
is_background?: boolean;
[PARAM_ADDITIONAL_PERMISSIONS]?: SandboxPermissions;
}
export class ShellToolInvocation extends BaseToolInvocation<
@@ -122,6 +126,15 @@ export class ShellToolInvocation extends BaseToolInvocation<
return undefined;
}
override async shouldConfirmExecute(
abortSignal: AbortSignal,
): Promise<ToolCallConfirmationDetails | false> {
if (this.params[PARAM_ADDITIONAL_PERMISSIONS]) {
return this.getConfirmationDetails(abortSignal);
}
return super.shouldConfirmExecute(abortSignal);
}
protected override async getConfirmationDetails(
_abortSignal: AbortSignal,
): Promise<ToolCallConfirmationDetails | false> {
@@ -148,6 +161,32 @@ export class ShellToolInvocation extends BaseToolInvocation<
// Rely entirely on PolicyEngine for interactive confirmation.
// If we are here, it means PolicyEngine returned ASK_USER (or no message bus),
// so we must provide confirmation details.
// If additional_permissions are provided, it's an expansion request
if (this.params[PARAM_ADDITIONAL_PERMISSIONS]) {
return {
type: 'sandbox_expansion',
title: 'Sandbox Expansion Request',
command: this.params.command,
rootCommand: rootCommandDisplay,
additionalPermissions: this.params[PARAM_ADDITIONAL_PERMISSIONS],
onConfirm: async (outcome: ToolConfirmationOutcome) => {
if (outcome === ToolConfirmationOutcome.ProceedAlwaysAndSave) {
const commandName = rootCommands[0] || 'shell';
this.context.config.sandboxPolicyManager.addPersistentApproval(
commandName,
this.params[PARAM_ADDITIONAL_PERMISSIONS]!,
);
} else if (outcome === ToolConfirmationOutcome.ProceedAlways) {
const commandName = rootCommands[0] || 'shell';
this.context.config.sandboxPolicyManager.addSessionApproval(
commandName,
this.params[PARAM_ADDITIONAL_PERMISSIONS]!,
);
}
},
};
}
const confirmationDetails: ToolExecuteConfirmationDetails = {
type: 'exec',
title: 'Confirm Shell Command',
@@ -293,6 +332,7 @@ export class ShellToolInvocation extends BaseToolInvocation<
shellExecutionConfig?.sanitizationConfig ??
this.context.config.sanitizationConfig,
sandboxManager: this.context.config.sandboxManager,
additionalPermissions: this.params[PARAM_ADDITIONAL_PERMISSIONS],
},
);
@@ -326,6 +366,13 @@ export class ShellToolInvocation extends BaseToolInvocation<
const pgrepLines = pgrepContent.split(os.EOL).filter(Boolean);
for (const line of pgrepLines) {
if (!/^\d+$/.test(line)) {
if (
line.includes('sysmond service not found') ||
line.includes('Cannot get process list') ||
line.includes('sysmon request failed')
) {
continue;
}
debugLogger.error(`pgrep: ${line}`);
}
const pid = Number(line);
@@ -430,6 +477,165 @@ export class ShellToolInvocation extends BaseToolInvocation<
}
}
// Heuristic Sandbox Denial Detection
const lowerOutput = (
(result.output || '') +
' ' +
(result.error?.message || '')
).toLowerCase();
const isFileDenial = [
'operation not permitted',
'vim:e303',
'should be read/write',
'sandbox_apply',
'sandbox: ',
].some((keyword) => lowerOutput.includes(keyword));
const isNetworkDenial = [
'error connecting to',
'network is unreachable',
'could not resolve host',
'connection refused',
'no address associated with hostname',
].some((keyword) => lowerOutput.includes(keyword));
// Only trigger heuristic if the command actually failed (exit code != 0 or aborted)
const failed =
!!result.error ||
!!result.signal ||
(result.exitCode !== undefined && result.exitCode !== 0) ||
result.aborted;
if (failed && (isFileDenial || isNetworkDenial)) {
const strippedCommand = stripShellWrapper(this.params.command);
const rootCommands = getCommandRoots(strippedCommand).filter(
(r) => r !== 'shopt',
);
const rootCommandDisplay =
rootCommands.length > 0 ? rootCommands[0] : 'shell';
// Extract denied paths
const deniedPaths = new Set<string>();
const regex =
/(?:^|\s)['"]?(\/[\w.-/]+)['"]?:\s*[Oo]peration not permitted/gi;
let match;
while ((match = regex.exec(result.output || '')) !== null) {
deniedPaths.add(match[1]);
}
while ((match = regex.exec(result.error?.message || '')) !== null) {
deniedPaths.add(match[1]);
}
if (isFileDenial && deniedPaths.size === 0) {
// Fallback heuristic: look for any absolute path in the output
// Avoid matching simple commands like /bin/sh
const fallbackRegex =
/(?:^|[\s"'[\]])(\/[a-zA-Z0-9_.-]+(?:\/[a-zA-Z0-9_.-]+)+)(?:$|[\s"'[\]:])/gi;
let m;
while ((m = fallbackRegex.exec(result.output || '')) !== null) {
const p = m[1];
if (p && !p.startsWith('/bin/') && !p.startsWith('/usr/bin/')) {
deniedPaths.add(p);
}
}
while (
(m = fallbackRegex.exec(result.error?.message || '')) !== null
) {
const p = m[1];
if (p && !p.startsWith('/bin/') && !p.startsWith('/usr/bin/')) {
deniedPaths.add(p);
}
}
}
const readPaths = new Set(
this.params[PARAM_ADDITIONAL_PERMISSIONS]?.fileSystem?.read || [],
);
const writePaths = new Set(
this.params[PARAM_ADDITIONAL_PERMISSIONS]?.fileSystem?.write || [],
);
for (const p of deniedPaths) {
try {
// Find an existing parent directory to add instead of a non-existent file
let currentPath = p;
try {
if (
fs.existsSync(currentPath) &&
fs.statSync(currentPath).isFile()
) {
currentPath = path.dirname(currentPath);
}
} catch (_e) {
/* ignore */
}
while (currentPath.length > 1) {
if (fs.existsSync(currentPath)) {
writePaths.add(currentPath);
readPaths.add(currentPath);
break;
}
currentPath = path.dirname(currentPath);
}
} catch (_e) {
// ignore
}
}
const additionalPermissions = {
network:
isNetworkDenial ||
this.params[PARAM_ADDITIONAL_PERMISSIONS]?.network ||
undefined,
fileSystem:
isFileDenial || writePaths.size > 0
? {
read: Array.from(readPaths),
write: Array.from(writePaths),
}
: undefined,
};
const originalReadSize =
this.params[PARAM_ADDITIONAL_PERMISSIONS]?.fileSystem?.read?.length ||
0;
const originalWriteSize =
this.params[PARAM_ADDITIONAL_PERMISSIONS]?.fileSystem?.write
?.length || 0;
const originalNetwork =
!!this.params[PARAM_ADDITIONAL_PERMISSIONS]?.network;
const newReadSize = additionalPermissions.fileSystem?.read?.length || 0;
const newWriteSize =
additionalPermissions.fileSystem?.write?.length || 0;
const newNetwork = !!additionalPermissions.network;
const hasNewPermissions =
newReadSize > originalReadSize ||
newWriteSize > originalWriteSize ||
(!originalNetwork && newNetwork);
if (hasNewPermissions) {
const confirmationDetails = {
type: 'sandbox_expansion',
title: 'Sandbox Expansion Request',
command: this.params.command,
rootCommand: rootCommandDisplay,
additionalPermissions,
};
return {
llmContent: 'Sandbox expansion required',
returnDisplay: returnDisplayMessage,
error: {
type: ToolErrorType.SANDBOX_EXPANSION_REQUIRED,
message: JSON.stringify(confirmationDetails),
},
};
}
// If no new permissions were found by heuristic, do not intercept.
// Just return the normal execution error so the LLM can try providing explicit paths itself.
}
const summarizeConfig =
this.context.config.getSummarizeToolOutputConfig();
const executionError = result.error
+1
View File
@@ -64,6 +64,7 @@ export enum ToolErrorType {
// Shell errors
SHELL_EXECUTE_ERROR = 'shell_execute_error',
SANDBOX_EXPANSION_REQUIRED = 'sandbox_expansion_required',
// DiscoveredTool-specific Errors
DISCOVERED_TOOL_EXECUTION_ERROR = 'discovered_tool_execution_error',
+2 -2
View File
@@ -73,7 +73,7 @@ import {
ASK_USER_OPTION_PARAM_LABEL,
ASK_USER_OPTION_PARAM_DESCRIPTION,
PLAN_MODE_PARAM_REASON,
EXIT_PLAN_PARAM_PLAN_PATH,
EXIT_PLAN_PARAM_PLAN_FILENAME,
SKILL_PARAM_NAME,
} from './definitions/coreTools.js';
@@ -146,7 +146,7 @@ export {
ASK_USER_OPTION_PARAM_LABEL,
ASK_USER_OPTION_PARAM_DESCRIPTION,
PLAN_MODE_PARAM_REASON,
EXIT_PLAN_PARAM_PLAN_PATH,
EXIT_PLAN_PARAM_PLAN_FILENAME,
SKILL_PARAM_NAME,
};
+11
View File
@@ -992,6 +992,16 @@ export type ToolConfirmationPayload =
| ToolAskUserConfirmationPayload
| ToolExitPlanModeConfirmationPayload;
export interface ToolSandboxExpansionConfirmationDetails {
type: 'sandbox_expansion';
systemMessage?: string;
title: string;
command: string;
rootCommand: string;
additionalPermissions: import('../services/sandboxManager.js').SandboxPermissions;
onConfirm: (outcome: ToolConfirmationOutcome) => Promise<void>;
}
export interface ToolExecuteConfirmationDetails {
type: 'exec';
title: string;
@@ -1048,6 +1058,7 @@ export interface ToolExitPlanModeConfirmationDetails {
}
export type ToolCallConfirmationDetails =
| ToolSandboxExpansionConfirmationDetails
| ToolEditConfirmationDetails
| ToolExecuteConfirmationDetails
| ToolMcpConfirmationDetails
@@ -105,6 +105,7 @@ const mockConfigInternal = {
}) as unknown as ToolRegistry,
isInteractive: () => false,
getDisableLLMCorrection: vi.fn(() => true),
isPlanMode: vi.fn(() => false),
getActiveModel: () => 'test-model',
storage: {
getProjectTempDir: vi.fn().mockReturnValue('/tmp/project'),
+13 -4
View File
@@ -165,10 +165,19 @@ class WriteFileToolInvocation extends BaseToolInvocation<
true,
() => this.config.getApprovalMode(),
);
this.resolvedPath = path.resolve(
this.config.getTargetDir(),
this.params.file_path,
);
if (this.config.isPlanMode()) {
const safeFilename = path.basename(this.params.file_path);
this.resolvedPath = path.join(
this.config.storage.getPlansDir(),
safeFilename,
);
} else {
this.resolvedPath = path.resolve(
this.config.getTargetDir(),
this.params.file_path,
);
}
}
override toolLocations(): ToolLocation[] {
+3 -13
View File
@@ -35,19 +35,13 @@ describe('planUtils', () => {
const fullPath = path.join(tempRootDir, planPath);
fs.writeFileSync(fullPath, '# My Plan');
const result = await validatePlanPath(planPath, plansDir, tempRootDir);
const result = await validatePlanPath(planPath, plansDir);
expect(result).toBeNull();
});
it('should return error for path traversal', async () => {
const planPath = path.join('..', 'secret.txt');
const result = await validatePlanPath(planPath, plansDir, tempRootDir);
expect(result).toContain('Access denied');
});
it('should return error for non-existent file', async () => {
const planPath = path.join('plans', 'ghost.md');
const result = await validatePlanPath(planPath, plansDir, tempRootDir);
const result = await validatePlanPath(planPath, plansDir);
expect(result).toContain('Plan file does not exist');
});
@@ -60,11 +54,7 @@ describe('planUtils', () => {
// Create a symbolic link pointing outside the plans directory
fs.symlinkSync(outsideFile, fullMaliciousPath);
const result = await validatePlanPath(
maliciousPath,
plansDir,
tempRootDir,
);
const result = await validatePlanPath(maliciousPath, plansDir);
expect(result).toContain('Access denied');
});
});
+5 -5
View File
@@ -13,8 +13,8 @@ import { isSubpath, resolveToRealPath } from './paths.js';
* Shared between backend tools and CLI UI for consistency.
*/
export const PlanErrorMessages = {
PATH_ACCESS_DENIED:
'Access denied: plan path must be within the designated plans directory.',
PATH_ACCESS_DENIED: (planPath: string, plansDir: string) =>
`Access denied: plan path (${planPath}) must be within the designated plans directory (${plansDir}).`,
FILE_NOT_FOUND: (path: string) =>
`Plan file does not exist: ${path}. You must create the plan file before requesting approval.`,
FILE_EMPTY:
@@ -32,14 +32,14 @@ export const PlanErrorMessages = {
export async function validatePlanPath(
planPath: string,
plansDir: string,
targetDir: string,
): Promise<string | null> {
const resolvedPath = path.resolve(targetDir, planPath);
const safeFilename = path.basename(planPath);
const resolvedPath = path.join(plansDir, safeFilename);
const realPath = resolveToRealPath(resolvedPath);
const realPlansDir = resolveToRealPath(plansDir);
if (!isSubpath(realPlansDir, realPath)) {
return PlanErrorMessages.PATH_ACCESS_DENIED;
return PlanErrorMessages.PATH_ACCESS_DENIED(planPath, realPlansDir);
}
if (!(await fileExists(resolvedPath))) {
+1 -1
View File
@@ -704,7 +704,7 @@ export function getCommandRoots(command: string): string[] {
export function stripShellWrapper(command: string): string {
const pattern =
/^\s*(?:(?:sh|bash|zsh)\s+-c|cmd\.exe\s+\/c|powershell(?:\.exe)?\s+(?:-NoProfile\s+)?-Command|pwsh(?:\.exe)?\s+(?:-NoProfile\s+)?-Command)\s+/i;
/^\s*(?:(?:(?:\S+\/)?(?:sh|bash|zsh))\s+-c|cmd\.exe\s+\/c|powershell(?:\.exe)?\s+(?:-NoProfile\s+)?-Command|pwsh(?:\.exe)?\s+(?:-NoProfile\s+)?-Command)\s+/i;
const match = command.match(pattern);
if (match) {
let newCommand = command.substring(match[0].length).trim();
+1 -1
View File
@@ -435,7 +435,7 @@ export class TestRig {
general: {
// Nightly releases sometimes becomes out of sync with local code and
// triggers auto-update, which causes tests to fail.
disableAutoUpdate: true,
enableAutoUpdate: false,
},
telemetry: {
enabled: true,