feat: move shell efficiency guidelines to tool description (#18614)

This commit is contained in:
N. Taylor Mullen
2026-02-09 10:51:13 -08:00
committed by GitHub
parent 469cbca67f
commit aebc107d2c
8 changed files with 147 additions and 91 deletions

View File

@@ -0,0 +1,110 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, expect } from 'vitest';
import { evalTest } from './test-helper.js';
describe('Shell Efficiency', () => {
const getCommand = (call: any): string | undefined => {
let args = call.toolRequest.args;
if (typeof args === 'string') {
try {
args = JSON.parse(args);
} catch (e) {
// Ignore parse errors
}
}
return typeof args === 'string' ? args : (args as any)['command'];
};
evalTest('ALWAYS_PASSES', {
name: 'should use --silent/--quiet flags when installing packages',
prompt: 'Install the "lodash" package using npm.',
assert: async (rig) => {
const toolCalls = rig.readToolLogs();
const shellCalls = toolCalls.filter(
(call) => call.toolRequest.name === 'run_shell_command',
);
const hasEfficiencyFlag = shellCalls.some((call) => {
const cmd = getCommand(call);
return (
cmd &&
cmd.includes('npm install') &&
(cmd.includes('--silent') ||
cmd.includes('--quiet') ||
cmd.includes('-q'))
);
});
expect(
hasEfficiencyFlag,
`Expected agent to use efficiency flags for npm install. Commands used: ${shellCalls
.map(getCommand)
.join(', ')}`,
).toBe(true);
},
});
evalTest('ALWAYS_PASSES', {
name: 'should use --no-pager with git commands',
prompt: 'Show the git log.',
assert: async (rig) => {
const toolCalls = rig.readToolLogs();
const shellCalls = toolCalls.filter(
(call) => call.toolRequest.name === 'run_shell_command',
);
const hasNoPager = shellCalls.some((call) => {
const cmd = getCommand(call);
return cmd && cmd.includes('git') && cmd.includes('--no-pager');
});
expect(
hasNoPager,
`Expected agent to use --no-pager with git. Commands used: ${shellCalls
.map(getCommand)
.join(', ')}`,
).toBe(true);
},
});
evalTest('ALWAYS_PASSES', {
name: 'should NOT use efficiency flags when enableShellOutputEfficiency is disabled',
params: {
settings: {
tools: {
shell: {
enableShellOutputEfficiency: false,
},
},
},
},
prompt: 'Install the "lodash" package using npm.',
assert: async (rig) => {
const toolCalls = rig.readToolLogs();
const shellCalls = toolCalls.filter(
(call) => call.toolRequest.name === 'run_shell_command',
);
const hasEfficiencyFlag = shellCalls.some((call) => {
const cmd = getCommand(call);
return (
cmd &&
cmd.includes('npm install') &&
(cmd.includes('--silent') ||
cmd.includes('--quiet') ||
cmd.includes('-q'))
);
});
expect(
hasEfficiencyFlag,
'Agent used efficiency flags even though enableShellOutputEfficiency was disabled',
).toBe(false);
},
});
});

View File

@@ -592,11 +592,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
# Operational Guidelines
## Shell Tool Efficiency
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
## Tone and Style
- **Role:** A senior software engineer and collaborative peer programmer.
@@ -706,11 +701,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
# Operational Guidelines
## Shell Tool Efficiency
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
## Tone and Style
- **Role:** A senior software engineer and collaborative peer programmer.
@@ -803,11 +793,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
# Operational Guidelines
## Shell Tool Efficiency
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
## Tone and Style
- **Role:** A senior software engineer and collaborative peer programmer.
@@ -1391,11 +1376,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
# Operational Guidelines
## Shell Tool Efficiency
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
## Tone and Style
- **Role:** A senior software engineer and collaborative peer programmer.
@@ -1514,11 +1494,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
# Operational Guidelines
## Shell Tool Efficiency
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
## Tone and Style
- **Role:** A senior software engineer and collaborative peer programmer.
@@ -1637,11 +1612,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
# Operational Guidelines
## Shell Tool Efficiency
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
## Tone and Style
- **Role:** A senior software engineer and collaborative peer programmer.
@@ -1868,11 +1838,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
# Operational Guidelines
## Shell Tool Efficiency
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
## Tone and Style
- **Role:** A senior software engineer and collaborative peer programmer.
@@ -2099,11 +2064,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
# Operational Guidelines
## Shell Tool Efficiency
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
## Tone and Style
- **Role:** A senior software engineer and collaborative peer programmer.
@@ -2218,11 +2178,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
# Operational Guidelines
## Shell Tool Efficiency
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
## Tone and Style
- **Role:** A senior software engineer and collaborative peer programmer.
@@ -2448,11 +2403,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
# Operational Guidelines
## Shell Tool Efficiency
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
## Tone and Style
- **Role:** A senior software engineer and collaborative peer programmer.
@@ -2567,11 +2517,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
# Operational Guidelines
## Shell Tool Efficiency
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
## Tone and Style
- **Role:** A senior software engineer and collaborative peer programmer.

View File

@@ -463,26 +463,6 @@ describe('Core System Prompt (prompts.ts)', () => {
});
describe('Platform-specific and Background Process instructions', () => {
it('should include Windows-specific shell efficiency commands on win32', () => {
mockPlatform('win32');
const prompt = getCoreSystemPrompt(mockConfig);
expect(prompt).toContain(
"using commands like 'type' or 'findstr' (on CMD) and 'Get-Content' or 'Select-String' (on PowerShell)",
);
expect(prompt).not.toContain(
"using commands like 'grep', 'tail', 'head'",
);
});
it('should include generic shell efficiency commands on non-Windows', () => {
mockPlatform('linux');
const prompt = getCoreSystemPrompt(mockConfig);
expect(prompt).toContain("using commands like 'grep', 'tail', 'head'");
expect(prompt).not.toContain(
"using commands like 'type' or 'findstr' (on CMD) and 'Get-Content' or 'Select-String' (on PowerShell)",
);
});
it('should use is_background parameter in background process instructions', () => {
const prompt = getCoreSystemPrompt(mockConfig);
expect(prompt).toContain(

View File

@@ -245,6 +245,7 @@ export function renderOperationalGuidelines(
if (!options) return '';
return `
# Operational Guidelines
${shellEfficiencyGuidelines(options.enableShellEfficiency)}
## Tone and Style (CLI Interaction)

View File

@@ -55,7 +55,6 @@ export interface PrimaryWorkflowsOptions {
export interface OperationalGuidelinesOptions {
interactive: boolean;
isGemini3: boolean;
enableShellEfficiency: boolean;
interactiveShellEnabled: boolean;
}
@@ -259,8 +258,6 @@ export function renderOperationalGuidelines(
return `
# Operational Guidelines
${shellEfficiencyGuidelines(options.enableShellEfficiency)}
## Tone and Style
- **Role:** A senior software engineer and collaborative peer programmer.
@@ -517,15 +514,6 @@ function planningPhaseSuggestion(options: PrimaryWorkflowsOptions): string {
return '';
}
function shellEfficiencyGuidelines(enabled: boolean): string {
if (!enabled) return '';
return `
## Shell Tool Efficiency
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).`;
}
function toneAndStyleNoChitchat(isGemini3: boolean): string {
return isGemini3
? `

View File

@@ -3,6 +3,10 @@
exports[`ShellTool > getDescription > should return the non-windows description when not on windows 1`] = `
"This tool executes a given shell command as \`bash -c <command>\`. Command can start background processes using \`&\`. Command is executed as a subprocess that leads its own process group. Command process group can be terminated as \`kill -- -PGID\` or signaled as \`kill -s SIGNAL -- -PGID\`.
Efficiency Guidelines:
- Quiet Flags: Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
- Pagination: Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
The following information is returned:
Output: Combined stdout/stderr. Can be \`(empty)\` or partial on error and for any unwaited background processes.
@@ -16,6 +20,10 @@ exports[`ShellTool > getDescription > should return the non-windows description
exports[`ShellTool > getDescription > should return the windows description when on windows 1`] = `
"This tool executes a given shell command as \`powershell.exe -NoProfile -Command <command>\`. Command can start background processes using PowerShell constructs such as \`Start-Process -NoNewWindow\` or \`Start-Job\`.
Efficiency Guidelines:
- Quiet Flags: Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
- Pagination: Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
The following information is returned:
Output: Combined stdout/stderr. Can be \`(empty)\` or partial on error and for any unwaited background processes.

View File

@@ -130,6 +130,7 @@ describe('ShellTool', () => {
getGeminiClient: vi.fn().mockReturnValue({}),
getShellToolInactivityTimeout: vi.fn().mockReturnValue(1000),
getEnableInteractiveShell: vi.fn().mockReturnValue(false),
getEnableShellOutputEfficiency: vi.fn().mockReturnValue(true),
sanitizationConfig: {},
} as unknown as Config;
@@ -633,6 +634,15 @@ describe('ShellTool', () => {
const shellTool = new ShellTool(mockConfig, createMockMessageBus());
expect(shellTool.description).toMatchSnapshot();
});
it('should not include efficiency guidelines when disabled', () => {
mockPlatform.mockReturnValue('linux');
vi.mocked(mockConfig.getEnableShellOutputEfficiency).mockReturnValue(
false,
);
const shellTool = new ShellTool(mockConfig, createMockMessageBus());
expect(shellTool.description).not.toContain('Efficiency Guidelines:');
});
});
describe('llmContent output format', () => {

View File

@@ -451,7 +451,18 @@ export class ShellToolInvocation extends BaseToolInvocation<
}
}
function getShellToolDescription(enableInteractiveShell: boolean): string {
function getShellToolDescription(
enableInteractiveShell: boolean,
enableEfficiency: boolean,
): string {
const efficiencyGuidelines = enableEfficiency
? `
Efficiency Guidelines:
- Quiet Flags: Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
- Pagination: Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).`
: '';
const returnedInfo = `
The following information is returned:
@@ -467,12 +478,12 @@ function getShellToolDescription(enableInteractiveShell: boolean): string {
const backgroundInstructions = enableInteractiveShell
? 'To run a command in the background, set the `is_background` parameter to true. Do NOT use PowerShell background constructs.'
: 'Command can start background processes using PowerShell constructs such as `Start-Process -NoNewWindow` or `Start-Job`.';
return `This tool executes a given shell command as \`powershell.exe -NoProfile -Command <command>\`. ${backgroundInstructions}${returnedInfo}`;
return `This tool executes a given shell command as \`powershell.exe -NoProfile -Command <command>\`. ${backgroundInstructions}${efficiencyGuidelines}${returnedInfo}`;
} else {
const backgroundInstructions = enableInteractiveShell
? 'To run a command in the background, set the `is_background` parameter to true. Do NOT use `&` to background commands.'
: 'Command can start background processes using `&`.';
return `This tool executes a given shell command as \`bash -c <command>\`. ${backgroundInstructions} Command is executed as a subprocess that leads its own process group. Command process group can be terminated as \`kill -- -PGID\` or signaled as \`kill -s SIGNAL -- -PGID\`.${returnedInfo}`;
return `This tool executes a given shell command as \`bash -c <command>\`. ${backgroundInstructions} Command is executed as a subprocess that leads its own process group. Command process group can be terminated as \`kill -- -PGID\` or signaled as \`kill -s SIGNAL -- -PGID\`.${efficiencyGuidelines}${returnedInfo}`;
}
}
@@ -500,7 +511,10 @@ export class ShellTool extends BaseDeclarativeTool<
super(
ShellTool.Name,
'Shell',
getShellToolDescription(config.getEnableInteractiveShell()),
getShellToolDescription(
config.getEnableInteractiveShell(),
config.getEnableShellOutputEfficiency(),
),
Kind.Execute,
{
type: 'object',