From aebc107d2cea6399d0484987f6cc8f1007a646a8 Mon Sep 17 00:00:00 2001 From: "N. Taylor Mullen" Date: Mon, 9 Feb 2026 10:51:13 -0800 Subject: [PATCH] feat: move shell efficiency guidelines to tool description (#18614) --- evals/shell-efficiency.eval.ts | 110 ++++++++++++++++++ .../core/__snapshots__/prompts.test.ts.snap | 55 --------- packages/core/src/core/prompts.test.ts | 20 ---- packages/core/src/prompts/snippets.legacy.ts | 1 + packages/core/src/prompts/snippets.ts | 12 -- .../tools/__snapshots__/shell.test.ts.snap | 8 ++ packages/core/src/tools/shell.test.ts | 10 ++ packages/core/src/tools/shell.ts | 22 +++- 8 files changed, 147 insertions(+), 91 deletions(-) create mode 100644 evals/shell-efficiency.eval.ts diff --git a/evals/shell-efficiency.eval.ts b/evals/shell-efficiency.eval.ts new file mode 100644 index 0000000000..ee016d53c4 --- /dev/null +++ b/evals/shell-efficiency.eval.ts @@ -0,0 +1,110 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect } from 'vitest'; +import { evalTest } from './test-helper.js'; + +describe('Shell Efficiency', () => { + const getCommand = (call: any): string | undefined => { + let args = call.toolRequest.args; + if (typeof args === 'string') { + try { + args = JSON.parse(args); + } catch (e) { + // Ignore parse errors + } + } + return typeof args === 'string' ? args : (args as any)['command']; + }; + + evalTest('ALWAYS_PASSES', { + name: 'should use --silent/--quiet flags when installing packages', + prompt: 'Install the "lodash" package using npm.', + assert: async (rig) => { + const toolCalls = rig.readToolLogs(); + const shellCalls = toolCalls.filter( + (call) => call.toolRequest.name === 'run_shell_command', + ); + + const hasEfficiencyFlag = shellCalls.some((call) => { + const cmd = getCommand(call); + return ( + cmd && + cmd.includes('npm install') && + (cmd.includes('--silent') || + cmd.includes('--quiet') || + cmd.includes('-q')) + ); + }); + + expect( + hasEfficiencyFlag, + `Expected agent to use efficiency flags for npm install. Commands used: ${shellCalls + .map(getCommand) + .join(', ')}`, + ).toBe(true); + }, + }); + + evalTest('ALWAYS_PASSES', { + name: 'should use --no-pager with git commands', + prompt: 'Show the git log.', + assert: async (rig) => { + const toolCalls = rig.readToolLogs(); + const shellCalls = toolCalls.filter( + (call) => call.toolRequest.name === 'run_shell_command', + ); + + const hasNoPager = shellCalls.some((call) => { + const cmd = getCommand(call); + return cmd && cmd.includes('git') && cmd.includes('--no-pager'); + }); + + expect( + hasNoPager, + `Expected agent to use --no-pager with git. Commands used: ${shellCalls + .map(getCommand) + .join(', ')}`, + ).toBe(true); + }, + }); + + evalTest('ALWAYS_PASSES', { + name: 'should NOT use efficiency flags when enableShellOutputEfficiency is disabled', + params: { + settings: { + tools: { + shell: { + enableShellOutputEfficiency: false, + }, + }, + }, + }, + prompt: 'Install the "lodash" package using npm.', + assert: async (rig) => { + const toolCalls = rig.readToolLogs(); + const shellCalls = toolCalls.filter( + (call) => call.toolRequest.name === 'run_shell_command', + ); + + const hasEfficiencyFlag = shellCalls.some((call) => { + const cmd = getCommand(call); + return ( + cmd && + cmd.includes('npm install') && + (cmd.includes('--silent') || + cmd.includes('--quiet') || + cmd.includes('-q')) + ); + }); + + expect( + hasEfficiencyFlag, + 'Agent used efficiency flags even though enableShellOutputEfficiency was disabled', + ).toBe(false); + }, + }); +}); diff --git a/packages/core/src/core/__snapshots__/prompts.test.ts.snap b/packages/core/src/core/__snapshots__/prompts.test.ts.snap index 4e66e3403c..6089af9ddc 100644 --- a/packages/core/src/core/__snapshots__/prompts.test.ts.snap +++ b/packages/core/src/core/__snapshots__/prompts.test.ts.snap @@ -592,11 +592,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi # Operational Guidelines -## Shell Tool Efficiency - -- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information. -- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`). - ## Tone and Style - **Role:** A senior software engineer and collaborative peer programmer. @@ -706,11 +701,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi # Operational Guidelines -## Shell Tool Efficiency - -- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information. -- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`). - ## Tone and Style - **Role:** A senior software engineer and collaborative peer programmer. @@ -803,11 +793,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi # Operational Guidelines -## Shell Tool Efficiency - -- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information. -- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`). - ## Tone and Style - **Role:** A senior software engineer and collaborative peer programmer. @@ -1391,11 +1376,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi # Operational Guidelines -## Shell Tool Efficiency - -- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information. -- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`). - ## Tone and Style - **Role:** A senior software engineer and collaborative peer programmer. @@ -1514,11 +1494,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi # Operational Guidelines -## Shell Tool Efficiency - -- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information. -- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`). - ## Tone and Style - **Role:** A senior software engineer and collaborative peer programmer. @@ -1637,11 +1612,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi # Operational Guidelines -## Shell Tool Efficiency - -- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information. -- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`). - ## Tone and Style - **Role:** A senior software engineer and collaborative peer programmer. @@ -1868,11 +1838,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi # Operational Guidelines -## Shell Tool Efficiency - -- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information. -- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`). - ## Tone and Style - **Role:** A senior software engineer and collaborative peer programmer. @@ -2099,11 +2064,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi # Operational Guidelines -## Shell Tool Efficiency - -- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information. -- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`). - ## Tone and Style - **Role:** A senior software engineer and collaborative peer programmer. @@ -2218,11 +2178,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi # Operational Guidelines -## Shell Tool Efficiency - -- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information. -- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`). - ## Tone and Style - **Role:** A senior software engineer and collaborative peer programmer. @@ -2448,11 +2403,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi # Operational Guidelines -## Shell Tool Efficiency - -- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information. -- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`). - ## Tone and Style - **Role:** A senior software engineer and collaborative peer programmer. @@ -2567,11 +2517,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi # Operational Guidelines -## Shell Tool Efficiency - -- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information. -- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`). - ## Tone and Style - **Role:** A senior software engineer and collaborative peer programmer. diff --git a/packages/core/src/core/prompts.test.ts b/packages/core/src/core/prompts.test.ts index 5307c3235a..bd6c1eaf18 100644 --- a/packages/core/src/core/prompts.test.ts +++ b/packages/core/src/core/prompts.test.ts @@ -463,26 +463,6 @@ describe('Core System Prompt (prompts.ts)', () => { }); describe('Platform-specific and Background Process instructions', () => { - it('should include Windows-specific shell efficiency commands on win32', () => { - mockPlatform('win32'); - const prompt = getCoreSystemPrompt(mockConfig); - expect(prompt).toContain( - "using commands like 'type' or 'findstr' (on CMD) and 'Get-Content' or 'Select-String' (on PowerShell)", - ); - expect(prompt).not.toContain( - "using commands like 'grep', 'tail', 'head'", - ); - }); - - it('should include generic shell efficiency commands on non-Windows', () => { - mockPlatform('linux'); - const prompt = getCoreSystemPrompt(mockConfig); - expect(prompt).toContain("using commands like 'grep', 'tail', 'head'"); - expect(prompt).not.toContain( - "using commands like 'type' or 'findstr' (on CMD) and 'Get-Content' or 'Select-String' (on PowerShell)", - ); - }); - it('should use is_background parameter in background process instructions', () => { const prompt = getCoreSystemPrompt(mockConfig); expect(prompt).toContain( diff --git a/packages/core/src/prompts/snippets.legacy.ts b/packages/core/src/prompts/snippets.legacy.ts index 56739ebb77..acb530b22e 100644 --- a/packages/core/src/prompts/snippets.legacy.ts +++ b/packages/core/src/prompts/snippets.legacy.ts @@ -245,6 +245,7 @@ export function renderOperationalGuidelines( if (!options) return ''; return ` # Operational Guidelines + ${shellEfficiencyGuidelines(options.enableShellEfficiency)} ## Tone and Style (CLI Interaction) diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index 2a713afbed..ca943e916f 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -55,7 +55,6 @@ export interface PrimaryWorkflowsOptions { export interface OperationalGuidelinesOptions { interactive: boolean; isGemini3: boolean; - enableShellEfficiency: boolean; interactiveShellEnabled: boolean; } @@ -259,8 +258,6 @@ export function renderOperationalGuidelines( return ` # Operational Guidelines -${shellEfficiencyGuidelines(options.enableShellEfficiency)} - ## Tone and Style - **Role:** A senior software engineer and collaborative peer programmer. @@ -517,15 +514,6 @@ function planningPhaseSuggestion(options: PrimaryWorkflowsOptions): string { return ''; } -function shellEfficiencyGuidelines(enabled: boolean): string { - if (!enabled) return ''; - return ` -## Shell Tool Efficiency - -- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information. -- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).`; -} - function toneAndStyleNoChitchat(isGemini3: boolean): string { return isGemini3 ? ` diff --git a/packages/core/src/tools/__snapshots__/shell.test.ts.snap b/packages/core/src/tools/__snapshots__/shell.test.ts.snap index 6592993160..73245052a7 100644 --- a/packages/core/src/tools/__snapshots__/shell.test.ts.snap +++ b/packages/core/src/tools/__snapshots__/shell.test.ts.snap @@ -3,6 +3,10 @@ exports[`ShellTool > getDescription > should return the non-windows description when not on windows 1`] = ` "This tool executes a given shell command as \`bash -c \`. Command can start background processes using \`&\`. Command is executed as a subprocess that leads its own process group. Command process group can be terminated as \`kill -- -PGID\` or signaled as \`kill -s SIGNAL -- -PGID\`. + Efficiency Guidelines: + - Quiet Flags: Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information. + - Pagination: Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`). + The following information is returned: Output: Combined stdout/stderr. Can be \`(empty)\` or partial on error and for any unwaited background processes. @@ -16,6 +20,10 @@ exports[`ShellTool > getDescription > should return the non-windows description exports[`ShellTool > getDescription > should return the windows description when on windows 1`] = ` "This tool executes a given shell command as \`powershell.exe -NoProfile -Command \`. Command can start background processes using PowerShell constructs such as \`Start-Process -NoNewWindow\` or \`Start-Job\`. + Efficiency Guidelines: + - Quiet Flags: Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information. + - Pagination: Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`). + The following information is returned: Output: Combined stdout/stderr. Can be \`(empty)\` or partial on error and for any unwaited background processes. diff --git a/packages/core/src/tools/shell.test.ts b/packages/core/src/tools/shell.test.ts index b851ee99d4..e1b16f0a4a 100644 --- a/packages/core/src/tools/shell.test.ts +++ b/packages/core/src/tools/shell.test.ts @@ -130,6 +130,7 @@ describe('ShellTool', () => { getGeminiClient: vi.fn().mockReturnValue({}), getShellToolInactivityTimeout: vi.fn().mockReturnValue(1000), getEnableInteractiveShell: vi.fn().mockReturnValue(false), + getEnableShellOutputEfficiency: vi.fn().mockReturnValue(true), sanitizationConfig: {}, } as unknown as Config; @@ -633,6 +634,15 @@ describe('ShellTool', () => { const shellTool = new ShellTool(mockConfig, createMockMessageBus()); expect(shellTool.description).toMatchSnapshot(); }); + + it('should not include efficiency guidelines when disabled', () => { + mockPlatform.mockReturnValue('linux'); + vi.mocked(mockConfig.getEnableShellOutputEfficiency).mockReturnValue( + false, + ); + const shellTool = new ShellTool(mockConfig, createMockMessageBus()); + expect(shellTool.description).not.toContain('Efficiency Guidelines:'); + }); }); describe('llmContent output format', () => { diff --git a/packages/core/src/tools/shell.ts b/packages/core/src/tools/shell.ts index e29419913e..1c7192e254 100644 --- a/packages/core/src/tools/shell.ts +++ b/packages/core/src/tools/shell.ts @@ -451,7 +451,18 @@ export class ShellToolInvocation extends BaseToolInvocation< } } -function getShellToolDescription(enableInteractiveShell: boolean): string { +function getShellToolDescription( + enableInteractiveShell: boolean, + enableEfficiency: boolean, +): string { + const efficiencyGuidelines = enableEfficiency + ? ` + + Efficiency Guidelines: + - Quiet Flags: Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information. + - Pagination: Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).` + : ''; + const returnedInfo = ` The following information is returned: @@ -467,12 +478,12 @@ function getShellToolDescription(enableInteractiveShell: boolean): string { const backgroundInstructions = enableInteractiveShell ? 'To run a command in the background, set the `is_background` parameter to true. Do NOT use PowerShell background constructs.' : 'Command can start background processes using PowerShell constructs such as `Start-Process -NoNewWindow` or `Start-Job`.'; - return `This tool executes a given shell command as \`powershell.exe -NoProfile -Command \`. ${backgroundInstructions}${returnedInfo}`; + return `This tool executes a given shell command as \`powershell.exe -NoProfile -Command \`. ${backgroundInstructions}${efficiencyGuidelines}${returnedInfo}`; } else { const backgroundInstructions = enableInteractiveShell ? 'To run a command in the background, set the `is_background` parameter to true. Do NOT use `&` to background commands.' : 'Command can start background processes using `&`.'; - return `This tool executes a given shell command as \`bash -c \`. ${backgroundInstructions} Command is executed as a subprocess that leads its own process group. Command process group can be terminated as \`kill -- -PGID\` or signaled as \`kill -s SIGNAL -- -PGID\`.${returnedInfo}`; + return `This tool executes a given shell command as \`bash -c \`. ${backgroundInstructions} Command is executed as a subprocess that leads its own process group. Command process group can be terminated as \`kill -- -PGID\` or signaled as \`kill -s SIGNAL -- -PGID\`.${efficiencyGuidelines}${returnedInfo}`; } } @@ -500,7 +511,10 @@ export class ShellTool extends BaseDeclarativeTool< super( ShellTool.Name, 'Shell', - getShellToolDescription(config.getEnableInteractiveShell()), + getShellToolDescription( + config.getEnableInteractiveShell(), + config.getEnableShellOutputEfficiency(), + ), Kind.Execute, { type: 'object',