mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-10 22:21:22 -07:00
feat: move shell efficiency guidelines to tool description (#18614)
This commit is contained in:
110
evals/shell-efficiency.eval.ts
Normal file
110
evals/shell-efficiency.eval.ts
Normal file
@@ -0,0 +1,110 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, expect } from 'vitest';
|
||||
import { evalTest } from './test-helper.js';
|
||||
|
||||
describe('Shell Efficiency', () => {
|
||||
const getCommand = (call: any): string | undefined => {
|
||||
let args = call.toolRequest.args;
|
||||
if (typeof args === 'string') {
|
||||
try {
|
||||
args = JSON.parse(args);
|
||||
} catch (e) {
|
||||
// Ignore parse errors
|
||||
}
|
||||
}
|
||||
return typeof args === 'string' ? args : (args as any)['command'];
|
||||
};
|
||||
|
||||
evalTest('ALWAYS_PASSES', {
|
||||
name: 'should use --silent/--quiet flags when installing packages',
|
||||
prompt: 'Install the "lodash" package using npm.',
|
||||
assert: async (rig) => {
|
||||
const toolCalls = rig.readToolLogs();
|
||||
const shellCalls = toolCalls.filter(
|
||||
(call) => call.toolRequest.name === 'run_shell_command',
|
||||
);
|
||||
|
||||
const hasEfficiencyFlag = shellCalls.some((call) => {
|
||||
const cmd = getCommand(call);
|
||||
return (
|
||||
cmd &&
|
||||
cmd.includes('npm install') &&
|
||||
(cmd.includes('--silent') ||
|
||||
cmd.includes('--quiet') ||
|
||||
cmd.includes('-q'))
|
||||
);
|
||||
});
|
||||
|
||||
expect(
|
||||
hasEfficiencyFlag,
|
||||
`Expected agent to use efficiency flags for npm install. Commands used: ${shellCalls
|
||||
.map(getCommand)
|
||||
.join(', ')}`,
|
||||
).toBe(true);
|
||||
},
|
||||
});
|
||||
|
||||
evalTest('ALWAYS_PASSES', {
|
||||
name: 'should use --no-pager with git commands',
|
||||
prompt: 'Show the git log.',
|
||||
assert: async (rig) => {
|
||||
const toolCalls = rig.readToolLogs();
|
||||
const shellCalls = toolCalls.filter(
|
||||
(call) => call.toolRequest.name === 'run_shell_command',
|
||||
);
|
||||
|
||||
const hasNoPager = shellCalls.some((call) => {
|
||||
const cmd = getCommand(call);
|
||||
return cmd && cmd.includes('git') && cmd.includes('--no-pager');
|
||||
});
|
||||
|
||||
expect(
|
||||
hasNoPager,
|
||||
`Expected agent to use --no-pager with git. Commands used: ${shellCalls
|
||||
.map(getCommand)
|
||||
.join(', ')}`,
|
||||
).toBe(true);
|
||||
},
|
||||
});
|
||||
|
||||
evalTest('ALWAYS_PASSES', {
|
||||
name: 'should NOT use efficiency flags when enableShellOutputEfficiency is disabled',
|
||||
params: {
|
||||
settings: {
|
||||
tools: {
|
||||
shell: {
|
||||
enableShellOutputEfficiency: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
prompt: 'Install the "lodash" package using npm.',
|
||||
assert: async (rig) => {
|
||||
const toolCalls = rig.readToolLogs();
|
||||
const shellCalls = toolCalls.filter(
|
||||
(call) => call.toolRequest.name === 'run_shell_command',
|
||||
);
|
||||
|
||||
const hasEfficiencyFlag = shellCalls.some((call) => {
|
||||
const cmd = getCommand(call);
|
||||
return (
|
||||
cmd &&
|
||||
cmd.includes('npm install') &&
|
||||
(cmd.includes('--silent') ||
|
||||
cmd.includes('--quiet') ||
|
||||
cmd.includes('-q'))
|
||||
);
|
||||
});
|
||||
|
||||
expect(
|
||||
hasEfficiencyFlag,
|
||||
'Agent used efficiency flags even though enableShellOutputEfficiency was disabled',
|
||||
).toBe(false);
|
||||
},
|
||||
});
|
||||
});
|
||||
@@ -592,11 +592,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
|
||||
|
||||
# Operational Guidelines
|
||||
|
||||
## Shell Tool Efficiency
|
||||
|
||||
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
|
||||
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
|
||||
|
||||
## Tone and Style
|
||||
|
||||
- **Role:** A senior software engineer and collaborative peer programmer.
|
||||
@@ -706,11 +701,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
|
||||
|
||||
# Operational Guidelines
|
||||
|
||||
## Shell Tool Efficiency
|
||||
|
||||
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
|
||||
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
|
||||
|
||||
## Tone and Style
|
||||
|
||||
- **Role:** A senior software engineer and collaborative peer programmer.
|
||||
@@ -803,11 +793,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
|
||||
|
||||
# Operational Guidelines
|
||||
|
||||
## Shell Tool Efficiency
|
||||
|
||||
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
|
||||
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
|
||||
|
||||
## Tone and Style
|
||||
|
||||
- **Role:** A senior software engineer and collaborative peer programmer.
|
||||
@@ -1391,11 +1376,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
|
||||
|
||||
# Operational Guidelines
|
||||
|
||||
## Shell Tool Efficiency
|
||||
|
||||
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
|
||||
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
|
||||
|
||||
## Tone and Style
|
||||
|
||||
- **Role:** A senior software engineer and collaborative peer programmer.
|
||||
@@ -1514,11 +1494,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
|
||||
|
||||
# Operational Guidelines
|
||||
|
||||
## Shell Tool Efficiency
|
||||
|
||||
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
|
||||
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
|
||||
|
||||
## Tone and Style
|
||||
|
||||
- **Role:** A senior software engineer and collaborative peer programmer.
|
||||
@@ -1637,11 +1612,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
|
||||
|
||||
# Operational Guidelines
|
||||
|
||||
## Shell Tool Efficiency
|
||||
|
||||
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
|
||||
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
|
||||
|
||||
## Tone and Style
|
||||
|
||||
- **Role:** A senior software engineer and collaborative peer programmer.
|
||||
@@ -1868,11 +1838,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
|
||||
|
||||
# Operational Guidelines
|
||||
|
||||
## Shell Tool Efficiency
|
||||
|
||||
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
|
||||
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
|
||||
|
||||
## Tone and Style
|
||||
|
||||
- **Role:** A senior software engineer and collaborative peer programmer.
|
||||
@@ -2099,11 +2064,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
|
||||
|
||||
# Operational Guidelines
|
||||
|
||||
## Shell Tool Efficiency
|
||||
|
||||
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
|
||||
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
|
||||
|
||||
## Tone and Style
|
||||
|
||||
- **Role:** A senior software engineer and collaborative peer programmer.
|
||||
@@ -2218,11 +2178,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
|
||||
|
||||
# Operational Guidelines
|
||||
|
||||
## Shell Tool Efficiency
|
||||
|
||||
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
|
||||
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
|
||||
|
||||
## Tone and Style
|
||||
|
||||
- **Role:** A senior software engineer and collaborative peer programmer.
|
||||
@@ -2448,11 +2403,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
|
||||
|
||||
# Operational Guidelines
|
||||
|
||||
## Shell Tool Efficiency
|
||||
|
||||
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
|
||||
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
|
||||
|
||||
## Tone and Style
|
||||
|
||||
- **Role:** A senior software engineer and collaborative peer programmer.
|
||||
@@ -2567,11 +2517,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
|
||||
|
||||
# Operational Guidelines
|
||||
|
||||
## Shell Tool Efficiency
|
||||
|
||||
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
|
||||
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
|
||||
|
||||
## Tone and Style
|
||||
|
||||
- **Role:** A senior software engineer and collaborative peer programmer.
|
||||
|
||||
@@ -463,26 +463,6 @@ describe('Core System Prompt (prompts.ts)', () => {
|
||||
});
|
||||
|
||||
describe('Platform-specific and Background Process instructions', () => {
|
||||
it('should include Windows-specific shell efficiency commands on win32', () => {
|
||||
mockPlatform('win32');
|
||||
const prompt = getCoreSystemPrompt(mockConfig);
|
||||
expect(prompt).toContain(
|
||||
"using commands like 'type' or 'findstr' (on CMD) and 'Get-Content' or 'Select-String' (on PowerShell)",
|
||||
);
|
||||
expect(prompt).not.toContain(
|
||||
"using commands like 'grep', 'tail', 'head'",
|
||||
);
|
||||
});
|
||||
|
||||
it('should include generic shell efficiency commands on non-Windows', () => {
|
||||
mockPlatform('linux');
|
||||
const prompt = getCoreSystemPrompt(mockConfig);
|
||||
expect(prompt).toContain("using commands like 'grep', 'tail', 'head'");
|
||||
expect(prompt).not.toContain(
|
||||
"using commands like 'type' or 'findstr' (on CMD) and 'Get-Content' or 'Select-String' (on PowerShell)",
|
||||
);
|
||||
});
|
||||
|
||||
it('should use is_background parameter in background process instructions', () => {
|
||||
const prompt = getCoreSystemPrompt(mockConfig);
|
||||
expect(prompt).toContain(
|
||||
|
||||
@@ -245,6 +245,7 @@ export function renderOperationalGuidelines(
|
||||
if (!options) return '';
|
||||
return `
|
||||
# Operational Guidelines
|
||||
|
||||
${shellEfficiencyGuidelines(options.enableShellEfficiency)}
|
||||
|
||||
## Tone and Style (CLI Interaction)
|
||||
|
||||
@@ -55,7 +55,6 @@ export interface PrimaryWorkflowsOptions {
|
||||
export interface OperationalGuidelinesOptions {
|
||||
interactive: boolean;
|
||||
isGemini3: boolean;
|
||||
enableShellEfficiency: boolean;
|
||||
interactiveShellEnabled: boolean;
|
||||
}
|
||||
|
||||
@@ -259,8 +258,6 @@ export function renderOperationalGuidelines(
|
||||
return `
|
||||
# Operational Guidelines
|
||||
|
||||
${shellEfficiencyGuidelines(options.enableShellEfficiency)}
|
||||
|
||||
## Tone and Style
|
||||
|
||||
- **Role:** A senior software engineer and collaborative peer programmer.
|
||||
@@ -517,15 +514,6 @@ function planningPhaseSuggestion(options: PrimaryWorkflowsOptions): string {
|
||||
return '';
|
||||
}
|
||||
|
||||
function shellEfficiencyGuidelines(enabled: boolean): string {
|
||||
if (!enabled) return '';
|
||||
return `
|
||||
## Shell Tool Efficiency
|
||||
|
||||
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
|
||||
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).`;
|
||||
}
|
||||
|
||||
function toneAndStyleNoChitchat(isGemini3: boolean): string {
|
||||
return isGemini3
|
||||
? `
|
||||
|
||||
@@ -3,6 +3,10 @@
|
||||
exports[`ShellTool > getDescription > should return the non-windows description when not on windows 1`] = `
|
||||
"This tool executes a given shell command as \`bash -c <command>\`. Command can start background processes using \`&\`. Command is executed as a subprocess that leads its own process group. Command process group can be terminated as \`kill -- -PGID\` or signaled as \`kill -s SIGNAL -- -PGID\`.
|
||||
|
||||
Efficiency Guidelines:
|
||||
- Quiet Flags: Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
|
||||
- Pagination: Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
|
||||
|
||||
The following information is returned:
|
||||
|
||||
Output: Combined stdout/stderr. Can be \`(empty)\` or partial on error and for any unwaited background processes.
|
||||
@@ -16,6 +20,10 @@ exports[`ShellTool > getDescription > should return the non-windows description
|
||||
exports[`ShellTool > getDescription > should return the windows description when on windows 1`] = `
|
||||
"This tool executes a given shell command as \`powershell.exe -NoProfile -Command <command>\`. Command can start background processes using PowerShell constructs such as \`Start-Process -NoNewWindow\` or \`Start-Job\`.
|
||||
|
||||
Efficiency Guidelines:
|
||||
- Quiet Flags: Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
|
||||
- Pagination: Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
|
||||
|
||||
The following information is returned:
|
||||
|
||||
Output: Combined stdout/stderr. Can be \`(empty)\` or partial on error and for any unwaited background processes.
|
||||
|
||||
@@ -130,6 +130,7 @@ describe('ShellTool', () => {
|
||||
getGeminiClient: vi.fn().mockReturnValue({}),
|
||||
getShellToolInactivityTimeout: vi.fn().mockReturnValue(1000),
|
||||
getEnableInteractiveShell: vi.fn().mockReturnValue(false),
|
||||
getEnableShellOutputEfficiency: vi.fn().mockReturnValue(true),
|
||||
sanitizationConfig: {},
|
||||
} as unknown as Config;
|
||||
|
||||
@@ -633,6 +634,15 @@ describe('ShellTool', () => {
|
||||
const shellTool = new ShellTool(mockConfig, createMockMessageBus());
|
||||
expect(shellTool.description).toMatchSnapshot();
|
||||
});
|
||||
|
||||
it('should not include efficiency guidelines when disabled', () => {
|
||||
mockPlatform.mockReturnValue('linux');
|
||||
vi.mocked(mockConfig.getEnableShellOutputEfficiency).mockReturnValue(
|
||||
false,
|
||||
);
|
||||
const shellTool = new ShellTool(mockConfig, createMockMessageBus());
|
||||
expect(shellTool.description).not.toContain('Efficiency Guidelines:');
|
||||
});
|
||||
});
|
||||
|
||||
describe('llmContent output format', () => {
|
||||
|
||||
@@ -451,7 +451,18 @@ export class ShellToolInvocation extends BaseToolInvocation<
|
||||
}
|
||||
}
|
||||
|
||||
function getShellToolDescription(enableInteractiveShell: boolean): string {
|
||||
function getShellToolDescription(
|
||||
enableInteractiveShell: boolean,
|
||||
enableEfficiency: boolean,
|
||||
): string {
|
||||
const efficiencyGuidelines = enableEfficiency
|
||||
? `
|
||||
|
||||
Efficiency Guidelines:
|
||||
- Quiet Flags: Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
|
||||
- Pagination: Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).`
|
||||
: '';
|
||||
|
||||
const returnedInfo = `
|
||||
|
||||
The following information is returned:
|
||||
@@ -467,12 +478,12 @@ function getShellToolDescription(enableInteractiveShell: boolean): string {
|
||||
const backgroundInstructions = enableInteractiveShell
|
||||
? 'To run a command in the background, set the `is_background` parameter to true. Do NOT use PowerShell background constructs.'
|
||||
: 'Command can start background processes using PowerShell constructs such as `Start-Process -NoNewWindow` or `Start-Job`.';
|
||||
return `This tool executes a given shell command as \`powershell.exe -NoProfile -Command <command>\`. ${backgroundInstructions}${returnedInfo}`;
|
||||
return `This tool executes a given shell command as \`powershell.exe -NoProfile -Command <command>\`. ${backgroundInstructions}${efficiencyGuidelines}${returnedInfo}`;
|
||||
} else {
|
||||
const backgroundInstructions = enableInteractiveShell
|
||||
? 'To run a command in the background, set the `is_background` parameter to true. Do NOT use `&` to background commands.'
|
||||
: 'Command can start background processes using `&`.';
|
||||
return `This tool executes a given shell command as \`bash -c <command>\`. ${backgroundInstructions} Command is executed as a subprocess that leads its own process group. Command process group can be terminated as \`kill -- -PGID\` or signaled as \`kill -s SIGNAL -- -PGID\`.${returnedInfo}`;
|
||||
return `This tool executes a given shell command as \`bash -c <command>\`. ${backgroundInstructions} Command is executed as a subprocess that leads its own process group. Command process group can be terminated as \`kill -- -PGID\` or signaled as \`kill -s SIGNAL -- -PGID\`.${efficiencyGuidelines}${returnedInfo}`;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -500,7 +511,10 @@ export class ShellTool extends BaseDeclarativeTool<
|
||||
super(
|
||||
ShellTool.Name,
|
||||
'Shell',
|
||||
getShellToolDescription(config.getEnableInteractiveShell()),
|
||||
getShellToolDescription(
|
||||
config.getEnableInteractiveShell(),
|
||||
config.getEnableShellOutputEfficiency(),
|
||||
),
|
||||
Kind.Execute,
|
||||
{
|
||||
type: 'object',
|
||||
|
||||
Reference in New Issue
Block a user