mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-12 12:54:07 -07:00
fix integration test static errors, and run_shell_command tests to actually be testing what they intend (#11050)
This commit is contained in:
@@ -18,13 +18,14 @@ describe.skip('Interactive Mode', () => {
|
|||||||
await rig.cleanup();
|
await rig.cleanup();
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should trigger chat compression with /compress command', async () => {
|
// TODO(#11062): Make this test reliable by not using the actual Gemini model
|
||||||
|
it.skip('should trigger chat compression with /compress command', async () => {
|
||||||
await rig.setup('interactive-compress-test');
|
await rig.setup('interactive-compress-test');
|
||||||
|
|
||||||
const run = await rig.runInteractive();
|
const run = await rig.runInteractive();
|
||||||
|
|
||||||
const longPrompt =
|
const longPrompt =
|
||||||
'Dont do anything except returning a 1000 token long paragragh with the <name of the scientist who discovered theory of relativity> at the end to indicate end of response. This is a moderately long sentence.';
|
'Dont do anything except returning a 1000 token long paragraph with the <name of the scientist who discovered theory of relativity> at the end to indicate end of response. This is a moderately long sentence.';
|
||||||
|
|
||||||
await run.type(longPrompt);
|
await run.type(longPrompt);
|
||||||
await run.type('\r');
|
await run.type('\r');
|
||||||
|
|||||||
@@ -5,8 +5,8 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
// Unset NO_COLOR environment variable to ensure consistent theme behavior between local and CI test runs
|
// Unset NO_COLOR environment variable to ensure consistent theme behavior between local and CI test runs
|
||||||
if (process.env.NO_COLOR !== undefined) {
|
if (process.env['NO_COLOR'] !== undefined) {
|
||||||
delete process.env.NO_COLOR;
|
delete process.env['NO_COLOR'];
|
||||||
}
|
}
|
||||||
|
|
||||||
import {
|
import {
|
||||||
@@ -60,21 +60,21 @@ export async function setup() {
|
|||||||
console.error('Error cleaning up old test runs:', e);
|
console.error('Error cleaning up old test runs:', e);
|
||||||
}
|
}
|
||||||
|
|
||||||
process.env.INTEGRATION_TEST_FILE_DIR = runDir;
|
process.env['INTEGRATION_TEST_FILE_DIR'] = runDir;
|
||||||
process.env.GEMINI_CLI_INTEGRATION_TEST = 'true';
|
process.env['GEMINI_CLI_INTEGRATION_TEST'] = 'true';
|
||||||
process.env.TELEMETRY_LOG_FILE = join(runDir, 'telemetry.log');
|
process.env['TELEMETRY_LOG_FILE'] = join(runDir, 'telemetry.log');
|
||||||
|
|
||||||
if (process.env.KEEP_OUTPUT) {
|
if (process.env['KEEP_OUTPUT']) {
|
||||||
console.log(`Keeping output for test run in: ${runDir}`);
|
console.log(`Keeping output for test run in: ${runDir}`);
|
||||||
}
|
}
|
||||||
process.env.VERBOSE = process.env.VERBOSE ?? 'false';
|
process.env['VERBOSE'] = process.env['VERBOSE'] ?? 'false';
|
||||||
|
|
||||||
console.log(`\nIntegration test output directory: ${runDir}`);
|
console.log(`\nIntegration test output directory: ${runDir}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function teardown() {
|
export async function teardown() {
|
||||||
// Cleanup the test run directory unless KEEP_OUTPUT is set
|
// Cleanup the test run directory unless KEEP_OUTPUT is set
|
||||||
if (process.env.KEEP_OUTPUT !== 'true' && runDir) {
|
if (process.env['KEEP_OUTPUT'] !== 'true' && runDir) {
|
||||||
await rm(runDir, { recursive: true, force: true });
|
await rm(runDir, { recursive: true, force: true });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -95,65 +95,12 @@ describe('run_shell_command', () => {
|
|||||||
const prompt = `use ${tool} to tell me how many lines there are in ${testFile}`;
|
const prompt = `use ${tool} to tell me how many lines there are in ${testFile}`;
|
||||||
|
|
||||||
// Provide the prompt via stdin to simulate non-interactive mode
|
// Provide the prompt via stdin to simulate non-interactive mode
|
||||||
const result = await rig.run({
|
|
||||||
stdin: prompt,
|
|
||||||
args: [`--allowed-tools=run_shell_command(${tool})`],
|
|
||||||
});
|
|
||||||
|
|
||||||
const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);
|
|
||||||
|
|
||||||
if (!foundToolCall) {
|
|
||||||
printDebugInfo(rig, result, {
|
|
||||||
'Found tool call': foundToolCall,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
expect(
|
|
||||||
foundToolCall,
|
|
||||||
'Expected to find a run_shell_command tool call',
|
|
||||||
).toBeTruthy();
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should succeed with no parens in non-interactive mode', async () => {
|
|
||||||
const rig = new TestRig();
|
|
||||||
await rig.setup('should succeed with no parens in non-interactive mode');
|
|
||||||
|
|
||||||
const testFile = rig.createFile('test.txt', 'Lorem\nIpsum\nDolor\n');
|
|
||||||
const { tool } = getLineCountCommand();
|
|
||||||
const prompt = `use ${tool} to tell me how many lines there are in ${testFile}`;
|
|
||||||
|
|
||||||
const result = await rig.run({
|
|
||||||
stdin: prompt,
|
|
||||||
args: ['--allowed-tools=run_shell_command'],
|
|
||||||
});
|
|
||||||
|
|
||||||
const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);
|
|
||||||
|
|
||||||
if (!foundToolCall) {
|
|
||||||
printDebugInfo(rig, result, {
|
|
||||||
'Found tool call': foundToolCall,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
expect(
|
|
||||||
foundToolCall,
|
|
||||||
'Expected to find a run_shell_command tool call',
|
|
||||||
).toBeTruthy();
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should succeed with --yolo mode', async () => {
|
|
||||||
const rig = new TestRig();
|
|
||||||
await rig.setup('should succeed with --yolo mode');
|
|
||||||
|
|
||||||
const testFile = rig.createFile('test.txt', 'Lorem\nIpsum\nDolor\n');
|
|
||||||
const { tool } = getLineCountCommand();
|
|
||||||
const prompt = `use ${tool} to tell me how many lines there are in ${testFile}`;
|
|
||||||
|
|
||||||
const result = await rig.run(
|
const result = await rig.run(
|
||||||
{
|
{
|
||||||
prompt: prompt,
|
stdin: prompt,
|
||||||
|
yolo: false,
|
||||||
},
|
},
|
||||||
'--yolo',
|
`--allowed-tools=run_shell_command(${tool})`,
|
||||||
);
|
);
|
||||||
|
|
||||||
const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);
|
const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);
|
||||||
@@ -168,6 +115,84 @@ describe('run_shell_command', () => {
|
|||||||
foundToolCall,
|
foundToolCall,
|
||||||
'Expected to find a run_shell_command tool call',
|
'Expected to find a run_shell_command tool call',
|
||||||
).toBeTruthy();
|
).toBeTruthy();
|
||||||
|
|
||||||
|
const toolCall = rig
|
||||||
|
.readToolLogs()
|
||||||
|
.filter(
|
||||||
|
(toolCall) => toolCall.toolRequest.name === 'run_shell_command',
|
||||||
|
)[0];
|
||||||
|
expect(toolCall.toolRequest.success).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should succeed with no parens in non-interactive mode', async () => {
|
||||||
|
const rig = new TestRig();
|
||||||
|
await rig.setup('should succeed with no parens in non-interactive mode');
|
||||||
|
|
||||||
|
const testFile = rig.createFile('test.txt', 'Lorem\nIpsum\nDolor\n');
|
||||||
|
const { tool } = getLineCountCommand();
|
||||||
|
const prompt = `use ${tool} to tell me how many lines there are in ${testFile}`;
|
||||||
|
|
||||||
|
const result = await rig.run(
|
||||||
|
{
|
||||||
|
stdin: prompt,
|
||||||
|
yolo: false,
|
||||||
|
},
|
||||||
|
'--allowed-tools=run_shell_command',
|
||||||
|
);
|
||||||
|
|
||||||
|
const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);
|
||||||
|
|
||||||
|
if (!foundToolCall) {
|
||||||
|
printDebugInfo(rig, result, {
|
||||||
|
'Found tool call': foundToolCall,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
expect(
|
||||||
|
foundToolCall,
|
||||||
|
'Expected to find a run_shell_command tool call',
|
||||||
|
).toBeTruthy();
|
||||||
|
|
||||||
|
const toolCall = rig
|
||||||
|
.readToolLogs()
|
||||||
|
.filter(
|
||||||
|
(toolCall) => toolCall.toolRequest.name === 'run_shell_command',
|
||||||
|
)[0];
|
||||||
|
expect(toolCall.toolRequest.success).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should succeed with --yolo mode', async () => {
|
||||||
|
const rig = new TestRig();
|
||||||
|
await rig.setup('should succeed with --yolo mode');
|
||||||
|
|
||||||
|
const testFile = rig.createFile('test.txt', 'Lorem\nIpsum\nDolor\n');
|
||||||
|
const { tool } = getLineCountCommand();
|
||||||
|
const prompt = `use ${tool} to tell me how many lines there are in ${testFile}`;
|
||||||
|
|
||||||
|
const result = await rig.run({
|
||||||
|
prompt: prompt,
|
||||||
|
yolo: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);
|
||||||
|
|
||||||
|
if (!foundToolCall) {
|
||||||
|
printDebugInfo(rig, result, {
|
||||||
|
'Found tool call': foundToolCall,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
expect(
|
||||||
|
foundToolCall,
|
||||||
|
'Expected to find a run_shell_command tool call',
|
||||||
|
).toBeTruthy();
|
||||||
|
|
||||||
|
const toolCall = rig
|
||||||
|
.readToolLogs()
|
||||||
|
.filter(
|
||||||
|
(toolCall) => toolCall.toolRequest.name === 'run_shell_command',
|
||||||
|
)[0];
|
||||||
|
expect(toolCall.toolRequest.success).toBe(true);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should work with ShellTool alias', async () => {
|
it('should work with ShellTool alias', async () => {
|
||||||
@@ -178,10 +203,13 @@ describe('run_shell_command', () => {
|
|||||||
const { tool } = getLineCountCommand();
|
const { tool } = getLineCountCommand();
|
||||||
const prompt = `use ${tool} to tell me how many lines there are in ${testFile}`;
|
const prompt = `use ${tool} to tell me how many lines there are in ${testFile}`;
|
||||||
|
|
||||||
const result = await rig.run({
|
const result = await rig.run(
|
||||||
stdin: prompt,
|
{
|
||||||
args: [`--allowed-tools=ShellTool(${tool})`],
|
stdin: prompt,
|
||||||
});
|
yolo: false,
|
||||||
|
},
|
||||||
|
`--allowed-tools=ShellTool(${tool})`,
|
||||||
|
);
|
||||||
|
|
||||||
const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);
|
const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);
|
||||||
|
|
||||||
@@ -195,37 +223,65 @@ describe('run_shell_command', () => {
|
|||||||
foundToolCall,
|
foundToolCall,
|
||||||
'Expected to find a run_shell_command tool call',
|
'Expected to find a run_shell_command tool call',
|
||||||
).toBeTruthy();
|
).toBeTruthy();
|
||||||
|
|
||||||
|
const toolCall = rig
|
||||||
|
.readToolLogs()
|
||||||
|
.filter(
|
||||||
|
(toolCall) => toolCall.toolRequest.name === 'run_shell_command',
|
||||||
|
)[0];
|
||||||
|
expect(toolCall.toolRequest.success).toBe(true);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should combine multiple --allowed-tools flags', async () => {
|
// TODO(#11062): Un-skip this once we can make it reliable by using hard coded
|
||||||
|
// model responses.
|
||||||
|
it.skip('should combine multiple --allowed-tools flags', async () => {
|
||||||
const rig = new TestRig();
|
const rig = new TestRig();
|
||||||
await rig.setup('should combine multiple --allowed-tools flags');
|
await rig.setup('should combine multiple --allowed-tools flags');
|
||||||
|
|
||||||
const { tool } = getLineCountCommand();
|
const { tool } = getLineCountCommand();
|
||||||
const prompt =
|
const prompt =
|
||||||
`use both ${tool} and ls to count the number of lines in ` +
|
`use both ${tool} and ls to count the number of lines in files in this ` +
|
||||||
`files in this directory`;
|
`directory. Do not pipe these commands into each other, run them separately.`;
|
||||||
|
|
||||||
const result = await rig.run({
|
const result = await rig.run(
|
||||||
stdin: prompt,
|
{
|
||||||
args: [
|
stdin: prompt,
|
||||||
`--allowed-tools=run_shell_command(${tool})`,
|
yolo: false,
|
||||||
'--allowed-tools=run_shell_command(ls)',
|
},
|
||||||
],
|
`--allowed-tools=run_shell_command(${tool})`,
|
||||||
});
|
'--allowed-tools=run_shell_command(ls)',
|
||||||
|
);
|
||||||
|
|
||||||
const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);
|
for (const expected in ['ls', tool]) {
|
||||||
|
const foundToolCall = await rig.waitForToolCall(
|
||||||
|
'run_shell_command',
|
||||||
|
15000,
|
||||||
|
(args) => args.toLowerCase().includes(`"command": "${expected}`),
|
||||||
|
);
|
||||||
|
|
||||||
if (!foundToolCall) {
|
if (!foundToolCall) {
|
||||||
printDebugInfo(rig, result, {
|
printDebugInfo(rig, result, {
|
||||||
'Found tool call': foundToolCall,
|
'Found tool call': foundToolCall,
|
||||||
});
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
expect(
|
||||||
|
foundToolCall,
|
||||||
|
`Expected to find a run_shell_command tool call to "${expected}",` +
|
||||||
|
` got ${rig.readToolLogs().join('\n')}`,
|
||||||
|
).toBeTruthy();
|
||||||
}
|
}
|
||||||
|
|
||||||
expect(
|
const toolLogs = rig
|
||||||
foundToolCall,
|
.readToolLogs()
|
||||||
'Expected to find a run_shell_command tool call',
|
.filter((toolCall) => toolCall.toolRequest.name === 'run_shell_command');
|
||||||
).toBeTruthy();
|
expect(toolLogs.length, toolLogs.join('\n')).toBeGreaterThanOrEqual(2);
|
||||||
|
for (const toolLog of toolLogs) {
|
||||||
|
expect(
|
||||||
|
toolLog.toolRequest.success,
|
||||||
|
`Expected tool call ${toolLog} to succeed`,
|
||||||
|
).toBe(true);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should allow all with "ShellTool" and other specific tools', async () => {
|
it('should allow all with "ShellTool" and other specific tools', async () => {
|
||||||
@@ -237,13 +293,14 @@ describe('run_shell_command', () => {
|
|||||||
const { tool } = getLineCountCommand();
|
const { tool } = getLineCountCommand();
|
||||||
const prompt = `Please run the command "echo test-allow-all" and show me the output`;
|
const prompt = `Please run the command "echo test-allow-all" and show me the output`;
|
||||||
|
|
||||||
const result = await rig.run({
|
const result = await rig.run(
|
||||||
stdin: prompt,
|
{
|
||||||
args: [
|
stdin: prompt,
|
||||||
`--allowed-tools=run_shell_command(${tool})`,
|
yolo: false,
|
||||||
'--allowed-tools=run_shell_command',
|
},
|
||||||
],
|
`--allowed-tools=run_shell_command(${tool})`,
|
||||||
});
|
'--allowed-tools=run_shell_command',
|
||||||
|
);
|
||||||
|
|
||||||
const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);
|
const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);
|
||||||
|
|
||||||
@@ -259,6 +316,13 @@ describe('run_shell_command', () => {
|
|||||||
'Expected to find a run_shell_command tool call',
|
'Expected to find a run_shell_command tool call',
|
||||||
).toBeTruthy();
|
).toBeTruthy();
|
||||||
|
|
||||||
|
const toolCall = rig
|
||||||
|
.readToolLogs()
|
||||||
|
.filter(
|
||||||
|
(toolCall) => toolCall.toolRequest.name === 'run_shell_command',
|
||||||
|
)[0];
|
||||||
|
expect(toolCall.toolRequest.success).toBe(true);
|
||||||
|
|
||||||
// Validate model output - will throw if no output, warn if missing expected content
|
// Validate model output - will throw if no output, warn if missing expected content
|
||||||
validateModelOutput(
|
validateModelOutput(
|
||||||
result,
|
result,
|
||||||
|
|||||||
@@ -20,9 +20,9 @@ describe.skip('stdin context', () => {
|
|||||||
|
|
||||||
await rig.waitForTelemetryEvent('api_request');
|
await rig.waitForTelemetryEvent('api_request');
|
||||||
const lastRequest = rig.readLastApiRequest();
|
const lastRequest = rig.readLastApiRequest();
|
||||||
expect(lastRequest).not.toBeNull();
|
|
||||||
|
|
||||||
const historyString = lastRequest.attributes.request_text;
|
expect(lastRequest?.attributes?.request_text).toBeDefined();
|
||||||
|
const historyString = lastRequest!.attributes!.request_text!;
|
||||||
|
|
||||||
// TODO: This test currently fails in sandbox mode (Docker/Podman) because
|
// TODO: This test currently fails in sandbox mode (Docker/Podman) because
|
||||||
// stdin content is not properly forwarded to the container when used
|
// stdin content is not properly forwarded to the container when used
|
||||||
|
|||||||
@@ -158,6 +158,7 @@ interface ParsedLog {
|
|||||||
function_args?: string;
|
function_args?: string;
|
||||||
success?: boolean;
|
success?: boolean;
|
||||||
duration_ms?: number;
|
duration_ms?: number;
|
||||||
|
request_text?: string;
|
||||||
};
|
};
|
||||||
scopeMetrics?: {
|
scopeMetrics?: {
|
||||||
metrics: {
|
metrics: {
|
||||||
@@ -315,10 +316,19 @@ export class TestRig {
|
|||||||
run(
|
run(
|
||||||
promptOrOptions:
|
promptOrOptions:
|
||||||
| string
|
| string
|
||||||
| { prompt?: string; stdin?: string; stdinDoesNotEnd?: boolean },
|
| {
|
||||||
|
prompt?: string;
|
||||||
|
stdin?: string;
|
||||||
|
stdinDoesNotEnd?: boolean;
|
||||||
|
yolo?: boolean;
|
||||||
|
},
|
||||||
...args: string[]
|
...args: string[]
|
||||||
): Promise<string> {
|
): Promise<string> {
|
||||||
const { command, initialArgs } = this._getCommandAndArgs(['--yolo']);
|
const yolo =
|
||||||
|
typeof promptOrOptions === 'string' || promptOrOptions.yolo !== false;
|
||||||
|
const { command, initialArgs } = this._getCommandAndArgs(
|
||||||
|
yolo ? ['--yolo'] : [],
|
||||||
|
);
|
||||||
const commandArgs = [...initialArgs];
|
const commandArgs = [...initialArgs];
|
||||||
const execOptions: {
|
const execOptions: {
|
||||||
cwd: string;
|
cwd: string;
|
||||||
@@ -566,7 +576,11 @@ export class TestRig {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
async waitForToolCall(toolName: string, timeout?: number) {
|
async waitForToolCall(
|
||||||
|
toolName: string,
|
||||||
|
timeout?: number,
|
||||||
|
matchArgs?: (args: string) => boolean,
|
||||||
|
) {
|
||||||
// Use environment-specific timeout
|
// Use environment-specific timeout
|
||||||
if (!timeout) {
|
if (!timeout) {
|
||||||
timeout = getDefaultTimeout();
|
timeout = getDefaultTimeout();
|
||||||
@@ -578,7 +592,11 @@ export class TestRig {
|
|||||||
return poll(
|
return poll(
|
||||||
() => {
|
() => {
|
||||||
const toolLogs = this.readToolLogs();
|
const toolLogs = this.readToolLogs();
|
||||||
return toolLogs.some((log) => log.toolRequest.name === toolName);
|
return toolLogs.some(
|
||||||
|
(log) =>
|
||||||
|
log.toolRequest.name === toolName &&
|
||||||
|
(matchArgs?.call(this, log.toolRequest.args) ?? true),
|
||||||
|
);
|
||||||
},
|
},
|
||||||
timeout,
|
timeout,
|
||||||
100,
|
100,
|
||||||
|
|||||||
@@ -28,13 +28,14 @@ describe('write_file', () => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const allTools = rig.readToolLogs();
|
const allTools = rig.readToolLogs();
|
||||||
expect(foundToolCall, 'Expected to find a write_file tool call').toBeTruthy(
|
expect(
|
||||||
|
foundToolCall,
|
||||||
createToolCallErrorMessage(
|
createToolCallErrorMessage(
|
||||||
'write_file',
|
'write_file',
|
||||||
allTools.map((t) => t.toolRequest.name),
|
allTools.map((t) => t.toolRequest.name),
|
||||||
result,
|
result,
|
||||||
),
|
),
|
||||||
);
|
).toBeTruthy();
|
||||||
|
|
||||||
// Validate model output - will throw if no output, warn if missing expected content
|
// Validate model output - will throw if no output, warn if missing expected content
|
||||||
validateModelOutput(result, 'dad.txt', 'Write file test');
|
validateModelOutput(result, 'dad.txt', 'Write file test');
|
||||||
|
|||||||
Reference in New Issue
Block a user