mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-10 14:10:37 -07:00
Improving memory tool instructions and eval testing (#18091)
This commit is contained in:
@@ -7,7 +7,12 @@
|
||||
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
||||
import { existsSync } from 'node:fs';
|
||||
import * as path from 'node:path';
|
||||
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
|
||||
import {
|
||||
TestRig,
|
||||
printDebugInfo,
|
||||
assertModelHasOutput,
|
||||
checkModelOutputContent,
|
||||
} from './test-helper.js';
|
||||
|
||||
describe('file-system', () => {
|
||||
let rig: TestRig;
|
||||
@@ -43,8 +48,11 @@ describe('file-system', () => {
|
||||
'Expected to find a read_file tool call',
|
||||
).toBeTruthy();
|
||||
|
||||
// Validate model output - will throw if no output, warn if missing expected content
|
||||
validateModelOutput(result, 'hello world', 'File read test');
|
||||
assertModelHasOutput(result);
|
||||
checkModelOutputContent(result, {
|
||||
expectedContent: 'hello world',
|
||||
testName: 'File read test',
|
||||
});
|
||||
});
|
||||
|
||||
it('should be able to write a file', async () => {
|
||||
@@ -74,8 +82,8 @@ describe('file-system', () => {
|
||||
'Expected to find a write_file, edit, or replace tool call',
|
||||
).toBeTruthy();
|
||||
|
||||
// Validate model output - will throw if no output
|
||||
validateModelOutput(result, null, 'File write test');
|
||||
assertModelHasOutput(result);
|
||||
checkModelOutputContent(result, { testName: 'File write test' });
|
||||
|
||||
const fileContent = rig.readFile('test.txt');
|
||||
|
||||
|
||||
@@ -6,7 +6,12 @@
|
||||
|
||||
import { WEB_SEARCH_TOOL_NAME } from '../packages/core/src/tools/tool-names.js';
|
||||
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
||||
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
|
||||
import {
|
||||
TestRig,
|
||||
printDebugInfo,
|
||||
assertModelHasOutput,
|
||||
checkModelOutputContent,
|
||||
} from './test-helper.js';
|
||||
|
||||
describe('web search tool', () => {
|
||||
let rig: TestRig;
|
||||
@@ -68,12 +73,11 @@ describe('web search tool', () => {
|
||||
`Expected to find a call to ${WEB_SEARCH_TOOL_NAME}`,
|
||||
).toBeTruthy();
|
||||
|
||||
// Validate model output - will throw if no output, warn if missing expected content
|
||||
const hasExpectedContent = validateModelOutput(
|
||||
result,
|
||||
['weather', 'london'],
|
||||
'Google web search test',
|
||||
);
|
||||
assertModelHasOutput(result);
|
||||
const hasExpectedContent = checkModelOutputContent(result, {
|
||||
expectedContent: ['weather', 'london'],
|
||||
testName: 'Google web search test',
|
||||
});
|
||||
|
||||
// If content was missing, log the search queries used
|
||||
if (!hasExpectedContent) {
|
||||
|
||||
@@ -9,7 +9,8 @@ import {
|
||||
TestRig,
|
||||
poll,
|
||||
printDebugInfo,
|
||||
validateModelOutput,
|
||||
assertModelHasOutput,
|
||||
checkModelOutputContent,
|
||||
} from './test-helper.js';
|
||||
import { existsSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
@@ -68,7 +69,10 @@ describe('list_directory', () => {
|
||||
throw e;
|
||||
}
|
||||
|
||||
// Validate model output - will throw if no output, warn if missing expected content
|
||||
validateModelOutput(result, ['file1.txt', 'subdir'], 'List directory test');
|
||||
assertModelHasOutput(result);
|
||||
checkModelOutputContent(result, {
|
||||
expectedContent: ['file1.txt', 'subdir'],
|
||||
testName: 'List directory test',
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -5,7 +5,12 @@
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
||||
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
|
||||
import {
|
||||
TestRig,
|
||||
printDebugInfo,
|
||||
assertModelHasOutput,
|
||||
checkModelOutputContent,
|
||||
} from './test-helper.js';
|
||||
|
||||
describe('read_many_files', () => {
|
||||
let rig: TestRig;
|
||||
@@ -50,7 +55,7 @@ describe('read_many_files', () => {
|
||||
'Expected to find either read_many_files or multiple read_file tool calls',
|
||||
).toBeTruthy();
|
||||
|
||||
// Validate model output - will throw if no output
|
||||
validateModelOutput(result, null, 'Read many files test');
|
||||
assertModelHasOutput(result);
|
||||
checkModelOutputContent(result, { testName: 'Read many files test' });
|
||||
});
|
||||
});
|
||||
|
||||
@@ -5,7 +5,12 @@
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
||||
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
|
||||
import {
|
||||
TestRig,
|
||||
printDebugInfo,
|
||||
assertModelHasOutput,
|
||||
checkModelOutputContent,
|
||||
} from './test-helper.js';
|
||||
import { getShellConfiguration } from '../packages/core/src/utils/shell-utils.js';
|
||||
|
||||
const { shell } = getShellConfiguration();
|
||||
@@ -115,13 +120,11 @@ describe('run_shell_command', () => {
|
||||
'Expected to find a run_shell_command tool call',
|
||||
).toBeTruthy();
|
||||
|
||||
// Validate model output - will throw if no output, warn if missing expected content
|
||||
// Model often reports exit code instead of showing output
|
||||
validateModelOutput(
|
||||
result,
|
||||
['hello-world', 'exit code 0'],
|
||||
'Shell command test',
|
||||
);
|
||||
assertModelHasOutput(result);
|
||||
checkModelOutputContent(result, {
|
||||
expectedContent: ['hello-world', 'exit code 0'],
|
||||
testName: 'Shell command test',
|
||||
});
|
||||
});
|
||||
|
||||
it('should be able to run a shell command via stdin', async () => {
|
||||
@@ -149,8 +152,11 @@ describe('run_shell_command', () => {
|
||||
'Expected to find a run_shell_command tool call',
|
||||
).toBeTruthy();
|
||||
|
||||
// Validate model output - will throw if no output, warn if missing expected content
|
||||
validateModelOutput(result, 'test-stdin', 'Shell command stdin test');
|
||||
assertModelHasOutput(result);
|
||||
checkModelOutputContent(result, {
|
||||
expectedContent: 'test-stdin',
|
||||
testName: 'Shell command stdin test',
|
||||
});
|
||||
});
|
||||
|
||||
it.skip('should run allowed sub-command in non-interactive mode', async () => {
|
||||
@@ -494,12 +500,11 @@ describe('run_shell_command', () => {
|
||||
)[0];
|
||||
expect(toolCall.toolRequest.success).toBe(true);
|
||||
|
||||
// Validate model output - will throw if no output, warn if missing expected content
|
||||
validateModelOutput(
|
||||
result,
|
||||
'test-allow-all',
|
||||
'Shell command stdin allow all',
|
||||
);
|
||||
assertModelHasOutput(result);
|
||||
checkModelOutputContent(result, {
|
||||
expectedContent: 'test-allow-all',
|
||||
testName: 'Shell command stdin allow all',
|
||||
});
|
||||
});
|
||||
|
||||
it('should propagate environment variables to the child process', async () => {
|
||||
@@ -528,7 +533,11 @@ describe('run_shell_command', () => {
|
||||
foundToolCall,
|
||||
'Expected to find a run_shell_command tool call',
|
||||
).toBeTruthy();
|
||||
validateModelOutput(result, varValue, 'Env var propagation test');
|
||||
assertModelHasOutput(result);
|
||||
checkModelOutputContent(result, {
|
||||
expectedContent: varValue,
|
||||
testName: 'Env var propagation test',
|
||||
});
|
||||
expect(result).toContain(varValue);
|
||||
} finally {
|
||||
delete process.env[varName];
|
||||
@@ -558,7 +567,11 @@ describe('run_shell_command', () => {
|
||||
'Expected to find a run_shell_command tool call',
|
||||
).toBeTruthy();
|
||||
|
||||
validateModelOutput(result, fileName, 'Platform-specific listing test');
|
||||
assertModelHasOutput(result);
|
||||
checkModelOutputContent(result, {
|
||||
expectedContent: fileName,
|
||||
testName: 'Platform-specific listing test',
|
||||
});
|
||||
expect(result).toContain(fileName);
|
||||
});
|
||||
|
||||
|
||||
@@ -11,7 +11,12 @@
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
||||
import { TestRig, poll, validateModelOutput } from './test-helper.js';
|
||||
import {
|
||||
TestRig,
|
||||
poll,
|
||||
assertModelHasOutput,
|
||||
checkModelOutputContent,
|
||||
} from './test-helper.js';
|
||||
import { join } from 'node:path';
|
||||
import { writeFileSync } from 'node:fs';
|
||||
|
||||
@@ -226,8 +231,11 @@ describe.skip('simple-mcp-server', () => {
|
||||
|
||||
expect(foundToolCall, 'Expected to find an add tool call').toBeTruthy();
|
||||
|
||||
// Validate model output - will throw if no output, fail if missing expected content
|
||||
validateModelOutput(output, '15', 'MCP server test');
|
||||
assertModelHasOutput(output);
|
||||
checkModelOutputContent(output, {
|
||||
expectedContent: '15',
|
||||
testName: 'MCP server test',
|
||||
});
|
||||
expect(
|
||||
output.includes('15'),
|
||||
'Expected output to contain the sum (15)',
|
||||
|
||||
@@ -5,7 +5,12 @@
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
||||
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
|
||||
import {
|
||||
TestRig,
|
||||
printDebugInfo,
|
||||
assertModelHasOutput,
|
||||
checkModelOutputContent,
|
||||
} from './test-helper.js';
|
||||
|
||||
describe.skip('stdin context', () => {
|
||||
let rig: TestRig;
|
||||
@@ -67,7 +72,11 @@ describe.skip('stdin context', () => {
|
||||
}
|
||||
|
||||
// Validate model output
|
||||
validateModelOutput(result, randomString, 'STDIN context test');
|
||||
assertModelHasOutput(result);
|
||||
checkModelOutputContent(result, {
|
||||
expectedContent: randomString,
|
||||
testName: 'STDIN context test',
|
||||
});
|
||||
|
||||
expect(
|
||||
result.toLowerCase().includes(randomString),
|
||||
|
||||
@@ -9,7 +9,8 @@ import {
|
||||
TestRig,
|
||||
createToolCallErrorMessage,
|
||||
printDebugInfo,
|
||||
validateModelOutput,
|
||||
assertModelHasOutput,
|
||||
checkModelOutputContent,
|
||||
} from './test-helper.js';
|
||||
|
||||
describe('write_file', () => {
|
||||
@@ -46,8 +47,11 @@ describe('write_file', () => {
|
||||
),
|
||||
).toBeTruthy();
|
||||
|
||||
// Validate model output - will throw if no output, warn if missing expected content
|
||||
validateModelOutput(result, 'dad.txt', 'Write file test');
|
||||
assertModelHasOutput(result);
|
||||
checkModelOutputContent(result, {
|
||||
expectedContent: 'dad.txt',
|
||||
testName: 'Write file test',
|
||||
});
|
||||
|
||||
const newFilePath = 'dad.txt';
|
||||
|
||||
|
||||
Reference in New Issue
Block a user