mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-22 01:33:30 -07:00
Fix compile errors.
This commit is contained in:
@@ -12,7 +12,7 @@ import {
|
||||
symlinkNodeModules,
|
||||
withEvalRetries,
|
||||
prepareWorkspace,
|
||||
BaseEvalCase,
|
||||
type BaseEvalCase,
|
||||
EVAL_MODEL,
|
||||
} from './test-helper.js';
|
||||
import fs from 'node:fs';
|
||||
@@ -32,6 +32,7 @@ interface EvalConfigOverrides {
|
||||
allowedTools?: never;
|
||||
/** Restricting tools via mainAgentTools in evals is forbidden. */
|
||||
mainAgentTools?: never;
|
||||
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
|
||||
+12
-8
@@ -5,17 +5,21 @@
|
||||
*/
|
||||
|
||||
import { describe, expect } from 'vitest';
|
||||
import { ApprovalMode, isRecord } from '@google/gemini-cli-core';
|
||||
import { appEvalTest, type AppEvalCase } from './app-test-helper.js';
|
||||
import { type EvalPolicy } from './test-helper.js';
|
||||
|
||||
function askUserEvalTest(policy: EvalPolicy, evalCase: AppEvalCase) {
|
||||
const existingGeneral = evalCase.configOverrides?.['general'];
|
||||
const generalBase = isRecord(existingGeneral) ? existingGeneral : {};
|
||||
|
||||
return appEvalTest(policy, {
|
||||
...evalCase,
|
||||
configOverrides: {
|
||||
...evalCase.configOverrides,
|
||||
approvalMode: ApprovalMode.DEFAULT,
|
||||
general: {
|
||||
...evalCase.configOverrides?.general,
|
||||
approvalMode: 'default',
|
||||
...generalBase,
|
||||
enableAutoUpdate: false,
|
||||
enableAutoUpdateNotification: false,
|
||||
},
|
||||
@@ -27,7 +31,7 @@ function askUserEvalTest(policy: EvalPolicy, evalCase: AppEvalCase) {
|
||||
}
|
||||
|
||||
describe('ask_user', () => {
|
||||
askUserEvalTest('USUALLY_PASSES', {
|
||||
askUserEvalTest('ALWAYS_PASSES', {
|
||||
suiteName: 'default',
|
||||
suiteType: 'behavioral',
|
||||
name: 'Agent uses AskUser tool to present multiple choice options',
|
||||
@@ -44,7 +48,7 @@ describe('ask_user', () => {
|
||||
},
|
||||
});
|
||||
|
||||
askUserEvalTest('USUALLY_PASSES', {
|
||||
askUserEvalTest('ALWAYS_PASSES', {
|
||||
suiteName: 'default',
|
||||
suiteType: 'behavioral',
|
||||
name: 'Agent uses AskUser tool to clarify ambiguous requirements',
|
||||
@@ -64,7 +68,7 @@ describe('ask_user', () => {
|
||||
},
|
||||
});
|
||||
|
||||
askUserEvalTest('USUALLY_PASSES', {
|
||||
askUserEvalTest('ALWAYS_PASSES', {
|
||||
suiteName: 'default',
|
||||
suiteType: 'behavioral',
|
||||
name: 'Agent uses AskUser tool before performing significant ambiguous rework',
|
||||
@@ -88,8 +92,8 @@ describe('ask_user', () => {
|
||||
]);
|
||||
expect(confirmation, 'Expected a tool call confirmation').toBeDefined();
|
||||
|
||||
if (confirmation?.name === 'enter_plan_mode') {
|
||||
rig.acceptConfirmation('enter_plan_mode');
|
||||
if (confirmation?.toolName === 'enter_plan_mode') {
|
||||
await rig.resolveTool('enter_plan_mode');
|
||||
confirmation = await rig.waitForPendingConfirmation('ask_user');
|
||||
}
|
||||
|
||||
@@ -106,7 +110,7 @@ describe('ask_user', () => {
|
||||
// confirm shell commands. Fixed via prompt refinements and tool definition
|
||||
// updates to clarify that shell command confirmation is handled by the UI.
|
||||
// See fix: https://github.com/google-gemini/gemini-cli/pull/20504
|
||||
askUserEvalTest('USUALLY_PASSES', {
|
||||
askUserEvalTest('ALWAYS_PASSES', {
|
||||
suiteName: 'default',
|
||||
suiteType: 'behavioral',
|
||||
name: 'Agent does NOT use AskUser to confirm shell commands',
|
||||
|
||||
@@ -13,18 +13,6 @@ import { evalTest } from './test-helper.js';
|
||||
* This ensures the agent doesn't flood the context window with unnecessary search results.
|
||||
*/
|
||||
describe('Frugal Search', () => {
|
||||
const getGrepParams = (call: any): any => {
|
||||
let args = call.toolRequest.args;
|
||||
if (typeof args === 'string') {
|
||||
try {
|
||||
args = JSON.parse(args);
|
||||
} catch (e) {
|
||||
// Ignore parse errors
|
||||
}
|
||||
}
|
||||
return args;
|
||||
};
|
||||
|
||||
/**
|
||||
* Ensure that the agent makes use of either grep or ranged reads in fulfilling this task.
|
||||
* The task is specifically phrased to not evoke "view" or "search" specifically because
|
||||
|
||||
@@ -5,8 +5,7 @@
|
||||
*/
|
||||
|
||||
import { describe, expect } from 'vitest';
|
||||
import { evalTest } from './test-helper.js';
|
||||
import { assertModelHasOutput } from '../integration-tests/test-helper.js';
|
||||
import { evalTest, assertModelHasOutput } from './test-helper.js';
|
||||
|
||||
describe('Hierarchical Memory', () => {
|
||||
const conflictResolutionTest =
|
||||
|
||||
@@ -5,11 +5,11 @@
|
||||
*/
|
||||
|
||||
import { describe, expect } from 'vitest';
|
||||
import { evalTest } from './test-helper.js';
|
||||
import {
|
||||
evalTest,
|
||||
assertModelHasOutput,
|
||||
checkModelOutputContent,
|
||||
} from '../integration-tests/test-helper.js';
|
||||
} from './test-helper.js';
|
||||
|
||||
describe('save_memory', () => {
|
||||
const TEST_PREFIX = 'Save memory test: ';
|
||||
|
||||
@@ -49,6 +49,8 @@ describe('evalTest reliability logic', () => {
|
||||
|
||||
// Execute the test function directly
|
||||
await internalEvalTest({
|
||||
suiteName: 'test',
|
||||
suiteType: 'behavioral',
|
||||
name: 'test-api-failure',
|
||||
prompt: 'do something',
|
||||
assert: async () => {},
|
||||
@@ -83,6 +85,8 @@ describe('evalTest reliability logic', () => {
|
||||
// Expect the test function to throw immediately
|
||||
await expect(
|
||||
internalEvalTest({
|
||||
suiteName: 'test',
|
||||
suiteType: 'behavioral',
|
||||
name: 'test-logic-failure',
|
||||
prompt: 'do something',
|
||||
assert: async () => {
|
||||
@@ -108,6 +112,8 @@ describe('evalTest reliability logic', () => {
|
||||
.mockResolvedValueOnce('Success');
|
||||
|
||||
await internalEvalTest({
|
||||
suiteName: 'test',
|
||||
suiteType: 'behavioral',
|
||||
name: 'test-recovery',
|
||||
prompt: 'do something',
|
||||
assert: async () => {},
|
||||
@@ -135,6 +141,8 @@ describe('evalTest reliability logic', () => {
|
||||
);
|
||||
|
||||
await internalEvalTest({
|
||||
suiteName: 'test',
|
||||
suiteType: 'behavioral',
|
||||
name: 'test-api-503',
|
||||
prompt: 'do something',
|
||||
assert: async () => {},
|
||||
@@ -162,6 +170,8 @@ describe('evalTest reliability logic', () => {
|
||||
try {
|
||||
await expect(
|
||||
internalEvalTest({
|
||||
suiteName: 'test',
|
||||
suiteType: 'behavioral',
|
||||
name: 'test-absolute-path',
|
||||
prompt: 'do something',
|
||||
files: {
|
||||
@@ -190,6 +200,8 @@ describe('evalTest reliability logic', () => {
|
||||
try {
|
||||
await expect(
|
||||
internalEvalTest({
|
||||
suiteName: 'test',
|
||||
suiteType: 'behavioral',
|
||||
name: 'test-traversal',
|
||||
prompt: 'do something',
|
||||
files: {
|
||||
|
||||
@@ -1,10 +0,0 @@
|
||||
{
|
||||
"extends": "../tsconfig.json",
|
||||
"compilerOptions": {
|
||||
"jsx": "react-jsx",
|
||||
"lib": ["DOM", "DOM.Iterable", "ES2023"],
|
||||
"types": ["node", "vitest/globals"]
|
||||
},
|
||||
"include": ["**/*.ts", "**/*.tsx"],
|
||||
"exclude": ["node_modules", "logs"]
|
||||
}
|
||||
Reference in New Issue
Block a user