gemini-cli/evals/frugalSearch.eval.ts

/**
 * @license
 * Copyright 2026 Google LLC
 * SPDX-License-Identifier: Apache-2.0
 */

import { describe, expect } from 'vitest';
import { evalTest } from './test-helper.js';

/**
 * Evals to verify that the agent uses search tools efficiently (frugally)
 * by utilizing limiting parameters like `total_max_matches` and `max_matches_per_file`.
 * This ensures the agent doesn't flood the context window with unnecessary search results.
 */
describe('Frugal Search', () => {
  const getGrepParams = (call: any): any => {
    let args = call.toolRequest.args;
    if (typeof args === 'string') {
      try {
        args = JSON.parse(args);
      } catch (e) {
        // Ignore parse errors
      }
    }
    return args;
  };

  evalTest('ALWAYS_PASSES', {
    name: 'should use targeted search with limit',
    prompt: 'find me a sample usage of path.resolve() in the codebase',
    files: {
      'package.json': JSON.stringify({
        name: 'test-project',
        version: '1.0.0',
        main: 'dist/index.js',
        scripts: {
          build: 'tsc',
          test: 'vitest',
        },
        dependencies: {
          typescript: '^5.0.0',
          '@types/node': '^20.0.0',
          vitest: '^1.0.0',
        },
      }),
      'src/index.ts': `
        import { App } from './app.ts';

        const app = new App();
        app.start();
      `,
      'src/app.ts': `
        import * as path from 'path';
        import { UserController } from './controllers/user.ts';

        export class App {
          constructor() {
            console.log('App initialized');
          }

          public start(): void {
            const userController = new UserController();
            console.log('Static path:', path.resolve(__dirname, '../public'));
          }
        }
      `,
      'src/utils.ts': `
        import * as path from 'path';
        import * as fs from 'fs';

        export function resolvePath(p: string): string {
          return path.resolve(process.cwd(), p);
        }

        export function ensureDir(dirPath: string): void {
          const absolutePath = path.resolve(dirPath);
          if (!fs.existsSync(absolutePath)) {
            fs.mkdirSync(absolutePath, { recursive: true });
          }
        }
      `,
      'src/config.ts': `
        import * as path from 'path';

        export const config = {
          dbPath: path.resolve(process.cwd(), 'data/db.sqlite'),
          logLevel: 'info',
        };
      `,
      'src/controllers/user.ts': `
        import * as path from 'path';

        export class UserController {
          public getUsers(): any[] {
            console.log('Loading users from:', path.resolve('data/users.json'));
            return [{ id: 1, name: 'Alice' }];
          }
        }
      `,
      'tests/app.test.ts': `
        import { describe, it, expect } from 'vitest';
        import * as path from 'path';

        describe('App', () => {
          it('should resolve paths', () => {
            const p = path.resolve('test');
            expect(p).toBeDefined();
          });
        });
      `,
    },
    assert: async (rig) => {
      const toolCalls = rig.readToolLogs();
      const grepCalls = toolCalls.filter(
        (call) => call.toolRequest.name === 'grep_search',
      );

      expect(grepCalls.length).toBeGreaterThan(0);

      const grepParams = grepCalls.map(getGrepParams);

      const hasTotalMaxLimit = grepParams.some(
        (p) => p.total_max_matches !== undefined && p.total_max_matches <= 100,
      );
      expect(
        hasTotalMaxLimit,
        `Expected agent to use a small total_max_matches (<= 100) for a sample usage request. Actual values: ${JSON.stringify(
          grepParams.map((p) => p.total_max_matches),
        )}`,
      ).toBe(true);
    },
  });

  /**
   * Ensure that the agent makes use of either grep or ranged reads in fulfilling this task.
   * The task is specifically phrased to not evoke "view" or "search" specifically because
   * the model implicitly understands that such tasks are searches. This covers the case of
   * an unexpectedly large file benefitting from frugal approaches to viewing, like grep, or
   * ranged reads.
   */
  evalTest('ALWAYS_PASSES', {
    name: 'should use grep or ranged read for large files',
    prompt: 'What year was legacy_processor.ts written?',
    files: {
      'src/utils.ts': 'export const add = (a, b) => a + b;',
      'src/types.ts': 'export type ID = string;',
      'src/legacy_processor.ts': [
        '// Copyright 2005 Legacy Systems Inc.',
        ...Array.from(
          { length: 5000 },
          (_, i) =>
            `// Legacy code block ${i} - strictly preserved for backward compatibility`,
        ),
      ].join('\\n'),
      'README.md': '# Project documentation',
    },
    assert: async (rig) => {
      const toolCalls = rig.readToolLogs();
      const getParams = (call: any) => {
        let args = call.toolRequest.args;
        if (typeof args === 'string') {
          try {
            args = JSON.parse(args);
          } catch (e) {
            // Ignore parse errors
          }
        }
        return args;
      };

      // Check for wasteful full file reads
      const fullReads = toolCalls.filter((call) => {
        if (call.toolRequest.name !== 'read_file') return false;
        const args = getParams(call);
        return (
          args.file_path === 'src/legacy_processor.ts' &&
          (args.limit === undefined || args.limit === null)
        );
      });

      expect(
        fullReads.length,
        'Agent should not attempt to read the entire large file at once',
      ).toBe(0);

      // Check that it actually tried to find it using appropriate tools
      const validAttempts = toolCalls.filter((call) => {
        const args = getParams(call);
        if (call.toolRequest.name === 'grep_search') {
          return true;
        }

        if (
          call.toolRequest.name === 'read_file' &&
          args.file_path === 'src/legacy_processor.ts' &&
          args.limit !== undefined
        ) {
          return true;
        }
        return false;
      });

      expect(validAttempts.length).toBeGreaterThan(0);
    },
  });
});