mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-06-27 11:47:46 -07:00
Feat/tool registry discovery (#28113)
This commit is contained in:
@@ -279,4 +279,235 @@ describe('eval-analysis', () => {
|
||||
'Could not statically resolve eval case object for evalTest call.',
|
||||
]);
|
||||
});
|
||||
|
||||
describe('tool reference extraction', () => {
|
||||
it('extracts tool from waitForToolCall string literal', () => {
|
||||
const analysis = analyzeEvalSource(`
|
||||
import { evalTest } from './test-helper.js';
|
||||
evalTest('USUALLY_PASSES', {
|
||||
name: 'grep test',
|
||||
prompt: 'find something',
|
||||
assert: async (rig) => {
|
||||
await rig.waitForToolCall('grep_search');
|
||||
},
|
||||
});
|
||||
`);
|
||||
|
||||
expect(analysis.cases[0].toolReferences).toEqual(['grep_search']);
|
||||
});
|
||||
|
||||
it('extracts tool from toolRequest.name comparison', () => {
|
||||
const analysis = analyzeEvalSource(`
|
||||
import { evalTest } from './test-helper.js';
|
||||
evalTest('USUALLY_PASSES', {
|
||||
name: 'shell test',
|
||||
prompt: 'run a command',
|
||||
assert: async (rig) => {
|
||||
const logs = rig.readToolLogs();
|
||||
const calls = logs.filter(
|
||||
(log) => log.toolRequest.name === 'run_shell_command',
|
||||
);
|
||||
},
|
||||
});
|
||||
`);
|
||||
|
||||
expect(analysis.cases[0].toolReferences).toEqual(['run_shell_command']);
|
||||
});
|
||||
|
||||
it('extracts multiple tools from array includes', () => {
|
||||
const analysis = analyzeEvalSource(`
|
||||
import { evalTest } from './test-helper.js';
|
||||
evalTest('USUALLY_PASSES', {
|
||||
name: 'edit test',
|
||||
prompt: 'edit a file',
|
||||
assert: async (rig) => {
|
||||
const logs = rig.readToolLogs();
|
||||
const editCalls = logs.filter(
|
||||
(log) => ['write_file', 'replace'].includes(log.toolRequest.name),
|
||||
);
|
||||
},
|
||||
});
|
||||
`);
|
||||
|
||||
expect(analysis.cases[0].toolReferences).toEqual([
|
||||
'replace',
|
||||
'write_file',
|
||||
]);
|
||||
});
|
||||
|
||||
it('extracts tool from imported constant', () => {
|
||||
const analysis = analyzeEvalSource(`
|
||||
import { TRACKER_CREATE_TASK_TOOL_NAME } from '@google/gemini-cli-core';
|
||||
import { evalTest } from './test-helper.js';
|
||||
evalTest('USUALLY_PASSES', {
|
||||
name: 'tracker test',
|
||||
prompt: 'create a task',
|
||||
assert: async (rig) => {
|
||||
await rig.waitForToolCall(TRACKER_CREATE_TASK_TOOL_NAME);
|
||||
},
|
||||
});
|
||||
`);
|
||||
|
||||
expect(analysis.cases[0].toolReferences).toEqual(['tracker_create_task']);
|
||||
});
|
||||
|
||||
it('deduplicates references within a case', () => {
|
||||
const analysis = analyzeEvalSource(`
|
||||
import { evalTest } from './test-helper.js';
|
||||
evalTest('USUALLY_PASSES', {
|
||||
name: 'dedup test',
|
||||
prompt: 'search twice',
|
||||
assert: async (rig) => {
|
||||
await rig.waitForToolCall('grep_search');
|
||||
const logs = rig.readToolLogs();
|
||||
const calls = logs.filter(
|
||||
(log) => log.toolRequest.name === 'grep_search',
|
||||
);
|
||||
},
|
||||
});
|
||||
`);
|
||||
|
||||
expect(analysis.cases[0].toolReferences).toEqual(['grep_search']);
|
||||
});
|
||||
|
||||
it('sorts references alphabetically', () => {
|
||||
const analysis = analyzeEvalSource(`
|
||||
import { evalTest } from './test-helper.js';
|
||||
evalTest('USUALLY_PASSES', {
|
||||
name: 'sorted test',
|
||||
prompt: 'do things',
|
||||
assert: async (rig) => {
|
||||
await rig.waitForToolCall('write_file');
|
||||
await rig.waitForToolCall('grep_search');
|
||||
await rig.waitForToolCall('glob');
|
||||
},
|
||||
});
|
||||
`);
|
||||
|
||||
expect(analysis.cases[0].toolReferences).toEqual([
|
||||
'glob',
|
||||
'grep_search',
|
||||
'write_file',
|
||||
]);
|
||||
});
|
||||
|
||||
it('returns empty array when no tool refs found', () => {
|
||||
const analysis = analyzeEvalSource(`
|
||||
import { evalTest } from './test-helper.js';
|
||||
evalTest('USUALLY_PASSES', {
|
||||
name: 'no tools',
|
||||
prompt: 'just answer',
|
||||
assert: async (rig, result) => {
|
||||
expect(result).toContain('hello');
|
||||
},
|
||||
});
|
||||
`);
|
||||
|
||||
expect(analysis.cases[0].toolReferences).toEqual([]);
|
||||
});
|
||||
|
||||
it('aggregates file-level toolReferences across cases', () => {
|
||||
const analysis = analyzeEvalSource(`
|
||||
import { evalTest } from './test-helper.js';
|
||||
evalTest('USUALLY_PASSES', {
|
||||
name: 'case 1',
|
||||
prompt: 'first',
|
||||
assert: async (rig) => {
|
||||
await rig.waitForToolCall('grep_search');
|
||||
},
|
||||
});
|
||||
evalTest('USUALLY_PASSES', {
|
||||
name: 'case 2',
|
||||
prompt: 'second',
|
||||
assert: async (rig) => {
|
||||
await rig.waitForToolCall('write_file');
|
||||
},
|
||||
});
|
||||
`);
|
||||
|
||||
expect(analysis.toolReferences).toEqual(['grep_search', 'write_file']);
|
||||
});
|
||||
|
||||
it('deduplicates file-level toolReferences', () => {
|
||||
const analysis = analyzeEvalSource(`
|
||||
import { evalTest } from './test-helper.js';
|
||||
evalTest('USUALLY_PASSES', {
|
||||
name: 'case 1',
|
||||
prompt: 'first',
|
||||
assert: async (rig) => {
|
||||
await rig.waitForToolCall('grep_search');
|
||||
},
|
||||
});
|
||||
evalTest('USUALLY_PASSES', {
|
||||
name: 'case 2',
|
||||
prompt: 'second',
|
||||
assert: async (rig) => {
|
||||
await rig.waitForToolCall('grep_search');
|
||||
},
|
||||
});
|
||||
`);
|
||||
|
||||
expect(analysis.toolReferences).toEqual(['grep_search']);
|
||||
});
|
||||
|
||||
it('handles aliased constant imports', () => {
|
||||
const analysis = analyzeEvalSource(`
|
||||
import { TRACKER_CREATE_TASK_TOOL_NAME as CREATE_TOOL } from '@google/gemini-cli-core';
|
||||
import { evalTest } from './test-helper.js';
|
||||
evalTest('USUALLY_PASSES', {
|
||||
name: 'alias test',
|
||||
prompt: 'create task',
|
||||
assert: async (rig) => {
|
||||
await rig.waitForToolCall(CREATE_TOOL);
|
||||
},
|
||||
});
|
||||
`);
|
||||
|
||||
expect(analysis.cases[0].toolReferences).toEqual(['tracker_create_task']);
|
||||
});
|
||||
|
||||
it('handles reversed toolRequest.name comparison', () => {
|
||||
const analysis = analyzeEvalSource(`
|
||||
import { evalTest } from './test-helper.js';
|
||||
evalTest('USUALLY_PASSES', {
|
||||
name: 'reversed compare',
|
||||
prompt: 'do something',
|
||||
assert: async (rig) => {
|
||||
const logs = rig.readToolLogs();
|
||||
const calls = logs.filter(
|
||||
(log) => 'replace' === log.toolRequest.name,
|
||||
);
|
||||
},
|
||||
});
|
||||
`);
|
||||
|
||||
expect(analysis.cases[0].toolReferences).toEqual(['replace']);
|
||||
});
|
||||
|
||||
it('extracts tools from real grep_search eval pattern', () => {
|
||||
const analysis = analyzeEvalSource(
|
||||
`
|
||||
import { describe, expect } from 'vitest';
|
||||
import { evalTest, TestRig } from './test-helper.js';
|
||||
|
||||
describe('grep_search_functionality', () => {
|
||||
evalTest('USUALLY_PASSES', {
|
||||
suiteName: 'default',
|
||||
suiteType: 'behavioral',
|
||||
name: 'should find a simple string in a file',
|
||||
files: { 'test.txt': 'hello world' },
|
||||
prompt: 'Find "world" in test.txt',
|
||||
assert: async (rig: TestRig, result: string) => {
|
||||
await rig.waitForToolCall('grep_search');
|
||||
},
|
||||
});
|
||||
});
|
||||
`,
|
||||
{ filePath: '/repo/evals/grep_search.eval.ts', repoRoot: '/repo' },
|
||||
);
|
||||
|
||||
expect(analysis.cases[0].toolReferences).toEqual(['grep_search']);
|
||||
expect(analysis.toolReferences).toEqual(['grep_search']);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -0,0 +1,139 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
buildToolRegistry,
|
||||
resolveToolName,
|
||||
getToolsByCategory,
|
||||
type ToolCategory,
|
||||
} from '../utils/tool-registry.js';
|
||||
|
||||
describe('tool-registry', () => {
|
||||
const registry = buildToolRegistry();
|
||||
|
||||
describe('buildToolRegistry', () => {
|
||||
it('includes all canonical built-in tools', () => {
|
||||
expect(registry.totalTools).toBeGreaterThanOrEqual(26);
|
||||
});
|
||||
|
||||
it('every tool has a valid category', () => {
|
||||
for (const [name, entry] of registry.tools) {
|
||||
expect(entry.category).toBeTruthy();
|
||||
expect(entry.name).toBe(name);
|
||||
}
|
||||
});
|
||||
|
||||
it('byCategory entries match tools map', () => {
|
||||
let categoryTotal = 0;
|
||||
for (const [, entries] of registry.byCategory) {
|
||||
for (const entry of entries) {
|
||||
expect(registry.tools.get(entry.name)).toBe(entry);
|
||||
}
|
||||
categoryTotal += entries.length;
|
||||
}
|
||||
expect(categoryTotal).toBe(registry.totalTools);
|
||||
});
|
||||
|
||||
it('aliasLookup covers every canonical name', () => {
|
||||
for (const name of registry.tools.keys()) {
|
||||
expect(registry.aliasLookup.get(name)).toBe(name);
|
||||
}
|
||||
});
|
||||
|
||||
it('aliasLookup covers every legacy alias', () => {
|
||||
for (const [, entry] of registry.tools) {
|
||||
for (const alias of entry.aliases) {
|
||||
expect(registry.aliasLookup.get(alias)).toBe(entry.name);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it('is deterministic across calls', () => {
|
||||
const second = buildToolRegistry();
|
||||
expect([...second.tools.keys()]).toEqual([...registry.tools.keys()]);
|
||||
expect(second.totalTools).toBe(registry.totalTools);
|
||||
});
|
||||
});
|
||||
|
||||
describe('resolveToolName', () => {
|
||||
it('resolves canonical names to themselves', () => {
|
||||
expect(resolveToolName(registry, 'grep_search')).toBe('grep_search');
|
||||
expect(resolveToolName(registry, 'run_shell_command')).toBe(
|
||||
'run_shell_command',
|
||||
);
|
||||
});
|
||||
|
||||
it('resolves legacy alias to canonical name', () => {
|
||||
expect(resolveToolName(registry, 'search_file_content')).toBe(
|
||||
'grep_search',
|
||||
);
|
||||
});
|
||||
|
||||
it('returns undefined for unknown tool names', () => {
|
||||
expect(resolveToolName(registry, 'nonexistent_tool')).toBeUndefined();
|
||||
});
|
||||
|
||||
it('returns undefined for empty string', () => {
|
||||
expect(resolveToolName(registry, '')).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('getToolsByCategory', () => {
|
||||
it('returns file-system tools', () => {
|
||||
const tools = getToolsByCategory(registry, 'file-system');
|
||||
const names = tools.map((t) => t.name);
|
||||
expect(names).toContain('glob');
|
||||
expect(names).toContain('grep_search');
|
||||
expect(names).toContain('read_file');
|
||||
expect(names).toContain('write_file');
|
||||
expect(names).toContain('replace');
|
||||
});
|
||||
|
||||
it('returns task-tracker tools', () => {
|
||||
const tools = getToolsByCategory(registry, 'task-tracker');
|
||||
const names = tools.map((t) => t.name);
|
||||
expect(names).toContain('tracker_create_task');
|
||||
expect(names).toContain('tracker_update_task');
|
||||
expect(names).toContain('tracker_get_task');
|
||||
expect(names).toContain('tracker_list_tasks');
|
||||
expect(names).toContain('tracker_add_dependency');
|
||||
expect(names).toContain('tracker_visualize');
|
||||
expect(names).toHaveLength(6);
|
||||
});
|
||||
|
||||
it('returns agent tools', () => {
|
||||
const tools = getToolsByCategory(registry, 'agent');
|
||||
const names = tools.map((t) => t.name);
|
||||
expect(names).toContain('invoke_agent');
|
||||
expect(names).toContain('complete_task');
|
||||
expect(names).toContain('update_topic');
|
||||
});
|
||||
|
||||
it('returns empty array for unknown category', () => {
|
||||
expect(
|
||||
getToolsByCategory(registry, 'nonexistent' as ToolCategory),
|
||||
).toEqual([]);
|
||||
});
|
||||
|
||||
it('every defined category has at least one tool', () => {
|
||||
const expectedCategories: ToolCategory[] = [
|
||||
'file-system',
|
||||
'shell',
|
||||
'web',
|
||||
'planning',
|
||||
'user-interaction',
|
||||
'skills',
|
||||
'task-tracker',
|
||||
'agent',
|
||||
'mcp',
|
||||
];
|
||||
for (const cat of expectedCategories) {
|
||||
expect(getToolsByCategory(registry, cat).length).toBeGreaterThan(0);
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user