Feat/tool registry discovery (#28113)

This commit is contained in:
Vedant Mahajan
2026-06-25 05:21:30 +05:30
committed by GitHub
parent 02c6c77324
commit d845bc5d45
4 changed files with 752 additions and 0 deletions
+231
View File
@@ -279,4 +279,235 @@ describe('eval-analysis', () => {
'Could not statically resolve eval case object for evalTest call.',
]);
});
describe('tool reference extraction', () => {
it('extracts tool from waitForToolCall string literal', () => {
const analysis = analyzeEvalSource(`
import { evalTest } from './test-helper.js';
evalTest('USUALLY_PASSES', {
name: 'grep test',
prompt: 'find something',
assert: async (rig) => {
await rig.waitForToolCall('grep_search');
},
});
`);
expect(analysis.cases[0].toolReferences).toEqual(['grep_search']);
});
it('extracts tool from toolRequest.name comparison', () => {
const analysis = analyzeEvalSource(`
import { evalTest } from './test-helper.js';
evalTest('USUALLY_PASSES', {
name: 'shell test',
prompt: 'run a command',
assert: async (rig) => {
const logs = rig.readToolLogs();
const calls = logs.filter(
(log) => log.toolRequest.name === 'run_shell_command',
);
},
});
`);
expect(analysis.cases[0].toolReferences).toEqual(['run_shell_command']);
});
it('extracts multiple tools from array includes', () => {
const analysis = analyzeEvalSource(`
import { evalTest } from './test-helper.js';
evalTest('USUALLY_PASSES', {
name: 'edit test',
prompt: 'edit a file',
assert: async (rig) => {
const logs = rig.readToolLogs();
const editCalls = logs.filter(
(log) => ['write_file', 'replace'].includes(log.toolRequest.name),
);
},
});
`);
expect(analysis.cases[0].toolReferences).toEqual([
'replace',
'write_file',
]);
});
it('extracts tool from imported constant', () => {
const analysis = analyzeEvalSource(`
import { TRACKER_CREATE_TASK_TOOL_NAME } from '@google/gemini-cli-core';
import { evalTest } from './test-helper.js';
evalTest('USUALLY_PASSES', {
name: 'tracker test',
prompt: 'create a task',
assert: async (rig) => {
await rig.waitForToolCall(TRACKER_CREATE_TASK_TOOL_NAME);
},
});
`);
expect(analysis.cases[0].toolReferences).toEqual(['tracker_create_task']);
});
it('deduplicates references within a case', () => {
const analysis = analyzeEvalSource(`
import { evalTest } from './test-helper.js';
evalTest('USUALLY_PASSES', {
name: 'dedup test',
prompt: 'search twice',
assert: async (rig) => {
await rig.waitForToolCall('grep_search');
const logs = rig.readToolLogs();
const calls = logs.filter(
(log) => log.toolRequest.name === 'grep_search',
);
},
});
`);
expect(analysis.cases[0].toolReferences).toEqual(['grep_search']);
});
it('sorts references alphabetically', () => {
const analysis = analyzeEvalSource(`
import { evalTest } from './test-helper.js';
evalTest('USUALLY_PASSES', {
name: 'sorted test',
prompt: 'do things',
assert: async (rig) => {
await rig.waitForToolCall('write_file');
await rig.waitForToolCall('grep_search');
await rig.waitForToolCall('glob');
},
});
`);
expect(analysis.cases[0].toolReferences).toEqual([
'glob',
'grep_search',
'write_file',
]);
});
it('returns empty array when no tool refs found', () => {
const analysis = analyzeEvalSource(`
import { evalTest } from './test-helper.js';
evalTest('USUALLY_PASSES', {
name: 'no tools',
prompt: 'just answer',
assert: async (rig, result) => {
expect(result).toContain('hello');
},
});
`);
expect(analysis.cases[0].toolReferences).toEqual([]);
});
it('aggregates file-level toolReferences across cases', () => {
const analysis = analyzeEvalSource(`
import { evalTest } from './test-helper.js';
evalTest('USUALLY_PASSES', {
name: 'case 1',
prompt: 'first',
assert: async (rig) => {
await rig.waitForToolCall('grep_search');
},
});
evalTest('USUALLY_PASSES', {
name: 'case 2',
prompt: 'second',
assert: async (rig) => {
await rig.waitForToolCall('write_file');
},
});
`);
expect(analysis.toolReferences).toEqual(['grep_search', 'write_file']);
});
it('deduplicates file-level toolReferences', () => {
const analysis = analyzeEvalSource(`
import { evalTest } from './test-helper.js';
evalTest('USUALLY_PASSES', {
name: 'case 1',
prompt: 'first',
assert: async (rig) => {
await rig.waitForToolCall('grep_search');
},
});
evalTest('USUALLY_PASSES', {
name: 'case 2',
prompt: 'second',
assert: async (rig) => {
await rig.waitForToolCall('grep_search');
},
});
`);
expect(analysis.toolReferences).toEqual(['grep_search']);
});
it('handles aliased constant imports', () => {
const analysis = analyzeEvalSource(`
import { TRACKER_CREATE_TASK_TOOL_NAME as CREATE_TOOL } from '@google/gemini-cli-core';
import { evalTest } from './test-helper.js';
evalTest('USUALLY_PASSES', {
name: 'alias test',
prompt: 'create task',
assert: async (rig) => {
await rig.waitForToolCall(CREATE_TOOL);
},
});
`);
expect(analysis.cases[0].toolReferences).toEqual(['tracker_create_task']);
});
it('handles reversed toolRequest.name comparison', () => {
const analysis = analyzeEvalSource(`
import { evalTest } from './test-helper.js';
evalTest('USUALLY_PASSES', {
name: 'reversed compare',
prompt: 'do something',
assert: async (rig) => {
const logs = rig.readToolLogs();
const calls = logs.filter(
(log) => 'replace' === log.toolRequest.name,
);
},
});
`);
expect(analysis.cases[0].toolReferences).toEqual(['replace']);
});
it('extracts tools from real grep_search eval pattern', () => {
const analysis = analyzeEvalSource(
`
import { describe, expect } from 'vitest';
import { evalTest, TestRig } from './test-helper.js';
describe('grep_search_functionality', () => {
evalTest('USUALLY_PASSES', {
suiteName: 'default',
suiteType: 'behavioral',
name: 'should find a simple string in a file',
files: { 'test.txt': 'hello world' },
prompt: 'Find "world" in test.txt',
assert: async (rig: TestRig, result: string) => {
await rig.waitForToolCall('grep_search');
},
});
});
`,
{ filePath: '/repo/evals/grep_search.eval.ts', repoRoot: '/repo' },
);
expect(analysis.cases[0].toolReferences).toEqual(['grep_search']);
expect(analysis.toolReferences).toEqual(['grep_search']);
});
});
});
+139
View File
@@ -0,0 +1,139 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, expect, it } from 'vitest';
import {
buildToolRegistry,
resolveToolName,
getToolsByCategory,
type ToolCategory,
} from '../utils/tool-registry.js';
describe('tool-registry', () => {
const registry = buildToolRegistry();
describe('buildToolRegistry', () => {
it('includes all canonical built-in tools', () => {
expect(registry.totalTools).toBeGreaterThanOrEqual(26);
});
it('every tool has a valid category', () => {
for (const [name, entry] of registry.tools) {
expect(entry.category).toBeTruthy();
expect(entry.name).toBe(name);
}
});
it('byCategory entries match tools map', () => {
let categoryTotal = 0;
for (const [, entries] of registry.byCategory) {
for (const entry of entries) {
expect(registry.tools.get(entry.name)).toBe(entry);
}
categoryTotal += entries.length;
}
expect(categoryTotal).toBe(registry.totalTools);
});
it('aliasLookup covers every canonical name', () => {
for (const name of registry.tools.keys()) {
expect(registry.aliasLookup.get(name)).toBe(name);
}
});
it('aliasLookup covers every legacy alias', () => {
for (const [, entry] of registry.tools) {
for (const alias of entry.aliases) {
expect(registry.aliasLookup.get(alias)).toBe(entry.name);
}
}
});
it('is deterministic across calls', () => {
const second = buildToolRegistry();
expect([...second.tools.keys()]).toEqual([...registry.tools.keys()]);
expect(second.totalTools).toBe(registry.totalTools);
});
});
describe('resolveToolName', () => {
it('resolves canonical names to themselves', () => {
expect(resolveToolName(registry, 'grep_search')).toBe('grep_search');
expect(resolveToolName(registry, 'run_shell_command')).toBe(
'run_shell_command',
);
});
it('resolves legacy alias to canonical name', () => {
expect(resolveToolName(registry, 'search_file_content')).toBe(
'grep_search',
);
});
it('returns undefined for unknown tool names', () => {
expect(resolveToolName(registry, 'nonexistent_tool')).toBeUndefined();
});
it('returns undefined for empty string', () => {
expect(resolveToolName(registry, '')).toBeUndefined();
});
});
describe('getToolsByCategory', () => {
it('returns file-system tools', () => {
const tools = getToolsByCategory(registry, 'file-system');
const names = tools.map((t) => t.name);
expect(names).toContain('glob');
expect(names).toContain('grep_search');
expect(names).toContain('read_file');
expect(names).toContain('write_file');
expect(names).toContain('replace');
});
it('returns task-tracker tools', () => {
const tools = getToolsByCategory(registry, 'task-tracker');
const names = tools.map((t) => t.name);
expect(names).toContain('tracker_create_task');
expect(names).toContain('tracker_update_task');
expect(names).toContain('tracker_get_task');
expect(names).toContain('tracker_list_tasks');
expect(names).toContain('tracker_add_dependency');
expect(names).toContain('tracker_visualize');
expect(names).toHaveLength(6);
});
it('returns agent tools', () => {
const tools = getToolsByCategory(registry, 'agent');
const names = tools.map((t) => t.name);
expect(names).toContain('invoke_agent');
expect(names).toContain('complete_task');
expect(names).toContain('update_topic');
});
it('returns empty array for unknown category', () => {
expect(
getToolsByCategory(registry, 'nonexistent' as ToolCategory),
).toEqual([]);
});
it('every defined category has at least one tool', () => {
const expectedCategories: ToolCategory[] = [
'file-system',
'shell',
'web',
'planning',
'user-interaction',
'skills',
'task-tracker',
'agent',
'mcp',
];
for (const cat of expectedCategories) {
expect(getToolsByCategory(registry, cat).length).toBeGreaterThan(0);
}
});
});
});
+239
View File
@@ -6,6 +6,11 @@
import path from 'node:path';
import * as ts from 'typescript';
import {
ALL_BUILTIN_TOOL_NAMES,
isValidToolName,
} from '@google/gemini-cli-core';
import { buildToolRegistry } from './tool-registry.js';
export const BASE_EVAL_HELPERS = [
'evalTest',
@@ -45,6 +50,7 @@ export interface EvalCaseRecord {
timeout?: number;
hasFiles: boolean;
hasPrompt: boolean;
toolReferences: readonly string[];
location: EvalSourceLocation;
}
@@ -53,6 +59,7 @@ export interface EvalFileAnalysis {
relativePath: string;
helpers: Record<string, BaseEvalHelper | 'unknown'>;
cases: readonly EvalCaseRecord[];
toolReferences: readonly string[];
diagnostics: readonly EvalAnalysisDiagnostic[];
}
@@ -76,6 +83,7 @@ export function analyzeEvalSource(
);
const helpers = collectHelperMappings(sourceFile);
const importedConstants = collectImportedToolNameConstants(sourceFile);
const diagnostics: EvalAnalysisDiagnostic[] = [];
const cases: EvalCaseRecord[] = [];
@@ -118,6 +126,30 @@ export function analyzeEvalSource(
});
}
const assertProp = getPropertyAssignment(evalCase, 'assert');
const assertBody = assertProp
? getFunctionBody(assertProp.initializer)
: undefined;
const toolRefsInfo = assertBody
? collectToolReferences(assertBody, importedConstants)
: [];
const toolRefs: string[] = [];
const registry = buildToolRegistry();
for (const { name: resolvedName, node } of toolRefsInfo) {
const canonicalName = registry.aliasLookup.get(resolvedName);
if (!canonicalName && !isValidToolName(resolvedName)) {
diagnostics.push({
severity: 'warning',
message: `Unrecognized tool name extracted: "${resolvedName}"`,
filePath,
location: getLocation(sourceFile, node),
});
}
toolRefs.push(canonicalName ?? resolvedName);
}
cases.push({
filePath,
relativePath,
@@ -130,17 +162,23 @@ export function analyzeEvalSource(
timeout: getStaticNumberProperty(evalCase, 'timeout'),
hasFiles: hasProperty(evalCase, 'files'),
hasPrompt: hasProperty(evalCase, 'prompt'),
toolReferences: Object.freeze([...new Set(toolRefs)].sort()),
location: getLocation(sourceFile, callExpression),
});
});
cases.sort(compareEvalCases);
const fileToolRefs = [
...new Set(cases.flatMap((c) => [...c.toolReferences])),
].sort();
return {
filePath,
relativePath,
helpers,
cases,
toolReferences: Object.freeze(fileToolRefs),
diagnostics: diagnostics.sort(compareDiagnostics),
};
}
@@ -439,3 +477,204 @@ function compareDiagnostics(
function compareStrings(left: string, right: string) {
return left.localeCompare(right, 'en');
}
const TOOL_NAME_TO_CONSTANT: Record<
(typeof ALL_BUILTIN_TOOL_NAMES)[number],
keyof typeof import('@google/gemini-cli-core')
> = {
glob: 'GLOB_TOOL_NAME',
grep_search: 'GREP_TOOL_NAME',
list_directory: 'LS_TOOL_NAME',
read_file: 'READ_FILE_TOOL_NAME',
run_shell_command: 'SHELL_TOOL_NAME',
write_file: 'WRITE_FILE_TOOL_NAME',
replace: 'EDIT_TOOL_NAME',
google_web_search: 'WEB_SEARCH_TOOL_NAME',
write_todos: 'WRITE_TODOS_TOOL_NAME',
web_fetch: 'WEB_FETCH_TOOL_NAME',
read_many_files: 'READ_MANY_FILES_TOOL_NAME',
get_internal_docs: 'GET_INTERNAL_DOCS_TOOL_NAME',
activate_skill: 'ACTIVATE_SKILL_TOOL_NAME',
ask_user: 'ASK_USER_TOOL_NAME',
exit_plan_mode: 'EXIT_PLAN_MODE_TOOL_NAME',
enter_plan_mode: 'ENTER_PLAN_MODE_TOOL_NAME',
update_topic: 'UPDATE_TOPIC_TOOL_NAME',
complete_task: 'COMPLETE_TASK_TOOL_NAME',
read_mcp_resource: 'READ_MCP_RESOURCE_TOOL_NAME',
list_mcp_resources: 'LIST_MCP_RESOURCES_TOOL_NAME',
tracker_create_task: 'TRACKER_CREATE_TASK_TOOL_NAME',
tracker_update_task: 'TRACKER_UPDATE_TASK_TOOL_NAME',
tracker_get_task: 'TRACKER_GET_TASK_TOOL_NAME',
tracker_list_tasks: 'TRACKER_LIST_TASKS_TOOL_NAME',
tracker_add_dependency: 'TRACKER_ADD_DEPENDENCY_TOOL_NAME',
tracker_visualize: 'TRACKER_VISUALIZE_TOOL_NAME',
invoke_agent: 'AGENT_TOOL_NAME',
};
const WELL_KNOWN_TOOL_CONSTANTS: Record<
string,
(typeof ALL_BUILTIN_TOOL_NAMES)[number]
> = Object.fromEntries(
Object.entries(TOOL_NAME_TO_CONSTANT).map(([toolName, constantName]) => [
constantName,
toolName as (typeof ALL_BUILTIN_TOOL_NAMES)[number],
]),
);
function collectImportedToolNameConstants(
sourceFile: ts.SourceFile,
): Map<string, string> {
const constants = new Map<string, string>();
for (const statement of sourceFile.statements) {
if (
!ts.isImportDeclaration(statement) ||
!statement.importClause?.namedBindings ||
!ts.isNamedImports(statement.importClause.namedBindings) ||
!ts.isStringLiteral(statement.moduleSpecifier) ||
statement.moduleSpecifier.text !== '@google/gemini-cli-core'
) {
continue;
}
for (const element of statement.importClause.namedBindings.elements) {
const importedName = element.propertyName?.text ?? element.name.text;
const localName = element.name.text;
const resolvedValue = WELL_KNOWN_TOOL_CONSTANTS[importedName];
if (resolvedValue !== undefined) {
constants.set(localName, resolvedValue);
}
}
}
return constants;
}
function getFunctionBody(
node: ts.Expression,
): ts.ConciseBody | ts.Block | undefined {
if (ts.isArrowFunction(node)) {
return node.body;
}
if (ts.isFunctionExpression(node)) {
return node.body;
}
return undefined;
}
function collectToolReferences(
body: ts.ConciseBody | ts.Block,
importedConstants: Map<string, string>,
): { name: string; node: ts.Node }[] {
const refs: { name: string; node: ts.Node }[] = [];
const visit = (node: ts.Node) => {
if (ts.isCallExpression(node)) {
extractFromWaitForToolCall(node, importedConstants, refs);
extractFromArrayIncludes(node, importedConstants, refs);
} else if (
ts.isBinaryExpression(node) &&
node.operatorToken.kind === ts.SyntaxKind.EqualsEqualsEqualsToken
) {
extractFromToolRequestNameComparison(node, importedConstants, refs);
}
ts.forEachChild(node, visit);
};
visit(body);
return refs;
}
function extractFromWaitForToolCall(
call: ts.CallExpression,
importedConstants: Map<string, string>,
refs: { name: string; node: ts.Node }[],
) {
const expr = call.expression;
if (
!ts.isPropertyAccessExpression(expr) ||
expr.name.text !== 'waitForToolCall'
) {
return;
}
const firstArg = call.arguments[0];
if (!firstArg) {
return;
}
const resolved = resolveStringValue(firstArg, importedConstants);
if (resolved) {
refs.push({ name: resolved, node: firstArg });
}
}
function isToolRequestName(node: ts.Expression): boolean {
return (
ts.isPropertyAccessExpression(node) &&
node.name.text === 'name' &&
ts.isPropertyAccessExpression(node.expression) &&
node.expression.name.text === 'toolRequest'
);
}
function extractFromToolRequestNameComparison(
binary: ts.BinaryExpression,
importedConstants: Map<string, string>,
refs: { name: string; node: ts.Node }[],
) {
let valueNode: ts.Expression | undefined;
if (isToolRequestName(binary.left)) {
valueNode = binary.right;
} else if (isToolRequestName(binary.right)) {
valueNode = binary.left;
}
if (valueNode) {
const resolved = resolveStringValue(valueNode, importedConstants);
if (resolved) {
refs.push({ name: resolved, node: valueNode });
}
}
}
function extractFromArrayIncludes(
call: ts.CallExpression,
importedConstants: Map<string, string>,
refs: { name: string; node: ts.Node }[],
) {
const expr = call.expression;
if (!ts.isPropertyAccessExpression(expr) || expr.name.text !== 'includes') {
return;
}
const firstArg = call.arguments[0];
if (!firstArg || !isToolRequestName(firstArg)) {
return;
}
const arrayExpr = expr.expression;
if (!ts.isArrayLiteralExpression(arrayExpr)) {
return;
}
for (const element of arrayExpr.elements) {
const resolved = resolveStringValue(element, importedConstants);
if (resolved) {
refs.push({ name: resolved, node: element });
}
}
}
function resolveStringValue(
node: ts.Expression,
importedConstants: Map<string, string>,
): string | undefined {
const literal = getStringLiteralValue(node);
if (literal !== undefined) {
return literal;
}
if (ts.isIdentifier(node)) {
return importedConstants.get(node.text);
}
return undefined;
}
+143
View File
@@ -0,0 +1,143 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import {
ALL_BUILTIN_TOOL_NAMES,
TOOL_LEGACY_ALIASES,
} from '@google/gemini-cli-core';
export type ToolCategory =
| 'file-system'
| 'shell'
| 'web'
| 'planning'
| 'user-interaction'
| 'skills'
| 'task-tracker'
| 'agent'
| 'mcp';
export interface ToolRegistryEntry {
name: string;
category: ToolCategory;
aliases: readonly string[];
}
export interface ToolRegistry {
tools: ReadonlyMap<string, ToolRegistryEntry>;
totalTools: number;
byCategory: ReadonlyMap<ToolCategory, readonly ToolRegistryEntry[]>;
aliasLookup: ReadonlyMap<string, string>;
}
const TOOL_CATEGORIES: Record<
(typeof ALL_BUILTIN_TOOL_NAMES)[number],
ToolCategory
> = {
glob: 'file-system',
grep_search: 'file-system',
list_directory: 'file-system',
read_file: 'file-system',
read_many_files: 'file-system',
write_file: 'file-system',
replace: 'file-system',
run_shell_command: 'shell',
google_web_search: 'web',
web_fetch: 'web',
enter_plan_mode: 'planning',
exit_plan_mode: 'planning',
write_todos: 'planning',
ask_user: 'user-interaction',
activate_skill: 'skills',
get_internal_docs: 'skills',
tracker_create_task: 'task-tracker',
tracker_update_task: 'task-tracker',
tracker_get_task: 'task-tracker',
tracker_list_tasks: 'task-tracker',
tracker_add_dependency: 'task-tracker',
tracker_visualize: 'task-tracker',
invoke_agent: 'agent',
complete_task: 'agent',
update_topic: 'agent',
read_mcp_resource: 'mcp',
list_mcp_resources: 'mcp',
};
let registryCache: ToolRegistry | undefined;
export function buildToolRegistry(): ToolRegistry {
if (registryCache) {
return registryCache;
}
const tools = new Map<string, ToolRegistryEntry>();
const aliasLookup = new Map<string, string>();
const categoryGroups = new Map<ToolCategory, ToolRegistryEntry[]>();
for (const name of ALL_BUILTIN_TOOL_NAMES) {
const category = TOOL_CATEGORIES[name];
const aliases: string[] = [];
for (const [legacyName, canonicalName] of Object.entries(
TOOL_LEGACY_ALIASES,
)) {
if (canonicalName === name) {
aliases.push(legacyName);
aliasLookup.set(legacyName, name);
}
}
aliasLookup.set(name, name);
const entry: ToolRegistryEntry = {
name,
category,
aliases: Object.freeze(aliases),
};
tools.set(name, entry);
const group = categoryGroups.get(category);
if (group) {
group.push(entry);
} else {
categoryGroups.set(category, [entry]);
}
}
const frozenCategories = new Map<
ToolCategory,
readonly ToolRegistryEntry[]
>();
for (const [cat, entries] of categoryGroups) {
frozenCategories.set(cat, Object.freeze(entries));
}
registryCache = {
tools,
totalTools: tools.size,
byCategory: frozenCategories,
aliasLookup,
};
return registryCache;
}
export function resolveToolName(
registry: ToolRegistry,
name: string,
): string | undefined {
if (!name) {
return undefined;
}
return registry.aliasLookup.get(name);
}
export function getToolsByCategory(
registry: ToolRegistry,
category: ToolCategory,
): readonly ToolRegistryEntry[] {
return registry.byCategory.get(category) ?? [];
}