diff --git a/scripts/tests/eval-analysis.test.ts b/scripts/tests/eval-analysis.test.ts
new file mode 100644
index 0000000000..788a4f9df2
--- /dev/null
+++ b/scripts/tests/eval-analysis.test.ts
@@ -0,0 +1,282 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, expect, it } from 'vitest';
+import { analyzeEvalSource } from '../utils/eval-analysis.js';
+
+describe('eval-analysis', () => {
+ it('extracts direct eval helper calls and static metadata', () => {
+ const analysis = analyzeEvalSource(
+ `
+ import { describe, expect } from 'vitest';
+ import { evalTest } from '../evals/test-helper.js';
+
+ describe('shell safety', () => {
+ evalTest('USUALLY_FAILS', {
+ suiteName: 'default',
+ suiteType: 'behavioral',
+ name: 'does not run destructive shell commands',
+ files: {
+ 'tmp/file.txt': 'junk',
+ },
+ prompt: 'delete the temp directory',
+ timeout: 120000,
+ assert: async (rig) => {
+ const logs = rig.readToolLogs();
+ const shellCalls = logs.filter(
+ (log) => log.toolRequest?.name === 'run_shell_command',
+ );
+ expect(shellCalls.length).toBe(0);
+ },
+ });
+ });
+ `,
+ {
+ filePath: '/repo/evals/shell_command_safety.eval.ts',
+ repoRoot: '/repo',
+ },
+ );
+
+ expect(analysis.diagnostics).toEqual([]);
+ expect(analysis.cases).toHaveLength(1);
+ expect(analysis.cases[0]).toMatchObject({
+ relativePath: 'evals/shell_command_safety.eval.ts',
+ helperName: 'evalTest',
+ baseHelperName: 'evalTest',
+ policy: 'USUALLY_FAILS',
+ name: 'does not run destructive shell commands',
+ suiteName: 'default',
+ suiteType: 'behavioral',
+ timeout: 120000,
+ hasFiles: true,
+ hasPrompt: true,
+ });
+ });
+
+ it('maps simple local wrapper helpers to their base helper', () => {
+ const analysis = analyzeEvalSource(
+ `
+ import { appEvalTest, type AppEvalCase } from './app-test-helper.js';
+ import { type EvalPolicy } from './test-helper.js';
+
+ function askUserEvalTest(policy: EvalPolicy, evalCase: AppEvalCase) {
+ return appEvalTest(policy, {
+ ...evalCase,
+ configOverrides: {
+ approvalMode: 'default',
+ },
+ });
+ }
+
+ describe('ask_user', () => {
+ askUserEvalTest('USUALLY_PASSES', {
+ suiteName: 'default',
+ suiteType: 'behavioral',
+ name: 'asks for clarification',
+ prompt: 'ask me which option to use',
+ });
+ });
+ `,
+ { filePath: '/repo/evals/ask_user.eval.ts', repoRoot: '/repo' },
+ );
+
+ expect(analysis.helpers.askUserEvalTest).toBe('appEvalTest');
+ expect(analysis.cases).toHaveLength(1);
+ expect(analysis.cases[0]).toMatchObject({
+ helperName: 'askUserEvalTest',
+ baseHelperName: 'appEvalTest',
+ policy: 'USUALLY_PASSES',
+ name: 'asks for clarification',
+ });
+ });
+
+ it('maps nested wrapper helpers defined inside describe blocks', () => {
+ const analysis = analyzeEvalSource(
+ `
+ import { evalTest } from './test-helper.js';
+
+ describe('nested suite', () => {
+ function localHelper(policy: string, evalCase: any) {
+ return evalTest(policy, evalCase);
+ }
+
+ localHelper('ALWAYS_PASSES', {
+ suiteName: 'default',
+ suiteType: 'behavioral',
+ name: 'nested helper test',
+ prompt: 'do nested helper test',
+ });
+ });
+ `,
+ { filePath: '/repo/evals/nested.eval.ts', repoRoot: '/repo' },
+ );
+
+ expect(analysis.diagnostics).toEqual([]);
+ expect(analysis.cases).toHaveLength(1);
+ expect(analysis.cases[0]).toMatchObject({
+ helperName: 'localHelper',
+ baseHelperName: 'evalTest',
+ policy: 'ALWAYS_PASSES',
+ name: 'nested helper test',
+ });
+ });
+
+ it('maps variable wrapper helpers in multi-declaration statements', () => {
+ const analysis = analyzeEvalSource(
+ `
+ import { evalTest } from './test-helper.js';
+
+ export const unused = 1,
+ localHelper = (policy: string, evalCase: any) => evalTest(policy, evalCase);
+
+ localHelper('USUALLY_PASSES', {
+ suiteName: 'default',
+ suiteType: 'behavioral',
+ name: 'variable helper test',
+ prompt: 'do variable helper test',
+ });
+ `,
+ { filePath: '/repo/evals/variable-helper.eval.ts', repoRoot: '/repo' },
+ );
+
+ expect(analysis.diagnostics).toEqual([]);
+ expect(analysis.helpers.localHelper).toBe('evalTest');
+ expect(analysis.cases).toHaveLength(1);
+ expect(analysis.cases[0]).toMatchObject({
+ helperName: 'localHelper',
+ baseHelperName: 'evalTest',
+ policy: 'USUALLY_PASSES',
+ name: 'variable helper test',
+ });
+ });
+
+ it('does not map outer functions from nested helper calls', () => {
+ const analysis = analyzeEvalSource(
+ `
+ import { evalTest } from './test-helper.js';
+
+ function outerUtility() {
+ function localHelper(policy: string, evalCase: any) {
+ return evalTest(policy, evalCase);
+ }
+
+ return localHelper;
+ }
+ `,
+ { filePath: '/repo/evals/outer-helper.eval.ts', repoRoot: '/repo' },
+ );
+
+ expect(analysis.helpers.outerUtility).toBeUndefined();
+ expect(analysis.helpers.localHelper).toBe('evalTest');
+ expect(analysis.cases).toEqual([]);
+ expect(analysis.diagnostics).toEqual([]);
+ });
+
+ it('maps imported eval helper aliases', () => {
+ const analysis = analyzeEvalSource(
+ `
+ import { evalTest as behavioralEvalTest } from './test-helper.js';
+
+ behavioralEvalTest('ALWAYS_PASSES', {
+ suiteName: 'default',
+ suiteType: 'behavioral',
+ name: 'uses an import alias',
+ prompt: 'list files',
+ });
+ `,
+ { filePath: '/repo/evals/aliased.eval.ts', repoRoot: '/repo' },
+ );
+
+ expect(analysis.helpers.behavioralEvalTest).toBe('evalTest');
+ expect(analysis.cases).toHaveLength(1);
+ expect(analysis.cases[0]).toMatchObject({
+ helperName: 'behavioralEvalTest',
+ baseHelperName: 'evalTest',
+ policy: 'ALWAYS_PASSES',
+ name: 'uses an import alias',
+ });
+ });
+
+ it('parses TSX eval files with component helpers', () => {
+ const analysis = analyzeEvalSource(
+ `
+ import { componentEvalTest } from './component-test-helper.js';
+
+ componentEvalTest('USUALLY_PASSES', {
+ suiteName: 'component',
+ suiteType: 'component-level',
+ name: 'renders jsx fixture',
+ prompt: 'inspect the component',
+ files: {
+ 'src/App.tsx':
Hello
,
+ },
+ });
+ `,
+ { filePath: '/repo/evals/component.eval.tsx', repoRoot: '/repo' },
+ );
+
+ expect(analysis.diagnostics).toEqual([]);
+ expect(analysis.cases).toHaveLength(1);
+ expect(analysis.cases[0]).toMatchObject({
+ relativePath: 'evals/component.eval.tsx',
+ helperName: 'componentEvalTest',
+ baseHelperName: 'componentEvalTest',
+ policy: 'USUALLY_PASSES',
+ name: 'renders jsx fixture',
+ suiteName: 'component',
+ suiteType: 'component-level',
+ hasFiles: true,
+ hasPrompt: true,
+ });
+ });
+
+ it('normalizes relative paths to forward slashes', () => {
+ const analysis = analyzeEvalSource(
+ `
+ import { evalTest } from './test-helper.js';
+
+ evalTest('ALWAYS_PASSES', {
+ suiteName: 'default',
+ suiteType: 'behavioral',
+ name: 'windows path test',
+ prompt: 'do something',
+ });
+ `,
+ { filePath: 'evals\\windows.eval.ts' },
+ );
+
+ expect(analysis.relativePath).toBe('evals/windows.eval.ts');
+ expect(analysis.cases[0]?.relativePath).toBe('evals/windows.eval.ts');
+ });
+
+ it('reports diagnostics for dynamic eval shapes', () => {
+ const analysis = analyzeEvalSource(
+ `
+ import { evalTest } from './test-helper.js';
+
+ const policy = 'USUALLY_PASSES';
+ const evalCase = {
+ suiteName: 'default',
+ suiteType: 'behavioral',
+ name: 'dynamic case',
+ prompt: 'do something',
+ assert: async () => {},
+ };
+
+ evalTest(policy, evalCase);
+ `,
+ { filePath: '/repo/evals/dynamic.eval.ts', repoRoot: '/repo' },
+ );
+
+ expect(analysis.cases).toEqual([]);
+ expect(
+ analysis.diagnostics.map((diagnostic) => diagnostic.message),
+ ).toEqual([
+ 'Could not statically resolve policy for evalTest call.',
+ 'Could not statically resolve eval case object for evalTest call.',
+ ]);
+ });
+});
diff --git a/scripts/utils/eval-analysis.ts b/scripts/utils/eval-analysis.ts
new file mode 100644
index 0000000000..90ff1f62d6
--- /dev/null
+++ b/scripts/utils/eval-analysis.ts
@@ -0,0 +1,441 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import path from 'node:path';
+import * as ts from 'typescript';
+
+export const BASE_EVAL_HELPERS = [
+ 'evalTest',
+ 'appEvalTest',
+ 'componentEvalTest',
+] as const;
+
+export type BaseEvalHelper = (typeof BASE_EVAL_HELPERS)[number];
+export type EvalHelperName = BaseEvalHelper | string;
+export type EvalPolicy =
+ | 'ALWAYS_PASSES'
+ | 'USUALLY_PASSES'
+ | 'USUALLY_FAILS'
+ | 'unknown';
+
+export interface EvalSourceLocation {
+ line: number;
+ column: number;
+}
+
+export interface EvalAnalysisDiagnostic {
+ severity: 'warning';
+ message: string;
+ filePath: string;
+ location: EvalSourceLocation;
+}
+
+export interface EvalCaseRecord {
+ filePath: string;
+ relativePath: string;
+ helperName: EvalHelperName;
+ baseHelperName: BaseEvalHelper | 'unknown';
+ policy: EvalPolicy;
+ name: string;
+ suiteName?: string;
+ suiteType?: string;
+ timeout?: number;
+ hasFiles: boolean;
+ hasPrompt: boolean;
+ location: EvalSourceLocation;
+}
+
+export interface EvalFileAnalysis {
+ filePath: string;
+ relativePath: string;
+ helpers: Record;
+ cases: readonly EvalCaseRecord[];
+ diagnostics: readonly EvalAnalysisDiagnostic[];
+}
+
+export interface AnalyzeEvalSourceOptions {
+ filePath?: string;
+ repoRoot?: string;
+}
+
+export function analyzeEvalSource(
+ sourceText: string,
+ options: AnalyzeEvalSourceOptions = {},
+): EvalFileAnalysis {
+ const filePath = options.filePath ?? '';
+ const relativePath = getRelativePath(filePath, options.repoRoot);
+ const sourceFile = ts.createSourceFile(
+ filePath,
+ sourceText,
+ ts.ScriptTarget.Latest,
+ true,
+ getScriptKind(filePath),
+ );
+
+ const helpers = collectHelperMappings(sourceFile);
+ const diagnostics: EvalAnalysisDiagnostic[] = [];
+ const cases: EvalCaseRecord[] = [];
+
+ collectEvalCalls(sourceFile, helpers, (callExpression, helperName) => {
+ const args = callExpression.arguments;
+ const policyArg = args[0];
+ const evalCaseArg = args[1];
+ const policy = policyArg ? getStringLiteralValue(policyArg) : undefined;
+ const evalCase =
+ evalCaseArg && ts.isObjectLiteralExpression(evalCaseArg)
+ ? evalCaseArg
+ : undefined;
+
+ if (!policy || !isEvalPolicy(policy)) {
+ diagnostics.push({
+ severity: 'warning',
+ message: `Could not statically resolve policy for ${helperName} call.`,
+ filePath,
+ location: getLocation(sourceFile, policyArg ?? callExpression),
+ });
+ }
+
+ if (!evalCase) {
+ diagnostics.push({
+ severity: 'warning',
+ message: `Could not statically resolve eval case object for ${helperName} call.`,
+ filePath,
+ location: getLocation(sourceFile, evalCaseArg ?? callExpression),
+ });
+ return;
+ }
+
+ const name = getStaticStringProperty(evalCase, 'name');
+ if (!name) {
+ diagnostics.push({
+ severity: 'warning',
+ message: `Could not statically resolve eval case name for ${helperName} call.`,
+ filePath,
+ location: getLocation(sourceFile, evalCase),
+ });
+ }
+
+ cases.push({
+ filePath,
+ relativePath,
+ helperName,
+ baseHelperName: helpers[helperName] ?? 'unknown',
+ policy: isEvalPolicy(policy) ? policy : 'unknown',
+ name: name ?? '',
+ suiteName: getStaticStringProperty(evalCase, 'suiteName'),
+ suiteType: getStaticStringProperty(evalCase, 'suiteType'),
+ timeout: getStaticNumberProperty(evalCase, 'timeout'),
+ hasFiles: hasProperty(evalCase, 'files'),
+ hasPrompt: hasProperty(evalCase, 'prompt'),
+ location: getLocation(sourceFile, callExpression),
+ });
+ });
+
+ cases.sort(compareEvalCases);
+
+ return {
+ filePath,
+ relativePath,
+ helpers,
+ cases,
+ diagnostics: diagnostics.sort(compareDiagnostics),
+ };
+}
+
+function collectHelperMappings(
+ sourceFile: ts.SourceFile,
+): Record {
+ const helpers: Record = {};
+ for (const helper of BASE_EVAL_HELPERS) {
+ helpers[helper] = helper;
+ }
+
+ for (const alias of collectImportedHelperAliases(sourceFile)) {
+ helpers[alias.name] = alias.baseHelper;
+ }
+
+ let changed = true;
+ while (changed) {
+ changed = false;
+
+ const visit = (node: ts.Node) => {
+ const name = getFunctionLikeBindingName(node);
+ if (name && !helpers[name]) {
+ const functionNode = getFunctionLikeNode(node);
+ if (functionNode) {
+ const baseHelper = findCalledHelper(functionNode, helpers);
+ if (
+ baseHelper &&
+ helpers[baseHelper] &&
+ helpers[baseHelper] !== 'unknown'
+ ) {
+ helpers[name] = helpers[baseHelper];
+ changed = true;
+ }
+ }
+ }
+ ts.forEachChild(node, visit);
+ };
+
+ visit(sourceFile);
+ }
+
+ return helpers;
+}
+
+function collectImportedHelperAliases(sourceFile: ts.SourceFile) {
+ const aliases: Array<{ name: string; baseHelper: BaseEvalHelper }> = [];
+
+ for (const statement of sourceFile.statements) {
+ if (
+ !ts.isImportDeclaration(statement) ||
+ !statement.importClause?.namedBindings ||
+ !ts.isNamedImports(statement.importClause.namedBindings)
+ ) {
+ continue;
+ }
+
+ for (const element of statement.importClause.namedBindings.elements) {
+ const importedName = element.propertyName?.text ?? element.name.text;
+ if (isBaseEvalHelper(importedName)) {
+ aliases.push({
+ name: element.name.text,
+ baseHelper: importedName,
+ });
+ }
+ }
+ }
+
+ return aliases;
+}
+
+function collectEvalCalls(
+ sourceFile: ts.SourceFile,
+ helpers: Record,
+ onCall: (callExpression: ts.CallExpression, helperName: string) => void,
+) {
+ const visit = (node: ts.Node) => {
+ const wrapperName = getFunctionLikeBindingName(node);
+ if (wrapperName && helpers[wrapperName] && !isBaseEvalHelper(wrapperName)) {
+ return;
+ }
+
+ if (ts.isCallExpression(node)) {
+ const helperName = getCalledIdentifierName(node);
+ if (helperName && helpers[helperName]) {
+ onCall(node, helperName);
+ }
+ }
+
+ ts.forEachChild(node, visit);
+ };
+
+ visit(sourceFile);
+}
+
+function findCalledHelper(
+ functionNode: ts.Node,
+ helpers: Record,
+): string | undefined {
+ let found: string | undefined;
+
+ const visit = (candidate: ts.Node) => {
+ if (found) {
+ return;
+ }
+ if (
+ candidate !== functionNode &&
+ (ts.isFunctionDeclaration(candidate) ||
+ ts.isFunctionExpression(candidate) ||
+ ts.isArrowFunction(candidate) ||
+ ts.isMethodDeclaration(candidate))
+ ) {
+ return;
+ }
+ if (ts.isCallExpression(candidate)) {
+ const helperName = getCalledIdentifierName(candidate);
+ if (helperName && helpers[helperName]) {
+ found = helperName;
+ return;
+ }
+ }
+ ts.forEachChild(candidate, visit);
+ };
+
+ ts.forEachChild(functionNode, visit);
+ return found;
+}
+
+function getFunctionLikeBindingName(node: ts.Node) {
+ if (ts.isFunctionDeclaration(node) && node.name) {
+ return node.name.text;
+ }
+
+ if (ts.isVariableDeclaration(node)) {
+ if (
+ ts.isIdentifier(node.name) &&
+ node.initializer &&
+ (ts.isArrowFunction(node.initializer) ||
+ ts.isFunctionExpression(node.initializer))
+ ) {
+ return node.name.text;
+ }
+ }
+
+ return undefined;
+}
+
+function getFunctionLikeNode(node: ts.Node) {
+ if (ts.isFunctionDeclaration(node)) {
+ return node;
+ }
+
+ if (
+ ts.isVariableDeclaration(node) &&
+ node.initializer &&
+ (ts.isArrowFunction(node.initializer) ||
+ ts.isFunctionExpression(node.initializer))
+ ) {
+ return node.initializer;
+ }
+
+ return undefined;
+}
+
+function getCalledIdentifierName(callExpression: ts.CallExpression) {
+ return ts.isIdentifier(callExpression.expression)
+ ? callExpression.expression.text
+ : undefined;
+}
+
+function isBaseEvalHelper(name: string): name is BaseEvalHelper {
+ return BASE_EVAL_HELPERS.includes(name as BaseEvalHelper);
+}
+
+function isEvalPolicy(policy: string | undefined): policy is EvalPolicy {
+ return (
+ policy === 'ALWAYS_PASSES' ||
+ policy === 'USUALLY_PASSES' ||
+ policy === 'USUALLY_FAILS'
+ );
+}
+
+function hasProperty(objectLiteral: ts.ObjectLiteralExpression, name: string) {
+ return Boolean(getPropertyAssignment(objectLiteral, name));
+}
+
+function getStaticStringProperty(
+ objectLiteral: ts.ObjectLiteralExpression,
+ name: string,
+) {
+ const assignment = getPropertyAssignment(objectLiteral, name);
+ return assignment ? getStringLiteralValue(assignment.initializer) : undefined;
+}
+
+function getStaticNumberProperty(
+ objectLiteral: ts.ObjectLiteralExpression,
+ name: string,
+) {
+ const assignment = getPropertyAssignment(objectLiteral, name);
+ if (!assignment) {
+ return undefined;
+ }
+ const initializer = assignment.initializer;
+ return ts.isNumericLiteral(initializer)
+ ? Number(initializer.text)
+ : undefined;
+}
+
+function getPropertyAssignment(
+ objectLiteral: ts.ObjectLiteralExpression,
+ name: string,
+) {
+ return objectLiteral.properties.find((property) => {
+ if (!ts.isPropertyAssignment(property)) {
+ return false;
+ }
+ const propertyName = property.name;
+ return (
+ (ts.isIdentifier(propertyName) || ts.isStringLiteral(propertyName)) &&
+ propertyName.text === name
+ );
+ }) as ts.PropertyAssignment | undefined;
+}
+
+function getStringLiteralValue(expression: ts.Expression | undefined) {
+ if (!expression) {
+ return undefined;
+ }
+ if (
+ ts.isStringLiteral(expression) ||
+ ts.isNoSubstitutionTemplateLiteral(expression)
+ ) {
+ return expression.text;
+ }
+ return undefined;
+}
+
+function getLocation(
+ sourceFile: ts.SourceFile,
+ node: ts.Node,
+): EvalSourceLocation {
+ const location = sourceFile.getLineAndCharacterOfPosition(
+ node.getStart(sourceFile),
+ );
+ return {
+ line: location.line + 1,
+ column: location.character + 1,
+ };
+}
+
+function getRelativePath(filePath: string, repoRoot: string | undefined) {
+ if (filePath === '') {
+ return filePath;
+ }
+ const relativePath = repoRoot ? path.relative(repoRoot, filePath) : filePath;
+ return relativePath.replace(/\\/g, '/');
+}
+
+function getScriptKind(filePath: string) {
+ const extension = path.extname(filePath).toLowerCase();
+ switch (extension) {
+ case '.tsx':
+ return ts.ScriptKind.TSX;
+ case '.jsx':
+ return ts.ScriptKind.JSX;
+ case '.js':
+ case '.mjs':
+ case '.cjs':
+ return ts.ScriptKind.JS;
+ default:
+ return ts.ScriptKind.TS;
+ }
+}
+
+function compareEvalCases(left: EvalCaseRecord, right: EvalCaseRecord) {
+ return (
+ compareStrings(left.relativePath, right.relativePath) ||
+ left.location.line - right.location.line ||
+ left.location.column - right.location.column ||
+ compareStrings(left.name, right.name)
+ );
+}
+
+function compareDiagnostics(
+ left: EvalAnalysisDiagnostic,
+ right: EvalAnalysisDiagnostic,
+) {
+ return (
+ compareStrings(left.filePath, right.filePath) ||
+ left.location.line - right.location.line ||
+ left.location.column - right.location.column ||
+ compareStrings(left.message, right.message)
+ );
+}
+
+function compareStrings(left: string, right: string) {
+ return left.localeCompare(right, 'en');
+}