feat: add eval:inventory CLI command and reporting logic (#28009)

This commit is contained in:
Vedant Mahajan
2026-06-19 23:31:01 +05:30
committed by GitHub
parent 6613e129de
commit c22137ea0a
5 changed files with 412 additions and 4 deletions
+1
View File
@@ -32,6 +32,7 @@
"schema:settings": "tsx ./scripts/generate-settings-schema.ts",
"docs:settings": "tsx ./scripts/generate-settings-doc.ts",
"docs:keybindings": "tsx ./scripts/generate-keybindings-doc.ts",
"eval:inventory": "tsx ./scripts/eval-inventory-cli.ts",
"build": "node scripts/build.js",
"build-and-start": "npm run build && npm run start --",
"build:vscode": "node scripts/build_vscode_companion.js",
+46
View File
@@ -0,0 +1,46 @@
#!/usr/bin/env tsx
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
/**
* @fileoverview CLI entry point for the eval inventory command.
*
* Scans all eval source files, runs the static analyzer on each,
* and prints a human-readable inventory report grouped by policy,
* file, and suite.
*
* Usage:
* npm run eval:inventory
* npm run eval:inventory -- --root /path/to/repo
*/
import {
collectInventory,
formatInventoryReport,
} from './utils/eval-inventory.js';
async function main() {
const rootFlagIndex = process.argv.indexOf('--root');
const repoRoot =
rootFlagIndex !== -1 && process.argv[rootFlagIndex + 1]
? process.argv[rootFlagIndex + 1]
: process.cwd();
const result = await collectInventory(repoRoot);
if (result.totalFiles === 0) {
console.error('No eval files found under evals/.');
process.exit(1);
}
console.log(formatInventoryReport(result));
}
main().catch((error) => {
console.error('Fatal error:', error);
process.exit(1);
});
+185
View File
@@ -0,0 +1,185 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import path from 'node:path';
import { describe, expect, it } from 'vitest';
import {
collectInventory,
formatInventoryReport,
type InventoryResult,
} from '../utils/eval-inventory.js';
import type { EvalCaseRecord } from '../utils/eval-analysis.js';
function makeCaseRecord(
overrides: Partial<EvalCaseRecord> = {},
): EvalCaseRecord {
return {
filePath: '/repo/evals/test.eval.ts',
relativePath: 'evals/test.eval.ts',
helperName: 'evalTest',
baseHelperName: 'evalTest',
policy: 'USUALLY_PASSES',
name: 'test case',
hasFiles: false,
hasPrompt: true,
location: { line: 1, column: 1 },
...overrides,
};
}
describe('eval-inventory', () => {
describe('collectInventory', () => {
it('discovers eval files from the real evals directory', async () => {
const repoRoot = path.resolve(import.meta.dirname, '../../');
const result = await collectInventory(repoRoot);
expect(result.totalFiles).toBeGreaterThanOrEqual(36);
expect(result.totalCases).toBeGreaterThanOrEqual(90);
expect(result.files.length).toBe(result.totalFiles);
expect(result.cases.length).toBe(result.totalCases);
for (const evalCase of result.cases) {
expect(evalCase.name).toBeTruthy();
expect(evalCase.relativePath).toBeTruthy();
expect(evalCase.relativePath).toMatch(/^evals\//);
}
});
it('returns zero counts for a directory with no eval files', async () => {
const result = await collectInventory(import.meta.dirname);
expect(result.totalFiles).toBe(0);
expect(result.totalCases).toBe(0);
expect(result.files).toEqual([]);
expect(result.cases).toEqual([]);
});
});
describe('formatInventoryReport', () => {
it('includes summary line with correct counts', () => {
const result: InventoryResult = {
totalFiles: 2,
totalCases: 3,
files: [],
cases: [
makeCaseRecord({ policy: 'ALWAYS_PASSES', name: 'case-1' }),
makeCaseRecord({ policy: 'USUALLY_PASSES', name: 'case-2' }),
makeCaseRecord({ policy: 'USUALLY_PASSES', name: 'case-3' }),
],
diagnostics: [],
};
const report = formatInventoryReport(result);
expect(report).toContain('2 files · 3 cases · 0 diagnostics');
});
it('groups cases by policy', () => {
const result: InventoryResult = {
totalFiles: 1,
totalCases: 2,
files: [],
cases: [
makeCaseRecord({
policy: 'ALWAYS_PASSES',
name: 'stable test',
}),
makeCaseRecord({
policy: 'USUALLY_PASSES',
name: 'flaky test',
}),
],
diagnostics: [],
};
const report = formatInventoryReport(result);
expect(report).toContain('By Policy');
expect(report).toContain('ALWAYS_PASSES (1 cases)');
expect(report).toContain('USUALLY_PASSES (1 cases)');
expect(report).toContain('• stable test');
expect(report).toContain('• flaky test');
});
it('groups cases by suite name', () => {
const result: InventoryResult = {
totalFiles: 1,
totalCases: 2,
files: [],
cases: [
makeCaseRecord({ suiteName: 'default', name: 'suite-test' }),
makeCaseRecord({ name: 'no-suite-test' }),
],
diagnostics: [],
};
const report = formatInventoryReport(result);
expect(report).toContain('By Suite');
expect(report).toContain('default (1 cases)');
expect(report).toContain('(no suite) (1 cases)');
});
it('shows diagnostics section when diagnostics exist', () => {
const result: InventoryResult = {
totalFiles: 1,
totalCases: 0,
files: [],
cases: [],
diagnostics: [
{
severity: 'warning',
message: 'Could not resolve policy',
filePath: '/repo/evals/bad.eval.ts',
location: { line: 5, column: 3 },
},
],
};
const report = formatInventoryReport(result);
expect(report).toContain('Diagnostics');
expect(report).toContain('1 diagnostics');
expect(report).toContain(
'⚠ /repo/evals/bad.eval.ts:5:3 — Could not resolve policy',
);
});
it('omits diagnostics section when there are none', () => {
const result: InventoryResult = {
totalFiles: 1,
totalCases: 1,
files: [],
cases: [makeCaseRecord()],
diagnostics: [],
};
const report = formatInventoryReport(result);
expect(report).not.toContain('Diagnostics');
expect(report).not.toContain('⚠');
});
it('includes helper name in case listing', () => {
const result: InventoryResult = {
totalFiles: 1,
totalCases: 1,
files: [],
cases: [
makeCaseRecord({
helperName: 'customHelper',
name: 'custom test',
}),
],
diagnostics: [],
};
const report = formatInventoryReport(result);
expect(report).toContain('• custom test [customHelper]');
});
});
});
+7 -4
View File
@@ -6,7 +6,10 @@
import { vi } from 'vitest';
vi.mock('fs', () => ({
...vi.importActual('fs'),
appendFileSync: vi.fn(),
}));
vi.mock('fs', async () => {
const actual = await vi.importActual<typeof import('fs')>('fs');
return {
...actual,
appendFileSync: vi.fn(),
};
});
+173
View File
@@ -0,0 +1,173 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import fs from 'node:fs';
import path from 'node:path';
import { glob } from 'glob';
import {
analyzeEvalSource,
type EvalCaseRecord,
type EvalFileAnalysis,
type EvalAnalysisDiagnostic,
type EvalPolicy,
} from './eval-analysis.js';
export interface InventoryResult {
totalFiles: number;
totalCases: number;
files: EvalFileAnalysis[];
cases: readonly EvalCaseRecord[];
diagnostics: readonly EvalAnalysisDiagnostic[];
}
/**
* Discovers all eval files under the given repo root and runs
* the static analyzer on each, returning the aggregated results.
*/
export async function collectInventory(
repoRoot: string,
): Promise<InventoryResult> {
const evalsDir = path.join(repoRoot, 'evals');
const pattern = '**/*.eval.{ts,tsx}';
const evalFiles = await glob(pattern, {
cwd: evalsDir,
absolute: true,
nodir: true,
});
evalFiles.sort();
const files: EvalFileAnalysis[] = [];
const allCases: EvalCaseRecord[] = [];
const allDiagnostics: EvalAnalysisDiagnostic[] = [];
for (const filePath of evalFiles) {
const sourceText = await fs.promises.readFile(filePath, 'utf-8');
const analysis = analyzeEvalSource(sourceText, { filePath, repoRoot });
files.push(analysis);
allCases.push(...analysis.cases);
allDiagnostics.push(...analysis.diagnostics);
}
return {
totalFiles: files.length,
totalCases: allCases.length,
files,
cases: allCases,
diagnostics: allDiagnostics,
};
}
/**
* Formats an InventoryResult into a human-readable report string.
*/
export function formatInventoryReport(result: InventoryResult): string {
const lines: string[] = [];
lines.push('Eval Inventory');
lines.push('══════════════');
lines.push('');
lines.push(
`${result.totalFiles} files · ${result.totalCases} cases · ${result.diagnostics.length} diagnostics`,
);
lines.push('');
// --- By Policy ---
lines.push('By Policy');
lines.push('─────────');
const byPolicy = groupBy(result.cases, (c) => c.policy);
const policyOrder: EvalPolicy[] = [
'ALWAYS_PASSES',
'USUALLY_PASSES',
'USUALLY_FAILS',
'unknown',
];
for (const policy of policyOrder) {
const cases = byPolicy.get(policy);
if (!cases || cases.length === 0) {
continue;
}
lines.push(`${policy} (${cases.length} cases)`);
const byFile = groupBy(cases, (c) => c.relativePath);
for (const [filePath, fileCases] of byFile) {
lines.push(` ${filePath}`);
for (const evalCase of fileCases) {
lines.push(`${evalCase.name} [${evalCase.helperName}]`);
}
}
lines.push('');
}
// --- By Suite ---
lines.push('By Suite');
lines.push('────────');
const bySuite = groupBy(result.cases, (c) => c.suiteName ?? '(no suite)');
const suiteNames = [...bySuite.keys()].sort((a, b) => {
if (a === b) return 0;
if (a === '(no suite)') return 1;
if (b === '(no suite)') return -1;
return a.localeCompare(b, 'en');
});
for (const suite of suiteNames) {
const cases = bySuite.get(suite)!;
lines.push(`${suite} (${cases.length} cases)`);
for (const evalCase of cases) {
lines.push(
`${evalCase.name} [${evalCase.relativePath}] (${evalCase.policy})`,
);
}
lines.push('');
}
// --- Diagnostics ---
if (result.diagnostics.length > 0) {
const filePaths = new Map<string, string>();
for (const f of result.files) {
filePaths.set(f.filePath, f.relativePath);
}
lines.push('Diagnostics');
lines.push('───────────');
for (const diagnostic of result.diagnostics) {
const displayPath =
diagnostic.filePath === '<inline>'
? diagnostic.filePath
: (filePaths.get(diagnostic.filePath) ?? diagnostic.filePath);
lines.push(
`${displayPath}:${diagnostic.location.line}:${diagnostic.location.column}${diagnostic.message}`,
);
}
lines.push('');
}
return lines.join('\n');
}
function groupBy<T>(
items: readonly T[],
keyFn: (item: T) => string,
): Map<string, T[]> {
const groups = new Map<string, T[]>();
for (const item of items) {
const key = keyFn(item);
const group = groups.get(key);
if (group) {
group.push(item);
} else {
groups.set(key, [item]);
}
}
return groups;
}