mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-06-22 09:17:33 -07:00
feat: add eval:inventory CLI command and reporting logic (#28009)
This commit is contained in:
@@ -32,6 +32,7 @@
|
||||
"schema:settings": "tsx ./scripts/generate-settings-schema.ts",
|
||||
"docs:settings": "tsx ./scripts/generate-settings-doc.ts",
|
||||
"docs:keybindings": "tsx ./scripts/generate-keybindings-doc.ts",
|
||||
"eval:inventory": "tsx ./scripts/eval-inventory-cli.ts",
|
||||
"build": "node scripts/build.js",
|
||||
"build-and-start": "npm run build && npm run start --",
|
||||
"build:vscode": "node scripts/build_vscode_companion.js",
|
||||
|
||||
@@ -0,0 +1,46 @@
|
||||
#!/usr/bin/env tsx
|
||||
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* @fileoverview CLI entry point for the eval inventory command.
|
||||
*
|
||||
* Scans all eval source files, runs the static analyzer on each,
|
||||
* and prints a human-readable inventory report grouped by policy,
|
||||
* file, and suite.
|
||||
*
|
||||
* Usage:
|
||||
* npm run eval:inventory
|
||||
* npm run eval:inventory -- --root /path/to/repo
|
||||
*/
|
||||
|
||||
import {
|
||||
collectInventory,
|
||||
formatInventoryReport,
|
||||
} from './utils/eval-inventory.js';
|
||||
|
||||
async function main() {
|
||||
const rootFlagIndex = process.argv.indexOf('--root');
|
||||
const repoRoot =
|
||||
rootFlagIndex !== -1 && process.argv[rootFlagIndex + 1]
|
||||
? process.argv[rootFlagIndex + 1]
|
||||
: process.cwd();
|
||||
|
||||
const result = await collectInventory(repoRoot);
|
||||
|
||||
if (result.totalFiles === 0) {
|
||||
console.error('No eval files found under evals/.');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(formatInventoryReport(result));
|
||||
}
|
||||
|
||||
main().catch((error) => {
|
||||
console.error('Fatal error:', error);
|
||||
process.exit(1);
|
||||
});
|
||||
@@ -0,0 +1,185 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import path from 'node:path';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
collectInventory,
|
||||
formatInventoryReport,
|
||||
type InventoryResult,
|
||||
} from '../utils/eval-inventory.js';
|
||||
import type { EvalCaseRecord } from '../utils/eval-analysis.js';
|
||||
|
||||
function makeCaseRecord(
|
||||
overrides: Partial<EvalCaseRecord> = {},
|
||||
): EvalCaseRecord {
|
||||
return {
|
||||
filePath: '/repo/evals/test.eval.ts',
|
||||
relativePath: 'evals/test.eval.ts',
|
||||
helperName: 'evalTest',
|
||||
baseHelperName: 'evalTest',
|
||||
policy: 'USUALLY_PASSES',
|
||||
name: 'test case',
|
||||
hasFiles: false,
|
||||
hasPrompt: true,
|
||||
location: { line: 1, column: 1 },
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe('eval-inventory', () => {
|
||||
describe('collectInventory', () => {
|
||||
it('discovers eval files from the real evals directory', async () => {
|
||||
const repoRoot = path.resolve(import.meta.dirname, '../../');
|
||||
const result = await collectInventory(repoRoot);
|
||||
|
||||
expect(result.totalFiles).toBeGreaterThanOrEqual(36);
|
||||
expect(result.totalCases).toBeGreaterThanOrEqual(90);
|
||||
expect(result.files.length).toBe(result.totalFiles);
|
||||
expect(result.cases.length).toBe(result.totalCases);
|
||||
|
||||
for (const evalCase of result.cases) {
|
||||
expect(evalCase.name).toBeTruthy();
|
||||
expect(evalCase.relativePath).toBeTruthy();
|
||||
expect(evalCase.relativePath).toMatch(/^evals\//);
|
||||
}
|
||||
});
|
||||
|
||||
it('returns zero counts for a directory with no eval files', async () => {
|
||||
const result = await collectInventory(import.meta.dirname);
|
||||
|
||||
expect(result.totalFiles).toBe(0);
|
||||
expect(result.totalCases).toBe(0);
|
||||
expect(result.files).toEqual([]);
|
||||
expect(result.cases).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('formatInventoryReport', () => {
|
||||
it('includes summary line with correct counts', () => {
|
||||
const result: InventoryResult = {
|
||||
totalFiles: 2,
|
||||
totalCases: 3,
|
||||
files: [],
|
||||
cases: [
|
||||
makeCaseRecord({ policy: 'ALWAYS_PASSES', name: 'case-1' }),
|
||||
makeCaseRecord({ policy: 'USUALLY_PASSES', name: 'case-2' }),
|
||||
makeCaseRecord({ policy: 'USUALLY_PASSES', name: 'case-3' }),
|
||||
],
|
||||
diagnostics: [],
|
||||
};
|
||||
|
||||
const report = formatInventoryReport(result);
|
||||
|
||||
expect(report).toContain('2 files · 3 cases · 0 diagnostics');
|
||||
});
|
||||
|
||||
it('groups cases by policy', () => {
|
||||
const result: InventoryResult = {
|
||||
totalFiles: 1,
|
||||
totalCases: 2,
|
||||
files: [],
|
||||
cases: [
|
||||
makeCaseRecord({
|
||||
policy: 'ALWAYS_PASSES',
|
||||
name: 'stable test',
|
||||
}),
|
||||
makeCaseRecord({
|
||||
policy: 'USUALLY_PASSES',
|
||||
name: 'flaky test',
|
||||
}),
|
||||
],
|
||||
diagnostics: [],
|
||||
};
|
||||
|
||||
const report = formatInventoryReport(result);
|
||||
|
||||
expect(report).toContain('By Policy');
|
||||
expect(report).toContain('ALWAYS_PASSES (1 cases)');
|
||||
expect(report).toContain('USUALLY_PASSES (1 cases)');
|
||||
expect(report).toContain('• stable test');
|
||||
expect(report).toContain('• flaky test');
|
||||
});
|
||||
|
||||
it('groups cases by suite name', () => {
|
||||
const result: InventoryResult = {
|
||||
totalFiles: 1,
|
||||
totalCases: 2,
|
||||
files: [],
|
||||
cases: [
|
||||
makeCaseRecord({ suiteName: 'default', name: 'suite-test' }),
|
||||
makeCaseRecord({ name: 'no-suite-test' }),
|
||||
],
|
||||
diagnostics: [],
|
||||
};
|
||||
|
||||
const report = formatInventoryReport(result);
|
||||
|
||||
expect(report).toContain('By Suite');
|
||||
expect(report).toContain('default (1 cases)');
|
||||
expect(report).toContain('(no suite) (1 cases)');
|
||||
});
|
||||
|
||||
it('shows diagnostics section when diagnostics exist', () => {
|
||||
const result: InventoryResult = {
|
||||
totalFiles: 1,
|
||||
totalCases: 0,
|
||||
files: [],
|
||||
cases: [],
|
||||
diagnostics: [
|
||||
{
|
||||
severity: 'warning',
|
||||
message: 'Could not resolve policy',
|
||||
filePath: '/repo/evals/bad.eval.ts',
|
||||
location: { line: 5, column: 3 },
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const report = formatInventoryReport(result);
|
||||
|
||||
expect(report).toContain('Diagnostics');
|
||||
expect(report).toContain('1 diagnostics');
|
||||
expect(report).toContain(
|
||||
'⚠ /repo/evals/bad.eval.ts:5:3 — Could not resolve policy',
|
||||
);
|
||||
});
|
||||
|
||||
it('omits diagnostics section when there are none', () => {
|
||||
const result: InventoryResult = {
|
||||
totalFiles: 1,
|
||||
totalCases: 1,
|
||||
files: [],
|
||||
cases: [makeCaseRecord()],
|
||||
diagnostics: [],
|
||||
};
|
||||
|
||||
const report = formatInventoryReport(result);
|
||||
|
||||
expect(report).not.toContain('Diagnostics');
|
||||
expect(report).not.toContain('⚠');
|
||||
});
|
||||
|
||||
it('includes helper name in case listing', () => {
|
||||
const result: InventoryResult = {
|
||||
totalFiles: 1,
|
||||
totalCases: 1,
|
||||
files: [],
|
||||
cases: [
|
||||
makeCaseRecord({
|
||||
helperName: 'customHelper',
|
||||
name: 'custom test',
|
||||
}),
|
||||
],
|
||||
diagnostics: [],
|
||||
};
|
||||
|
||||
const report = formatInventoryReport(result);
|
||||
|
||||
expect(report).toContain('• custom test [customHelper]');
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -6,7 +6,10 @@
|
||||
|
||||
import { vi } from 'vitest';
|
||||
|
||||
vi.mock('fs', () => ({
|
||||
...vi.importActual('fs'),
|
||||
appendFileSync: vi.fn(),
|
||||
}));
|
||||
vi.mock('fs', async () => {
|
||||
const actual = await vi.importActual<typeof import('fs')>('fs');
|
||||
return {
|
||||
...actual,
|
||||
appendFileSync: vi.fn(),
|
||||
};
|
||||
});
|
||||
|
||||
@@ -0,0 +1,173 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { glob } from 'glob';
|
||||
|
||||
import {
|
||||
analyzeEvalSource,
|
||||
type EvalCaseRecord,
|
||||
type EvalFileAnalysis,
|
||||
type EvalAnalysisDiagnostic,
|
||||
type EvalPolicy,
|
||||
} from './eval-analysis.js';
|
||||
|
||||
export interface InventoryResult {
|
||||
totalFiles: number;
|
||||
totalCases: number;
|
||||
files: EvalFileAnalysis[];
|
||||
cases: readonly EvalCaseRecord[];
|
||||
diagnostics: readonly EvalAnalysisDiagnostic[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Discovers all eval files under the given repo root and runs
|
||||
* the static analyzer on each, returning the aggregated results.
|
||||
*/
|
||||
export async function collectInventory(
|
||||
repoRoot: string,
|
||||
): Promise<InventoryResult> {
|
||||
const evalsDir = path.join(repoRoot, 'evals');
|
||||
const pattern = '**/*.eval.{ts,tsx}';
|
||||
|
||||
const evalFiles = await glob(pattern, {
|
||||
cwd: evalsDir,
|
||||
absolute: true,
|
||||
nodir: true,
|
||||
});
|
||||
|
||||
evalFiles.sort();
|
||||
|
||||
const files: EvalFileAnalysis[] = [];
|
||||
const allCases: EvalCaseRecord[] = [];
|
||||
const allDiagnostics: EvalAnalysisDiagnostic[] = [];
|
||||
|
||||
for (const filePath of evalFiles) {
|
||||
const sourceText = await fs.promises.readFile(filePath, 'utf-8');
|
||||
const analysis = analyzeEvalSource(sourceText, { filePath, repoRoot });
|
||||
files.push(analysis);
|
||||
allCases.push(...analysis.cases);
|
||||
allDiagnostics.push(...analysis.diagnostics);
|
||||
}
|
||||
|
||||
return {
|
||||
totalFiles: files.length,
|
||||
totalCases: allCases.length,
|
||||
files,
|
||||
cases: allCases,
|
||||
diagnostics: allDiagnostics,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Formats an InventoryResult into a human-readable report string.
|
||||
*/
|
||||
export function formatInventoryReport(result: InventoryResult): string {
|
||||
const lines: string[] = [];
|
||||
|
||||
lines.push('Eval Inventory');
|
||||
lines.push('══════════════');
|
||||
lines.push('');
|
||||
lines.push(
|
||||
`${result.totalFiles} files · ${result.totalCases} cases · ${result.diagnostics.length} diagnostics`,
|
||||
);
|
||||
lines.push('');
|
||||
|
||||
// --- By Policy ---
|
||||
lines.push('By Policy');
|
||||
lines.push('─────────');
|
||||
|
||||
const byPolicy = groupBy(result.cases, (c) => c.policy);
|
||||
const policyOrder: EvalPolicy[] = [
|
||||
'ALWAYS_PASSES',
|
||||
'USUALLY_PASSES',
|
||||
'USUALLY_FAILS',
|
||||
'unknown',
|
||||
];
|
||||
|
||||
for (const policy of policyOrder) {
|
||||
const cases = byPolicy.get(policy);
|
||||
if (!cases || cases.length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
lines.push(`${policy} (${cases.length} cases)`);
|
||||
|
||||
const byFile = groupBy(cases, (c) => c.relativePath);
|
||||
for (const [filePath, fileCases] of byFile) {
|
||||
lines.push(` ${filePath}`);
|
||||
for (const evalCase of fileCases) {
|
||||
lines.push(` • ${evalCase.name} [${evalCase.helperName}]`);
|
||||
}
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
// --- By Suite ---
|
||||
lines.push('By Suite');
|
||||
lines.push('────────');
|
||||
|
||||
const bySuite = groupBy(result.cases, (c) => c.suiteName ?? '(no suite)');
|
||||
const suiteNames = [...bySuite.keys()].sort((a, b) => {
|
||||
if (a === b) return 0;
|
||||
if (a === '(no suite)') return 1;
|
||||
if (b === '(no suite)') return -1;
|
||||
return a.localeCompare(b, 'en');
|
||||
});
|
||||
|
||||
for (const suite of suiteNames) {
|
||||
const cases = bySuite.get(suite)!;
|
||||
lines.push(`${suite} (${cases.length} cases)`);
|
||||
|
||||
for (const evalCase of cases) {
|
||||
lines.push(
|
||||
` • ${evalCase.name} [${evalCase.relativePath}] (${evalCase.policy})`,
|
||||
);
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
// --- Diagnostics ---
|
||||
if (result.diagnostics.length > 0) {
|
||||
const filePaths = new Map<string, string>();
|
||||
for (const f of result.files) {
|
||||
filePaths.set(f.filePath, f.relativePath);
|
||||
}
|
||||
|
||||
lines.push('Diagnostics');
|
||||
lines.push('───────────');
|
||||
for (const diagnostic of result.diagnostics) {
|
||||
const displayPath =
|
||||
diagnostic.filePath === '<inline>'
|
||||
? diagnostic.filePath
|
||||
: (filePaths.get(diagnostic.filePath) ?? diagnostic.filePath);
|
||||
lines.push(
|
||||
`⚠ ${displayPath}:${diagnostic.location.line}:${diagnostic.location.column} — ${diagnostic.message}`,
|
||||
);
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
function groupBy<T>(
|
||||
items: readonly T[],
|
||||
keyFn: (item: T) => string,
|
||||
): Map<string, T[]> {
|
||||
const groups = new Map<string, T[]>();
|
||||
for (const item of items) {
|
||||
const key = keyFn(item);
|
||||
const group = groups.get(key);
|
||||
if (group) {
|
||||
group.push(item);
|
||||
} else {
|
||||
groups.set(key, [item]);
|
||||
}
|
||||
}
|
||||
return groups;
|
||||
}
|
||||
Reference in New Issue
Block a user