feat: add eval:inventory CLI command and reporting logic (#28009)

2026-06-22 09:17:33 -07:00 · 2026-06-19 23:31:01 +05:30
parent 6613e129de
commit c22137ea0a
5 changed files with 412 additions and 4 deletions
@@ -32,6 +32,7 @@
    "schema:settings": "tsx ./scripts/generate-settings-schema.ts",
    "docs:settings": "tsx ./scripts/generate-settings-doc.ts",
    "docs:keybindings": "tsx ./scripts/generate-keybindings-doc.ts",
+    "eval:inventory": "tsx ./scripts/eval-inventory-cli.ts",
    "build": "node scripts/build.js",
    "build-and-start": "npm run build && npm run start --",
    "build:vscode": "node scripts/build_vscode_companion.js",
@@ -0,0 +1,46 @@
+#!/usr/bin/env tsx
+
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/**
+ * @fileoverview CLI entry point for the eval inventory command.
+ *
+ * Scans all eval source files, runs the static analyzer on each,
+ * and prints a human-readable inventory report grouped by policy,
+ * file, and suite.
+ *
+ * Usage:
+ *   npm run eval:inventory
+ *   npm run eval:inventory -- --root /path/to/repo
+ */
+
+import {
+  collectInventory,
+  formatInventoryReport,
+} from './utils/eval-inventory.js';
+
+async function main() {
+  const rootFlagIndex = process.argv.indexOf('--root');
+  const repoRoot =
+    rootFlagIndex !== -1 && process.argv[rootFlagIndex + 1]
+      ? process.argv[rootFlagIndex + 1]
+      : process.cwd();
+
+  const result = await collectInventory(repoRoot);
+
+  if (result.totalFiles === 0) {
+    console.error('No eval files found under evals/.');
+    process.exit(1);
+  }
+
+  console.log(formatInventoryReport(result));
+}
+
+main().catch((error) => {
+  console.error('Fatal error:', error);
+  process.exit(1);
+});
@@ -0,0 +1,185 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import path from 'node:path';
+import { describe, expect, it } from 'vitest';
+import {
+  collectInventory,
+  formatInventoryReport,
+  type InventoryResult,
+} from '../utils/eval-inventory.js';
+import type { EvalCaseRecord } from '../utils/eval-analysis.js';
+
+function makeCaseRecord(
+  overrides: Partial<EvalCaseRecord> = {},
+): EvalCaseRecord {
+  return {
+    filePath: '/repo/evals/test.eval.ts',
+    relativePath: 'evals/test.eval.ts',
+    helperName: 'evalTest',
+    baseHelperName: 'evalTest',
+    policy: 'USUALLY_PASSES',
+    name: 'test case',
+    hasFiles: false,
+    hasPrompt: true,
+    location: { line: 1, column: 1 },
+    ...overrides,
+  };
+}
+
+describe('eval-inventory', () => {
+  describe('collectInventory', () => {
+    it('discovers eval files from the real evals directory', async () => {
+      const repoRoot = path.resolve(import.meta.dirname, '../../');
+      const result = await collectInventory(repoRoot);
+
+      expect(result.totalFiles).toBeGreaterThanOrEqual(36);
+      expect(result.totalCases).toBeGreaterThanOrEqual(90);
+      expect(result.files.length).toBe(result.totalFiles);
+      expect(result.cases.length).toBe(result.totalCases);
+
+      for (const evalCase of result.cases) {
+        expect(evalCase.name).toBeTruthy();
+        expect(evalCase.relativePath).toBeTruthy();
+        expect(evalCase.relativePath).toMatch(/^evals\//);
+      }
+    });
+
+    it('returns zero counts for a directory with no eval files', async () => {
+      const result = await collectInventory(import.meta.dirname);
+
+      expect(result.totalFiles).toBe(0);
+      expect(result.totalCases).toBe(0);
+      expect(result.files).toEqual([]);
+      expect(result.cases).toEqual([]);
+    });
+  });
+
+  describe('formatInventoryReport', () => {
+    it('includes summary line with correct counts', () => {
+      const result: InventoryResult = {
+        totalFiles: 2,
+        totalCases: 3,
+        files: [],
+        cases: [
+          makeCaseRecord({ policy: 'ALWAYS_PASSES', name: 'case-1' }),
+          makeCaseRecord({ policy: 'USUALLY_PASSES', name: 'case-2' }),
+          makeCaseRecord({ policy: 'USUALLY_PASSES', name: 'case-3' }),
+        ],
+        diagnostics: [],
+      };
+
+      const report = formatInventoryReport(result);
+
+      expect(report).toContain('2 files · 3 cases · 0 diagnostics');
+    });
+
+    it('groups cases by policy', () => {
+      const result: InventoryResult = {
+        totalFiles: 1,
+        totalCases: 2,
+        files: [],
+        cases: [
+          makeCaseRecord({
+            policy: 'ALWAYS_PASSES',
+            name: 'stable test',
+          }),
+          makeCaseRecord({
+            policy: 'USUALLY_PASSES',
+            name: 'flaky test',
+          }),
+        ],
+        diagnostics: [],
+      };
+
+      const report = formatInventoryReport(result);
+
+      expect(report).toContain('By Policy');
+      expect(report).toContain('ALWAYS_PASSES (1 cases)');
+      expect(report).toContain('USUALLY_PASSES (1 cases)');
+      expect(report).toContain('• stable test');
+      expect(report).toContain('• flaky test');
+    });
+
+    it('groups cases by suite name', () => {
+      const result: InventoryResult = {
+        totalFiles: 1,
+        totalCases: 2,
+        files: [],
+        cases: [
+          makeCaseRecord({ suiteName: 'default', name: 'suite-test' }),
+          makeCaseRecord({ name: 'no-suite-test' }),
+        ],
+        diagnostics: [],
+      };
+
+      const report = formatInventoryReport(result);
+
+      expect(report).toContain('By Suite');
+      expect(report).toContain('default (1 cases)');
+      expect(report).toContain('(no suite) (1 cases)');
+    });
+
+    it('shows diagnostics section when diagnostics exist', () => {
+      const result: InventoryResult = {
+        totalFiles: 1,
+        totalCases: 0,
+        files: [],
+        cases: [],
+        diagnostics: [
+          {
+            severity: 'warning',
+            message: 'Could not resolve policy',
+            filePath: '/repo/evals/bad.eval.ts',
+            location: { line: 5, column: 3 },
+          },
+        ],
+      };
+
+      const report = formatInventoryReport(result);
+
+      expect(report).toContain('Diagnostics');
+      expect(report).toContain('1 diagnostics');
+      expect(report).toContain(
+        '⚠ /repo/evals/bad.eval.ts:5:3 — Could not resolve policy',
+      );
+    });
+
+    it('omits diagnostics section when there are none', () => {
+      const result: InventoryResult = {
+        totalFiles: 1,
+        totalCases: 1,
+        files: [],
+        cases: [makeCaseRecord()],
+        diagnostics: [],
+      };
+
+      const report = formatInventoryReport(result);
+
+      expect(report).not.toContain('Diagnostics');
+      expect(report).not.toContain('⚠');
+    });
+
+    it('includes helper name in case listing', () => {
+      const result: InventoryResult = {
+        totalFiles: 1,
+        totalCases: 1,
+        files: [],
+        cases: [
+          makeCaseRecord({
+            helperName: 'customHelper',
+            name: 'custom test',
+          }),
+        ],
+        diagnostics: [],
+      };
+
+      const report = formatInventoryReport(result);
+
+      expect(report).toContain('• custom test [customHelper]');
+    });
+  });
+});
@@ -6,7 +6,10 @@

 import { vi } from 'vitest';

-vi.mock('fs', () => ({
-  ...vi.importActual('fs'),
-  appendFileSync: vi.fn(),
-}));
+vi.mock('fs', async () => {
+  const actual = await vi.importActual<typeof import('fs')>('fs');
+  return {
+    ...actual,
+    appendFileSync: vi.fn(),
+  };
+});
@@ -0,0 +1,173 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import fs from 'node:fs';
+import path from 'node:path';
+import { glob } from 'glob';
+
+import {
+  analyzeEvalSource,
+  type EvalCaseRecord,
+  type EvalFileAnalysis,
+  type EvalAnalysisDiagnostic,
+  type EvalPolicy,
+} from './eval-analysis.js';
+
+export interface InventoryResult {
+  totalFiles: number;
+  totalCases: number;
+  files: EvalFileAnalysis[];
+  cases: readonly EvalCaseRecord[];
+  diagnostics: readonly EvalAnalysisDiagnostic[];
+}
+
+/**
+ * Discovers all eval files under the given repo root and runs
+ * the static analyzer on each, returning the aggregated results.
+ */
+export async function collectInventory(
+  repoRoot: string,
+): Promise<InventoryResult> {
+  const evalsDir = path.join(repoRoot, 'evals');
+  const pattern = '**/*.eval.{ts,tsx}';
+
+  const evalFiles = await glob(pattern, {
+    cwd: evalsDir,
+    absolute: true,
+    nodir: true,
+  });
+
+  evalFiles.sort();
+
+  const files: EvalFileAnalysis[] = [];
+  const allCases: EvalCaseRecord[] = [];
+  const allDiagnostics: EvalAnalysisDiagnostic[] = [];
+
+  for (const filePath of evalFiles) {
+    const sourceText = await fs.promises.readFile(filePath, 'utf-8');
+    const analysis = analyzeEvalSource(sourceText, { filePath, repoRoot });
+    files.push(analysis);
+    allCases.push(...analysis.cases);
+    allDiagnostics.push(...analysis.diagnostics);
+  }
+
+  return {
+    totalFiles: files.length,
+    totalCases: allCases.length,
+    files,
+    cases: allCases,
+    diagnostics: allDiagnostics,
+  };
+}
+
+/**
+ * Formats an InventoryResult into a human-readable report string.
+ */
+export function formatInventoryReport(result: InventoryResult): string {
+  const lines: string[] = [];
+
+  lines.push('Eval Inventory');
+  lines.push('══════════════');
+  lines.push('');
+  lines.push(
+    `${result.totalFiles} files · ${result.totalCases} cases · ${result.diagnostics.length} diagnostics`,
+  );
+  lines.push('');
+
+  // --- By Policy ---
+  lines.push('By Policy');
+  lines.push('─────────');
+
+  const byPolicy = groupBy(result.cases, (c) => c.policy);
+  const policyOrder: EvalPolicy[] = [
+    'ALWAYS_PASSES',
+    'USUALLY_PASSES',
+    'USUALLY_FAILS',
+    'unknown',
+  ];
+
+  for (const policy of policyOrder) {
+    const cases = byPolicy.get(policy);
+    if (!cases || cases.length === 0) {
+      continue;
+    }
+
+    lines.push(`${policy} (${cases.length} cases)`);
+
+    const byFile = groupBy(cases, (c) => c.relativePath);
+    for (const [filePath, fileCases] of byFile) {
+      lines.push(`  ${filePath}`);
+      for (const evalCase of fileCases) {
+        lines.push(`    • ${evalCase.name} [${evalCase.helperName}]`);
+      }
+    }
+    lines.push('');
+  }
+
+  // --- By Suite ---
+  lines.push('By Suite');
+  lines.push('────────');
+
+  const bySuite = groupBy(result.cases, (c) => c.suiteName ?? '(no suite)');
+  const suiteNames = [...bySuite.keys()].sort((a, b) => {
+    if (a === b) return 0;
+    if (a === '(no suite)') return 1;
+    if (b === '(no suite)') return -1;
+    return a.localeCompare(b, 'en');
+  });
+
+  for (const suite of suiteNames) {
+    const cases = bySuite.get(suite)!;
+    lines.push(`${suite} (${cases.length} cases)`);
+
+    for (const evalCase of cases) {
+      lines.push(
+        `  • ${evalCase.name} [${evalCase.relativePath}] (${evalCase.policy})`,
+      );
+    }
+    lines.push('');
+  }
+
+  // --- Diagnostics ---
+  if (result.diagnostics.length > 0) {
+    const filePaths = new Map<string, string>();
+    for (const f of result.files) {
+      filePaths.set(f.filePath, f.relativePath);
+    }
+
+    lines.push('Diagnostics');
+    lines.push('───────────');
+    for (const diagnostic of result.diagnostics) {
+      const displayPath =
+        diagnostic.filePath === '<inline>'
+          ? diagnostic.filePath
+          : (filePaths.get(diagnostic.filePath) ?? diagnostic.filePath);
+      lines.push(
+        `⚠ ${displayPath}:${diagnostic.location.line}:${diagnostic.location.column} — ${diagnostic.message}`,
+      );
+    }
+    lines.push('');
+  }
+
+  return lines.join('\n');
+}
+
+function groupBy<T>(
+  items: readonly T[],
+  keyFn: (item: T) => string,
+): Map<string, T[]> {
+  const groups = new Map<string, T[]>();
+  for (const item of items) {
+    const key = keyFn(item);
+    const group = groups.get(key);
+    if (group) {
+      group.push(item);
+    } else {
+      groups.set(key, [item]);
+    }
+  }
+  return groups;
+}