mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-06-02 15:13:15 -07:00
feat(evals): implement related evaluation system for targeted testing
This commit is contained in:
+167
-26
@@ -3,7 +3,23 @@
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* @fileoverview Intelligence layer for detecting steering and behavior changes.
|
||||
*
|
||||
* This script identifies if code changes affect model steering (system prompts,
|
||||
* tool definitions, agent instructions) and maps them to relevant evaluation
|
||||
* suites. It supports both CI (GitHub Actions) and local development workflows.
|
||||
*
|
||||
* Detection Methods:
|
||||
* 1. Path-based: Monitors critical steering and tool directories.
|
||||
* 2. Signature-based: Scans diff content for core steering primitives
|
||||
* (e.g., ToolDefinition, inputSchema).
|
||||
* 3. Suite-aware: Uses evals/suites.json to identify related tests for surgical runs.
|
||||
*/
|
||||
|
||||
import { execSync } from 'node:child_process';
|
||||
import fs from 'node:fs';
|
||||
|
||||
const CORE_STEERING_PATHS = [
|
||||
'packages/core/src/prompts/',
|
||||
@@ -20,46 +36,132 @@ const STEERING_SIGNATURES = [
|
||||
"kind: 'local'",
|
||||
];
|
||||
|
||||
function minimatch(file, pattern) {
|
||||
if (pattern.endsWith('/**')) {
|
||||
const prefix = pattern.slice(0, -3);
|
||||
return file.startsWith(prefix);
|
||||
}
|
||||
if (pattern.includes('*')) {
|
||||
const regex = new RegExp(
|
||||
'^' +
|
||||
pattern
|
||||
.replace(/\./g, '\\.')
|
||||
.replace(/\*\*/g, '.*')
|
||||
.replace(/\*/g, '[^/]*') +
|
||||
'$',
|
||||
);
|
||||
return regex.test(file);
|
||||
}
|
||||
return file === pattern;
|
||||
}
|
||||
|
||||
function main() {
|
||||
const targetBranch = process.env.GITHUB_BASE_REF || 'main';
|
||||
const verbose = process.argv.includes('--verbose');
|
||||
const steeringOnly = process.argv.includes('--steering-only');
|
||||
const isRelatedMode = process.argv.includes('--related');
|
||||
const isJsonMode = process.argv.includes('--json');
|
||||
|
||||
try {
|
||||
const remoteUrl = process.env.GITHUB_REPOSITORY
|
||||
? `https://github.com/${process.env.GITHUB_REPOSITORY}.git`
|
||||
: 'origin';
|
||||
|
||||
// Fetch target branch from the remote.
|
||||
execSync(`git fetch ${remoteUrl} ${targetBranch}`, {
|
||||
stdio: 'ignore',
|
||||
});
|
||||
let changedFiles = [];
|
||||
const isCi = !!process.env.GITHUB_ACTIONS;
|
||||
|
||||
// Get changed files using the triple-dot syntax which correctly handles merge commits
|
||||
const head = process.env.PR_HEAD_SHA || 'HEAD';
|
||||
const changedFiles = execSync(`git diff --name-only FETCH_HEAD...${head}`, {
|
||||
encoding: 'utf-8',
|
||||
})
|
||||
.split('\n')
|
||||
.filter(Boolean);
|
||||
if (isCi) {
|
||||
try {
|
||||
// 1. Try fetching from remote (CI environment)
|
||||
execSync(`git fetch ${remoteUrl} ${targetBranch}`, {
|
||||
stdio: 'ignore',
|
||||
});
|
||||
|
||||
// Get changed files using the triple-dot syntax which correctly handles merge commits
|
||||
const head = process.env.PR_HEAD_SHA || 'HEAD';
|
||||
changedFiles = execSync(`git diff --name-only FETCH_HEAD...${head}`, {
|
||||
encoding: 'utf-8',
|
||||
})
|
||||
.split('\n')
|
||||
.filter(Boolean);
|
||||
} catch (e) {
|
||||
if (verbose)
|
||||
process.stderr.write(
|
||||
`Warning: git fetch failed in CI: ${e.message}\n`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Local fallback or if CI fetch failed: Try diffing against target branch
|
||||
if (changedFiles.length === 0) {
|
||||
try {
|
||||
changedFiles = execSync(`git diff --name-only ${targetBranch}`, {
|
||||
encoding: 'utf-8',
|
||||
})
|
||||
.split('\n')
|
||||
.filter(Boolean);
|
||||
} catch {
|
||||
// 3. Last resort: Just diff against HEAD (uncommitted changes only)
|
||||
changedFiles = execSync('git diff --name-only HEAD', {
|
||||
encoding: 'utf-8',
|
||||
})
|
||||
.split('\n')
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
// Also include untracked files in local mode
|
||||
const untracked = execSync('git ls-files --others --exclude-standard', {
|
||||
encoding: 'utf-8',
|
||||
})
|
||||
.split('\n')
|
||||
.filter(Boolean);
|
||||
changedFiles = [...new Set([...changedFiles, ...untracked])];
|
||||
}
|
||||
|
||||
let detected = false;
|
||||
const reasons = [];
|
||||
const affectedSuites = new Set();
|
||||
const rationales = [];
|
||||
|
||||
// Load suites for --related mode
|
||||
let suitesConfig = null;
|
||||
if (isRelatedMode) {
|
||||
try {
|
||||
suitesConfig = JSON.parse(
|
||||
fs.readFileSync('evals/suites.json', 'utf-8'),
|
||||
);
|
||||
} catch {
|
||||
process.stderr.write(`Warning: Could not load evals/suites.json\n`);
|
||||
}
|
||||
}
|
||||
|
||||
// 1. Path-based detection
|
||||
for (const file of changedFiles) {
|
||||
if (CORE_STEERING_PATHS.some((prefix) => file.startsWith(prefix))) {
|
||||
detected = true;
|
||||
reasons.push(`Matched core steering path: ${file}`);
|
||||
if (!verbose) break;
|
||||
}
|
||||
if (
|
||||
!steeringOnly &&
|
||||
TEST_PATHS.some((prefix) => file.startsWith(prefix))
|
||||
TEST_PATHS.some((prefix) => file.startsWith(prefix)) &&
|
||||
file.endsWith('.eval.ts')
|
||||
) {
|
||||
detected = true;
|
||||
reasons.push(`Matched test path: ${file}`);
|
||||
if (!verbose) break;
|
||||
reasons.push(`Matched test file: ${file}`);
|
||||
}
|
||||
|
||||
// Related suite detection
|
||||
if (suitesConfig) {
|
||||
for (const [suiteName, suite] of Object.entries(suitesConfig)) {
|
||||
if (suiteName === 'allowedOverlaps' || !suite.patterns) continue;
|
||||
|
||||
if (suite.patterns.some((pattern) => minimatch(file, pattern))) {
|
||||
affectedSuites.add(suiteName);
|
||||
rationales.push(
|
||||
`Testing **${suiteName}** because **${file}** was modified.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -70,15 +172,30 @@ function main() {
|
||||
);
|
||||
if (coreChanges.length > 0) {
|
||||
// Get the actual diff content for core files
|
||||
const diff = execSync(
|
||||
`git diff -U0 FETCH_HEAD...${head} -- packages/core/src/`,
|
||||
{ encoding: 'utf-8' },
|
||||
);
|
||||
// We need to be careful with the diff command depending on if we have FETCH_HEAD
|
||||
let diffCmd = '';
|
||||
try {
|
||||
const head = process.env.PR_HEAD_SHA || 'HEAD';
|
||||
diffCmd = `git diff -U0 FETCH_HEAD...${head} -- packages/core/src/`;
|
||||
execSync('git rev-parse FETCH_HEAD', { stdio: 'ignore' });
|
||||
} catch {
|
||||
diffCmd = `git diff -U0 ${targetBranch} -- packages/core/src/`;
|
||||
}
|
||||
|
||||
const diff = execSync(diffCmd, { encoding: 'utf-8' });
|
||||
for (const sig of STEERING_SIGNATURES) {
|
||||
if (diff.includes(sig)) {
|
||||
detected = true;
|
||||
reasons.push(`Matched steering signature in core: ${sig}`);
|
||||
if (!verbose) break;
|
||||
|
||||
// If we detected a steering signature, mark core_steering suite
|
||||
if (isRelatedMode) {
|
||||
affectedSuites.add('core_steering');
|
||||
rationales.push(
|
||||
`Testing **core_steering** because matched signature '${sig}' in core files.`,
|
||||
);
|
||||
}
|
||||
if (!verbose && !isRelatedMode) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -89,14 +206,38 @@ function main() {
|
||||
reasons.forEach((r) => process.stderr.write(` - ${r}\n`));
|
||||
}
|
||||
|
||||
process.stdout.write(detected ? 'true' : 'false');
|
||||
if (isJsonMode) {
|
||||
process.stdout.write(
|
||||
JSON.stringify(
|
||||
{
|
||||
detected,
|
||||
reasons,
|
||||
affectedSuites: Array.from(affectedSuites),
|
||||
rationales,
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
);
|
||||
} else {
|
||||
process.stdout.write(detected ? 'true' : 'false');
|
||||
}
|
||||
} catch (error) {
|
||||
// If anything fails (e.g., no git history), run evals/guidance to be safe
|
||||
process.stderr.write(
|
||||
'Warning: Failed to determine if changes occurred. Defaulting to true.\n',
|
||||
);
|
||||
if (isJsonMode) {
|
||||
process.stdout.write(
|
||||
JSON.stringify({
|
||||
detected: true,
|
||||
reasons: [`Error during detection: ${error.message}`],
|
||||
affectedSuites: ['core_steering'],
|
||||
rationales: [
|
||||
'Error during detection: running all stable evals for safety.',
|
||||
],
|
||||
}),
|
||||
);
|
||||
} else {
|
||||
process.stdout.write('true');
|
||||
}
|
||||
process.stderr.write(String(error) + '\n');
|
||||
process.stdout.write('true');
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
* to ensure high-signal validation and minimize noise.
|
||||
*/
|
||||
|
||||
import fs from 'node:fs';
|
||||
import { fetchNightlyHistory, escapeRegex } from './eval_utils.js';
|
||||
|
||||
const LOOKBACK_COUNT = 6;
|
||||
@@ -25,11 +26,24 @@ const AGGREGATE_PASS_RATE_THRESHOLD = 0.8; // Weekly signal (e.g., 15/18)
|
||||
*/
|
||||
function main() {
|
||||
const targetModel = process.argv[2];
|
||||
if (!targetModel) {
|
||||
if (!targetModel || targetModel.startsWith('--')) {
|
||||
console.error('❌ Error: No target model specified.');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Parse --suites argument
|
||||
const suitesArgIndex = process.argv.indexOf('--suites');
|
||||
let requestedSuites = null;
|
||||
if (suitesArgIndex !== -1 && process.argv[suitesArgIndex + 1]) {
|
||||
requestedSuites = process.argv[suitesArgIndex + 1]
|
||||
.split(',')
|
||||
.map((s) => s.trim());
|
||||
}
|
||||
|
||||
console.error(`🔍 Identifying trustworthy evals for model: ${targetModel}`);
|
||||
if (requestedSuites) {
|
||||
console.error(`📂 Filtering by suites: ${requestedSuites.join(', ')}`);
|
||||
}
|
||||
|
||||
const history = fetchNightlyHistory(LOOKBACK_COUNT);
|
||||
if (history.length === 0) {
|
||||
@@ -37,6 +51,32 @@ function main() {
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Load suites configuration
|
||||
let allowedFiles = null;
|
||||
let runAllStable = false;
|
||||
if (requestedSuites) {
|
||||
try {
|
||||
const suitesConfig = JSON.parse(
|
||||
fs.readFileSync('evals/suites.json', 'utf-8'),
|
||||
);
|
||||
allowedFiles = new Set();
|
||||
for (const suiteName of requestedSuites) {
|
||||
const suite = suitesConfig[suiteName];
|
||||
if (suite) {
|
||||
if (suite.evals.includes('ALL_ALWAYS_PASSING')) {
|
||||
runAllStable = true;
|
||||
} else {
|
||||
suite.evals.forEach((file) => allowedFiles.add(file));
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.error(
|
||||
`⚠️ Warning: Could not load evals/suites.json or match suites: ${e.message}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Aggregate results for the target model across all history
|
||||
const testHistories = {}; // { [testName]: { totalPassed: 0, totalRuns: 0, dailyRates: [], file: string } }
|
||||
|
||||
@@ -83,11 +123,28 @@ function main() {
|
||||
const isAggregateHighSignal = aggregateRate > AGGREGATE_PASS_RATE_THRESHOLD;
|
||||
|
||||
if (isDailyStable && isAggregateHighSignal) {
|
||||
trustworthyTests.push(testName);
|
||||
if (info.file) {
|
||||
const match = info.file.match(/evals\/.*\.eval\.ts/);
|
||||
if (match) {
|
||||
trustworthyFiles.add(match[0]);
|
||||
// Suite filtering logic
|
||||
let isFileAllowed = true;
|
||||
if (requestedSuites && !runAllStable) {
|
||||
if (info.file) {
|
||||
const match = info.file.match(/evals\/.*\.eval\.ts/);
|
||||
if (match && !allowedFiles.has(match[0])) {
|
||||
isFileAllowed = false;
|
||||
} else if (!match) {
|
||||
isFileAllowed = false;
|
||||
}
|
||||
} else {
|
||||
isFileAllowed = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (isFileAllowed) {
|
||||
trustworthyTests.push(testName);
|
||||
if (info.file) {
|
||||
const match = info.file.match(/evals\/.*\.eval\.ts/);
|
||||
if (match) {
|
||||
trustworthyFiles.add(match[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@@ -99,10 +156,14 @@ function main() {
|
||||
`✅ Found ${trustworthyTests.length} trustworthy tests across ${trustworthyFiles.size} files:`,
|
||||
);
|
||||
trustworthyTests.sort().forEach((name) => console.error(` - ${name}`));
|
||||
console.error(`\n⚪ Ignored ${volatileTests.length} volatile tests.`);
|
||||
console.error(
|
||||
`🆕 Ignored ${newTests.length} tests with insufficient history.`,
|
||||
);
|
||||
if (volatileTests.length > 0) {
|
||||
console.error(`\n⚪ Ignored ${volatileTests.length} volatile tests.`);
|
||||
}
|
||||
if (newTests.length > 0) {
|
||||
console.error(
|
||||
`🆕 Ignored ${newTests.length} tests with insufficient history.`,
|
||||
);
|
||||
}
|
||||
|
||||
// Output the list of names as a regex-friendly pattern for vitest -t
|
||||
const pattern = trustworthyTests.map((name) => escapeRegex(name)).join('|');
|
||||
|
||||
@@ -500,6 +500,9 @@ function main() {
|
||||
if (args.includes('--check-github-actions-pinning')) {
|
||||
runGithubActionsPinningLinter();
|
||||
}
|
||||
if (args.includes('--eval-suites')) {
|
||||
runEvalSuiteLinter();
|
||||
}
|
||||
|
||||
if (args.length === 0) {
|
||||
setupLinters();
|
||||
@@ -511,8 +514,18 @@ function main() {
|
||||
runSensitiveKeywordLinter();
|
||||
runTSConfigLinter();
|
||||
runGithubActionsPinningLinter();
|
||||
runEvalSuiteLinter();
|
||||
console.log('\nAll linting checks passed!');
|
||||
}
|
||||
}
|
||||
|
||||
export function runEvalSuiteLinter() {
|
||||
console.log('\nRunning eval suite linter...');
|
||||
try {
|
||||
execSync('node scripts/validate_eval_suites.js', { stdio: 'inherit' });
|
||||
} catch {
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
|
||||
@@ -22,22 +22,62 @@ import fs from 'node:fs';
|
||||
async function main() {
|
||||
const modelList = process.env.MODEL_LIST || 'gemini-3-flash-preview';
|
||||
const models = modelList.split(',').map((m) => m.trim());
|
||||
const isRelatedMode = process.argv.includes('--related');
|
||||
|
||||
let combinedReport = '';
|
||||
let hasRegression = false;
|
||||
let detectionRationale = '';
|
||||
let affectedSuitesStr = '';
|
||||
|
||||
console.log(
|
||||
`🚀 Starting evaluation orchestration for models: ${models.join(', ')}`,
|
||||
);
|
||||
|
||||
if (isRelatedMode) {
|
||||
console.log('🔍 Identifying related evaluations based on changes...');
|
||||
try {
|
||||
const detectionOutput = execSync(
|
||||
`node scripts/changed_prompt.js --related --json`,
|
||||
{ encoding: 'utf-8', stdio: ['inherit', 'pipe', 'inherit'] },
|
||||
).trim();
|
||||
const detection = JSON.parse(detectionOutput);
|
||||
|
||||
if (detection.affectedSuites && detection.affectedSuites.length > 0) {
|
||||
affectedSuitesStr = detection.affectedSuites.join(',');
|
||||
detectionRationale = '### 🧪 Related Evaluation Rationale\n\n';
|
||||
detection.rationales.forEach((r) => {
|
||||
detectionRationale += `- ${r}\n`;
|
||||
});
|
||||
detectionRationale +=
|
||||
'\n_Something missing? [Update evals/suites.json](evals/README.md#related-testing-with-related) to adjust detection logic._\n\n---\n\n';
|
||||
} else if (!detection.detected) {
|
||||
console.log('✅ No related changes detected. Skipping evaluations.');
|
||||
process.exit(0);
|
||||
} else {
|
||||
console.log(
|
||||
'⚠️ Changes detected but no specific suites matched. Running full stable suite for safety.',
|
||||
);
|
||||
detectionRationale =
|
||||
'### 🧪 Related Evaluation Rationale\n\n- No specific suites matched. Running full stable suite for safety.\n\n---\n\n';
|
||||
}
|
||||
} catch (e) {
|
||||
console.error(`❌ Error during suite detection: ${e.message}`);
|
||||
detectionRationale =
|
||||
'### 🧪 Related Evaluation Rationale\n\n- Error during suite detection. Running full stable suite for safety.\n\n---\n\n';
|
||||
}
|
||||
}
|
||||
|
||||
for (const model of models) {
|
||||
console.log(`\n--- Processing Model: ${model} ---`);
|
||||
|
||||
try {
|
||||
// 1. Identify Trustworthy Evals
|
||||
console.log(`🔍 Identifying trustworthy tests for ${model}...`);
|
||||
const suitesFlag = affectedSuitesStr
|
||||
? `--suites ${affectedSuitesStr}`
|
||||
: '';
|
||||
const output = execSync(
|
||||
`node scripts/get_trustworthy_evals.js "${model}"`,
|
||||
`node scripts/get_trustworthy_evals.js "${model}" ${suitesFlag}`,
|
||||
{
|
||||
encoding: 'utf-8',
|
||||
stdio: ['inherit', 'pipe', 'inherit'], // Capture stdout but pass stdin/stderr
|
||||
@@ -83,7 +123,8 @@ async function main() {
|
||||
|
||||
// Always save the combined report to a file so the workflow can capture it cleanly
|
||||
if (combinedReport) {
|
||||
fs.writeFileSync('eval_regression_report.md', combinedReport);
|
||||
const finalReport = detectionRationale + combinedReport;
|
||||
fs.writeFileSync('eval_regression_report.md', finalReport);
|
||||
console.log(
|
||||
'\n📊 Final Markdown report saved to eval_regression_report.md',
|
||||
);
|
||||
|
||||
@@ -0,0 +1,98 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
|
||||
const SUITES_PATH = 'evals/suites.json';
|
||||
const EVALS_DIR = 'evals';
|
||||
|
||||
/**
|
||||
* Validates that all eval files are mapped in suites.json and that there are no overlaps.
|
||||
*/
|
||||
function main() {
|
||||
if (!fs.existsSync(SUITES_PATH)) {
|
||||
console.error(`❌ Error: ${SUITES_PATH} not found.`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const suitesConfig = JSON.parse(fs.readFileSync(SUITES_PATH, 'utf-8'));
|
||||
const allowedOverlaps = new Set(suitesConfig.allowedOverlaps || []);
|
||||
const evalFilesOnDisk = fs
|
||||
.readdirSync(EVALS_DIR)
|
||||
.filter((f) => f.endsWith('.eval.ts'))
|
||||
.map((f) => path.join(EVALS_DIR, f));
|
||||
|
||||
const evalToSuiteMap = new Map();
|
||||
const errors = [];
|
||||
|
||||
// 1. Map evals to suites and check for overlaps/trigger-coverage
|
||||
for (const [suiteName, suite] of Object.entries(suitesConfig)) {
|
||||
if (suiteName === 'allowedOverlaps' || !suite.evals) continue;
|
||||
|
||||
for (const evalFile of suite.evals) {
|
||||
if (evalFile === 'ALL_ALWAYS_PASSING') continue;
|
||||
|
||||
if (!fs.existsSync(evalFile)) {
|
||||
errors.push(
|
||||
`Suite **${suiteName}** references non-existent file: **${evalFile}**`,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if the eval file itself is in the suite's trigger patterns
|
||||
if (!suite.patterns || !suite.patterns.includes(evalFile)) {
|
||||
errors.push(
|
||||
`Trigger coverage missing: **${evalFile}** is in the **${suiteName}** suite but is missing from its **patterns** array. (Changes to the test won't trigger itself correctly).`,
|
||||
);
|
||||
}
|
||||
|
||||
if (evalToSuiteMap.has(evalFile) && !allowedOverlaps.has(evalFile)) {
|
||||
errors.push(
|
||||
`Overlap detected: **${evalFile}** is present in both **${evalToSuiteMap.get(evalFile)}** and **${suiteName}** suites.`,
|
||||
);
|
||||
} else {
|
||||
const existingSuites = evalToSuiteMap.get(evalFile) || [];
|
||||
evalToSuiteMap.set(
|
||||
evalFile,
|
||||
Array.isArray(existingSuites)
|
||||
? [...existingSuites, suiteName]
|
||||
: [existingSuites, suiteName],
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Check for orphaned evals (on disk but not in suites.json)
|
||||
for (const diskFile of evalFilesOnDisk) {
|
||||
if (!evalToSuiteMap.has(diskFile)) {
|
||||
errors.push(
|
||||
`Orphaned eval detected: **${diskFile}** is not mapped to any suite in ${SUITES_PATH}.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (errors.length > 0) {
|
||||
console.error('\n❌ Eval Suite Validation Failed:');
|
||||
errors.forEach((err) => console.error(` - ${err}`));
|
||||
|
||||
const hasOverlap = errors.some((err) => err.includes('Overlap detected'));
|
||||
if (hasOverlap) {
|
||||
console.error(
|
||||
`\n💡 Tip: If this overlap is intentional, add the file path to the 'allowedOverlaps' list in ${SUITES_PATH}.`,
|
||||
);
|
||||
} else {
|
||||
console.error(`\n💡 Tip: Update ${SUITES_PATH} to resolve these issues.`);
|
||||
}
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(
|
||||
'✅ Eval Suite Validation Passed: All files mapped and no overlaps found.',
|
||||
);
|
||||
}
|
||||
|
||||
main();
|
||||
Reference in New Issue
Block a user