fix(evals): correctly handle new failing test files and fix lint error

This commit is contained in:
Alisa Novikova
2026-04-08 22:25:31 -07:00
parent ce674a5c59
commit 90ff8feb4c
+33 -21
View File
@@ -183,33 +183,45 @@ async function verifyBaseline(testName, results, files, model) {
let baselinePasses = 0;
let baselineTotal = 0;
while (baselinePasses === 0 && baselineTotal < 3) {
baselineTotal++;
console.log(` Baseline Attempt ${baselineTotal}...`);
const baselineRun = runTests(files, escapeRegex(testName), model);
if (findAssertion(baselineRun, testName)?.status === 'passed') {
baselinePasses++;
console.log(` ✅ Baseline Attempt ${baselineTotal} passed.`);
// Determine the primary file for this test
const testFile = results[testName].file
? path.relative(process.cwd(), results[testName].file)
: null;
if (testFile && !fs.existsSync(testFile)) {
console.log(
` ️ Test file **${testFile}** does not exist on 'main'. Marking as NEW CONFIRMED REGRESSION.`,
);
results[testName].status = 'regression';
} else {
while (baselinePasses === 0 && baselineTotal < 3) {
baselineTotal++;
console.log(` Baseline Attempt ${baselineTotal}...`);
const baselineRun = runTests(files, escapeRegex(testName), model);
if (findAssertion(baselineRun, testName)?.status === 'passed') {
baselinePasses++;
console.log(` ✅ Baseline Attempt ${baselineTotal} passed.`);
} else {
console.log(` ❌ Baseline Attempt ${baselineTotal} failed.`);
}
}
if (baselinePasses === 0) {
console.log(
` ️ Test also fails on 'main'. Marking as PRE-EXISTING (Cleared).`,
);
results[testName].status = 'pre-existing';
results[testName].passed = results[testName].total; // Clear for report
} else {
console.log(` ❌ Baseline Attempt ${baselineTotal} failed.`);
console.log(
` ❌ Test passes on 'main' but fails in PR. Marking as CONFIRMED REGRESSION.`,
);
results[testName].status = 'regression';
}
}
execSync('git checkout -', { stdio: 'inherit' });
if (hasStash) execSync('git stash pop', { stdio: 'inherit' });
if (baselinePasses === 0) {
console.log(
` ️ Test also fails on 'main'. Marking as PRE-EXISTING (Cleared).`,
);
results[testName].status = 'pre-existing';
results[testName].passed = results[testName].total; // Clear for report
} else {
console.log(
` ❌ Test passes on 'main' but fails in PR. Marking as CONFIRMED REGRESSION.`,
);
results[testName].status = 'regression';
}
} catch (error) {
console.error(` ❌ Failed to verify baseline: ${error.message}`);