fix(ci): isolate workflow evals, revert unrelated changes, and fix aggregation

This commit is contained in:
cocosheng-g
2026-02-03 23:04:50 -05:00
parent 9da1542071
commit fe50e580fa
2 changed files with 14 additions and 22 deletions

View File

@@ -35,18 +35,6 @@ export * from '@google/gemini-cli-test-utils';
// This may take a really long time and is not recommended.
export type EvalPolicy = 'ALWAYS_PASSES' | 'USUALLY_PASSES';
export interface EvalCase {
name: string;
params?: Record<string, any>;
prompt: string | string[];
timeout?: number;
env?: Record<string, string>;
files?: Record<string, string>;
approvalMode?: 'default' | 'auto_edit' | 'yolo' | 'plan';
targetModels?: string[];
assert: (rig: TestRig, result: string) => Promise<void>;
}
export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
const fn = async () => {
const rig = new TestRig() as any;
@@ -169,16 +157,6 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
}
};
const currentModel = process.env.GEMINI_MODEL;
if (
evalCase.targetModels &&
currentModel &&
!evalCase.targetModels.includes(currentModel)
) {
it.skip(`${evalCase.name} (skipped for model ${currentModel})`, fn);
return;
}
if (policy === 'USUALLY_PASSES' && !process.env['RUN_EVALS']) {
it.skip(evalCase.name, fn);
} else {
@@ -192,3 +170,14 @@ async function prepareLogDir(name: string) {
const sanitizedName = name.replace(/[^a-z0-9]/gi, '_').toLowerCase();
return { logDir, sanitizedName };
}
export interface EvalCase {
name: string;
params?: Record<string, any>;
prompt: string | string[];
timeout?: number;
env?: Record<string, string>;
files?: Record<string, string>;
approvalMode?: 'default' | 'auto_edit' | 'yolo' | 'plan';
assert: (rig: TestRig, result: string) => Promise<void>;
}

View File

@@ -38,6 +38,9 @@ function getModelFromPath(reportPath) {
const artifactDir = parts.find((p) => p.startsWith('eval-logs-'));
if (!artifactDir) return 'unknown';
const matchWorkflow = artifactDir.match(/^eval-logs-workflows-(.+)$/);
if (matchWorkflow) return `${matchWorkflow[1]} (Workflow)`;
const matchNew = artifactDir.match(/^eval-logs-(.+)-(\d+)$/);
if (matchNew) return matchNew[1];