mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-11 22:51:00 -07:00
fix(ci): isolate workflow evals, revert unrelated changes, and fix aggregation
This commit is contained in:
@@ -35,18 +35,6 @@ export * from '@google/gemini-cli-test-utils';
|
||||
// This may take a really long time and is not recommended.
|
||||
export type EvalPolicy = 'ALWAYS_PASSES' | 'USUALLY_PASSES';
|
||||
|
||||
export interface EvalCase {
|
||||
name: string;
|
||||
params?: Record<string, any>;
|
||||
prompt: string | string[];
|
||||
timeout?: number;
|
||||
env?: Record<string, string>;
|
||||
files?: Record<string, string>;
|
||||
approvalMode?: 'default' | 'auto_edit' | 'yolo' | 'plan';
|
||||
targetModels?: string[];
|
||||
assert: (rig: TestRig, result: string) => Promise<void>;
|
||||
}
|
||||
|
||||
export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
|
||||
const fn = async () => {
|
||||
const rig = new TestRig() as any;
|
||||
@@ -169,16 +157,6 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
|
||||
}
|
||||
};
|
||||
|
||||
const currentModel = process.env.GEMINI_MODEL;
|
||||
if (
|
||||
evalCase.targetModels &&
|
||||
currentModel &&
|
||||
!evalCase.targetModels.includes(currentModel)
|
||||
) {
|
||||
it.skip(`${evalCase.name} (skipped for model ${currentModel})`, fn);
|
||||
return;
|
||||
}
|
||||
|
||||
if (policy === 'USUALLY_PASSES' && !process.env['RUN_EVALS']) {
|
||||
it.skip(evalCase.name, fn);
|
||||
} else {
|
||||
@@ -192,3 +170,14 @@ async function prepareLogDir(name: string) {
|
||||
const sanitizedName = name.replace(/[^a-z0-9]/gi, '_').toLowerCase();
|
||||
return { logDir, sanitizedName };
|
||||
}
|
||||
|
||||
export interface EvalCase {
|
||||
name: string;
|
||||
params?: Record<string, any>;
|
||||
prompt: string | string[];
|
||||
timeout?: number;
|
||||
env?: Record<string, string>;
|
||||
files?: Record<string, string>;
|
||||
approvalMode?: 'default' | 'auto_edit' | 'yolo' | 'plan';
|
||||
assert: (rig: TestRig, result: string) => Promise<void>;
|
||||
}
|
||||
|
||||
@@ -38,6 +38,9 @@ function getModelFromPath(reportPath) {
|
||||
const artifactDir = parts.find((p) => p.startsWith('eval-logs-'));
|
||||
if (!artifactDir) return 'unknown';
|
||||
|
||||
const matchWorkflow = artifactDir.match(/^eval-logs-workflows-(.+)$/);
|
||||
if (matchWorkflow) return `${matchWorkflow[1]} (Workflow)`;
|
||||
|
||||
const matchNew = artifactDir.match(/^eval-logs-(.+)-(\d+)$/);
|
||||
if (matchNew) return matchNew[1];
|
||||
|
||||
|
||||
Reference in New Issue
Block a user