feat(core): experimental in-progress steering hints (1 of 3) (#19008)

This commit is contained in:
joshualitt
2026-02-17 14:59:33 -08:00
committed by GitHub
parent 5e2f5df62c
commit 55c628e967
20 changed files with 1381 additions and 60 deletions

View File

@@ -47,11 +47,7 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
// Symlink node modules to reduce the amount of time needed to
// bootstrap test projects.
const rootNodeModules = path.join(process.cwd(), 'node_modules');
const testNodeModules = path.join(rig.testDir || '', 'node_modules');
if (fs.existsSync(rootNodeModules) && !fs.existsSync(testNodeModules)) {
fs.symlinkSync(rootNodeModules, testNodeModules, 'dir');
}
symlinkNodeModules(rig.testDir || '');
if (evalCase.files) {
const acknowledgedAgents: Record<string, Record<string, string>> = {};
@@ -159,20 +155,47 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
}
};
runEval(policy, evalCase.name, fn, evalCase.timeout);
}
/**
* Wraps a test function with the appropriate Vitest 'it' or 'it.skip' based on policy.
*/
export function runEval(
policy: EvalPolicy,
name: string,
fn: () => Promise<void>,
timeout?: number,
) {
if (policy === 'USUALLY_PASSES' && !process.env['RUN_EVALS']) {
it.skip(evalCase.name, fn);
it.skip(name, fn);
} else {
it(evalCase.name, fn, evalCase.timeout);
it(name, fn, timeout);
}
}
async function prepareLogDir(name: string) {
export async function prepareLogDir(name: string) {
const logDir = path.resolve(process.cwd(), 'evals/logs');
await fs.promises.mkdir(logDir, { recursive: true });
const sanitizedName = name.replace(/[^a-z0-9]/gi, '_').toLowerCase();
return { logDir, sanitizedName };
}
/**
* Symlinks node_modules to the test directory to speed up tests that need to run tools.
*/
export function symlinkNodeModules(testDir: string) {
const rootNodeModules = path.join(process.cwd(), 'node_modules');
const testNodeModules = path.join(testDir, 'node_modules');
if (
testDir &&
fs.existsSync(rootNodeModules) &&
!fs.existsSync(testNodeModules)
) {
fs.symlinkSync(rootNodeModules, testNodeModules, 'dir');
}
}
export interface EvalCase {
name: string;
params?: Record<string, any>;