Prompt changes.

This commit is contained in:
Christian Gunderman
2026-02-15 13:09:54 -08:00
parent 6d1bfa8da9
commit 9844066de3
2 changed files with 33 additions and 10 deletions
+29 -6
View File
@@ -52,8 +52,18 @@ describe('subagent eval test cases', () => {
});
evalTest('ALWAYS_PASSES', {
name: 'should fix linter errors in multiple projects',
name: 'should fix linter errors in multiple projects using implicit parallelism',
prompt: 'Fix all linter errors.',
timeout: 600000,
params: {
settings: {
agents: {
overrides: {
generalist: { enabled: true },
},
},
},
},
files: {
'project-a/eslint.config.js': `
module.exports = [
@@ -85,18 +95,31 @@ describe('subagent eval test cases', () => {
if (fileA.includes('var x')) {
throw new Error(`project-a/index.js was not fixed. Content:\n${fileA}`);
}
if (fileB.includes('console.log')) {
throw new Error(`project-b/main.js was not fixed. Content:\n${fileB}`);
// Check if console.log is present and NOT commented out or disabled.
const lines = fileB.split('\n');
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
if (line.includes('console.log')) {
const isCommented = line.trim().startsWith('//');
const isDisabled =
(i > 0 && lines[i - 1].includes('eslint-disable')) ||
line.includes('eslint-disable-line');
if (!isCommented && !isDisabled) {
throw new Error(
`project-b/main.js was not fixed (console.log present without disable/comment). Content:\n${fileB}`,
);
}
}
}
// Assert that the agent delegated to a subagent for each project.
const toolLogs = rig.readToolLogs();
const subagentCalls = toolLogs.filter((log) => {
if (log.toolRequest.name === 'codebase_investigator') return true;
if (log.toolRequest.name === 'generalist') return true;
if (log.toolRequest.name === 'delegate_to_agent') {
try {
const args = JSON.parse(log.toolRequest.args);
return args.agent_name === 'codebase_investigator';
return args.agent_name === 'generalist';
} catch {
return false;
}
@@ -106,7 +129,7 @@ describe('subagent eval test cases', () => {
if (subagentCalls.length < 2) {
throw new Error(
`Expected at least 2 codebase_investigator calls, but found ${subagentCalls.length}`,
`Expected at least 2 generalist calls, but found ${subagentCalls.length}`,
);
}
},