fix(plan): sandbox path resolution in Plan Mode to prevent hallucinations (#22737)

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
Adib234
2026-03-24 09:19:29 -04:00
committed by A.K.M. Adib
parent a3cc1c5682
commit 81675857fd
22 changed files with 271 additions and 110 deletions
+111
View File
@@ -132,6 +132,32 @@ describe('plan_mode', () => {
expect(wasToolCalled, 'Expected exit_plan_mode tool to be called').toBe(
true,
);
const toolLogs = rig.readToolLogs();
const exitPlanCall = toolLogs.find(
(log) => log.toolRequest.name === 'exit_plan_mode',
);
expect(
exitPlanCall,
'Expected to find exit_plan_mode in tool logs',
).toBeDefined();
const args = JSON.parse(exitPlanCall!.toolRequest.args);
expect(args.plan_filename, 'plan_filename should be a string').toBeTypeOf(
'string',
);
expect(args.plan_filename, 'plan_filename should end with .md').toMatch(
/\.md$/,
);
expect(
args.plan_filename,
'plan_filename should not be a path',
).not.toContain('/');
expect(
args.plan_filename,
'plan_filename should not be a path',
).not.toContain('\\');
assertModelHasOutput(result);
},
});
@@ -166,4 +192,89 @@ describe('plan_mode', () => {
assertModelHasOutput(result);
},
});
evalTest('USUALLY_PASSES', {
name: 'should create a plan in plan mode and implement it for a refactoring task',
params: {
settings,
},
files: {
'src/mathUtils.ts':
'export const sum = (a: number, b: number) => a + b;\nexport const multiply = (a: number, b: number) => a * b;',
'src/main.ts':
'import { sum } from "./mathUtils";\nconsole.log(sum(1, 2));',
},
prompt:
'I want to refactor our math utilities. Move the `sum` function from `src/mathUtils.ts` to a new file `src/basicMath.ts` and update `src/main.ts` to use the new file. Please create a detailed implementation plan first, then execute it.',
assert: async (rig, result) => {
const enterPlanCalled = await rig.waitForToolCall('enter_plan_mode');
expect(
enterPlanCalled,
'Expected enter_plan_mode tool to be called',
).toBe(true);
const exitPlanCalled = await rig.waitForToolCall('exit_plan_mode');
expect(exitPlanCalled, 'Expected exit_plan_mode tool to be called').toBe(
true,
);
await rig.waitForTelemetryReady();
const toolLogs = rig.readToolLogs();
const exitPlanCall = toolLogs.find(
(log) => log.toolRequest.name === 'exit_plan_mode',
);
expect(
exitPlanCall,
'Expected to find exit_plan_mode in tool logs',
).toBeDefined();
const args = JSON.parse(exitPlanCall!.toolRequest.args);
expect(args.plan_filename, 'plan_filename should be a string').toBeTypeOf(
'string',
);
expect(args.plan_filename, 'plan_filename should end with .md').toMatch(
/\.md$/,
);
expect(
args.plan_filename,
'plan_filename should not be a path',
).not.toContain('/');
expect(
args.plan_filename,
'plan_filename should not be a path',
).not.toContain('\\');
// Check if plan was written
const planWrite = toolLogs.find(
(log) =>
log.toolRequest.name === 'write_file' &&
log.toolRequest.args.includes('/plans/'),
);
expect(
planWrite,
'Expected a plan file to be written in the plans directory',
).toBeDefined();
// Check for implementation files
const newFileWrite = toolLogs.find(
(log) =>
log.toolRequest.name === 'write_file' &&
log.toolRequest.args.includes('src/basicMath.ts'),
);
expect(
newFileWrite,
'Expected src/basicMath.ts to be created',
).toBeDefined();
const mainUpdate = toolLogs.find(
(log) =>
['write_file', 'replace'].includes(log.toolRequest.name) &&
log.toolRequest.args.includes('src/main.ts'),
);
expect(mainUpdate, 'Expected src/main.ts to be updated').toBeDefined();
assertModelHasOutput(result);
},
});
});