fix(core): enforce parallel task tracker updates (#24477)

Co-authored-by: anj-s <anjalisridhar@google.com>
This commit is contained in:
anj-s
2026-04-27 16:17:26 -07:00
committed by GitHub
parent 47bca39eeb
commit c17400b830
4 changed files with 56 additions and 10 deletions
+50 -8
View File
@@ -62,11 +62,13 @@ describe('tracker_mode', () => {
'Expected tracker_update_task tool to be called',
).toBe(true);
const updateCall = toolLogs.find(
const updateCalls = toolLogs.filter(
(log) => log.toolRequest.name === TRACKER_UPDATE_TASK_TOOL_NAME,
);
expect(updateCall).toBeDefined();
const updateArgs = JSON.parse(updateCall!.toolRequest.args);
expect(updateCalls.length).toBeGreaterThan(0);
const updateArgs = JSON.parse(
updateCalls[updateCalls.length - 1].toolRequest.args,
);
expect(updateArgs.status).toBe('closed');
const loginContent = fs.readFileSync(
@@ -128,12 +130,52 @@ describe('tracker_mode', () => {
prompt:
'Where is my task tracker storage located? Please provide the absolute path in your response.',
assert: async (rig, result) => {
// The rig sets GEMINI_CLI_HOME to rig.homeDir
const homeDir = rig.homeDir!;
// The response should contain the dynamic path which includes the home directory
// and follows the .gemini/tmp/.../tracker structure.
expect(result).toContain(homeDir);
// The response should contain the dynamic path which follows the .gemini/tmp/.../tracker structure.
expect(result).toMatch(/\.gemini\/tmp\/.*\/tracker/);
},
});
evalTest('USUALLY_PASSES', {
suiteName: 'default',
suiteType: 'behavioral',
name: 'should update the tracker in the same turn as the task completion to save turns',
params: {
settings: { experimental: { taskTracker: true } },
},
files: FILES,
prompt:
'We have a bug in src/login.js: the password check is missing. Fix this bug. Then, create a new file src/auth.js that exports a simple verifyToken function. Please organize this into tasks and execute them.',
assert: async (rig, result) => {
await rig.waitForToolCall(TRACKER_CREATE_TASK_TOOL_NAME);
await rig.waitForToolCall(TRACKER_UPDATE_TASK_TOOL_NAME);
const toolLogs = rig.readToolLogs();
// Get the prompt ID of the fix for login.js
const loginEditCalls = toolLogs.filter(
(log) =>
(log.toolRequest.name === 'replace' ||
log.toolRequest.name === 'write_file') &&
log.toolRequest.args.includes('login.js'),
);
expect(loginEditCalls.length).toBeGreaterThan(0);
const loginEditPromptId =
loginEditCalls[loginEditCalls.length - 1].toolRequest.prompt_id;
// Verify there is an update to the tracker in the exact same turn
const parallelTrackerUpdates = toolLogs.filter(
(log) =>
log.toolRequest.name === TRACKER_UPDATE_TASK_TOOL_NAME &&
log.toolRequest.prompt_id === loginEditPromptId,
);
expect(
parallelTrackerUpdates.length,
'Expected tracker_update_task to be called in the same turn as the login.js fix',
).toBeGreaterThan(0);
assertModelHasOutput(result);
},
});
});
@@ -2965,6 +2965,7 @@ You are operating with a persistent file-based task tracking system located at \
6. **STATE OVER CHAT**: If the user says "I think we finished that," but the tool says it is 'pending', trust the tool--or verify explicitly before updating.
7. **DEPENDENCY MANAGEMENT**: Respect task topology. Never attempt to execute a task if its dependencies are not marked as 'closed'. If you are blocked, focus only on the leaf nodes of the task graph.
8. **DETAILED TASKS**: Ensure that the tasks created have highly detailed titles and descriptions. The description MUST provide significantly more specific details and technical context than the title.
9. **TURN EFFICIENCY**: Update the tracker immediately when a step is completed. Combine \`tracker_update_task\` calls with other tool calls in the same turn to save turns.
# Operational Guidelines
@@ -3151,6 +3152,7 @@ You are operating with a persistent file-based task tracking system located at \
6. **STATE OVER CHAT**: If the user says "I think we finished that," but the tool says it is 'pending', trust the tool--or verify explicitly before updating.
7. **DEPENDENCY MANAGEMENT**: Respect task topology. Never attempt to execute a task if its dependencies are not marked as 'closed'. If you are blocked, focus only on the leaf nodes of the task graph.
8. **DETAILED TASKS**: Ensure that the tasks created have highly detailed titles and descriptions. The description MUST provide significantly more specific details and technical context than the title.
9. **TURN EFFICIENCY**: Update the tracker immediately when a step is completed. Combine \`tracker_update_task\` calls with other tool calls in the same turn to save turns.
# Operational Guidelines
+2 -1
View File
@@ -510,7 +510,8 @@ You are operating with a persistent file-based task tracking system located at \
5. **VERIFICATION**: Before marking a task as complete, verify the work is actually done (e.g., run the test, check the file existence).
6. **STATE OVER CHAT**: If the user says "I think we finished that," but the tool says it is 'pending', trust the tool--or verify explicitly before updating.
7. **DEPENDENCY MANAGEMENT**: Respect task topology. Never attempt to execute a task if its dependencies are not marked as 'closed'. If you are blocked, focus only on the leaf nodes of the task graph.
8. **DETAILED TASKS**: Ensure that the tasks created have highly detailed titles and descriptions. The description MUST provide significantly more specific details and technical context than the title.`.trim();
8. **DETAILED TASKS**: Ensure that the tasks created have highly detailed titles and descriptions. The description MUST provide significantly more specific details and technical context than the title.
9. **TURN EFFICIENCY**: Update the tracker immediately when a step is completed. Combine \`${TRACKER_UPDATE_TASK_TOOL_NAME}\` calls with other tool calls in the same turn to save turns.`.trim();
}
// --- Leaf Helpers (Strictly strings or simple calls) ---
+2 -1
View File
@@ -577,7 +577,8 @@ You are operating with a persistent file-based task tracking system located at \
5. **VERIFICATION**: Before marking a task as complete, verify the work is actually done (e.g., run the test, check the file existence).
6. **STATE OVER CHAT**: If the user says "I think we finished that," but the tool says it is 'pending', trust the tool--or verify explicitly before updating.
7. **DEPENDENCY MANAGEMENT**: Respect task topology. Never attempt to execute a task if its dependencies are not marked as 'closed'. If you are blocked, focus only on the leaf nodes of the task graph.
8. **DETAILED TASKS**: Ensure that the tasks created have highly detailed titles and descriptions. The description MUST provide significantly more specific details and technical context than the title.`.trim();
8. **DETAILED TASKS**: Ensure that the tasks created have highly detailed titles and descriptions. The description MUST provide significantly more specific details and technical context than the title.
9. **TURN EFFICIENCY**: Update the tracker immediately when a step is completed. Combine ${trackerUpdate} calls with other tool calls in the same turn to save turns.`.trim();
}
export function renderPlanningWorkflow(