From fdb61b470d527dd5aef38c6e2e5e67510cf6031b Mon Sep 17 00:00:00 2001 From: Aishanee Shah Date: Thu, 9 Apr 2026 16:17:12 +0000 Subject: [PATCH] feat(watcher): handle status file writing internally and cleanup on dispose --- packages/core/src/agents/watcher-agent.ts | 41 ++++--------------- packages/core/src/core/client.ts | 37 +++++++++++++++++ packages/core/src/core/client_watcher.test.ts | 19 ++++----- 3 files changed, 52 insertions(+), 45 deletions(-) diff --git a/packages/core/src/agents/watcher-agent.ts b/packages/core/src/agents/watcher-agent.ts index 1e863bb039..a2c83ea332 100644 --- a/packages/core/src/agents/watcher-agent.ts +++ b/packages/core/src/agents/watcher-agent.ts @@ -123,42 +123,17 @@ Analyze the recent history against the North Star. Actively look for anti-patter ### Standard Operating Procedure: 1. **READ & TRIAGE:** Read the user's prompt, recent history, and the existing memory file at \`${statusFilePath}\`. Determine the state: Standalone Short-Horizon, Active Long-Horizon, or Task Transition. -2. **ANALYZE & OVERWRITE:** - * *If Standalone Short-Horizon (No active macro-task):* Overwrite \`${statusFilePath}\` with a single line: \`EMPTY\`. +2. **ANALYZE:** + * *If Standalone Short-Horizon (No active macro-task):* Note that the status should be empty. * *If Task Transition / Abort:* Purge old data, initialize a fresh state for the new task, or leave empty if no new task is given. - * *If Active Long-Horizon (Even if current turn is a tactical question):* Compare history against the file, update progress, log dead ends, and track trajectory. Overwrite \`${statusFilePath}\` using the exact Markdown format below. -3. **REPORT:** Call the \`complete_task\` tool. Formulate sharp, direct feedback to snap the main agent out of loops, or stay silent if things are on track. - + * *If Active Long-Horizon (Even if current turn is a tactical question):* Compare history against the file, update progress, log dead ends, and track trajectory. +3. **REPORT:** Call the \`complete_task\` tool with the updated state and sharp, direct feedback to snap the main agent out of loops, or stay silent if things are on track. + --- -### Status File Format (Overwrite \`${statusFilePath}\` with this structure): +### Output JSON Format (Provide this to \`complete_task\`): -*(Note: If this is a Standalone Short-Horizon task with no ongoing goal, just write \`EMPTY\` and omit the rest.)* - -\`\`\`md -# Watcher Memory State - -## 1. The North Star (Primary Goal) -[A concise, 1-2 sentence description of the final desired outcome. Purge and replace ONLY if the user starts a completely new task or aborts.] - -## 2. Strategic Updates -[Bullet points of any major architectural/requirements changes since this specific task started. Ignore debugging steps.] - -## 3. Progress Snapshot -[ Short summary to provide quick context to watcher sub-agent.] - -## 4. Failed Strategies & Dead Ends -* [Attempted X to solve Y, resulted in Z error. DO NOT RETRY without a substantial change in approach.] - -## 5. Current Trajectory Evaluation -[State: ON TRACK / DEVIATING / STUCK / LOOPING] -[One sentence justifying the state based on the last few turns.] - -\`\`\` - -### Output Execution: - -When your file update is complete, you MUST call the \`complete_task\` tool with a JSON report. +*(Note: If this is a Standalone Short-Horizon task with no ongoing goal, just set all fields to "EMPTY" or "N/A" and omit feedback.)* * \`userDirections\`: Any parsed _strategic_ changes, or note if the user transitioned/aborted tasks. * \`progressSummary\`: Brief text of what was achieved, or "N/A" for short-horizon. @@ -167,7 +142,7 @@ When your file update is complete, you MUST call the \`complete_task\` tool with * **If Short-Horizon or ON_TRACK**: Leave empty. * **If DEVIATING/STUCK/LOOPING**: Provide a strong, authoritative directive to the main agent. (e.g., _"WARNING: You are in a loop trying to fix test_utils.py. The original goal is to build the API endpoint. Revert your last change, ignore the test warning for now, and return to the API endpoint."_) -`, +You MUST call \`complete_task\` with a JSON report containing \`userDirections\`, \`progressSummary\`, \`evaluation\`, and optional \`feedback\`.`, }, }; }; diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 031ff3857b..4bccd72818 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -12,6 +12,8 @@ import { type Tool, type GenerateContentResponse, } from '@google/genai'; +import * as fs from 'node:fs'; +import * as path from 'node:path'; import { partListUnionToString } from './geminiRequest.js'; import { getDirectoryContextString, @@ -320,6 +322,20 @@ export class GeminiClient { dispose() { coreEvents.off(CoreEvent.ModelChanged, this.handleModelChanged); coreEvents.off(CoreEvent.MemoryChanged, this.handleMemoryChanged); + + // Clean up Watcher status file + try { + const projectTempDir = this.config.storage.getProjectTempDir(); + const statusFilePath = path.join(projectTempDir, 'watcher_status.md'); + if (fs.existsSync(statusFilePath)) { + fs.unlinkSync(statusFilePath); + } + } catch (e) { + debugLogger.warn( + 'Failed to clean up watcher status file during dispose', + e, + ); + } } async resumeChat( @@ -1357,6 +1373,27 @@ export class GeminiClient { try { const contentString = partListUnionToString(result.llmContent); const parsed = WatcherReportSchema.parse(JSON.parse(contentString)); + + // Internally write the status report to avoid requiring user permission + const projectTempDir = this.config.storage.getProjectTempDir(); + const statusFilePath = path.join(projectTempDir, 'watcher_status.md'); + const reportLines = [ + '# Watcher Memory State', + '', + '## 1. User Directions', + parsed.userDirections, + '', + '## 2. Progress Summary', + parsed.progressSummary, + '', + '## 3. Current Trajectory Evaluation', + `State: ${parsed.evaluation}`, + '', + '## 4. Feedback', + parsed.feedback ?? 'N/A', + ]; + fs.writeFileSync(statusFilePath, reportLines.join('\n'), 'utf-8'); + return parsed as WatcherProgress; } catch (e) { debugLogger.warn('Failed to parse watcher output', e); diff --git a/packages/core/src/core/client_watcher.test.ts b/packages/core/src/core/client_watcher.test.ts index f9a3a2b4c2..7a546e1604 100644 --- a/packages/core/src/core/client_watcher.test.ts +++ b/packages/core/src/core/client_watcher.test.ts @@ -324,16 +324,6 @@ describe('GeminiClient Watcher Integration', () => { // Simulate 11 turns for (let i = 1; i <= 11; i++) { clientAccess.sessionTurnCount = i - 1; // Will become i inside processTurn - // In a real scenario, the subagent would write this file via WRITE_FILE_TOOL. - // We simulate this side effect here when the watcher is triggered. - if (i % interval === 0) { - const projectTempDir = config.storage.getProjectTempDir(); - const statusFilePath = path.join(projectTempDir, 'watcher_status.md'); - fs.writeFileSync( - statusFilePath, - '# Watcher Status Update\nDummy status', - ); - } const generator = clientAccess.processTurn( [{ text: `turn ${i}` }], @@ -350,11 +340,16 @@ describe('GeminiClient Watcher Integration', () => { // With interval 5, it should trigger at turn 1, turn 5 and turn 10 expect(mockWatcherTool.build).toHaveBeenCalledTimes(3); - // Verify the status file exists + // Verify the status file exists (written by GeminiClient internally) const projectTempDir = config.storage.getProjectTempDir(); const statusFilePath = path.join(projectTempDir, 'watcher_status.md'); expect(fs.existsSync(statusFilePath)).toBe(true); const content = fs.readFileSync(statusFilePath, 'utf-8'); - expect(content).toContain('Watcher Status Update'); + expect(content).toContain('# Watcher Memory State'); + expect(content).toContain('Keep testing'); + + // Verify cleanup in dispose + client.dispose(); + expect(fs.existsSync(statusFilePath)).toBe(false); }); });