From fdb61b470d527dd5aef38c6e2e5e67510cf6031b Mon Sep 17 00:00:00 2001
From: Aishanee Shah <aishaneeshah@google.com>
Date: Thu, 9 Apr 2026 16:17:12 +0000
Subject: [PATCH] feat(watcher): handle status file writing internally and
 cleanup on dispose

---
 packages/core/src/agents/watcher-agent.ts     | 41 ++++---------------
 packages/core/src/core/client.ts              | 37 +++++++++++++++++
 packages/core/src/core/client_watcher.test.ts | 19 ++++-----
 3 files changed, 52 insertions(+), 45 deletions(-)

diff --git a/packages/core/src/agents/watcher-agent.ts b/packages/core/src/agents/watcher-agent.ts
index 1e863bb039..a2c83ea332 100644
--- a/packages/core/src/agents/watcher-agent.ts
+++ b/packages/core/src/agents/watcher-agent.ts
@@ -123,42 +123,17 @@ Analyze the recent history against the North Star. Actively look for anti-patter
 
 ### Standard Operating Procedure:
 1.  **READ & TRIAGE:** Read the user's prompt, recent history, and the existing memory file at \`${statusFilePath}\`. Determine the state: Standalone Short-Horizon, Active Long-Horizon, or Task Transition.
-2.  **ANALYZE & OVERWRITE:** 
-    *   *If Standalone Short-Horizon (No active macro-task):* Overwrite \`${statusFilePath}\` with a single line: \`EMPTY\`.
+2.  **ANALYZE:** 
+    *   *If Standalone Short-Horizon (No active macro-task):* Note that the status should be empty.
     *   *If Task Transition / Abort:* Purge old data, initialize a fresh state for the new task, or leave empty if no new task is given.
-    *   *If Active Long-Horizon (Even if current turn is a tactical question):* Compare history against the file, update progress, log dead ends, and track trajectory. Overwrite \`${statusFilePath}\` using the exact Markdown format below.
-3.  **REPORT:** Call the \`complete_task\` tool. Formulate sharp, direct feedback to snap the main agent out of loops, or stay silent if things are on track.
-      
+    *   *If Active Long-Horizon (Even if current turn is a tactical question):* Compare history against the file, update progress, log dead ends, and track trajectory.
+3.  **REPORT:** Call the \`complete_task\` tool with the updated state and sharp, direct feedback to snap the main agent out of loops, or stay silent if things are on track.
+
 ---
 
-### Status File Format (Overwrite \`${statusFilePath}\` with this structure):
+### Output JSON Format (Provide this to \`complete_task\`):
 
-*(Note: If this is a Standalone Short-Horizon task with no ongoing goal, just write \`EMPTY\` and omit the rest.)*
-
-\`\`\`md
-# Watcher Memory State
-
-## 1. The North Star (Primary Goal)
-[A concise, 1-2 sentence description of the final desired outcome. Purge and replace ONLY if the user starts a completely new task or aborts.]
-
-## 2. Strategic Updates
-[Bullet points of any major architectural/requirements changes since this specific task started. Ignore debugging steps.]
-
-## 3. Progress Snapshot
-[ Short summary to provide quick context to watcher sub-agent.]
-
-## 4. Failed Strategies & Dead Ends
-*   [Attempted X to solve Y, resulted in Z error. DO NOT RETRY without a substantial change in approach.]
-
-## 5. Current Trajectory Evaluation
-[State: ON TRACK / DEVIATING / STUCK / LOOPING]
-[One sentence justifying the state based on the last few turns.]
-
-\`\`\`
-
-### Output Execution:
-
-When your file update is complete, you MUST call the \`complete_task\` tool with a JSON report.
+*(Note: If this is a Standalone Short-Horizon task with no ongoing goal, just set all fields to "EMPTY" or "N/A" and omit feedback.)*
 
 * \`userDirections\`: Any parsed _strategic_ changes, or note if the user transitioned/aborted tasks.
 * \`progressSummary\`: Brief text of what was achieved, or "N/A" for short-horizon.
@@ -167,7 +142,7 @@ When your file update is complete, you MUST call the \`complete_task\` tool with
     * **If Short-Horizon or ON_TRACK**: Leave empty.
     * **If DEVIATING/STUCK/LOOPING**: Provide a strong, authoritative directive to the main agent. (e.g., _"WARNING: You are in a loop trying to fix test_utils.py. The original goal is to build the API endpoint. Revert your last change, ignore the test warning for now, and return to the API endpoint."_)
 
-`,
+You MUST call \`complete_task\` with a JSON report containing \`userDirections\`, \`progressSummary\`, \`evaluation\`, and optional \`feedback\`.`,
     },
   };
 };
diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts
index 031ff3857b..4bccd72818 100644
--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@@ -12,6 +12,8 @@ import {
   type Tool,
   type GenerateContentResponse,
 } from '@google/genai';
+import * as fs from 'node:fs';
+import * as path from 'node:path';
 import { partListUnionToString } from './geminiRequest.js';
 import {
   getDirectoryContextString,
@@ -320,6 +322,20 @@ export class GeminiClient {
   dispose() {
     coreEvents.off(CoreEvent.ModelChanged, this.handleModelChanged);
     coreEvents.off(CoreEvent.MemoryChanged, this.handleMemoryChanged);
+
+    // Clean up Watcher status file
+    try {
+      const projectTempDir = this.config.storage.getProjectTempDir();
+      const statusFilePath = path.join(projectTempDir, 'watcher_status.md');
+      if (fs.existsSync(statusFilePath)) {
+        fs.unlinkSync(statusFilePath);
+      }
+    } catch (e) {
+      debugLogger.warn(
+        'Failed to clean up watcher status file during dispose',
+        e,
+      );
+    }
   }
 
   async resumeChat(
@@ -1357,6 +1373,27 @@ export class GeminiClient {
         try {
           const contentString = partListUnionToString(result.llmContent);
           const parsed = WatcherReportSchema.parse(JSON.parse(contentString));
+
+          // Internally write the status report to avoid requiring user permission
+          const projectTempDir = this.config.storage.getProjectTempDir();
+          const statusFilePath = path.join(projectTempDir, 'watcher_status.md');
+          const reportLines = [
+            '# Watcher Memory State',
+            '',
+            '## 1. User Directions',
+            parsed.userDirections,
+            '',
+            '## 2. Progress Summary',
+            parsed.progressSummary,
+            '',
+            '## 3. Current Trajectory Evaluation',
+            `State: ${parsed.evaluation}`,
+            '',
+            '## 4. Feedback',
+            parsed.feedback ?? 'N/A',
+          ];
+          fs.writeFileSync(statusFilePath, reportLines.join('\n'), 'utf-8');
+
           return parsed as WatcherProgress;
         } catch (e) {
           debugLogger.warn('Failed to parse watcher output', e);
diff --git a/packages/core/src/core/client_watcher.test.ts b/packages/core/src/core/client_watcher.test.ts
index f9a3a2b4c2..7a546e1604 100644
--- a/packages/core/src/core/client_watcher.test.ts
+++ b/packages/core/src/core/client_watcher.test.ts
@@ -324,16 +324,6 @@ describe('GeminiClient Watcher Integration', () => {
     // Simulate 11 turns
     for (let i = 1; i <= 11; i++) {
       clientAccess.sessionTurnCount = i - 1; // Will become i inside processTurn
-      // In a real scenario, the subagent would write this file via WRITE_FILE_TOOL.
-      // We simulate this side effect here when the watcher is triggered.
-      if (i % interval === 0) {
-        const projectTempDir = config.storage.getProjectTempDir();
-        const statusFilePath = path.join(projectTempDir, 'watcher_status.md');
-        fs.writeFileSync(
-          statusFilePath,
-          '# Watcher Status Update\nDummy status',
-        );
-      }
 
       const generator = clientAccess.processTurn(
         [{ text: `turn ${i}` }],
@@ -350,11 +340,16 @@ describe('GeminiClient Watcher Integration', () => {
     // With interval 5, it should trigger at turn 1, turn 5 and turn 10
     expect(mockWatcherTool.build).toHaveBeenCalledTimes(3);
 
-    // Verify the status file exists
+    // Verify the status file exists (written by GeminiClient internally)
     const projectTempDir = config.storage.getProjectTempDir();
     const statusFilePath = path.join(projectTempDir, 'watcher_status.md');
     expect(fs.existsSync(statusFilePath)).toBe(true);
     const content = fs.readFileSync(statusFilePath, 'utf-8');
-    expect(content).toContain('Watcher Status Update');
+    expect(content).toContain('# Watcher Memory State');
+    expect(content).toContain('Keep testing');
+
+    // Verify cleanup in dispose
+    client.dispose();
+    expect(fs.existsSync(statusFilePath)).toBe(false);
   });
 });