feat(watcher): handle status file writing internally and cleanup on dispose

2026-05-20 08:42:39 -07:00 · 2026-04-09 16:17:12 +00:00
parent 705214ed5b
commit fdb61b470d
3 changed files with 52 additions and 45 deletions
@@ -123,42 +123,17 @@ Analyze the recent history against the North Star. Actively look for anti-patter

 ### Standard Operating Procedure:
 1.  **READ & TRIAGE:** Read the user's prompt, recent history, and the existing memory file at \`${statusFilePath}\`. Determine the state: Standalone Short-Horizon, Active Long-Horizon, or Task Transition.
-2.  **ANALYZE & OVERWRITE:** 
-    *   *If Standalone Short-Horizon (No active macro-task):* Overwrite \`${statusFilePath}\` with a single line: \`EMPTY\`.
+2.  **ANALYZE:** 
+    *   *If Standalone Short-Horizon (No active macro-task):* Note that the status should be empty.
    *   *If Task Transition / Abort:* Purge old data, initialize a fresh state for the new task, or leave empty if no new task is given.
-    *   *If Active Long-Horizon (Even if current turn is a tactical question):* Compare history against the file, update progress, log dead ends, and track trajectory. Overwrite \`${statusFilePath}\` using the exact Markdown format below.
-3.  **REPORT:** Call the \`complete_task\` tool. Formulate sharp, direct feedback to snap the main agent out of loops, or stay silent if things are on track.
-      
+    *   *If Active Long-Horizon (Even if current turn is a tactical question):* Compare history against the file, update progress, log dead ends, and track trajectory.
+3.  **REPORT:** Call the \`complete_task\` tool with the updated state and sharp, direct feedback to snap the main agent out of loops, or stay silent if things are on track.
+
 ---

-### Status File Format (Overwrite \`${statusFilePath}\` with this structure):
+### Output JSON Format (Provide this to \`complete_task\`):

-*(Note: If this is a Standalone Short-Horizon task with no ongoing goal, just write \`EMPTY\` and omit the rest.)*
-
-\`\`\`md
-# Watcher Memory State
-
-## 1. The North Star (Primary Goal)
-[A concise, 1-2 sentence description of the final desired outcome. Purge and replace ONLY if the user starts a completely new task or aborts.]
-
-## 2. Strategic Updates
-[Bullet points of any major architectural/requirements changes since this specific task started. Ignore debugging steps.]
-
-## 3. Progress Snapshot
-[ Short summary to provide quick context to watcher sub-agent.]
-
-## 4. Failed Strategies & Dead Ends
-*   [Attempted X to solve Y, resulted in Z error. DO NOT RETRY without a substantial change in approach.]
-
-## 5. Current Trajectory Evaluation
-[State: ON TRACK / DEVIATING / STUCK / LOOPING]
-[One sentence justifying the state based on the last few turns.]
-
-\`\`\`
-
-### Output Execution:
-
-When your file update is complete, you MUST call the \`complete_task\` tool with a JSON report.
+*(Note: If this is a Standalone Short-Horizon task with no ongoing goal, just set all fields to "EMPTY" or "N/A" and omit feedback.)*

 * \`userDirections\`: Any parsed _strategic_ changes, or note if the user transitioned/aborted tasks.
 * \`progressSummary\`: Brief text of what was achieved, or "N/A" for short-horizon.
@@ -167,7 +142,7 @@ When your file update is complete, you MUST call the \`complete_task\` tool with
    * **If Short-Horizon or ON_TRACK**: Leave empty.
    * **If DEVIATING/STUCK/LOOPING**: Provide a strong, authoritative directive to the main agent. (e.g., _"WARNING: You are in a loop trying to fix test_utils.py. The original goal is to build the API endpoint. Revert your last change, ignore the test warning for now, and return to the API endpoint."_)

-`,
+You MUST call \`complete_task\` with a JSON report containing \`userDirections\`, \`progressSummary\`, \`evaluation\`, and optional \`feedback\`.`,
    },
  };
 };
@@ -12,6 +12,8 @@ import {
  type Tool,
  type GenerateContentResponse,
 } from '@google/genai';
+import * as fs from 'node:fs';
+import * as path from 'node:path';
 import { partListUnionToString } from './geminiRequest.js';
 import {
  getDirectoryContextString,
@@ -320,6 +322,20 @@ export class GeminiClient {
  dispose() {
    coreEvents.off(CoreEvent.ModelChanged, this.handleModelChanged);
    coreEvents.off(CoreEvent.MemoryChanged, this.handleMemoryChanged);
+
+    // Clean up Watcher status file
+    try {
+      const projectTempDir = this.config.storage.getProjectTempDir();
+      const statusFilePath = path.join(projectTempDir, 'watcher_status.md');
+      if (fs.existsSync(statusFilePath)) {
+        fs.unlinkSync(statusFilePath);
+      }
+    } catch (e) {
+      debugLogger.warn(
+        'Failed to clean up watcher status file during dispose',
+        e,
+      );
+    }
  }

  async resumeChat(
@@ -1357,6 +1373,27 @@ export class GeminiClient {
        try {
          const contentString = partListUnionToString(result.llmContent);
          const parsed = WatcherReportSchema.parse(JSON.parse(contentString));
+
+          // Internally write the status report to avoid requiring user permission
+          const projectTempDir = this.config.storage.getProjectTempDir();
+          const statusFilePath = path.join(projectTempDir, 'watcher_status.md');
+          const reportLines = [
+            '# Watcher Memory State',
+            '',
+            '## 1. User Directions',
+            parsed.userDirections,
+            '',
+            '## 2. Progress Summary',
+            parsed.progressSummary,
+            '',
+            '## 3. Current Trajectory Evaluation',
+            `State: ${parsed.evaluation}`,
+            '',
+            '## 4. Feedback',
+            parsed.feedback ?? 'N/A',
+          ];
+          fs.writeFileSync(statusFilePath, reportLines.join('\n'), 'utf-8');
+
          return parsed as WatcherProgress;
        } catch (e) {
          debugLogger.warn('Failed to parse watcher output', e);
@@ -324,16 +324,6 @@ describe('GeminiClient Watcher Integration', () => {
    // Simulate 11 turns
    for (let i = 1; i <= 11; i++) {
      clientAccess.sessionTurnCount = i - 1; // Will become i inside processTurn
-      // In a real scenario, the subagent would write this file via WRITE_FILE_TOOL.
-      // We simulate this side effect here when the watcher is triggered.
-      if (i % interval === 0) {
-        const projectTempDir = config.storage.getProjectTempDir();
-        const statusFilePath = path.join(projectTempDir, 'watcher_status.md');
-        fs.writeFileSync(
-          statusFilePath,
-          '# Watcher Status Update\nDummy status',
-        );
-      }

      const generator = clientAccess.processTurn(
        [{ text: `turn ${i}` }],
@@ -350,11 +340,16 @@ describe('GeminiClient Watcher Integration', () => {
    // With interval 5, it should trigger at turn 1, turn 5 and turn 10
    expect(mockWatcherTool.build).toHaveBeenCalledTimes(3);

-    // Verify the status file exists
+    // Verify the status file exists (written by GeminiClient internally)
    const projectTempDir = config.storage.getProjectTempDir();
    const statusFilePath = path.join(projectTempDir, 'watcher_status.md');
    expect(fs.existsSync(statusFilePath)).toBe(true);
    const content = fs.readFileSync(statusFilePath, 'utf-8');
-    expect(content).toContain('Watcher Status Update');
+    expect(content).toContain('# Watcher Memory State');
+    expect(content).toContain('Keep testing');
+
+    // Verify cleanup in dispose
+    client.dispose();
+    expect(fs.existsSync(statusFilePath)).toBe(false);
  });
 });