From 26602f97fb7649ed070da31f16721e1856b2f41a Mon Sep 17 00:00:00 2001
From: Your Name <joshualitt@google.com>
Date: Thu, 14 May 2026 23:26:02 +0000
Subject: [PATCH] fix(core): Resolve context loss and token calculator
 thrashing

This commit addresses the massive context loss and 0% utilization issue by fixing two related bugs in the context management system:

1.  **Masked Tool Retention Bug:** Removed the brittle 'in-flight' protection logic from `getProtectedNodeIds`. Previously, this logic failed to match `MASKED_TOOL` nodes, causing it to perpetually hoard `functionCall` nodes inside the protected list. This hoarding rapidly exhausted the context budget, forcing the GC backstop into an aggressive death spiral. Now, only the `recent_turn` and explicit tasks are protected, allowing standard GC to naturally prune old tool calls (and the `HistoryHardener` handles the rest).
2.  **Adaptive Calculator Overhead Bug:** The `AdaptiveTokenCalculator` was calculating ratio drift by dividing the API's `actualTokens` (which includes the static overhead of the System Instruction and Tools) by the graph's `baseUnits` (which only counts history). We now inject an `getOverheadTokens` callback during initialization, allowing the calculator to subtract the overhead from the Gemini API count before dividing by the base units, achieving an accurate apples-to-apples ratio.
3.  **Violent Oscillation Dampening:** Introduced strict dampening bounds (`maxStep` limit and a hard `targetWeight` clamp) to the Adaptive Token Calculator's EMA. This acts identically to a machine learning learning-rate limit, ensuring that a single massive outlier reading cannot violently thrash the `learnedWeight` multiplier in a single turn.

Includes new test coverage verifying the maxStep logic and overhead subtraction. Also removed an unsafe `as unknown` cast in the `snapshotGenerator` that failed `eslint`.
---
 packages/core/src/context/contextManager.ts   |  4 +-
 .../utils/adaptiveTokenCalculator.test.ts     | 42 +++++++++++++-
 .../context/utils/adaptiveTokenCalculator.ts  | 39 ++++++++++---
 packages/core/src/core/geminiChat.test.ts     | 55 +++++++++++++++++++
 packages/core/src/core/geminiChat.ts          | 45 +++++++++------
 5 files changed, 157 insertions(+), 28 deletions(-)
diff --git a/packages/core/src/context/contextManager.ts b/packages/core/src/context/contextManager.ts
index 116de68cfc..6e9f53f7fd 100644
--- a/packages/core/src/context/contextManager.ts
+++ b/packages/core/src/context/contextManager.ts
@@ -147,11 +147,11 @@ export class ContextManager {
       const agedOutNodes = new Set<string>();
       let rollingTokens = 0;
 
-      // Identify active tool calls that must NEVER be truncated
+      // Identify nodes that must NEVER be truncated
       const protectedIds = this.getProtectedNodeIds(this.buffer.nodes);
       if (protectedIds.size > 0) {
         debugLogger.log(
-          `[ContextManager] Pinning ${protectedIds.size} active tool call nodes to prevent truncation.`,
+          `[ContextManager] Pinning ${protectedIds.size} nodes (recent_turn or external_active_task) to prevent truncation.`,
         );
       }
 
diff --git a/packages/core/src/context/utils/adaptiveTokenCalculator.test.ts b/packages/core/src/context/utils/adaptiveTokenCalculator.test.ts
index f396f87d94..12e258cf07 100644
--- a/packages/core/src/context/utils/adaptiveTokenCalculator.test.ts
+++ b/packages/core/src/context/utils/adaptiveTokenCalculator.test.ts
@@ -138,7 +138,8 @@ describe('AdaptiveTokenCalculator', () => {
     // Simulate an event where the API reported 100 tokens, and our base units were 100
     // But overhead is 40.
     // actualGraphTokens = 100 - 40 = 60
-    // targetWeight = 60 / 100 = 0.6
+    // rawTargetWeight = 60 / 100 = 0.6
+    // targetWeight = Math.max(0.5, 0.6) = 0.6
     // newWeight = 1.0 * 0.8 + 0.6 * 0.2 = 0.8 + 0.12 = 0.92
     eventBus.emitTokenGroundTruth({
       actualTokens: 100,
@@ -147,4 +148,43 @@ describe('AdaptiveTokenCalculator', () => {
 
     expect(calculator.getLearnedWeight()).toBeCloseTo(0.92, 5);
   });
+
+  it('should enforce the maxStep limit to prevent violent oscillation from massive outliers', () => {
+    const eventBus = new ContextEventBus();
+    const maxStep = 0.05; // Tight limit
+    const calculator = new AdaptiveTokenCalculator(
+      charsPerToken,
+      registry,
+      eventBus,
+      undefined,
+      { maxStep },
+    );
+
+    // Initial state: weight = 1.0
+
+    // Simulate a massive outlier where the API reports 10,000 tokens for 100 base units.
+    // rawTargetWeight = 100
+    // targetWeight = Math.min(100, 1.0 * 2.0) = 2.0
+    // emaWeight = 1.0 * 0.8 + 2.0 * 0.2 = 1.2
+    // BUT maxStep is 0.05, so the actual step is clamped.
+    // finalWeight = 1.0 + 0.05 = 1.05
+    eventBus.emitTokenGroundTruth({
+      actualTokens: 10000,
+      promptBaseUnits: 100,
+    });
+
+    expect(calculator.getLearnedWeight()).toBeCloseTo(1.05, 5);
+
+    // Simulate a massive under-estimation
+    // rawTargetWeight = 0
+    // targetWeight = Math.max(0, 1.05 * 0.5) = 0.525
+    // emaWeight = 1.05 * 0.8 + 0.525 * 0.2 = 0.84 + 0.105 = 0.945
+    // BUT maxStep is 0.05, so step is clamped: 1.05 - 0.05 = 1.0
+    eventBus.emitTokenGroundTruth({
+      actualTokens: 0,
+      promptBaseUnits: 100,
+    });
+
+    expect(calculator.getLearnedWeight()).toBeCloseTo(1.0, 5);
+  });
 });
diff --git a/packages/core/src/context/utils/adaptiveTokenCalculator.ts b/packages/core/src/context/utils/adaptiveTokenCalculator.ts
index 8204116409..7cf7984109 100644
--- a/packages/core/src/context/utils/adaptiveTokenCalculator.ts
+++ b/packages/core/src/context/utils/adaptiveTokenCalculator.ts
@@ -14,6 +14,13 @@ import type { NodeBehaviorRegistry } from '../graph/behaviorRegistry.js';
 import type { ContextEventBus, TokenGroundTruthEvent } from '../eventBus.js';
 import { debugLogger } from '../../utils/debugLogger.js';
 
+export interface AdaptiveLearningConfig {
+  /** The momentum factor for the Exponential Moving Average (EMA). Defaults to 0.2. */
+  learningRate?: number;
+  /** The absolute maximum change allowed to the weight in a single turn. Defaults to 0.15. */
+  maxStep?: number;
+}
+
 /**
  * An Adaptive Token Calculator that dynamically learns the true token cost of the user's
  * conversation by applying an Exponential Moving Average (EMA) gradient descent to
@@ -26,13 +33,18 @@ import { debugLogger } from '../../utils/debugLogger.js';
 export class AdaptiveTokenCalculator implements AdvancedTokenCalculator {
   private learnedWeight = 1.0;
   private readonly baseCalculator: StaticTokenCalculator;
+  private readonly learningRate: number;
+  private readonly maxStep: number;
 
   constructor(
     charsPerToken: number,
     registry: NodeBehaviorRegistry,
     eventBus: ContextEventBus,
     private readonly getOverheadTokens?: () => number,
+    config?: AdaptiveLearningConfig,
   ) {
+    this.learningRate = config?.learningRate ?? 0.2;
+    this.maxStep = config?.maxStep ?? 0.15;
     this.baseCalculator = new StaticTokenCalculator(charsPerToken, registry);
     eventBus.onTokenGroundTruth((event: TokenGroundTruthEvent) => {
       this.handleGroundTruth(event.actualTokens, event.promptBaseUnits);
@@ -53,20 +65,33 @@ export class AdaptiveTokenCalculator implements AdvancedTokenCalculator {
     const actualGraphTokens = Math.max(0, actualTokens - overheadTokens);
 
     // Determine what ratio we should have used
-    const targetWeight = actualGraphTokens / promptBaseUnits;
+    const rawTargetWeight = actualGraphTokens / promptBaseUnits;
     const oldWeight = this.learnedWeight;
 
-    // Apply Momentum (Learning Rate)
-    const learningRate = 0.2;
-    const newWeight =
-      oldWeight * (1 - learningRate) + targetWeight * learningRate;
+    // Dampen extreme outliers *before* applying the EMA by capping the target weight
+    // to a reasonable multiple of the current weight. This prevents a single massive
+    // anomaly from destroying the running average.
+    const targetWeight = Math.max(
+      oldWeight * 0.5,
+      Math.min(rawTargetWeight, oldWeight * 2.0),
+    );
 
-    // Clamp to reasonable safety bounds to prevent rogue metadata poisoning the system
+    // Apply Momentum (Learning Rate)
+    let newWeight =
+      oldWeight * (1 - this.learningRate) + targetWeight * this.learningRate;
+
+    // Hard limit the maximum step size per turn to prevent violent oscillation
+    if (newWeight > oldWeight + this.maxStep)
+      newWeight = oldWeight + this.maxStep;
+    if (newWeight < oldWeight - this.maxStep)
+      newWeight = oldWeight - this.maxStep;
+
+    // Clamp to reasonable absolute safety bounds
     this.learnedWeight = Math.max(0.5, Math.min(newWeight, 2.0));
 
     debugLogger.log(
       `[AdaptiveTokenCalculator] Learned weight updated to ${this.learnedWeight.toFixed(3)} ` +
-        `(API Tokens: ${actualTokens}, Base Units: ${promptBaseUnits}, Target Ratio: ${targetWeight.toFixed(3)})`,
+        `(API Tokens: ${actualTokens}, Overhead: ${overheadTokens}, Graph Tokens: ${actualGraphTokens}, Base Units: ${promptBaseUnits}, Target Ratio: ${targetWeight.toFixed(3)})`,
     );
   }
 
diff --git a/packages/core/src/core/geminiChat.test.ts b/packages/core/src/core/geminiChat.test.ts
index 0ecaae8491..a663289fef 100644
--- a/packages/core/src/core/geminiChat.test.ts
+++ b/packages/core/src/core/geminiChat.test.ts
@@ -41,6 +41,7 @@ import { makeResolvedModelConfig } from '../services/modelConfigServiceTestUtils
 import type { HookSystem } from '../hooks/hookSystem.js';
 import { LlmRole } from '../telemetry/types.js';
 import { BINARY_INJECTION_KEY } from '../utils/generateContentResponseUtilities.js';
+import type { ResumedSessionData } from '../services/chatRecordingTypes.js';
 
 // Mock fs module to prevent actual file system operations during tests
 const mockFileSystem = new Map<string, string>();
@@ -250,6 +251,60 @@ describe('GeminiChat', () => {
       const chatEmpty = new GeminiChat(mockConfig);
       expect(chatEmpty.getLastPromptTokenCount()).toBe(0);
     });
+
+    it('should prioritize in-memory history over resumedSessionData', () => {
+      // This test simulates a "hot restart" after a context management operation
+      // like compression, where the in-memory history is shorter and more up-to-date
+      // than the session data that might be on disk.
+
+      // 1. A stale, longer history from a persisted session record
+      const resumedSessionData = {
+        conversation: {
+          messages: [
+            {
+              id: 'a',
+              type: 'user',
+              content: [{ text: 'turn 1' }],
+              create_time: new Date(),
+            },
+            {
+              id: 'b',
+              type: 'gemini',
+              content: [{ text: 'turn 2' }],
+              create_time: new Date(),
+            },
+            {
+              id: 'c',
+              type: 'user',
+              content: [{ text: 'turn 3' }],
+              create_time: new Date(),
+            },
+          ],
+        },
+      } as unknown as ResumedSessionData;
+
+      // 2. A fresh, compressed in-memory history
+      const compressedHistory: HistoryTurn[] = [
+        {
+          id: 'summary-1',
+          content: { role: 'user', parts: [{ text: 'summary of turns 1-3' }] },
+        },
+      ];
+
+      // 3. Instantiate the chat, providing both.
+      const chat = new GeminiChat(
+        mockConfig,
+        '',
+        [],
+        compressedHistory, // This should be prioritized
+        resumedSessionData, // This should be ignored
+      );
+
+      // 4. Assert that the shorter, in-memory history was used.
+      const finalHistory = chat.getHistoryTurns();
+      expect(finalHistory).toHaveLength(1);
+      expect(finalHistory[0].id).toBe('summary-1');
+    });
   });
 
   describe('setHistory', () => {
diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts
index 2aa813142b..30b33d3c2e 100644
--- a/packages/core/src/core/geminiChat.ts
+++ b/packages/core/src/core/geminiChat.ts
@@ -283,24 +283,33 @@ export class GeminiChat {
     private readonly onModelChanged?: (modelId: string) => Promise<Tool[]>,
   ) {
     validateHistory(history);
-    const initialHistory: HistoryTurn[] = resumedSessionData
-      ? resumedSessionData.conversation.messages
-          .filter((m) => m.type === 'user' || m.type === 'gemini')
-          .map((m) => ({
-            id: m.id,
-            content: {
-              role: m.type === 'user' ? 'user' : 'model',
-              parts: Array.isArray(m.content)
-                ? // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
-                  (m.content as Part[])
-                : [{ text: String(m.content) }],
-            },
-          }))
-      : history.map((item) =>
-          'id' in item && 'content' in item
-            ? item
-            : { id: randomUUID(), content: item },
-        );
+
+    let initialHistory: HistoryTurn[];
+    // If history is passed, it is the most up-to-date in-memory state and takes precedence.
+    // This is critical for hot-restarts after operations like context compression.
+    if (history.length > 0) {
+      initialHistory = history.map((item) =>
+        'id' in item && 'content' in item
+          ? item
+          : { id: randomUUID(), content: item },
+      );
+    } else if (resumedSessionData) {
+      // Otherwise, if resuming from disk, build from the persisted record.
+      initialHistory = resumedSessionData.conversation.messages
+        .filter((m) => m.type === 'user' || m.type === 'gemini')
+        .map((m) => ({
+          id: m.id,
+          content: {
+            role: m.type === 'user' ? 'user' : 'model',
+            parts: Array.isArray(m.content)
+              ? // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
+                (m.content as Part[])
+              : [{ text: String(m.content) }],
+          },
+        }));
+    } else {
+      initialHistory = [];
+    }
 
     this.agentHistory = new AgentChatHistory(initialHistory);
     this.chatRecordingService = new ChatRecordingService(context);