fix(core): Resolve context loss and token calculator thrashing

This commit addresses the massive context loss and 0% utilization issue by fixing two related bugs in the context management system:

1.  **Masked Tool Retention Bug:** Removed the brittle 'in-flight' protection logic from `getProtectedNodeIds`. Previously, this logic failed to match `MASKED_TOOL` nodes, causing it to perpetually hoard `functionCall` nodes inside the protected list. This hoarding rapidly exhausted the context budget, forcing the GC backstop into an aggressive death spiral. Now, only the `recent_turn` and explicit tasks are protected, allowing standard GC to naturally prune old tool calls (and the `HistoryHardener` handles the rest).
2.  **Adaptive Calculator Overhead Bug:** The `AdaptiveTokenCalculator` was calculating ratio drift by dividing the API's `actualTokens` (which includes the static overhead of the System Instruction and Tools) by the graph's `baseUnits` (which only counts history). We now inject an `getOverheadTokens` callback during initialization, allowing the calculator to subtract the overhead from the Gemini API count before dividing by the base units, achieving an accurate apples-to-apples ratio.
3.  **Violent Oscillation Dampening:** Introduced strict dampening bounds (`maxStep` limit and a hard `targetWeight` clamp) to the Adaptive Token Calculator's EMA. This acts identically to a machine learning learning-rate limit, ensuring that a single massive outlier reading cannot violently thrash the `learnedWeight` multiplier in a single turn.

Includes new test coverage verifying the maxStep logic and overhead subtraction. Also removed an unsafe `as unknown` cast in the `snapshotGenerator` that failed `eslint`.
This commit is contained in:
Your Name
2026-05-14 23:26:02 +00:00
parent 665b5ca6c7
commit 26602f97fb
5 changed files with 157 additions and 28 deletions
+2 -2
View File
@@ -147,11 +147,11 @@ export class ContextManager {
const agedOutNodes = new Set<string>();
let rollingTokens = 0;
// Identify active tool calls that must NEVER be truncated
// Identify nodes that must NEVER be truncated
const protectedIds = this.getProtectedNodeIds(this.buffer.nodes);
if (protectedIds.size > 0) {
debugLogger.log(
`[ContextManager] Pinning ${protectedIds.size} active tool call nodes to prevent truncation.`,
`[ContextManager] Pinning ${protectedIds.size} nodes (recent_turn or external_active_task) to prevent truncation.`,
);
}
@@ -138,7 +138,8 @@ describe('AdaptiveTokenCalculator', () => {
// Simulate an event where the API reported 100 tokens, and our base units were 100
// But overhead is 40.
// actualGraphTokens = 100 - 40 = 60
// targetWeight = 60 / 100 = 0.6
// rawTargetWeight = 60 / 100 = 0.6
// targetWeight = Math.max(0.5, 0.6) = 0.6
// newWeight = 1.0 * 0.8 + 0.6 * 0.2 = 0.8 + 0.12 = 0.92
eventBus.emitTokenGroundTruth({
actualTokens: 100,
@@ -147,4 +148,43 @@ describe('AdaptiveTokenCalculator', () => {
expect(calculator.getLearnedWeight()).toBeCloseTo(0.92, 5);
});
it('should enforce the maxStep limit to prevent violent oscillation from massive outliers', () => {
const eventBus = new ContextEventBus();
const maxStep = 0.05; // Tight limit
const calculator = new AdaptiveTokenCalculator(
charsPerToken,
registry,
eventBus,
undefined,
{ maxStep },
);
// Initial state: weight = 1.0
// Simulate a massive outlier where the API reports 10,000 tokens for 100 base units.
// rawTargetWeight = 100
// targetWeight = Math.min(100, 1.0 * 2.0) = 2.0
// emaWeight = 1.0 * 0.8 + 2.0 * 0.2 = 1.2
// BUT maxStep is 0.05, so the actual step is clamped.
// finalWeight = 1.0 + 0.05 = 1.05
eventBus.emitTokenGroundTruth({
actualTokens: 10000,
promptBaseUnits: 100,
});
expect(calculator.getLearnedWeight()).toBeCloseTo(1.05, 5);
// Simulate a massive under-estimation
// rawTargetWeight = 0
// targetWeight = Math.max(0, 1.05 * 0.5) = 0.525
// emaWeight = 1.05 * 0.8 + 0.525 * 0.2 = 0.84 + 0.105 = 0.945
// BUT maxStep is 0.05, so step is clamped: 1.05 - 0.05 = 1.0
eventBus.emitTokenGroundTruth({
actualTokens: 0,
promptBaseUnits: 100,
});
expect(calculator.getLearnedWeight()).toBeCloseTo(1.0, 5);
});
});
@@ -14,6 +14,13 @@ import type { NodeBehaviorRegistry } from '../graph/behaviorRegistry.js';
import type { ContextEventBus, TokenGroundTruthEvent } from '../eventBus.js';
import { debugLogger } from '../../utils/debugLogger.js';
export interface AdaptiveLearningConfig {
/** The momentum factor for the Exponential Moving Average (EMA). Defaults to 0.2. */
learningRate?: number;
/** The absolute maximum change allowed to the weight in a single turn. Defaults to 0.15. */
maxStep?: number;
}
/**
* An Adaptive Token Calculator that dynamically learns the true token cost of the user's
* conversation by applying an Exponential Moving Average (EMA) gradient descent to
@@ -26,13 +33,18 @@ import { debugLogger } from '../../utils/debugLogger.js';
export class AdaptiveTokenCalculator implements AdvancedTokenCalculator {
private learnedWeight = 1.0;
private readonly baseCalculator: StaticTokenCalculator;
private readonly learningRate: number;
private readonly maxStep: number;
constructor(
charsPerToken: number,
registry: NodeBehaviorRegistry,
eventBus: ContextEventBus,
private readonly getOverheadTokens?: () => number,
config?: AdaptiveLearningConfig,
) {
this.learningRate = config?.learningRate ?? 0.2;
this.maxStep = config?.maxStep ?? 0.15;
this.baseCalculator = new StaticTokenCalculator(charsPerToken, registry);
eventBus.onTokenGroundTruth((event: TokenGroundTruthEvent) => {
this.handleGroundTruth(event.actualTokens, event.promptBaseUnits);
@@ -53,20 +65,33 @@ export class AdaptiveTokenCalculator implements AdvancedTokenCalculator {
const actualGraphTokens = Math.max(0, actualTokens - overheadTokens);
// Determine what ratio we should have used
const targetWeight = actualGraphTokens / promptBaseUnits;
const rawTargetWeight = actualGraphTokens / promptBaseUnits;
const oldWeight = this.learnedWeight;
// Apply Momentum (Learning Rate)
const learningRate = 0.2;
const newWeight =
oldWeight * (1 - learningRate) + targetWeight * learningRate;
// Dampen extreme outliers *before* applying the EMA by capping the target weight
// to a reasonable multiple of the current weight. This prevents a single massive
// anomaly from destroying the running average.
const targetWeight = Math.max(
oldWeight * 0.5,
Math.min(rawTargetWeight, oldWeight * 2.0),
);
// Clamp to reasonable safety bounds to prevent rogue metadata poisoning the system
// Apply Momentum (Learning Rate)
let newWeight =
oldWeight * (1 - this.learningRate) + targetWeight * this.learningRate;
// Hard limit the maximum step size per turn to prevent violent oscillation
if (newWeight > oldWeight + this.maxStep)
newWeight = oldWeight + this.maxStep;
if (newWeight < oldWeight - this.maxStep)
newWeight = oldWeight - this.maxStep;
// Clamp to reasonable absolute safety bounds
this.learnedWeight = Math.max(0.5, Math.min(newWeight, 2.0));
debugLogger.log(
`[AdaptiveTokenCalculator] Learned weight updated to ${this.learnedWeight.toFixed(3)} ` +
`(API Tokens: ${actualTokens}, Base Units: ${promptBaseUnits}, Target Ratio: ${targetWeight.toFixed(3)})`,
`(API Tokens: ${actualTokens}, Overhead: ${overheadTokens}, Graph Tokens: ${actualGraphTokens}, Base Units: ${promptBaseUnits}, Target Ratio: ${targetWeight.toFixed(3)})`,
);
}
+55
View File
@@ -41,6 +41,7 @@ import { makeResolvedModelConfig } from '../services/modelConfigServiceTestUtils
import type { HookSystem } from '../hooks/hookSystem.js';
import { LlmRole } from '../telemetry/types.js';
import { BINARY_INJECTION_KEY } from '../utils/generateContentResponseUtilities.js';
import type { ResumedSessionData } from '../services/chatRecordingTypes.js';
// Mock fs module to prevent actual file system operations during tests
const mockFileSystem = new Map<string, string>();
@@ -250,6 +251,60 @@ describe('GeminiChat', () => {
const chatEmpty = new GeminiChat(mockConfig);
expect(chatEmpty.getLastPromptTokenCount()).toBe(0);
});
it('should prioritize in-memory history over resumedSessionData', () => {
// This test simulates a "hot restart" after a context management operation
// like compression, where the in-memory history is shorter and more up-to-date
// than the session data that might be on disk.
// 1. A stale, longer history from a persisted session record
const resumedSessionData = {
conversation: {
messages: [
{
id: 'a',
type: 'user',
content: [{ text: 'turn 1' }],
create_time: new Date(),
},
{
id: 'b',
type: 'gemini',
content: [{ text: 'turn 2' }],
create_time: new Date(),
},
{
id: 'c',
type: 'user',
content: [{ text: 'turn 3' }],
create_time: new Date(),
},
],
},
} as unknown as ResumedSessionData;
// 2. A fresh, compressed in-memory history
const compressedHistory: HistoryTurn[] = [
{
id: 'summary-1',
content: { role: 'user', parts: [{ text: 'summary of turns 1-3' }] },
},
];
// 3. Instantiate the chat, providing both.
const chat = new GeminiChat(
mockConfig,
'',
[],
compressedHistory, // This should be prioritized
resumedSessionData, // This should be ignored
);
// 4. Assert that the shorter, in-memory history was used.
const finalHistory = chat.getHistoryTurns();
expect(finalHistory).toHaveLength(1);
expect(finalHistory[0].id).toBe('summary-1');
});
});
describe('setHistory', () => {
+27 -18
View File
@@ -283,24 +283,33 @@ export class GeminiChat {
private readonly onModelChanged?: (modelId: string) => Promise<Tool[]>,
) {
validateHistory(history);
const initialHistory: HistoryTurn[] = resumedSessionData
? resumedSessionData.conversation.messages
.filter((m) => m.type === 'user' || m.type === 'gemini')
.map((m) => ({
id: m.id,
content: {
role: m.type === 'user' ? 'user' : 'model',
parts: Array.isArray(m.content)
? // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
(m.content as Part[])
: [{ text: String(m.content) }],
},
}))
: history.map((item) =>
'id' in item && 'content' in item
? item
: { id: randomUUID(), content: item },
);
let initialHistory: HistoryTurn[];
// If history is passed, it is the most up-to-date in-memory state and takes precedence.
// This is critical for hot-restarts after operations like context compression.
if (history.length > 0) {
initialHistory = history.map((item) =>
'id' in item && 'content' in item
? item
: { id: randomUUID(), content: item },
);
} else if (resumedSessionData) {
// Otherwise, if resuming from disk, build from the persisted record.
initialHistory = resumedSessionData.conversation.messages
.filter((m) => m.type === 'user' || m.type === 'gemini')
.map((m) => ({
id: m.id,
content: {
role: m.type === 'user' ? 'user' : 'model',
parts: Array.isArray(m.content)
? // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
(m.content as Part[])
: [{ text: String(m.content) }],
},
}));
} else {
initialHistory = [];
}
this.agentHistory = new AgentChatHistory(initialHistory);
this.chatRecordingService = new ChatRecordingService(context);