mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-15 06:12:50 -07:00
fix(core): Resolve context loss and token calculator thrashing
This commit addresses the massive context loss and 0% utilization issue by fixing two related bugs in the context management system: 1. **Masked Tool Retention Bug:** Removed the brittle 'in-flight' protection logic from `getProtectedNodeIds`. Previously, this logic failed to match `MASKED_TOOL` nodes, causing it to perpetually hoard `functionCall` nodes inside the protected list. This hoarding rapidly exhausted the context budget, forcing the GC backstop into an aggressive death spiral. Now, only the `recent_turn` and explicit tasks are protected, allowing standard GC to naturally prune old tool calls (and the `HistoryHardener` handles the rest). 2. **Adaptive Calculator Overhead Bug:** The `AdaptiveTokenCalculator` was calculating ratio drift by dividing the API's `actualTokens` (which includes the static overhead of the System Instruction and Tools) by the graph's `baseUnits` (which only counts history). We now inject an `getOverheadTokens` callback during initialization, allowing the calculator to subtract the overhead from the Gemini API count before dividing by the base units, achieving an accurate apples-to-apples ratio. 3. **Violent Oscillation Dampening:** Introduced strict dampening bounds (`maxStep` limit and a hard `targetWeight` clamp) to the Adaptive Token Calculator's EMA. This acts identically to a machine learning learning-rate limit, ensuring that a single massive outlier reading cannot violently thrash the `learnedWeight` multiplier in a single turn. Includes new test coverage verifying the maxStep logic and overhead subtraction. Also removed an unsafe `as unknown` cast in the `snapshotGenerator` that failed `eslint`.
This commit is contained in:
@@ -147,11 +147,11 @@ export class ContextManager {
|
||||
const agedOutNodes = new Set<string>();
|
||||
let rollingTokens = 0;
|
||||
|
||||
// Identify active tool calls that must NEVER be truncated
|
||||
// Identify nodes that must NEVER be truncated
|
||||
const protectedIds = this.getProtectedNodeIds(this.buffer.nodes);
|
||||
if (protectedIds.size > 0) {
|
||||
debugLogger.log(
|
||||
`[ContextManager] Pinning ${protectedIds.size} active tool call nodes to prevent truncation.`,
|
||||
`[ContextManager] Pinning ${protectedIds.size} nodes (recent_turn or external_active_task) to prevent truncation.`,
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -138,7 +138,8 @@ describe('AdaptiveTokenCalculator', () => {
|
||||
// Simulate an event where the API reported 100 tokens, and our base units were 100
|
||||
// But overhead is 40.
|
||||
// actualGraphTokens = 100 - 40 = 60
|
||||
// targetWeight = 60 / 100 = 0.6
|
||||
// rawTargetWeight = 60 / 100 = 0.6
|
||||
// targetWeight = Math.max(0.5, 0.6) = 0.6
|
||||
// newWeight = 1.0 * 0.8 + 0.6 * 0.2 = 0.8 + 0.12 = 0.92
|
||||
eventBus.emitTokenGroundTruth({
|
||||
actualTokens: 100,
|
||||
@@ -147,4 +148,43 @@ describe('AdaptiveTokenCalculator', () => {
|
||||
|
||||
expect(calculator.getLearnedWeight()).toBeCloseTo(0.92, 5);
|
||||
});
|
||||
|
||||
it('should enforce the maxStep limit to prevent violent oscillation from massive outliers', () => {
|
||||
const eventBus = new ContextEventBus();
|
||||
const maxStep = 0.05; // Tight limit
|
||||
const calculator = new AdaptiveTokenCalculator(
|
||||
charsPerToken,
|
||||
registry,
|
||||
eventBus,
|
||||
undefined,
|
||||
{ maxStep },
|
||||
);
|
||||
|
||||
// Initial state: weight = 1.0
|
||||
|
||||
// Simulate a massive outlier where the API reports 10,000 tokens for 100 base units.
|
||||
// rawTargetWeight = 100
|
||||
// targetWeight = Math.min(100, 1.0 * 2.0) = 2.0
|
||||
// emaWeight = 1.0 * 0.8 + 2.0 * 0.2 = 1.2
|
||||
// BUT maxStep is 0.05, so the actual step is clamped.
|
||||
// finalWeight = 1.0 + 0.05 = 1.05
|
||||
eventBus.emitTokenGroundTruth({
|
||||
actualTokens: 10000,
|
||||
promptBaseUnits: 100,
|
||||
});
|
||||
|
||||
expect(calculator.getLearnedWeight()).toBeCloseTo(1.05, 5);
|
||||
|
||||
// Simulate a massive under-estimation
|
||||
// rawTargetWeight = 0
|
||||
// targetWeight = Math.max(0, 1.05 * 0.5) = 0.525
|
||||
// emaWeight = 1.05 * 0.8 + 0.525 * 0.2 = 0.84 + 0.105 = 0.945
|
||||
// BUT maxStep is 0.05, so step is clamped: 1.05 - 0.05 = 1.0
|
||||
eventBus.emitTokenGroundTruth({
|
||||
actualTokens: 0,
|
||||
promptBaseUnits: 100,
|
||||
});
|
||||
|
||||
expect(calculator.getLearnedWeight()).toBeCloseTo(1.0, 5);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -14,6 +14,13 @@ import type { NodeBehaviorRegistry } from '../graph/behaviorRegistry.js';
|
||||
import type { ContextEventBus, TokenGroundTruthEvent } from '../eventBus.js';
|
||||
import { debugLogger } from '../../utils/debugLogger.js';
|
||||
|
||||
export interface AdaptiveLearningConfig {
|
||||
/** The momentum factor for the Exponential Moving Average (EMA). Defaults to 0.2. */
|
||||
learningRate?: number;
|
||||
/** The absolute maximum change allowed to the weight in a single turn. Defaults to 0.15. */
|
||||
maxStep?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* An Adaptive Token Calculator that dynamically learns the true token cost of the user's
|
||||
* conversation by applying an Exponential Moving Average (EMA) gradient descent to
|
||||
@@ -26,13 +33,18 @@ import { debugLogger } from '../../utils/debugLogger.js';
|
||||
export class AdaptiveTokenCalculator implements AdvancedTokenCalculator {
|
||||
private learnedWeight = 1.0;
|
||||
private readonly baseCalculator: StaticTokenCalculator;
|
||||
private readonly learningRate: number;
|
||||
private readonly maxStep: number;
|
||||
|
||||
constructor(
|
||||
charsPerToken: number,
|
||||
registry: NodeBehaviorRegistry,
|
||||
eventBus: ContextEventBus,
|
||||
private readonly getOverheadTokens?: () => number,
|
||||
config?: AdaptiveLearningConfig,
|
||||
) {
|
||||
this.learningRate = config?.learningRate ?? 0.2;
|
||||
this.maxStep = config?.maxStep ?? 0.15;
|
||||
this.baseCalculator = new StaticTokenCalculator(charsPerToken, registry);
|
||||
eventBus.onTokenGroundTruth((event: TokenGroundTruthEvent) => {
|
||||
this.handleGroundTruth(event.actualTokens, event.promptBaseUnits);
|
||||
@@ -53,20 +65,33 @@ export class AdaptiveTokenCalculator implements AdvancedTokenCalculator {
|
||||
const actualGraphTokens = Math.max(0, actualTokens - overheadTokens);
|
||||
|
||||
// Determine what ratio we should have used
|
||||
const targetWeight = actualGraphTokens / promptBaseUnits;
|
||||
const rawTargetWeight = actualGraphTokens / promptBaseUnits;
|
||||
const oldWeight = this.learnedWeight;
|
||||
|
||||
// Apply Momentum (Learning Rate)
|
||||
const learningRate = 0.2;
|
||||
const newWeight =
|
||||
oldWeight * (1 - learningRate) + targetWeight * learningRate;
|
||||
// Dampen extreme outliers *before* applying the EMA by capping the target weight
|
||||
// to a reasonable multiple of the current weight. This prevents a single massive
|
||||
// anomaly from destroying the running average.
|
||||
const targetWeight = Math.max(
|
||||
oldWeight * 0.5,
|
||||
Math.min(rawTargetWeight, oldWeight * 2.0),
|
||||
);
|
||||
|
||||
// Clamp to reasonable safety bounds to prevent rogue metadata poisoning the system
|
||||
// Apply Momentum (Learning Rate)
|
||||
let newWeight =
|
||||
oldWeight * (1 - this.learningRate) + targetWeight * this.learningRate;
|
||||
|
||||
// Hard limit the maximum step size per turn to prevent violent oscillation
|
||||
if (newWeight > oldWeight + this.maxStep)
|
||||
newWeight = oldWeight + this.maxStep;
|
||||
if (newWeight < oldWeight - this.maxStep)
|
||||
newWeight = oldWeight - this.maxStep;
|
||||
|
||||
// Clamp to reasonable absolute safety bounds
|
||||
this.learnedWeight = Math.max(0.5, Math.min(newWeight, 2.0));
|
||||
|
||||
debugLogger.log(
|
||||
`[AdaptiveTokenCalculator] Learned weight updated to ${this.learnedWeight.toFixed(3)} ` +
|
||||
`(API Tokens: ${actualTokens}, Base Units: ${promptBaseUnits}, Target Ratio: ${targetWeight.toFixed(3)})`,
|
||||
`(API Tokens: ${actualTokens}, Overhead: ${overheadTokens}, Graph Tokens: ${actualGraphTokens}, Base Units: ${promptBaseUnits}, Target Ratio: ${targetWeight.toFixed(3)})`,
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -41,6 +41,7 @@ import { makeResolvedModelConfig } from '../services/modelConfigServiceTestUtils
|
||||
import type { HookSystem } from '../hooks/hookSystem.js';
|
||||
import { LlmRole } from '../telemetry/types.js';
|
||||
import { BINARY_INJECTION_KEY } from '../utils/generateContentResponseUtilities.js';
|
||||
import type { ResumedSessionData } from '../services/chatRecordingTypes.js';
|
||||
|
||||
// Mock fs module to prevent actual file system operations during tests
|
||||
const mockFileSystem = new Map<string, string>();
|
||||
@@ -250,6 +251,60 @@ describe('GeminiChat', () => {
|
||||
const chatEmpty = new GeminiChat(mockConfig);
|
||||
expect(chatEmpty.getLastPromptTokenCount()).toBe(0);
|
||||
});
|
||||
|
||||
it('should prioritize in-memory history over resumedSessionData', () => {
|
||||
// This test simulates a "hot restart" after a context management operation
|
||||
// like compression, where the in-memory history is shorter and more up-to-date
|
||||
// than the session data that might be on disk.
|
||||
|
||||
// 1. A stale, longer history from a persisted session record
|
||||
const resumedSessionData = {
|
||||
conversation: {
|
||||
messages: [
|
||||
{
|
||||
id: 'a',
|
||||
type: 'user',
|
||||
content: [{ text: 'turn 1' }],
|
||||
create_time: new Date(),
|
||||
},
|
||||
{
|
||||
id: 'b',
|
||||
type: 'gemini',
|
||||
content: [{ text: 'turn 2' }],
|
||||
create_time: new Date(),
|
||||
},
|
||||
{
|
||||
id: 'c',
|
||||
type: 'user',
|
||||
content: [{ text: 'turn 3' }],
|
||||
create_time: new Date(),
|
||||
},
|
||||
],
|
||||
},
|
||||
} as unknown as ResumedSessionData;
|
||||
|
||||
// 2. A fresh, compressed in-memory history
|
||||
const compressedHistory: HistoryTurn[] = [
|
||||
{
|
||||
id: 'summary-1',
|
||||
content: { role: 'user', parts: [{ text: 'summary of turns 1-3' }] },
|
||||
},
|
||||
];
|
||||
|
||||
// 3. Instantiate the chat, providing both.
|
||||
const chat = new GeminiChat(
|
||||
mockConfig,
|
||||
'',
|
||||
[],
|
||||
compressedHistory, // This should be prioritized
|
||||
resumedSessionData, // This should be ignored
|
||||
);
|
||||
|
||||
// 4. Assert that the shorter, in-memory history was used.
|
||||
const finalHistory = chat.getHistoryTurns();
|
||||
expect(finalHistory).toHaveLength(1);
|
||||
expect(finalHistory[0].id).toBe('summary-1');
|
||||
});
|
||||
});
|
||||
|
||||
describe('setHistory', () => {
|
||||
|
||||
@@ -283,24 +283,33 @@ export class GeminiChat {
|
||||
private readonly onModelChanged?: (modelId: string) => Promise<Tool[]>,
|
||||
) {
|
||||
validateHistory(history);
|
||||
const initialHistory: HistoryTurn[] = resumedSessionData
|
||||
? resumedSessionData.conversation.messages
|
||||
.filter((m) => m.type === 'user' || m.type === 'gemini')
|
||||
.map((m) => ({
|
||||
id: m.id,
|
||||
content: {
|
||||
role: m.type === 'user' ? 'user' : 'model',
|
||||
parts: Array.isArray(m.content)
|
||||
? // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||
(m.content as Part[])
|
||||
: [{ text: String(m.content) }],
|
||||
},
|
||||
}))
|
||||
: history.map((item) =>
|
||||
'id' in item && 'content' in item
|
||||
? item
|
||||
: { id: randomUUID(), content: item },
|
||||
);
|
||||
|
||||
let initialHistory: HistoryTurn[];
|
||||
// If history is passed, it is the most up-to-date in-memory state and takes precedence.
|
||||
// This is critical for hot-restarts after operations like context compression.
|
||||
if (history.length > 0) {
|
||||
initialHistory = history.map((item) =>
|
||||
'id' in item && 'content' in item
|
||||
? item
|
||||
: { id: randomUUID(), content: item },
|
||||
);
|
||||
} else if (resumedSessionData) {
|
||||
// Otherwise, if resuming from disk, build from the persisted record.
|
||||
initialHistory = resumedSessionData.conversation.messages
|
||||
.filter((m) => m.type === 'user' || m.type === 'gemini')
|
||||
.map((m) => ({
|
||||
id: m.id,
|
||||
content: {
|
||||
role: m.type === 'user' ? 'user' : 'model',
|
||||
parts: Array.isArray(m.content)
|
||||
? // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||
(m.content as Part[])
|
||||
: [{ text: String(m.content) }],
|
||||
},
|
||||
}));
|
||||
} else {
|
||||
initialHistory = [];
|
||||
}
|
||||
|
||||
this.agentHistory = new AgentChatHistory(initialHistory);
|
||||
this.chatRecordingService = new ChatRecordingService(context);
|
||||
|
||||
Reference in New Issue
Block a user