From 07792f98cdd43a791e5b31af019934d4a2e9824f Mon Sep 17 00:00:00 2001 From: joshualitt Date: Tue, 12 May 2026 08:51:20 -0700 Subject: [PATCH] feat(context): Introduce adaptive token calculator to more accurately calculate content sizes. (#26888) --- .../context/contextManager.barrier.test.ts | 8 +- .../context/contextManager.hotstart.test.ts | 92 ++++++++++ packages/core/src/context/contextManager.ts | 114 ++++++++++-- packages/core/src/context/eventBus.ts | 13 ++ .../core/src/context/graph/render.test.ts | 54 +++++- packages/core/src/context/graph/render.ts | 44 ++++- packages/core/src/context/initializer.ts | 18 +- .../context/pipeline/environmentImpl.test.ts | 6 + .../src/context/pipeline/environmentImpl.ts | 15 +- .../blobDegradationProcessor.test.ts | 4 +- .../stateSnapshotAsyncProcessor.test.ts | 4 +- .../lifecycle.golden.test.ts.snap | 100 ++++------- .../context/system-tests/hysteresis.test.ts | 57 +++--- .../context/system-tests/simulationHarness.ts | 14 +- .../src/context/testing/contextTestUtils.ts | 43 ++++- .../utils/adaptiveTokenCalculator.test.ts | 125 ++++++++++++++ .../context/utils/adaptiveTokenCalculator.ts | 163 ++++++++++++++++++ .../utils/contextTokenCalculator.test.ts | 16 +- .../context/utils/contextTokenCalculator.ts | 64 ++++++- .../context/utils/snapshotGenerator.test.ts | 3 + packages/core/src/core/baseLlmClient.ts | 4 + packages/core/src/core/client.test.ts | 8 +- packages/core/src/core/client.ts | 25 ++- packages/core/src/core/geminiChat.test.ts | 2 +- .../core/src/utils/tokenCalculation.test.ts | 20 +++ packages/core/src/utils/tokenCalculation.ts | 4 +- 26 files changed, 856 insertions(+), 164 deletions(-) create mode 100644 packages/core/src/context/contextManager.hotstart.test.ts create mode 100644 packages/core/src/context/utils/adaptiveTokenCalculator.test.ts create mode 100644 packages/core/src/context/utils/adaptiveTokenCalculator.ts diff --git a/packages/core/src/context/contextManager.barrier.test.ts b/packages/core/src/context/contextManager.barrier.test.ts index 438f9d3230..e46637d7d8 100644 --- a/packages/core/src/context/contextManager.barrier.test.ts +++ b/packages/core/src/context/contextManager.barrier.test.ts @@ -37,8 +37,8 @@ describe('ContextManager Sync Pressure Barrier Tests', () => { ]); // 3. Add massive history that blows past the 150k maxTokens limit - // 20 turns * 10,000 tokens/turn = ~200,000 tokens - const massiveHistory = createSyntheticHistory(20, 35000); + // 20 turns * ~20,000 tokens/turn (10k user + 10k model) = ~400,000 tokens + const massiveHistory = createSyntheticHistory(20, 10000); chatHistory.set([...chatHistory.get(), ...massiveHistory]); // 4. Add the Latest Turn (Protected) @@ -60,8 +60,8 @@ describe('ContextManager Sync Pressure Barrier Tests', () => { // Verify Episode 0 (System) was pruned, so we now start with a sentinel due to role alternation expect(projection[0].role).toBe('user'); - expect(projection[0].parts![0].text).toContain('User turn 17'); - + const projectionString = JSON.stringify(projection); + expect(projectionString).toContain('User turn 17'); // Filter out synthetic Yield nodes (they are model responses without actual tool/text bodies) const contentNodes = projection.filter( (p) => diff --git a/packages/core/src/context/contextManager.hotstart.test.ts b/packages/core/src/context/contextManager.hotstart.test.ts new file mode 100644 index 0000000000..5d0d848267 --- /dev/null +++ b/packages/core/src/context/contextManager.hotstart.test.ts @@ -0,0 +1,92 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi } from 'vitest'; +import { + createMockContextConfig, + setupContextComponentTest, + createMockLlmClient, +} from './testing/contextTestUtils.js'; +import { stressTestProfile } from './config/profiles.js'; + +describe('ContextManager - Hot Start Calibration', () => { + it('should not perform calibration if the buffer is empty', async () => { + const mockLlm = createMockLlmClient(); + const config = createMockContextConfig(undefined, mockLlm); + const { contextManager } = setupContextComponentTest( + config, + stressTestProfile, + ); + + // We can spy on the underlying mock LLM client countTokens + const countTokensSpy = vi.spyOn(mockLlm, 'countTokens'); + + // Render an empty graph + await contextManager.renderHistory(); + + expect(countTokensSpy).not.toHaveBeenCalled(); + }); + + it('should perform calibration exactly once when rendering with existing nodes', async () => { + const mockLlm = createMockLlmClient(); + const countTokensSpy = vi + .spyOn(mockLlm, 'countTokens') + .mockResolvedValue({ totalTokens: 42 }); + + const config = createMockContextConfig(undefined, mockLlm); + const { contextManager, chatHistory } = setupContextComponentTest( + config, + stressTestProfile, + ); + + // We need to access the env's eventBus inside the contextManager + const env = Reflect.get(contextManager, 'env'); + const emitGroundTruthSpy = vi.spyOn(env.eventBus, 'emitTokenGroundTruth'); + + // Add a node to make the buffer non-empty + chatHistory.set([{ role: 'user', parts: [{ text: 'Hello' }] }]); + + // First render should trigger calibration + await contextManager.renderHistory(); + + expect(countTokensSpy).toHaveBeenCalledTimes(1); + expect(emitGroundTruthSpy).toHaveBeenCalledTimes(1); + expect(emitGroundTruthSpy).toHaveBeenCalledWith( + expect.objectContaining({ + actualTokens: 42, + promptBaseUnits: 10, + }), + ); + + // Second render should skip calibration + await contextManager.renderHistory(); + expect(countTokensSpy).toHaveBeenCalledTimes(1); + // emit hasn't been called again + expect(emitGroundTruthSpy).toHaveBeenCalledTimes(1); + }); + + it('should silently swallow errors if countTokens API fails', async () => { + const mockLlm = createMockLlmClient(); + const countTokensSpy = vi + .spyOn(mockLlm, 'countTokens') + .mockRejectedValue(new Error('API failure')); + + const config = createMockContextConfig(undefined, mockLlm); + const { contextManager, chatHistory } = setupContextComponentTest( + config, + stressTestProfile, + ); + + // Add a node + chatHistory.set([{ role: 'user', parts: [{ text: 'Hello' }] }]); + + // Render should succeed without throwing + const result = await contextManager.renderHistory(); + + expect(result.history).toBeDefined(); + expect(countTokensSpy).toHaveBeenCalledTimes(1); + }); +}); diff --git a/packages/core/src/context/contextManager.ts b/packages/core/src/context/contextManager.ts index e949090cc1..f161e0903b 100644 --- a/packages/core/src/context/contextManager.ts +++ b/packages/core/src/context/contextManager.ts @@ -18,6 +18,7 @@ import { ContextWorkingBufferImpl } from './pipeline/contextWorkingBuffer.js'; import { debugLogger } from '../utils/debugLogger.js'; import { hardenHistory } from '../utils/historyHardening.js'; import { checkContextInvariants } from './utils/invariantChecker.js'; +import type { AdvancedTokenCalculator } from './utils/contextTokenCalculator.js'; export class ContextManager { // The master state containing the pristine graph and current active graph. @@ -36,15 +37,22 @@ export class ContextManager { // Cache for Anomaly 3 (Redundant Renders) private lastRenderCache?: { nodesHash: string; - result: { history: Content[]; didApplyManagement: boolean }; + result: { + history: Content[]; + didApplyManagement: boolean; + baseUnits: number; + }; }; + private hasPerformedHotStart = false; + constructor( private readonly sidecar: ContextProfile, private readonly env: ContextEnvironment, private readonly tracer: ContextTracer, orchestrator: PipelineOrchestrator, chatHistory: AgentChatHistory, + private readonly advancedTokenCalculator: AdvancedTokenCalculator, private readonly headerProvider?: () => Promise, ) { this.eventBus = env.eventBus; @@ -260,6 +268,10 @@ export class ContextManager { return [...this.buffer.nodes]; } + getEnvironment(): ContextEnvironment { + return this.env; + } + /** * Executes the final 'gc_backstop' pipeline if necessary, enforcing the token budget, * and maps the Episodic Context Graph back into a raw Gemini Content[] array for transmission. @@ -268,22 +280,44 @@ export class ContextManager { async renderHistory( pendingRequest?: Content, activeTaskIds: Set = new Set(), - ): Promise<{ history: Content[]; didApplyManagement: boolean }> { + abortSignal?: AbortSignal, + ): Promise<{ + history: Content[]; + didApplyManagement: boolean; + baseUnits: number; + }> { this.tracer.logEvent('ContextManager', 'Starting rendering of LLM context'); + let previewNodes: ConcreteNode[] = []; + if (pendingRequest) { + previewNodes = this.env.graphMapper.applyEvent({ + type: 'PUSH', + payload: [pendingRequest], + }); + } + + // --- Hot Start Calibration --- + // If we are resuming a session with history, we don't want the adaptive token calculator + // to fly blind on its first GC pass. We do a one-time API calibration. + const hotStartPromise = (async () => { + if (!this.hasPerformedHotStart) { + this.hasPerformedHotStart = true; + if (this.buffer.nodes.length > 0) { + const nodesForHotStart = [...this.buffer.nodes, ...previewNodes]; + await this.performHotStartCalibration(nodesForHotStart, abortSignal); + } + } + })(); + // 1. Synchronous Pressure Barrier: Wait for background management pipelines to finish. - // This ensures that the render sees the results of recent pushes (Anomaly 2). - await this.orchestrator.waitForPipelines(); + // We run hot start calibration in parallel to hide the network latency. + await Promise.all([this.orchestrator.waitForPipelines(), hotStartPromise]); let nodes = this.buffer.nodes; const previewNodeIds = new Set(); - // If we have a pending request, we need to build a 'preview' graph for this render. - if (pendingRequest) { - const previewNodes = this.env.graphMapper.applyEvent({ - type: 'PUSH', - payload: [pendingRequest], - }); + // Apply the preview nodes to the final graph + if (previewNodes.length > 0) { for (const n of previewNodes) { previewNodeIds.add(n.id); } @@ -294,9 +328,6 @@ export class ContextManager { const header = this.headerProvider ? await this.headerProvider() : undefined; - const headerTokens = header - ? this.env.tokenCalculator.calculateContentTokens(header) - : 0; // 3. Cache Check (Anomaly 3): If nodes haven't changed, return previous result. // We combine the graph hash with a hash of the header to ensure total freshness. @@ -314,14 +345,19 @@ export class ContextManager { const protectionReasons = this.getProtectedNodeIds(nodes, activeTaskIds); // Apply final GC Backstop pressure barrier synchronously before mapping - const { history: renderedHistory, didApplyManagement } = await render( + const { + history: renderedHistory, + didApplyManagement, + baseUnits, + } = await render( nodes, this.orchestrator, this.sidecar, this.tracer, this.env, + this.advancedTokenCalculator, protectionReasons, - headerTokens, + header, previewNodeIds, ); @@ -339,10 +375,58 @@ export class ContextManager { sentinels: this.sidecar.sentinels, }), didApplyManagement, + baseUnits, }; // Update cache this.lastRenderCache = { nodesHash: totalHash, result }; return result; } + + private async performHotStartCalibration( + nodes: readonly ConcreteNode[], + abortSignal?: AbortSignal, + ) { + try { + this.tracer.logEvent( + 'ContextManager', + 'Performing Hot Start Token Calibration', + ); + + const contents = this.env.graphMapper.fromGraph(nodes); + const header = this.headerProvider + ? await this.headerProvider() + : undefined; + const combinedHistory = header ? [header, ...contents] : contents; + + const baseUnits = + this.advancedTokenCalculator.getRawBaseUnits(nodes) + + (header + ? this.advancedTokenCalculator.getRawBaseUnitsForContent(header) + : 0); + + // We only make the network call if we have actual contents to send, + // avoiding 400 Bad Request errors from the API. + if (combinedHistory.length > 0) { + const result = await this.env.llmClient.countTokens({ + contents: combinedHistory, + abortSignal, + }); + if (result.totalTokens > 0) { + this.env.eventBus.emitTokenGroundTruth({ + actualTokens: result.totalTokens, + promptBaseUnits: baseUnits, + }); + } + } + } catch (error) { + // Hot start calibration is purely an optimization. If the network fails or auth is weird, + // we silently swallow and fallback to the un-calibrated 1.0 ratio heuristic. + this.tracer.logEvent( + 'ContextManager', + 'Hot Start Token Calibration Failed (Ignored)', + { error }, + ); + } + } } diff --git a/packages/core/src/context/eventBus.ts b/packages/core/src/context/eventBus.ts index 82e4d45e67..407dc4d6fc 100644 --- a/packages/core/src/context/eventBus.ts +++ b/packages/core/src/context/eventBus.ts @@ -29,7 +29,20 @@ export interface ChunkReceivedEvent { targetNodeIds: Set; } +export interface TokenGroundTruthEvent { + actualTokens: number; + promptBaseUnits: number; +} + export class ContextEventBus extends EventEmitter { + emitTokenGroundTruth(event: TokenGroundTruthEvent) { + this.emit('TOKEN_GROUND_TRUTH', event); + } + + onTokenGroundTruth(listener: (event: TokenGroundTruthEvent) => void) { + this.on('TOKEN_GROUND_TRUTH', listener); + } + emitPristineHistoryUpdated(event: PristineHistoryUpdatedEvent) { this.emit('PRISTINE_HISTORY_UPDATED', event); } diff --git a/packages/core/src/context/graph/render.test.ts b/packages/core/src/context/graph/render.test.ts index e3890ae437..1f862ff768 100644 --- a/packages/core/src/context/graph/render.test.ts +++ b/packages/core/src/context/graph/render.test.ts @@ -8,6 +8,7 @@ import { describe, it, expect, vi } from 'vitest'; import { render } from './render.js'; import type { ConcreteNode } from './types.js'; import { NodeType } from './types.js'; +import type { AdvancedTokenCalculator } from '../utils/contextTokenCalculator.js'; import type { ContextEnvironment } from '../pipeline/environment.js'; import type { ContextTracer } from '../tracer.js'; import type { ContextProfile } from '../config/profiles.js'; @@ -37,7 +38,20 @@ describe('render', () => { const orchestrator = {} as PipelineOrchestrator; const sidecar = { config: {} } as ContextProfile; // No budget + const mockAdvancedTokenCalculator = { + calculateTokensAndBaseUnits: vi.fn().mockReturnValue({ + tokens: 100, + baseUnits: 100, + }), + getRawBaseUnits: vi.fn().mockReturnValue(100), + getRawBaseUnitsForContent: vi.fn().mockReturnValue(0), + }; + const env = { + tokenCalculator: { + calculateConcreteListTokens: vi.fn().mockReturnValue(100), + calculateTokenBreakdown: vi.fn().mockReturnValue({}), + }, graphMapper: { fromGraph: vi.fn((nodes: readonly ConcreteNode[]) => nodes.map((n) => ({ text: n.id })), @@ -54,12 +68,14 @@ describe('render', () => { sidecar, tracer, env, + mockAdvancedTokenCalculator as unknown as AdvancedTokenCalculator, new Map(), - 0, + undefined, previewNodeIds, ); expect(result.history).toEqual([{ text: '1' }, { text: '2' }]); + expect(result.baseUnits).toBe(100); }); it('simulates the boundary knapsack problem (loose boundary policy)', async () => { @@ -108,12 +124,24 @@ describe('render', () => { const currentTokens = 160000; + const mockAdvancedTokenCalculator = { + calculateTokensAndBaseUnits: vi.fn((nodes: readonly ConcreteNode[]) => { + const tokens = + nodes.length === 1 ? tokenMap[nodes[0].id] : currentTokens; + return { tokens, baseUnits: tokens }; + }), + getRawBaseUnits: vi.fn((nodes: readonly ConcreteNode[]) => { + if (nodes.length === 1) return tokenMap[nodes[0].id]; + return currentTokens; + }), + }; + const env = { llmClient: { countTokens: vi.fn().mockResolvedValue({ totalTokens: 1000 }), }, tokenCalculator: { - calculateConcreteListTokens: vi.fn((nodes) => { + calculateConcreteListTokens: vi.fn((nodes: readonly ConcreteNode[]) => { if (nodes.length === 1) return tokenMap[nodes[0].id]; return currentTokens; }), @@ -136,8 +164,9 @@ describe('render', () => { sidecar, tracer, env, + mockAdvancedTokenCalculator as unknown as AdvancedTokenCalculator, new Map(), - 0, + undefined, new Set(), ); @@ -147,6 +176,7 @@ describe('render', () => { // Adding C pushes rolling total (70k) above retainedTokens (65k). // Under loose policy, C survives. D is strictly older and drops. expect(surviving).toEqual(['C', 'B', 'A']); // D is dropped + expect(result.baseUnits).toBe(160000); }); it('drops nodes that are STRICTLY older than the boundary node', async () => { @@ -188,12 +218,24 @@ describe('render', () => { const currentTokens = 160000; + const mockAdvancedTokenCalculator = { + calculateTokensAndBaseUnits: vi.fn((nodes: readonly ConcreteNode[]) => { + const tokens = + nodes.length === 1 ? tokenMap[nodes[0].id] : currentTokens; + return { tokens, baseUnits: tokens }; + }), + getRawBaseUnits: vi.fn((nodes: readonly ConcreteNode[]) => { + if (nodes.length === 1) return tokenMap[nodes[0].id]; + return currentTokens; + }), + }; + const env = { llmClient: { countTokens: vi.fn().mockResolvedValue({ totalTokens: 1000 }), }, tokenCalculator: { - calculateConcreteListTokens: vi.fn((nodes) => { + calculateConcreteListTokens: vi.fn((nodes: readonly ConcreteNode[]) => { if (nodes.length === 1) return tokenMap[nodes[0].id]; return currentTokens; }), @@ -216,8 +258,9 @@ describe('render', () => { sidecar, tracer, env, + mockAdvancedTokenCalculator as unknown as AdvancedTokenCalculator, new Map(), - 0, + undefined, new Set(), ); @@ -225,5 +268,6 @@ describe('render', () => { const surviving = result.history.map((c: any) => c.text); // C(40k), B(40k). Adding B pushes total to 80k. B is the boundary node and survives. A drops. expect(surviving).toEqual(['B', 'C']); // A is dropped + expect(result.baseUnits).toBe(160000); }); }); diff --git a/packages/core/src/context/graph/render.ts b/packages/core/src/context/graph/render.ts index 5c0fa3df0e..e16beb4f38 100644 --- a/packages/core/src/context/graph/render.ts +++ b/packages/core/src/context/graph/render.ts @@ -11,6 +11,7 @@ import type { ContextProfile } from '../config/profiles.js'; import type { PipelineOrchestrator } from '../pipeline/orchestrator.js'; import type { ContextEnvironment } from '../pipeline/environment.js'; import { performCalibration } from '../utils/tokenCalibration.js'; +import type { AdvancedTokenCalculator } from '../utils/contextTokenCalculator.js'; /** * Maps the Episodic Context Graph back into a raw Gemini Content[] array for transmission. @@ -22,21 +23,43 @@ export async function render( sidecar: ContextProfile, tracer: ContextTracer, env: ContextEnvironment, + advancedTokenCalculator: AdvancedTokenCalculator, protectionReasons: Map = new Map(), - headerTokens: number = 0, + header?: Content, previewNodeIds: ReadonlySet = new Set(), -): Promise<{ history: Content[]; didApplyManagement: boolean }> { +): Promise<{ + history: Content[]; + didApplyManagement: boolean; + baseUnits: number; +}> { + let headerTokens = 0; + let headerBaseUnits = 0; + if (header) { + const costs = + advancedTokenCalculator.calculateContentTokensAndBaseUnits(header); + headerTokens = costs.tokens; + headerBaseUnits = costs.baseUnits; + } + if (!sidecar.config.budget) { const visibleNodes = nodes.filter((n) => !previewNodeIds.has(n.id)); const contents = env.graphMapper.fromGraph(visibleNodes); tracer.logEvent('Render', 'Render Context to LLM (No Budget)', { renderedContext: contents, }); - return { history: contents, didApplyManagement: false }; + + // In all cases, retrieve raw base units from the token calculator interface + const baseUnits = + advancedTokenCalculator.getRawBaseUnits(nodes) + headerBaseUnits; + + return { history: contents, didApplyManagement: false, baseUnits }; } const maxTokens = sidecar.config.budget.maxTokens; - const graphTokens = env.tokenCalculator.calculateConcreteListTokens(nodes); + + const { tokens: graphTokens, baseUnits: graphBaseUnits } = + advancedTokenCalculator.calculateTokensAndBaseUnits(nodes); + const currentTokens = graphTokens + headerTokens; const protectedIds = new Set(protectionReasons.keys()); @@ -70,7 +93,11 @@ export async function render( renderedContext: contents, }); performCalibration(env, visibleNodes, contents); - return { history: contents, didApplyManagement: false }; + return { + history: contents, + didApplyManagement: false, + baseUnits: graphBaseUnits + headerBaseUnits, + }; } const targetDelta = currentTokens - sidecar.config.budget.retainedTokens; tracer.logEvent( @@ -119,5 +146,10 @@ export async function render( renderedContextSanitized: contents, }); performCalibration(env, visibleNodes, contents); - return { history: contents, didApplyManagement: true }; + return { + history: contents, + didApplyManagement: true, + baseUnits: + advancedTokenCalculator.getRawBaseUnits(visibleNodes) + headerBaseUnits, + }; } diff --git a/packages/core/src/context/initializer.ts b/packages/core/src/context/initializer.ts index 3b37d2bac7..3916210bea 100644 --- a/packages/core/src/context/initializer.ts +++ b/packages/core/src/context/initializer.ts @@ -23,6 +23,9 @@ import { StateSnapshotProcessorOptionsSchema } from './processors/stateSnapshotP import { StateSnapshotAsyncProcessorOptionsSchema } from './processors/stateSnapshotAsyncProcessor.js'; import { RollingSummaryProcessorOptionsSchema } from './processors/rollingSummaryProcessor.js'; import { getEnvironmentContext } from '../utils/environmentContext.js'; +import { AdaptiveTokenCalculator } from './utils/adaptiveTokenCalculator.js'; +import { NodeBehaviorRegistry } from './graph/behaviorRegistry.js'; +import { registerBuiltInBehaviors } from './graph/builtinBehaviors.js'; export async function initializeContextManager( config: Config, @@ -85,6 +88,16 @@ export async function initializeContextManager( const eventBus = new ContextEventBus(); + const charsPerToken = 3; + const behaviorRegistry = new NodeBehaviorRegistry(); + registerBuiltInBehaviors(behaviorRegistry); + + const calculator = new AdaptiveTokenCalculator( + charsPerToken, + behaviorRegistry, + eventBus, + ); + const env = new ContextEnvironmentImpl( () => config.getBaseLlmClient(), config.getSessionId(), @@ -92,8 +105,10 @@ export async function initializeContextManager( logDir, projectTempDir, tracer, - 4, + charsPerToken, eventBus, + calculator, + behaviorRegistry, { calibrateTokenCalculation: !!process.env['GEMINI_CONTEXT_CALIBRATE_TOKEN_CALCULATIONS'], @@ -114,6 +129,7 @@ export async function initializeContextManager( tracer, orchestrator, chat.agentHistory, + calculator, async () => { const parts = await getEnvironmentContext(config); return { role: 'user', parts }; diff --git a/packages/core/src/context/pipeline/environmentImpl.test.ts b/packages/core/src/context/pipeline/environmentImpl.test.ts index c1dc0c2f16..d6f2835b89 100644 --- a/packages/core/src/context/pipeline/environmentImpl.test.ts +++ b/packages/core/src/context/pipeline/environmentImpl.test.ts @@ -8,12 +8,16 @@ import { ContextEnvironmentImpl } from './environmentImpl.js'; import { ContextTracer } from '../tracer.js'; import { ContextEventBus } from '../eventBus.js'; import { createMockLlmClient } from '../testing/contextTestUtils.js'; +import { StaticTokenCalculator } from '../utils/contextTokenCalculator.js'; +import { NodeBehaviorRegistry } from '../graph/behaviorRegistry.js'; describe('ContextEnvironmentImpl', () => { it('should initialize with defaults correctly', () => { const tracer = new ContextTracer({ targetDir: '/tmp', sessionId: 'mock' }); const eventBus = new ContextEventBus(); const mockLlmClient = createMockLlmClient(); + const behaviorRegistry = new NodeBehaviorRegistry(); + const calculator = new StaticTokenCalculator(4, behaviorRegistry); const env = new ContextEnvironmentImpl( () => mockLlmClient, @@ -24,6 +28,8 @@ describe('ContextEnvironmentImpl', () => { tracer, 4, eventBus, + calculator, + behaviorRegistry, ); expect(env.llmClient).toBe(mockLlmClient); diff --git a/packages/core/src/context/pipeline/environmentImpl.ts b/packages/core/src/context/pipeline/environmentImpl.ts index 736792d561..78f1b3dda5 100644 --- a/packages/core/src/context/pipeline/environmentImpl.ts +++ b/packages/core/src/context/pipeline/environmentImpl.ts @@ -8,16 +8,13 @@ import type { BaseLlmClient } from '../../core/baseLlmClient.js'; import type { ContextTracer } from '../tracer.js'; import type { ContextEnvironment, RenderOptions } from './environment.js'; import type { ContextEventBus } from '../eventBus.js'; -import { ContextTokenCalculator } from '../utils/contextTokenCalculator.js'; +import type { ContextTokenCalculator } from '../utils/contextTokenCalculator.js'; import { LiveInbox } from './inbox.js'; -import { NodeBehaviorRegistry } from '../graph/behaviorRegistry.js'; -import { registerBuiltInBehaviors } from '../graph/builtinBehaviors.js'; +import type { NodeBehaviorRegistry } from '../graph/behaviorRegistry.js'; import { ContextGraphMapper } from '../graph/mapper.js'; export class ContextEnvironmentImpl implements ContextEnvironment { - readonly tokenCalculator: ContextTokenCalculator; readonly inbox: LiveInbox; - readonly behaviorRegistry: NodeBehaviorRegistry; readonly graphMapper: ContextGraphMapper; constructor( @@ -29,14 +26,10 @@ export class ContextEnvironmentImpl implements ContextEnvironment { readonly tracer: ContextTracer, readonly charsPerToken: number, readonly eventBus: ContextEventBus, + readonly tokenCalculator: ContextTokenCalculator, + readonly behaviorRegistry: NodeBehaviorRegistry, readonly renderOptions?: RenderOptions, ) { - this.behaviorRegistry = new NodeBehaviorRegistry(); - registerBuiltInBehaviors(this.behaviorRegistry); - this.tokenCalculator = new ContextTokenCalculator( - this.charsPerToken, - this.behaviorRegistry, - ); this.inbox = new LiveInbox(); this.graphMapper = new ContextGraphMapper(); } diff --git a/packages/core/src/context/processors/blobDegradationProcessor.test.ts b/packages/core/src/context/processors/blobDegradationProcessor.test.ts index 252c8e4007..5ee97a2d6b 100644 --- a/packages/core/src/context/processors/blobDegradationProcessor.test.ts +++ b/packages/core/src/context/processors/blobDegradationProcessor.test.ts @@ -66,12 +66,12 @@ describe('BlobDegradationProcessor', () => { const node1 = createDummyNode('ep1', NodeType.USER_PROMPT, 100, { payload: { - fileData: { mimeType: 'video/mp4', fileUri: 'gs://test1' }, + fileData: { mimeType: 'image/png', fileUri: 'gs://test1' }, }, }); const node2 = createDummyNode('ep1', NodeType.USER_PROMPT, 100, { payload: { - fileData: { mimeType: 'video/mp4', fileUri: 'gs://test2' }, + fileData: { mimeType: 'image/png', fileUri: 'gs://test2' }, }, }); diff --git a/packages/core/src/context/processors/stateSnapshotAsyncProcessor.test.ts b/packages/core/src/context/processors/stateSnapshotAsyncProcessor.test.ts index 95fee61c6d..c1c29f8137 100644 --- a/packages/core/src/context/processors/stateSnapshotAsyncProcessor.test.ts +++ b/packages/core/src/context/processors/stateSnapshotAsyncProcessor.test.ts @@ -52,7 +52,7 @@ describe('StateSnapshotAsyncProcessor', () => { 'PROPOSED_SNAPSHOT', expect.objectContaining({ newText: - '{"active_tasks":[],"discovered_facts":[],"constraints_and_preferences":[],"recent_arc":[]}', + '{"active_tasks":[],"discovered_facts":[],"constraints_and_preferences":[],"recent_arc":["Mock LLM summary response"]}', consumedIds: ['node-A', 'node-B'], type: 'point-in-time', }), @@ -107,7 +107,7 @@ describe('StateSnapshotAsyncProcessor', () => { 'PROPOSED_SNAPSHOT', expect.objectContaining({ newText: - '{"active_tasks":[],"discovered_facts":[],"constraints_and_preferences":[],"recent_arc":[]}', + '{"active_tasks":[],"discovered_facts":[],"constraints_and_preferences":[],"recent_arc":["Mock LLM summary response"]}', consumedIds: ['node-A', 'node-B', 'node-C'], // Aggregated! type: 'accumulate', }), diff --git a/packages/core/src/context/system-tests/__snapshots__/lifecycle.golden.test.ts.snap b/packages/core/src/context/system-tests/__snapshots__/lifecycle.golden.test.ts.snap index 1f4ba5a7eb..3c36020dbc 100644 --- a/packages/core/src/context/system-tests/__snapshots__/lifecycle.golden.test.ts.snap +++ b/packages/core/src/context/system-tests/__snapshots__/lifecycle.golden.test.ts.snap @@ -2,6 +2,7 @@ exports[`System Lifecycle Golden Tests > Scenario 1: Organic Growth with Huge Tool Output & Images 1`] = ` { + "baseUnits": 787, "finalProjection": [ { "parts": [ @@ -126,28 +127,28 @@ exports[`System Lifecycle Golden Tests > Scenario 1: Organic Growth with Huge To ], "tokenTrajectory": [ { - "tokensAfterBackground": 17, - "tokensBeforeBackground": 17, + "tokensAfterBackground": 33, + "tokensBeforeBackground": 33, "turnIndex": 0, }, { - "tokensAfterBackground": 34, - "tokensBeforeBackground": 34, + "tokensAfterBackground": 68, + "tokensBeforeBackground": 68, "turnIndex": 1, }, { - "tokensAfterBackground": 437, - "tokensBeforeBackground": 20172, + "tokensAfterBackground": 497, + "tokensBeforeBackground": 20232, "turnIndex": 2, }, { - "tokensAfterBackground": 526, - "tokensBeforeBackground": 3462, + "tokensAfterBackground": 750, + "tokensBeforeBackground": 3554, "turnIndex": 3, }, { - "tokensAfterBackground": 544, - "tokensBeforeBackground": 544, + "tokensAfterBackground": 787, + "tokensBeforeBackground": 787, "turnIndex": 4, }, ], @@ -156,6 +157,7 @@ exports[`System Lifecycle Golden Tests > Scenario 1: Organic Growth with Huge To exports[`System Lifecycle Golden Tests > Scenario 2: Under Budget (No Modifications) 1`] = ` { + "baseUnits": 68, "finalProjection": [ { "parts": [ @@ -200,13 +202,13 @@ exports[`System Lifecycle Golden Tests > Scenario 2: Under Budget (No Modificati ], "tokenTrajectory": [ { - "tokensAfterBackground": 17, - "tokensBeforeBackground": 17, + "tokensAfterBackground": 33, + "tokensBeforeBackground": 33, "turnIndex": 0, }, { - "tokensAfterBackground": 34, - "tokensBeforeBackground": 34, + "tokensAfterBackground": 68, + "tokensBeforeBackground": 68, "turnIndex": 1, }, ], @@ -215,6 +217,7 @@ exports[`System Lifecycle Golden Tests > Scenario 2: Under Budget (No Modificati exports[`System Lifecycle Golden Tests > Scenario 3: Node Distillation of Large Historical Messages 1`] = ` { + "baseUnits": 5370, "finalProjection": [ { "parts": [ @@ -243,7 +246,7 @@ exports[`System Lifecycle Golden Tests > Scenario 3: Node Distillation of Large { "parts": [ { - "text": "DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD", + "text": "Mock response from: utility_compressor, for: {"text":"D...DDDDDDDD"}", }, ], "role": "model", @@ -251,7 +254,7 @@ exports[`System Lifecycle Golden Tests > Scenario 3: Node Distillation of Large { "parts": [ { - "text": "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE", + "text": "Mock response from: utility_compressor, for: {"text":"E...EEEEEEEE"}", }, ], "role": "user", @@ -275,18 +278,18 @@ exports[`System Lifecycle Golden Tests > Scenario 3: Node Distillation of Large ], "tokenTrajectory": [ { - "tokensAfterBackground": 3308, - "tokensBeforeBackground": 3308, + "tokensAfterBackground": 5078, + "tokensBeforeBackground": 10010, "turnIndex": 0, }, { - "tokensAfterBackground": 4989, - "tokensBeforeBackground": 6616, + "tokensAfterBackground": 5224, + "tokensBeforeBackground": 15088, "turnIndex": 1, }, { - "tokensAfterBackground": 5043, - "tokensBeforeBackground": 8297, + "tokensAfterBackground": 5370, + "tokensBeforeBackground": 15234, "turnIndex": 2, }, ], @@ -295,44 +298,13 @@ exports[`System Lifecycle Golden Tests > Scenario 3: Node Distillation of Large exports[`System Lifecycle Golden Tests > Scenario 4: Async-Driven Background GC via State Snapshots 1`] = ` { + "baseUnits": 505, "finalProjection": [ { "parts": [ { "text": "{"active_tasks":[],"discovered_facts":[],"constraints_and_preferences":[],"recent_arc":[]}", }, - { - "text": "Msg 4 Msg 4 Msg 4 Msg 4 Msg 4 Msg 4 Msg 4 Msg 4 Msg 4 Msg 4 Msg 4 Msg 4 Msg 4 Msg 4 Msg 4 Msg 4 Msg 4 Msg 4 Msg 4 Msg 4 Msg 4 Msg 4 Msg 4 Msg 4 Msg 4 ..................................................", - }, - ], - "role": "user", - }, - { - "parts": [ - { - "text": "Msg 5 Msg 5 Msg 5 Msg 5 Msg 5 Msg 5 Msg 5 Msg 5 Msg 5 Msg 5 Msg 5 Msg 5 Msg 5 Msg 5 Msg 5 Msg 5 Msg 5 Msg 5 Msg 5 Msg 5 Msg 5 Msg 5 Msg 5 Msg 5 Msg 5 ..................................................", - }, - ], - "role": "model", - }, - { - "parts": [ - { - "text": "Msg 6 Msg 6 Msg 6 Msg 6 Msg 6 Msg 6 Msg 6 Msg 6 Msg 6 Msg 6 Msg 6 Msg 6 Msg 6 Msg 6 Msg 6 Msg 6 Msg 6 Msg 6 Msg 6 Msg 6 Msg 6 Msg 6 Msg 6 Msg 6 Msg 6 ..................................................", - }, - ], - "role": "user", - }, - { - "parts": [ - { - "text": "Msg 7 Msg 7 Msg 7 Msg 7 Msg 7 Msg 7 Msg 7 Msg 7 Msg 7 Msg 7 Msg 7 Msg 7 Msg 7 Msg 7 Msg 7 Msg 7 Msg 7 Msg 7 Msg 7 Msg 7 Msg 7 Msg 7 Msg 7 Msg 7 Msg 7 ..................................................", - }, - ], - "role": "model", - }, - { - "parts": [ { "text": "Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 ..................................................", }, @@ -358,28 +330,28 @@ exports[`System Lifecycle Golden Tests > Scenario 4: Async-Driven Background GC ], "tokenTrajectory": [ { - "tokensAfterBackground": 140, - "tokensBeforeBackground": 140, + "tokensAfterBackground": 410, + "tokensBeforeBackground": 410, "turnIndex": 0, }, { - "tokensAfterBackground": 280, - "tokensBeforeBackground": 280, + "tokensAfterBackground": 820, + "tokensBeforeBackground": 820, "turnIndex": 1, }, { - "tokensAfterBackground": 420, - "tokensBeforeBackground": 420, + "tokensAfterBackground": 1230, + "tokensBeforeBackground": 1230, "turnIndex": 2, }, { - "tokensAfterBackground": 560, - "tokensBeforeBackground": 560, + "tokensAfterBackground": 1640, + "tokensBeforeBackground": 1640, "turnIndex": 3, }, { - "tokensAfterBackground": 700, - "tokensBeforeBackground": 700, + "tokensAfterBackground": 2050, + "tokensBeforeBackground": 2050, "turnIndex": 4, }, ], diff --git a/packages/core/src/context/system-tests/hysteresis.test.ts b/packages/core/src/context/system-tests/hysteresis.test.ts index ca335a43fa..cf804a27f6 100644 --- a/packages/core/src/context/system-tests/hysteresis.test.ts +++ b/packages/core/src/context/system-tests/hysteresis.test.ts @@ -9,6 +9,7 @@ import { SimulationHarness } from './simulationHarness.js'; import { createMockLlmClient } from '../testing/contextTestUtils.js'; import type { ContextProfile } from '../config/profiles.js'; import { generalistProfile } from '../config/profiles.js'; +import type { Content } from '@google/genai'; describe('Context Manager Hysteresis Tests', () => { const mockLlmClient = createMockLlmClient(['']); @@ -25,6 +26,12 @@ describe('Context Manager Hysteresis Tests', () => { }, }); + const getProjectionTokens = (proj: Content[], harness: SimulationHarness) => + proj.reduce( + (sum, c) => sum + harness.env.tokenCalculator.calculateContentTokens(c), + 0, + ); + it('should block consolidation when deficit is below coalescing threshold', async () => { const threshold = 1500; const harness = await SimulationHarness.create( @@ -35,14 +42,14 @@ describe('Context Manager Hysteresis Tests', () => { // Turn 0: INIT await harness.simulateTurn([{ role: 'user', parts: [{ text: 'INIT' }] }]); - // Turn 1: Add 1500 chars (~500 tokens). Total ~500. Under retained (1000). + // Turn 1: Add ~500 tokens await harness.simulateTurn([ - { role: 'user', parts: [{ text: 'A'.repeat(1500) }] }, + { role: 'user', parts: [{ text: 'A'.repeat(500) }] }, ]); - // Turn 2: Add 3000 chars (~1000 tokens). Total ~1500. Deficit ~500 < 1500. + // Turn 2: Add ~1000 tokens. Total ~1500. Deficit ~500 < 1500. await harness.simulateTurn([ - { role: 'user', parts: [{ text: 'B'.repeat(3000) }] }, + { role: 'user', parts: [{ text: 'B'.repeat(1000) }] }, ]); await new Promise((resolve) => setTimeout(resolve, 100)); @@ -54,19 +61,19 @@ describe('Context Manager Hysteresis Tests', () => { ), ).toBe(false); - // Turn 3: Add 9000 chars (~3000 tokens). Total ~4500. + // Turn 3: Add ~3000 tokens. Total ~4500. // Deficit ~3500 > 1500. TRIGGER! await harness.simulateTurn([ - { role: 'user', parts: [{ text: 'C'.repeat(9000) }] }, + { role: 'user', parts: [{ text: 'C'.repeat(3000) }] }, ]); // Give it a moment for the async task to finish await new Promise((resolve) => setTimeout(resolve, 500)); // Exceed maxTokens to force a render that shows the snapshot - // Add 3000 more tokens (9000 chars). Total ~7500 > 5000. + // Add ~3000 tokens. Total ~7500 > 5000. await harness.simulateTurn([ - { role: 'user', parts: [{ text: 'D'.repeat(9000) }] }, + { role: 'user', parts: [{ text: 'D'.repeat(3000) }] }, ]); state = await harness.getGoldenState(); @@ -85,57 +92,51 @@ describe('Context Manager Hysteresis Tests', () => { ); // 1. Trigger first consolidation - // Add ~9000 chars (~3000 tokens). Total ~3000. Deficit ~2000 > 1000. + // Add ~3000 tokens. Total ~3000. Deficit ~2000 > 1000. await harness.simulateTurn([ - { role: 'user', parts: [{ text: 'A'.repeat(9000) }] }, + { role: 'user', parts: [{ text: 'A'.repeat(3000) }] }, ]); await harness.simulateTurn([{ role: 'user', parts: [{ text: 'B' }] }]); // Make eligible await new Promise((resolve) => setTimeout(resolve, 500)); // Exceed maxTokens (5000) to see it await harness.simulateTurn([ - { role: 'user', parts: [{ text: 'X'.repeat(9000) }] }, + { role: 'user', parts: [{ text: 'X'.repeat(3000) }] }, ]); - const state = await harness.getGoldenState(); + // Get baseline tokens + let state = await harness.getGoldenState(); expect( state.finalProjection.some((c) => c.parts?.some((p) => p.text?.includes('')), ), ).toBe(true); - // Get baseline tokens - const baselineTokens = - harness.env.tokenCalculator.calculateConcreteListTokens( - harness.contextManager.getNodes(), - ); + const baselineTokens = getProjectionTokens(state.finalProjection, harness); // 2. Add nodes again, staying below threshold growth - // Add 1500 chars (~500 tokens). Growth ~500 < 1000. + // Add ~500 tokens. Growth ~500 < 1000. await harness.simulateTurn([ - { role: 'user', parts: [{ text: 'C'.repeat(1500) }] }, + { role: 'user', parts: [{ text: 'C'.repeat(500) }] }, ]); await harness.simulateTurn([{ role: 'user', parts: [{ text: 'D' }] }]); // Make eligible await new Promise((resolve) => setTimeout(resolve, 200)); - const currentTokens = - harness.env.tokenCalculator.calculateConcreteListTokens( - harness.contextManager.getNodes(), - ); + state = await harness.getGoldenState(); + const currentTokens = getProjectionTokens(state.finalProjection, harness); // Should not have shrunk further (except for D's small addition) expect(currentTokens).toBeGreaterThanOrEqual(baselineTokens); // 3. Exceed threshold growth - // Add 6000 chars (~2000 tokens). Growth = ~500 + ~2000 = ~2500 > 1000. + // Add ~2000 tokens. Growth = ~500 + ~2000 = ~2500 > 1000. await harness.simulateTurn([ - { role: 'user', parts: [{ text: 'E'.repeat(6000) }] }, + { role: 'user', parts: [{ text: 'E'.repeat(2000) }] }, ]); await harness.simulateTurn([{ role: 'user', parts: [{ text: 'F' }] }]); // Make eligible await new Promise((resolve) => setTimeout(resolve, 500)); - const finalTokens = harness.env.tokenCalculator.calculateConcreteListTokens( - harness.contextManager.getNodes(), - ); + state = await harness.getGoldenState(); + const finalTokens = getProjectionTokens(state.finalProjection, harness); // Now it should have consolidated again (E should be replaced by a snapshot eventually) expect(finalTokens).toBeLessThan(currentTokens + 2000); }); diff --git a/packages/core/src/context/system-tests/simulationHarness.ts b/packages/core/src/context/system-tests/simulationHarness.ts index 303b715273..c15c9fc26b 100644 --- a/packages/core/src/context/system-tests/simulationHarness.ts +++ b/packages/core/src/context/system-tests/simulationHarness.ts @@ -13,6 +13,9 @@ import { ContextTracer } from '../tracer.js'; import { ContextEventBus } from '../eventBus.js'; import { PipelineOrchestrator } from '../pipeline/orchestrator.js'; import type { BaseLlmClient } from '../../core/baseLlmClient.js'; +import { StaticTokenCalculator } from '../utils/contextTokenCalculator.js'; +import { NodeBehaviorRegistry } from '../graph/behaviorRegistry.js'; +import { registerBuiltInBehaviors } from '../graph/builtinBehaviors.js'; export interface TurnSummary { turnIndex: number; @@ -57,6 +60,11 @@ export class SimulationHarness { targetDir: mockTempDir, sessionId: 'sim-session', }); + + const behaviorRegistry = new NodeBehaviorRegistry(); + registerBuiltInBehaviors(behaviorRegistry); + const calculator = new StaticTokenCalculator(1, behaviorRegistry); + this.env = new ContextEnvironmentImpl( () => mockLlmClient, 'sim-prompt', @@ -66,6 +74,8 @@ export class SimulationHarness { this.tracer, 1, // 1 char per token average for estimation (but estimator uses 0.33) this.eventBus, + calculator, + behaviorRegistry, ); this.orchestrator = new PipelineOrchestrator( @@ -81,6 +91,7 @@ export class SimulationHarness { this.tracer, this.orchestrator, this.chatHistory, + calculator, ); } @@ -111,11 +122,12 @@ export class SimulationHarness { } async getGoldenState() { - const { history: finalProjection } = + const { history: finalProjection, baseUnits } = await this.contextManager.renderHistory(); return { tokenTrajectory: this.tokenTrajectory, finalProjection, + baseUnits, }; } } diff --git a/packages/core/src/context/testing/contextTestUtils.ts b/packages/core/src/context/testing/contextTestUtils.ts index 37355ce291..0ac9bae341 100644 --- a/packages/core/src/context/testing/contextTestUtils.ts +++ b/packages/core/src/context/testing/contextTestUtils.ts @@ -30,6 +30,9 @@ import type { ContextProfile } from '../config/profiles.js'; import type { Mock } from 'vitest'; import { ContextWorkingBufferImpl } from '../pipeline/contextWorkingBuffer.js'; import { testTruncateProfile } from './testProfile.js'; +import { StaticTokenCalculator } from '../utils/contextTokenCalculator.js'; +import { NodeBehaviorRegistry } from '../graph/behaviorRegistry.js'; +import { registerBuiltInBehaviors } from '../graph/builtinBehaviors.js'; /** * Creates a valid mock GenerateContentResponse with the provided text. @@ -134,17 +137,30 @@ export function createMockLlmClient( ); }); - const generateJsonMock = vi.fn().mockImplementation(async () => ({ - active_tasks: [], - discovered_facts: [], - constraints_and_preferences: [], - recent_arc: [], - })); + const generateJsonMock = vi.fn().mockImplementation(async () => { + let mockStr = ''; + if (responses && responses.length > 0) { + const callCount = generateJsonMock.mock.calls.length - 1; + const idx = + callCount < responses.length ? callCount : responses.length - 1; + const res = responses[idx]; + if (typeof res === 'string') { + mockStr = res; + } + } + return { + active_tasks: [], + discovered_facts: [], + constraints_and_preferences: [], + chronological_summary: mockStr, + }; + }); // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion return { generateContent: generateContentMock, generateJson: generateJsonMock, + countTokens: vi.fn().mockResolvedValue({ totalTokens: 100 }), } as unknown as MockLlmClient; } @@ -158,6 +174,9 @@ export function createMockEnvironment( sessionId: 'mock-session', }); const eventBus = new ContextEventBus(); + const behaviorRegistry = new NodeBehaviorRegistry(); + registerBuiltInBehaviors(behaviorRegistry); + const calculator = new StaticTokenCalculator(1, behaviorRegistry); let env = new ContextEnvironmentImpl( () => llmClient as BaseLlmClient, @@ -168,6 +187,8 @@ export function createMockEnvironment( tracer, 1, eventBus, + calculator, + behaviorRegistry, ); if (overrides) { @@ -181,6 +202,8 @@ export function createMockEnvironment( env.tracer, env.charsPerToken, env.eventBus, + calculator, + behaviorRegistry, ); } const { llmClient: _llmClient, ...restOverrides } = overrides; @@ -273,6 +296,10 @@ export function setupContextComponentTest( sessionId: 'test-session', }); const eventBus = new ContextEventBus(); + const behaviorRegistry = new NodeBehaviorRegistry(); + registerBuiltInBehaviors(behaviorRegistry); + const calculator = new StaticTokenCalculator(1, behaviorRegistry); + const env = new ContextEnvironmentImpl( () => config.getBaseLlmClient(), 'test prompt-id', @@ -282,6 +309,8 @@ export function setupContextComponentTest( tracer, 1, eventBus, + calculator, + behaviorRegistry, ); const orchestrator = new PipelineOrchestrator( @@ -298,8 +327,8 @@ export function setupContextComponentTest( tracer, orchestrator, chatHistory, + calculator, ); - // The async async pipeline is now internally managed by ContextManager return { chatHistory, contextManager }; } diff --git a/packages/core/src/context/utils/adaptiveTokenCalculator.test.ts b/packages/core/src/context/utils/adaptiveTokenCalculator.test.ts new file mode 100644 index 0000000000..6e89d1baca --- /dev/null +++ b/packages/core/src/context/utils/adaptiveTokenCalculator.test.ts @@ -0,0 +1,125 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect } from 'vitest'; +import { AdaptiveTokenCalculator } from './adaptiveTokenCalculator.js'; +import { NodeBehaviorRegistry } from '../graph/behaviorRegistry.js'; +import { registerBuiltInBehaviors } from '../graph/builtinBehaviors.js'; +import { ContextEventBus } from '../eventBus.js'; +import { createDummyNode } from '../testing/contextTestUtils.js'; +import { NodeType } from '../graph/types.js'; + +describe('AdaptiveTokenCalculator', () => { + const registry = new NodeBehaviorRegistry(); + registerBuiltInBehaviors(registry); + const charsPerToken = 1; // Simplifies math + + it('should initialize with a learned weight of 1.0', () => { + const eventBus = new ContextEventBus(); + const calculator = new AdaptiveTokenCalculator( + charsPerToken, + registry, + eventBus, + ); + expect(calculator.getLearnedWeight()).toBe(1.0); + }); + + it('should dynamically update learned weight based on token ground truth events', () => { + const eventBus = new ContextEventBus(); + const calculator = new AdaptiveTokenCalculator( + charsPerToken, + registry, + eventBus, + ); + + // Initial state: weight = 1.0 + + // Simulate an event where the API reported fewer tokens than our base units + // targetWeight = 50 / 100 = 0.5 + // newWeight = 1.0 * 0.8 + 0.5 * 0.2 = 0.8 + 0.1 = 0.9 + eventBus.emitTokenGroundTruth({ + actualTokens: 50, + promptBaseUnits: 100, + }); + + // JavaScript floating point precision means we should use toBeCloseTo + expect(calculator.getLearnedWeight()).toBeCloseTo(0.9, 5); + + // Simulate another event + // newWeight = 0.9 * 0.8 + (150 / 100) * 0.2 = 0.72 + 0.3 = 1.02 + eventBus.emitTokenGroundTruth({ + actualTokens: 150, + promptBaseUnits: 100, + }); + + expect(calculator.getLearnedWeight()).toBeCloseTo(1.02, 5); + }); + + it('should clamp the learned weight between 0.5 and 2.0', () => { + const eventBus = new ContextEventBus(); + const calculator = new AdaptiveTokenCalculator( + charsPerToken, + registry, + eventBus, + ); + + // Push weight up extremely high (API returns 10x tokens) + for (let i = 0; i < 20; i++) { + eventBus.emitTokenGroundTruth({ + actualTokens: 1000, + promptBaseUnits: 100, + }); + } + expect(calculator.getLearnedWeight()).toBe(2.0); + + // Push weight down extremely low (API returns 0 tokens) + for (let i = 0; i < 20; i++) { + eventBus.emitTokenGroundTruth({ actualTokens: 0, promptBaseUnits: 100 }); + } + expect(calculator.getLearnedWeight()).toBe(0.5); + }); + + it('should correctly apply the learned weight to node calculations while keeping raw base units stable', () => { + const eventBus = new ContextEventBus(); + const calculator = new AdaptiveTokenCalculator( + charsPerToken, + registry, + eventBus, + ); + + // Decrease the weight to exactly 0.5 + for (let i = 0; i < 20; i++) { + eventBus.emitTokenGroundTruth({ actualTokens: 0, promptBaseUnits: 100 }); + } + + const turn1Id = 'turn-1'; + const node1 = createDummyNode(turn1Id, NodeType.USER_PROMPT); + + // Get raw base units directly + const rawTokens = calculator.calculateTokensAndBaseUnits([node1]).baseUnits; + + // Get adjusted tokens + const adjustedTokens = calculator.calculateConcreteListTokens([node1]); + + expect(adjustedTokens).toBe(Math.round(rawTokens * 0.5)); + }); + + it('should ignore ground truth events with 0 promptBaseUnits to prevent division by zero', () => { + const eventBus = new ContextEventBus(); + const calculator = new AdaptiveTokenCalculator( + charsPerToken, + registry, + eventBus, + ); + + eventBus.emitTokenGroundTruth({ + actualTokens: 100, + promptBaseUnits: 0, + }); + + expect(calculator.getLearnedWeight()).toBe(1.0); + }); +}); diff --git a/packages/core/src/context/utils/adaptiveTokenCalculator.ts b/packages/core/src/context/utils/adaptiveTokenCalculator.ts new file mode 100644 index 0000000000..2ac3825ef5 --- /dev/null +++ b/packages/core/src/context/utils/adaptiveTokenCalculator.ts @@ -0,0 +1,163 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { Content, Part } from '@google/genai'; +import type { ConcreteNode } from '../graph/types.js'; +import { + StaticTokenCalculator, + type AdvancedTokenCalculator, +} from './contextTokenCalculator.js'; +import type { NodeBehaviorRegistry } from '../graph/behaviorRegistry.js'; +import type { ContextEventBus, TokenGroundTruthEvent } from '../eventBus.js'; +import { debugLogger } from '../../utils/debugLogger.js'; + +/** + * An Adaptive Token Calculator that dynamically learns the true token cost of the user's + * conversation by applying an Exponential Moving Average (EMA) gradient descent to + * real usage metadata returned from the Gemini API. + * + * It wraps the deterministic `StaticTokenCalculator` base heuristic to ensure + * immutable node cost caching while still surfacing a self-corrected estimate + * to the pipeline processors. + */ +export class AdaptiveTokenCalculator implements AdvancedTokenCalculator { + private learnedWeight = 1.0; + private readonly baseCalculator: StaticTokenCalculator; + + constructor( + charsPerToken: number, + registry: NodeBehaviorRegistry, + eventBus: ContextEventBus, + ) { + this.baseCalculator = new StaticTokenCalculator(charsPerToken, registry); + eventBus.onTokenGroundTruth((event: TokenGroundTruthEvent) => { + this.handleGroundTruth(event.actualTokens, event.promptBaseUnits); + }); + } + + private handleGroundTruth(actualTokens: number, promptBaseUnits: number) { + if (promptBaseUnits <= 0) return; + + // Determine what ratio we should have used + const targetWeight = actualTokens / promptBaseUnits; + const oldWeight = this.learnedWeight; + + // Apply Momentum (Learning Rate) + const learningRate = 0.2; + const newWeight = + oldWeight * (1 - learningRate) + targetWeight * learningRate; + + // Clamp to reasonable safety bounds to prevent rogue metadata poisoning the system + this.learnedWeight = Math.max(0.5, Math.min(newWeight, 2.0)); + + debugLogger.log( + `[AdaptiveTokenCalculator] Learned weight updated to ${this.learnedWeight.toFixed(3)} ` + + `(API Tokens: ${actualTokens}, Base Units: ${promptBaseUnits}, Target Ratio: ${targetWeight.toFixed(3)})`, + ); + } + + /** + * Retrieves the current learned weight multiplier. + */ + getLearnedWeight(): number { + return this.learnedWeight; + } + + /** + * Returns the exact, unweighted Base Heuristic Units for the graph. + * This is used exactly once per interaction to capture the baseline sent to the API. + */ + getRawBaseUnits(nodes: readonly ConcreteNode[]): number { + return this.baseCalculator.calculateConcreteListTokens(nodes); + } + + /** + * Returns the exact, unweighted Base Heuristic Units for a raw content chunk. + */ + getRawBaseUnitsForContent(content: Content): number { + return this.baseCalculator.calculateContentTokens(content); + } + + calculateTokensAndBaseUnits(nodes: readonly ConcreteNode[]): { + tokens: number; + baseUnits: number; + } { + const baseUnits = this.baseCalculator.calculateConcreteListTokens(nodes); + return { + tokens: Math.round(baseUnits * this.learnedWeight), + baseUnits, + }; + } + + calculateContentTokensAndBaseUnits(content: Content): { + tokens: number; + baseUnits: number; + } { + const baseUnits = this.baseCalculator.calculateContentTokens(content); + return { + tokens: Math.round(baseUnits * this.learnedWeight), + baseUnits, + }; + } + + // --- Delegation and Weighting --- + + garbageCollectCache(liveNodeIds: ReadonlySet): void { + this.baseCalculator.garbageCollectCache(liveNodeIds); + } + + cacheNodeTokens(node: ConcreteNode): number { + return this.baseCalculator.cacheNodeTokens(node); + } + + calculateTokenBreakdown(nodes: readonly ConcreteNode[]): { + text: number; + media: number; + tool: number; + overhead: number; + total: number; + } { + const raw = this.baseCalculator.calculateTokenBreakdown(nodes); + return { + text: Math.round(raw.text * this.learnedWeight), + media: Math.round(raw.media * this.learnedWeight), + tool: Math.round(raw.tool * this.learnedWeight), + overhead: Math.round(raw.overhead * this.learnedWeight), + total: Math.round(raw.total * this.learnedWeight), + }; + } + + estimateTokensForParts(parts: Part[]): number { + const baseUnits = this.baseCalculator.estimateTokensForParts(parts); + return Math.round(baseUnits * this.learnedWeight); + } + + getTokenCost(node: ConcreteNode): number { + const baseUnits = this.baseCalculator.getTokenCost(node); + return Math.round(baseUnits * this.learnedWeight); + } + + calculateConcreteListTokens(nodes: readonly ConcreteNode[]): number { + const baseUnits = this.baseCalculator.calculateConcreteListTokens(nodes); + return Math.round(baseUnits * this.learnedWeight); + } + + calculateContentTokens(content: Content): number { + const baseUnits = this.baseCalculator.calculateContentTokens(content); + return Math.round(baseUnits * this.learnedWeight); + } + + estimateTokensForString(text: string): number { + const baseUnits = this.baseCalculator.estimateTokensForString(text); + return Math.round(baseUnits * this.learnedWeight); + } + + tokensToChars(tokens: number): number { + // If weight is > 1.0 (we are inflating tokens), a single returned token is worth fewer chars. + // We reverse the math: convert requested tokens to target base units, then get chars. + return this.baseCalculator.tokensToChars(tokens / this.learnedWeight); + } +} diff --git a/packages/core/src/context/utils/contextTokenCalculator.test.ts b/packages/core/src/context/utils/contextTokenCalculator.test.ts index 9d1d79a926..84abfc5f49 100644 --- a/packages/core/src/context/utils/contextTokenCalculator.test.ts +++ b/packages/core/src/context/utils/contextTokenCalculator.test.ts @@ -5,7 +5,7 @@ */ import { describe, it, expect } from 'vitest'; -import { ContextTokenCalculator } from './contextTokenCalculator.js'; +import { StaticTokenCalculator } from './contextTokenCalculator.js'; import { NodeBehaviorRegistry } from '../graph/behaviorRegistry.js'; import { registerBuiltInBehaviors } from '../graph/builtinBehaviors.js'; import { createDummyNode } from '../testing/contextTestUtils.js'; @@ -16,7 +16,7 @@ describe('ContextTokenCalculator', () => { const registry = new NodeBehaviorRegistry(); registerBuiltInBehaviors(registry); const charsPerToken = 1; // Simplifies math for text nodes in tests - const calculator = new ContextTokenCalculator(charsPerToken, registry); + const calculator = new StaticTokenCalculator(charsPerToken, registry); it('should include structural overhead for each unique turn', () => { const turn1Id = 'turn-1'; @@ -28,16 +28,16 @@ describe('ContextTokenCalculator', () => { const nodes = [node1, node2, node3]; - // Estimated tokens (using 0.33 per ASCII char heuristic): - // node1: floor(17 chars * 0.33) = 5 tokens - // node2: floor(17 chars * 0.33) = 5 tokens - // node3: floor(19 chars * 0.33) = 6 tokens + // Estimated tokens (using charsPerToken = 1): + // node1: 17 chars / 1 = 17 tokens + // node2: 17 chars / 1 = 17 tokens + // node3: 19 chars / 1 = 19 tokens // Turn 1 overhead: 5 tokens // Turn 2 overhead: 5 tokens - // Total: 5 + 5 + 6 + 5 + 5 = 26 + // Total: 17 + 17 + 19 + 5 + 5 = 63 const total = calculator.calculateConcreteListTokens(nodes); - expect(total).toBe(26); + expect(total).toBe(63); }); it('should handle categorical breakdown with overhead', () => { diff --git a/packages/core/src/context/utils/contextTokenCalculator.ts b/packages/core/src/context/utils/contextTokenCalculator.ts index e54bc716a7..cc7018f77a 100644 --- a/packages/core/src/context/utils/contextTokenCalculator.ts +++ b/packages/core/src/context/utils/contextTokenCalculator.ts @@ -17,7 +17,41 @@ import type { NodeBehaviorRegistry } from '../graph/behaviorRegistry.js'; * by the Gemini API. We use this as a baseline heuristic for inlineData/fileData. */ -export class ContextTokenCalculator { +export interface ContextTokenCalculator { + estimateTokensForString(text: string): number; + tokensToChars(tokens: number): number; + garbageCollectCache(liveNodeIds: ReadonlySet): void; + cacheNodeTokens(node: ConcreteNode): number; + getTokenCost(node: ConcreteNode): number; + calculateTokenBreakdown(nodes: readonly ConcreteNode[]): { + text: number; + media: number; + tool: number; + overhead: number; + total: number; + }; + calculateConcreteListTokens(nodes: readonly ConcreteNode[]): number; + calculateContentTokens(content: Content): number; + estimateTokensForParts(parts: Part[]): number; +} + +export interface AdvancedTokenCalculator extends ContextTokenCalculator { + getRawBaseUnits(nodes: readonly ConcreteNode[]): number; + getRawBaseUnitsForContent(content: Content): number; + calculateTokensAndBaseUnits(nodes: readonly ConcreteNode[]): { + tokens: number; + baseUnits: number; + }; + calculateContentTokensAndBaseUnits(content: Content): { + tokens: number; + baseUnits: number; + }; +} + +/** + * A fast, deterministic token heuristic calculator. + */ +export class StaticTokenCalculator implements AdvancedTokenCalculator { private readonly tokenCache = new Map(); constructor( @@ -143,6 +177,34 @@ export class ContextTokenCalculator { return breakdown; } + /** + * For the static calculator, Raw Base Units are exactly the same as the final tokens, + * because there is no dynamic learned weight (the multiplier is effectively 1.0). + */ + getRawBaseUnits(nodes: readonly ConcreteNode[]): number { + return this.calculateConcreteListTokens(nodes); + } + + getRawBaseUnitsForContent(content: Content): number { + return this.calculateContentTokens(content); + } + + calculateTokensAndBaseUnits(nodes: readonly ConcreteNode[]): { + tokens: number; + baseUnits: number; + } { + const baseUnits = this.calculateConcreteListTokens(nodes); + return { tokens: baseUnits, baseUnits }; + } + + calculateContentTokensAndBaseUnits(content: Content): { + tokens: number; + baseUnits: number; + } { + const baseUnits = this.calculateContentTokens(content); + return { tokens: baseUnits, baseUnits }; + } + /** * Fast calculation for a flat array of ConcreteNodes (The Nodes). * It relies entirely on the O(1) sidecar token cache. diff --git a/packages/core/src/context/utils/snapshotGenerator.test.ts b/packages/core/src/context/utils/snapshotGenerator.test.ts index c3ac960052..07ccc195f9 100644 --- a/packages/core/src/context/utils/snapshotGenerator.test.ts +++ b/packages/core/src/context/utils/snapshotGenerator.test.ts @@ -21,6 +21,9 @@ describe('SnapshotGenerator', () => { llmClient: { generateJson: mockGenerateJson, }, + advancedTokenCalculator: { + getRawBaseUnits: vi.fn().mockReturnValue(100), + }, tokenCalculator: { estimateTokensForString: vi.fn().mockReturnValue(100), }, diff --git a/packages/core/src/core/baseLlmClient.ts b/packages/core/src/core/baseLlmClient.ts index 5c1b7f66fa..dbf7539944 100644 --- a/packages/core/src/core/baseLlmClient.ts +++ b/packages/core/src/core/baseLlmClient.ts @@ -114,6 +114,7 @@ interface _CommonGenerateOptions { export interface CountTokenOptions { modelConfigKey?: ModelConfigKey; contents: Content[]; + abortSignal?: AbortSignal; } /** @@ -240,6 +241,9 @@ export class BaseLlmClient { const result = await this.contentGenerator.countTokens({ model, contents: options.contents, + config: options.abortSignal + ? { abortSignal: options.abortSignal } + : undefined, }); return { totalTokens: result.totalTokens || 0 }; } diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts index 535f751ae7..be85412e0c 100644 --- a/packages/core/src/core/client.test.ts +++ b/packages/core/src/core/client.test.ts @@ -1517,8 +1517,8 @@ ${JSON.stringify( // A string of length 404 is roughly 101 tokens. const longText = 'a'.repeat(404); const request: Part[] = [{ text: longText }]; - // estimateTextOnlyLength counts only text content (400 chars), not JSON structure - const estimatedRequestTokenCount = Math.floor(longText.length * 0.33); + // estimateTextOnlyLength counts only text content (404 chars), not JSON structure + const estimatedRequestTokenCount = Math.floor(longText.length * 0.25); const remainingTokenCount = MOCKED_TOKEN_LIMIT - lastPromptTokenCount; // Mock tryCompressChat to not compress @@ -1577,8 +1577,8 @@ ${JSON.stringify( // We need a request > 100 tokens. const longText = 'a'.repeat(404); const request: Part[] = [{ text: longText }]; - // estimateTextOnlyLength counts only text content (400 chars), not JSON structure - const estimatedRequestTokenCount = Math.floor(longText.length * 0.33); + // estimateTextOnlyLength counts only text content (404 chars), not JSON structure + const estimatedRequestTokenCount = Math.floor(longText.length * 0.25); const remainingTokenCount = STICKY_MODEL_LIMIT - lastPromptTokenCount; vi.spyOn(client, 'tryCompressChat').mockResolvedValue({ diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index ce544a0e30..302b89d7f0 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -637,11 +637,22 @@ export class GeminiClient { // Check for context window overflow const modelForLimitCheck = this._getActiveModelForCurrentTurn(); + let currentBaseUnits = 0; + if (this.config.getContextManagementConfig().enabled) { if (this.contextManager) { const pendingRequest = createUserContent(request); - const { history: newHistory, didApplyManagement } = - await this.contextManager.renderHistory(pendingRequest); + const { + history: newHistory, + didApplyManagement, + baseUnits, + } = await this.contextManager.renderHistory( + pendingRequest, + undefined, + signal, + ); + + currentBaseUnits = baseUnits; if (didApplyManagement) { // If the manager pruned history, we update the chat before continuing. @@ -800,8 +811,16 @@ export class GeminiClient { } yield event; + if (event.type === GeminiEventType.Finished && this.contextManager) { + const usageMetadata = event.value.usageMetadata; + if (usageMetadata && usageMetadata.promptTokenCount !== undefined) { + this.contextManager.getEnvironment().eventBus.emitTokenGroundTruth({ + actualTokens: usageMetadata.promptTokenCount, + promptBaseUnits: currentBaseUnits, + }); + } + } this.updateTelemetryTokenCount(); - if (event.type === GeminiEventType.Error) { isError = true; } diff --git a/packages/core/src/core/geminiChat.test.ts b/packages/core/src/core/geminiChat.test.ts index 49fc72c364..05a27f8bbc 100644 --- a/packages/core/src/core/geminiChat.test.ts +++ b/packages/core/src/core/geminiChat.test.ts @@ -242,7 +242,7 @@ describe('GeminiChat', () => { // 'Hello': 5 chars * 0.25 = 1.25 // 'Hi there': 8 chars * 0.25 = 2.0 // Total: 3.25 -> floor(3.25) = 3 - expect(chatWithHistory.getLastPromptTokenCount()).toBe(4); + expect(chatWithHistory.getLastPromptTokenCount()).toBe(3); }); it('should initialize lastPromptTokenCount for empty history', () => { diff --git a/packages/core/src/utils/tokenCalculation.test.ts b/packages/core/src/utils/tokenCalculation.test.ts index e642669708..23a58c1774 100644 --- a/packages/core/src/utils/tokenCalculation.test.ts +++ b/packages/core/src/utils/tokenCalculation.test.ts @@ -280,6 +280,26 @@ describe('tokenCalculation', () => { expect(tokens).toBeLessThan(30); }); + it('should respect the user supplied charsPerToken argument', () => { + const text = 'abcdefghijkl'; // 12 chars + const parts: Part[] = [{ text }]; + + // Default (4 chars/token) -> 12 / 4 = 3 tokens + expect(estimateTokenCountSync(parts)).toBe(3); + + // Override to 3 chars/token -> 12 / 3 = 4 tokens + expect(estimateTokenCountSync(parts, 0, 3)).toBe(4); + + // Override to 2 chars/token -> 12 / 2 = 6 tokens + expect(estimateTokenCountSync(parts, 0, 2)).toBe(6); + + // Verify massive strings also respect the argument + const massiveText = 'a'.repeat(120_000); // Exceeds 100k + const massiveParts: Part[] = [{ text: massiveText }]; + expect(estimateTokenCountSync(massiveParts, 0, 4)).toBe(30_000); + expect(estimateTokenCountSync(massiveParts, 0, 3)).toBe(40_000); + }); + it('should handle empty or nullish inputs gracefully', () => { expect(estimateTokenCountSync([])).toBe(0); expect(estimateTokenCountSync([{ text: '' }])).toBe(0); diff --git a/packages/core/src/utils/tokenCalculation.ts b/packages/core/src/utils/tokenCalculation.ts index 2fc4f8e6fa..6c347c7c0a 100644 --- a/packages/core/src/utils/tokenCalculation.ts +++ b/packages/core/src/utils/tokenCalculation.ts @@ -43,10 +43,12 @@ function estimateTextTokens(text: string, charsPerToken: number): number { } let tokens = 0; + const asciiTokensPerChar = 1 / charsPerToken; + // Optimized loop: charCodeAt is faster than for...of on large strings for (let i = 0; i < text.length; i++) { if (text.charCodeAt(i) <= 127) { - tokens += ASCII_TOKENS_PER_CHAR; + tokens += asciiTokensPerChar; } else { tokens += NON_ASCII_TOKENS_PER_CHAR; }