mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-17 23:32:43 -07:00
token calculation service
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
import { IrMapper } from './ir/mapper.js';
|
||||
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
@@ -17,7 +17,7 @@ describe('ContextManager Barrier Tests', () => {
|
||||
const { chatHistory, contextManager } = setupContextComponentTest(config);
|
||||
|
||||
// 1. Shrink limits: 1 char = 1 token. RetainedTokens = 10. MaxTokens = 100.
|
||||
IrMapper.setConfig({ charsPerToken: 1 });
|
||||
|
||||
|
||||
contextManager['sidecar'].budget.retainedTokens = 5;
|
||||
contextManager['sidecar'].budget.maxTokens = 100;
|
||||
@@ -80,7 +80,6 @@ describe('ContextManager Barrier Tests', () => {
|
||||
// 5. Verify Projection shrinks: 6 original messages replaced by 1 snapshot episode (1 text part) -> length 5.
|
||||
const projection = await contextManager.projectCompressedHistory();
|
||||
expect(projection.length).toBe(5);
|
||||
// console.dir(projection, {depth: null});
|
||||
// projection[0] should be the snapshot yield
|
||||
expect(projection[0].parts![0].text).toBe('<SNAP>');
|
||||
});
|
||||
@@ -90,7 +89,7 @@ describe('ContextManager Barrier Tests', () => {
|
||||
const { chatHistory, contextManager } = setupContextComponentTest(config);
|
||||
|
||||
// 1. Shrink limits: maxTokens = 15.
|
||||
IrMapper.setConfig({ charsPerToken: 1 });
|
||||
|
||||
contextManager['sidecar'].budget.maxTokens = 15;
|
||||
|
||||
// 2. Build history: 2 turns. Total = 24 tokens.
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
||||
import { IrMapper } from './ir/mapper.js';
|
||||
|
||||
import {
|
||||
createSyntheticHistory,
|
||||
createMockContextConfig,
|
||||
@@ -46,7 +46,7 @@ describe('ContextManager Sync Pressure Barrier Tests', () => {
|
||||
]);
|
||||
|
||||
const rawHistoryLength = chatHistory.get().length;
|
||||
IrMapper.setConfig({ charsPerToken: 1 });
|
||||
|
||||
|
||||
// 5. Project History (Triggers Sync Barrier)
|
||||
const projection = await contextManager.projectCompressedHistory();
|
||||
|
||||
@@ -18,6 +18,7 @@ import { ContextEnvironmentImpl } from './sidecar/environmentImpl.js';
|
||||
import { SidecarLoader } from './sidecar/SidecarLoader.js';
|
||||
import { ContextTracer } from './tracer.js';
|
||||
import { ContextEventBus } from './eventBus.js';
|
||||
import { ContextTokenCalculator } from './utils/contextTokenCalculator.js';
|
||||
|
||||
import type { Content } from '@google/genai';
|
||||
|
||||
@@ -46,6 +47,7 @@ describe('ContextManager Golden Tests', () => {
|
||||
beforeEach(() => {
|
||||
mockConfig = {
|
||||
isContextManagementEnabled: vi.fn().mockReturnValue(true),
|
||||
getExperimentalContextSidecarConfig: vi.fn().mockReturnValue(undefined),
|
||||
getTargetDir: vi.fn().mockReturnValue('/tmp'),
|
||||
getSessionId: vi.fn().mockReturnValue('test-session'),
|
||||
getToolOutputMaskingConfig: vi.fn().mockResolvedValue({
|
||||
@@ -68,7 +70,7 @@ describe('ContextManager Golden Tests', () => {
|
||||
}),
|
||||
};
|
||||
|
||||
const sidecar = SidecarLoader.fromLegacyConfig(mockConfig as any);
|
||||
const sidecar = SidecarLoader.fromConfig(mockConfig as any);
|
||||
const tracer = new ContextTracer('/tmp', 'test-session');
|
||||
const eventBus = new ContextEventBus();
|
||||
const env = new ContextEnvironmentImpl(
|
||||
@@ -118,7 +120,7 @@ describe('ContextManager Golden Tests', () => {
|
||||
const history = createLargeHistory();
|
||||
(contextManager as any).pristineEpisodes = (
|
||||
await import('./ir/mapper.js')
|
||||
).IrMapper.toIr(history);
|
||||
).IrMapper.toIr(history, new ContextTokenCalculator(4));
|
||||
const result = await contextManager.projectCompressedHistory();
|
||||
expect(result).toMatchSnapshot();
|
||||
});
|
||||
@@ -127,7 +129,7 @@ describe('ContextManager Golden Tests', () => {
|
||||
const history = createLargeHistory();
|
||||
(contextManager as any).pristineEpisodes = (
|
||||
await import('./ir/mapper.js')
|
||||
).IrMapper.toIr(history);
|
||||
).IrMapper.toIr(history, new ContextTokenCalculator(4));
|
||||
// In Golden Tests, we just want to ensure the logic doesn't throw or alter unprotected history in weird ways.
|
||||
// Since we're skipping processors due to being under budget, it should equal history.
|
||||
const tracer2 = new ContextTracer('/tmp', 'test2');
|
||||
@@ -153,7 +155,7 @@ describe('ContextManager Golden Tests', () => {
|
||||
|
||||
(contextManager as any).pristineEpisodes = (
|
||||
await import('./ir/mapper.js')
|
||||
).IrMapper.toIr(history);
|
||||
).IrMapper.toIr(history, new ContextTokenCalculator(4));
|
||||
const result = await contextManager.projectCompressedHistory();
|
||||
|
||||
expect(result.length).toEqual(history.length);
|
||||
|
||||
@@ -21,7 +21,7 @@ import type { SidecarConfig } from './sidecar/types.js';
|
||||
import { ProcessorRegistry } from './sidecar/registry.js';
|
||||
import { PipelineOrchestrator } from './sidecar/orchestrator.js';
|
||||
import { HistoryObserver } from './historyObserver.js';
|
||||
import { calculateEpisodeListTokens } from './utils/contextTokenCalculator.js';
|
||||
|
||||
import { generateWorkingBufferView } from './ir/graphUtils.js';
|
||||
|
||||
|
||||
@@ -63,6 +63,11 @@ export class ContextManager {
|
||||
|
||||
this.orchestrator = new PipelineOrchestrator(this.sidecar, this.env, this.eventBus, this.tracer);
|
||||
|
||||
this.eventBus.onPristineHistoryUpdated((event) => {
|
||||
this.pristineEpisodes = event.episodes;
|
||||
this.evaluateTriggers();
|
||||
});
|
||||
|
||||
this.eventBus.onVariantReady((event) => {
|
||||
|
||||
// Find the target episode in the pristine graph
|
||||
@@ -92,6 +97,32 @@ export class ContextManager {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Evaluates if the current working buffer exceeds configured budget thresholds,
|
||||
* firing consolidation events if necessary.
|
||||
*/
|
||||
private evaluateTriggers() {
|
||||
if (!this.sidecar.budget) return;
|
||||
|
||||
const workingBuffer = this.getWorkingBufferView();
|
||||
const currentTokens = this.env.tokenCalculator.calculateEpisodeListTokens(workingBuffer);
|
||||
|
||||
this.tracer.logEvent('ContextManager', 'Evaluated triggers', { currentTokens, retainedTokens: this.sidecar.budget.retainedTokens });
|
||||
|
||||
// 1. Eager Compute Trigger
|
||||
this.eventBus.emitChunkReceived({ episodes: this.pristineEpisodes });
|
||||
|
||||
// 2. Budget Crossed Trigger
|
||||
if (currentTokens > this.sidecar.budget.retainedTokens) {
|
||||
const deficit = currentTokens - this.sidecar.budget.retainedTokens;
|
||||
this.tracer.logEvent('ContextManager', 'Budget crossed. Emitting ConsolidationNeeded', { deficit });
|
||||
this.eventBus.emitConsolidationNeeded({
|
||||
episodes: workingBuffer,
|
||||
targetDeficit: deficit,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Subscribes to the core AgentChatHistory to natively track all message events,
|
||||
* converting them seamlessly into pristine Episodes.
|
||||
@@ -105,10 +136,7 @@ export class ContextManager {
|
||||
chatHistory,
|
||||
this.eventBus,
|
||||
this.tracer,
|
||||
this.sidecar,
|
||||
(episodes) => { this.pristineEpisodes = episodes; },
|
||||
() => this.getWorkingBufferView(),
|
||||
(episodes) => calculateEpisodeListTokens(episodes)
|
||||
this.env.tokenCalculator,
|
||||
);
|
||||
|
||||
this.historyObserver.start();
|
||||
@@ -125,7 +153,8 @@ export class ContextManager {
|
||||
return generateWorkingBufferView(
|
||||
this.pristineEpisodes,
|
||||
this.sidecar.budget.retainedTokens,
|
||||
this.tracer
|
||||
this.tracer,
|
||||
this.env
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -7,6 +7,10 @@
|
||||
import { EventEmitter } from 'node:events';
|
||||
import type { Episode, Variant } from './ir/types.js';
|
||||
|
||||
export interface PristineHistoryUpdatedEvent {
|
||||
episodes: Episode[];
|
||||
}
|
||||
|
||||
export interface ContextConsolidationEvent {
|
||||
episodes: Episode[];
|
||||
targetDeficit: number;
|
||||
@@ -23,6 +27,14 @@ export interface VariantReadyEvent {
|
||||
}
|
||||
|
||||
export class ContextEventBus extends EventEmitter {
|
||||
emitPristineHistoryUpdated(event: PristineHistoryUpdatedEvent) {
|
||||
this.emit('PRISTINE_HISTORY_UPDATED', event);
|
||||
}
|
||||
|
||||
onPristineHistoryUpdated(listener: (event: PristineHistoryUpdatedEvent) => void) {
|
||||
this.on('PRISTINE_HISTORY_UPDATED', listener);
|
||||
}
|
||||
|
||||
emitChunkReceived(event: IrChunkReceivedEvent) {
|
||||
this.emit('IR_CHUNK_RECEIVED', event);
|
||||
}
|
||||
|
||||
@@ -6,10 +6,9 @@
|
||||
|
||||
import type { AgentChatHistory, HistoryEvent } from '../core/agentChatHistory.js';
|
||||
import { IrMapper } from './ir/mapper.js';
|
||||
import type { ContextTokenCalculator } from './utils/contextTokenCalculator.js';
|
||||
import type { ContextEventBus } from './eventBus.js';
|
||||
import type { ContextTracer } from './tracer.js';
|
||||
import type { SidecarConfig } from './sidecar/types.js';
|
||||
import type { Episode } from './ir/types.js';
|
||||
|
||||
/**
|
||||
* Connects the raw AgentChatHistory to the ContextManager.
|
||||
@@ -23,10 +22,7 @@ export class HistoryObserver {
|
||||
private readonly chatHistory: AgentChatHistory,
|
||||
private readonly eventBus: ContextEventBus,
|
||||
private readonly tracer: ContextTracer,
|
||||
private readonly sidecar: SidecarConfig,
|
||||
private readonly onIrRebuilt: (episodes: Episode[]) => void,
|
||||
private readonly computeWorkingBuffer: () => Episode[],
|
||||
private readonly calculateIrTokens: (episodes: Episode[]) => number,
|
||||
private readonly tokenCalculator: ContextTokenCalculator,
|
||||
) {}
|
||||
|
||||
start() {
|
||||
@@ -36,11 +32,10 @@ export class HistoryObserver {
|
||||
|
||||
this.unsubscribeHistory = this.chatHistory.subscribe((_event: HistoryEvent) => {
|
||||
// Rebuild the pristine IR graph from the full source history on every change.
|
||||
const pristineEpisodes = IrMapper.toIr(this.chatHistory.get(), this.sidecar.tokenCalculator);
|
||||
const pristineEpisodes = IrMapper.toIr(this.chatHistory.get(), this.tokenCalculator);
|
||||
this.tracer.logEvent('HistoryObserver', 'Rebuilt pristine graph from chat history update', { episodeCount: pristineEpisodes.length });
|
||||
|
||||
this.onIrRebuilt(pristineEpisodes);
|
||||
this.checkTriggers(pristineEpisodes);
|
||||
this.eventBus.emitPristineHistoryUpdated({ episodes: pristineEpisodes });
|
||||
});
|
||||
}
|
||||
|
||||
@@ -50,26 +45,4 @@ export class HistoryObserver {
|
||||
this.unsubscribeHistory = undefined;
|
||||
}
|
||||
}
|
||||
|
||||
private checkTriggers(pristineEpisodes: Episode[]) {
|
||||
if (!this.sidecar.budget) return;
|
||||
|
||||
const workingBuffer = this.computeWorkingBuffer();
|
||||
const currentTokens = this.calculateIrTokens(workingBuffer);
|
||||
|
||||
this.tracer.logEvent('HistoryObserver', 'Evaluated triggers', { currentTokens, retainedTokens: this.sidecar.budget.retainedTokens });
|
||||
|
||||
// 1. Eager Compute Trigger
|
||||
this.eventBus.emitChunkReceived({ episodes: pristineEpisodes });
|
||||
|
||||
// 2. Budget Crossed Trigger
|
||||
if (currentTokens > this.sidecar.budget.retainedTokens) {
|
||||
const deficit = currentTokens - this.sidecar.budget.retainedTokens;
|
||||
this.tracer.logEvent('HistoryObserver', 'Budget crossed. Emitting ConsolidationNeeded', { deficit });
|
||||
this.eventBus.emitConsolidationNeeded({
|
||||
episodes: workingBuffer,
|
||||
targetDeficit: deficit,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
import type { Episode } from './types.js';
|
||||
import type { ContextTracer } from '../tracer.js';
|
||||
import { debugLogger } from '../../utils/debugLogger.js';
|
||||
import { calculateEpisodeListTokens } from '../utils/contextTokenCalculator.js';
|
||||
|
||||
|
||||
/**
|
||||
* Generates a computed view of the pristine log.
|
||||
@@ -16,10 +16,13 @@ import { calculateEpisodeListTokens } from '../utils/contextTokenCalculator.js';
|
||||
* (snapshot > summary > masked) instead of the raw text.
|
||||
* Handles N-to-1 variant skipping automatically.
|
||||
*/
|
||||
import type { ContextEnvironment } from "../sidecar/environment.js";
|
||||
|
||||
export function generateWorkingBufferView(
|
||||
pristineEpisodes: Episode[],
|
||||
retainedTokens: number,
|
||||
tracer: ContextTracer,
|
||||
env: ContextEnvironment,
|
||||
): Episode[] {
|
||||
let currentEpisodes: Episode[] = [];
|
||||
let rollingTokens = 0;
|
||||
@@ -72,7 +75,7 @@ export function generateWorkingBufferView(
|
||||
: undefined,
|
||||
};
|
||||
|
||||
const epTokens = calculateEpisodeListTokens([projectedEp]);
|
||||
const epTokens = env.tokenCalculator.calculateEpisodeListTokens([projectedEp]);
|
||||
|
||||
if (rollingTokens > retainedTokens && ep.variants) {
|
||||
const snapshot = ep.variants['snapshot'];
|
||||
@@ -153,7 +156,7 @@ export function generateWorkingBufferView(
|
||||
}
|
||||
|
||||
currentEpisodes.unshift(projectedEp);
|
||||
rollingTokens += calculateEpisodeListTokens([projectedEp]);
|
||||
rollingTokens += env.tokenCalculator.calculateEpisodeListTokens([projectedEp]);
|
||||
}
|
||||
|
||||
return currentEpisodes;
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { IrMapper } from './mapper.js';
|
||||
import { ContextTokenCalculator } from '../utils/contextTokenCalculator.js';
|
||||
import type { Content } from '@google/genai';
|
||||
import type { UserPrompt, ToolExecution } from './types.js';
|
||||
|
||||
@@ -80,7 +81,8 @@ describe('IrMapper', () => {
|
||||
{ role: 'model', parts: [{ text: 'Everything is done!' }] },
|
||||
];
|
||||
|
||||
const episodes = IrMapper.toIr(rawHistory);
|
||||
const tokenCalculator = new ContextTokenCalculator(4);
|
||||
const episodes = IrMapper.toIr(rawHistory, tokenCalculator);
|
||||
|
||||
expect(episodes).toHaveLength(1);
|
||||
const ep = episodes[0];
|
||||
@@ -135,7 +137,8 @@ describe('IrMapper', () => {
|
||||
{ role: 'model', parts: [{ text: 'Hi there' }] },
|
||||
];
|
||||
|
||||
const initialIr = IrMapper.toIr(history);
|
||||
const tokenCalculator = new ContextTokenCalculator(4);
|
||||
const initialIr = IrMapper.toIr(history, tokenCalculator);
|
||||
expect(initialIr).toHaveLength(1);
|
||||
|
||||
// Save the uniquely generated deterministic ID for the first episode
|
||||
@@ -146,7 +149,7 @@ describe('IrMapper', () => {
|
||||
history.push({ role: 'user', parts: [{ text: 'How are you?' }] });
|
||||
history.push({ role: 'model', parts: [{ text: 'I am an AI.' }] });
|
||||
|
||||
const updatedIr = IrMapper.toIr(history);
|
||||
const updatedIr = IrMapper.toIr(history, tokenCalculator);
|
||||
expect(updatedIr).toHaveLength(2);
|
||||
|
||||
// 3. Verify ID Stability
|
||||
|
||||
@@ -11,7 +11,7 @@ import { debugLogger } from '../../utils/debugLogger.js';
|
||||
import type { ContextEnvironment, ContextTracer } from '../sidecar/environment.js';
|
||||
import type { PipelineOrchestrator } from '../sidecar/orchestrator.js';
|
||||
import type { SidecarConfig } from '../sidecar/types.js';
|
||||
import { calculateEpisodeListTokens } from '../utils/contextTokenCalculator.js';
|
||||
|
||||
|
||||
export class IrProjector {
|
||||
/**
|
||||
@@ -31,7 +31,7 @@ export class IrProjector {
|
||||
}
|
||||
|
||||
const maxTokens = sidecar.budget.maxTokens;
|
||||
let currentTokens = calculateEpisodeListTokens(workingBuffer);
|
||||
let currentTokens = env.tokenCalculator.calculateEpisodeListTokens(workingBuffer);
|
||||
|
||||
if (currentTokens <= maxTokens) {
|
||||
tracer.logEvent('IrProjector', `View is within maxTokens (${currentTokens} <= ${maxTokens}). Returning view.`);
|
||||
@@ -50,7 +50,7 @@ export class IrProjector {
|
||||
isBudgetSatisfied: currentTokens <= sidecar.budget.maxTokens,
|
||||
});
|
||||
|
||||
const finalTokens = calculateEpisodeListTokens(processedEpisodes);
|
||||
const finalTokens = env.tokenCalculator.calculateEpisodeListTokens(processedEpisodes);
|
||||
tracer.logEvent('IrProjector', `Finished projection. Final token count: ${finalTokens}.`);
|
||||
debugLogger.log(`Context Manager finished. Final actual token count: ${finalTokens}.`);
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
import type { Episode } from '../ir/types.js';
|
||||
import type { ContextAccountingState, ContextProcessor } from '../pipeline.js';
|
||||
import type { ContextEnvironment } from '../sidecar/environment.js';
|
||||
import { estimateContextTokenCountSync as estimateTokenCountSync } from '../utils/contextTokenCalculator.js';
|
||||
|
||||
import { sanitizeFilenamePart } from '../../utils/fileUtils.js';
|
||||
import * as fsPromises from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
@@ -80,30 +80,28 @@ export class BlobDegradationProcessor implements ContextProcessor {
|
||||
newText = `[Multi-Modal Blob (${part.mimeType}, ${mb}MB) degraded to text to preserve context window. Saved to: ${filePath}]`;
|
||||
|
||||
// Re-calculate tokens. Images are expensive (~258 tokens). The text is cheap (~20 tokens).
|
||||
const oldTokens = estimateTokenCountSync([
|
||||
const oldTokens = this.env.tokenCalculator.estimateTokensForParts([
|
||||
{ inlineData: { mimeType: part.mimeType, data: part.data } },
|
||||
]);
|
||||
const newTokens = estimateTokenCountSync([{ text: newText }]);
|
||||
const newTokens = this.env.tokenCalculator.estimateTokensForParts([{ text: newText }]);
|
||||
tokensSaved = oldTokens - newTokens;
|
||||
} else if (part.type === 'file_data') {
|
||||
newText = `[File Reference (${part.mimeType}) degraded to text to preserve context window. Original URI: ${part.fileUri}]`;
|
||||
const oldTokens = estimateTokenCountSync([
|
||||
const oldTokens = this.env.tokenCalculator.estimateTokensForParts([
|
||||
{ fileData: { mimeType: part.mimeType, fileUri: part.fileUri } },
|
||||
]);
|
||||
const newTokens = estimateTokenCountSync([{ text: newText }]);
|
||||
const newTokens = this.env.tokenCalculator.estimateTokensForParts([{ text: newText }]);
|
||||
tokensSaved = oldTokens - newTokens;
|
||||
} else if (part.type === 'raw_part') {
|
||||
newText = `[Unknown Part degraded to text to preserve context window.]`;
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||
const oldTokens = estimateTokenCountSync([part.part as Part]);
|
||||
const newTokens = estimateTokenCountSync([{ text: newText }]);
|
||||
const oldTokens = this.env.tokenCalculator.estimateTokensForParts([part.part as Part]);
|
||||
const newTokens = this.env.tokenCalculator.estimateTokensForParts([{ text: newText }]);
|
||||
tokensSaved = oldTokens - newTokens;
|
||||
}
|
||||
|
||||
if (newText && tokensSaved > 0) {
|
||||
const newTokens = estimateTokenCountSync([{ text: newText }], 0, {
|
||||
charsPerToken: this.env.charsPerToken,
|
||||
});
|
||||
const newTokens = this.env.tokenCalculator.estimateTokensForParts([{ text: newText }]);
|
||||
part.presentation = { text: newText, tokens: newTokens };
|
||||
|
||||
ep.trigger.metadata.transformations.push({
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
import type { ContextProcessor, ContextAccountingState } from '../pipeline.js';
|
||||
import type { Episode } from '../ir/types.js';
|
||||
import type { ContextEnvironment } from '../sidecar/environment.js';
|
||||
import { calculateEpisodeListTokens } from '../utils/contextTokenCalculator.js';
|
||||
|
||||
|
||||
export interface EmergencyTruncationProcessorOptions {}
|
||||
|
||||
@@ -19,7 +19,7 @@ export class EmergencyTruncationProcessor implements ContextProcessor {
|
||||
readonly id = 'EmergencyTruncationProcessor';
|
||||
readonly name = 'EmergencyTruncationProcessor';
|
||||
readonly options: EmergencyTruncationProcessorOptions;
|
||||
constructor(_env: ContextEnvironment, options: EmergencyTruncationProcessorOptions) {
|
||||
constructor(private readonly _env: ContextEnvironment, options: EmergencyTruncationProcessorOptions) {
|
||||
this.options = options;
|
||||
}
|
||||
|
||||
@@ -32,7 +32,7 @@ export class EmergencyTruncationProcessor implements ContextProcessor {
|
||||
|
||||
// We respect the global protected Episode IDs (like the system prompt at index 0)
|
||||
for (const ep of episodes) {
|
||||
const epTokens = calculateEpisodeListTokens([ep]);
|
||||
const epTokens = this._env.tokenCalculator.calculateEpisodeListTokens([ep]);
|
||||
|
||||
if (remainingTokens > targetTokens && !state.protectedEpisodeIds.has(ep.id)) {
|
||||
remainingTokens -= epTokens;
|
||||
|
||||
@@ -10,7 +10,7 @@ import type { ContextEnvironment } from '../sidecar/environment.js';
|
||||
import { debugLogger } from '../../utils/debugLogger.js';
|
||||
import { LlmRole } from '../../telemetry/types.js';
|
||||
import { getResponseText } from '../../utils/partUtils.js';
|
||||
import { estimateContextTokenCountSync } from '../utils/contextTokenCalculator.js';
|
||||
|
||||
|
||||
export class SemanticCompressionProcessor implements ContextProcessor {
|
||||
readonly name = 'SemanticCompression';
|
||||
@@ -62,8 +62,8 @@ export class SemanticCompressionProcessor implements ContextProcessor {
|
||||
part.text,
|
||||
'User Prompt',
|
||||
);
|
||||
const newTokens = estimateContextTokenCountSync([{ text: summary }], 0, { charsPerToken: this.env.charsPerToken });
|
||||
const oldTokens = estimateContextTokenCountSync([{ text: part.text }], 0, { charsPerToken: this.env.charsPerToken });
|
||||
const newTokens = this.env.tokenCalculator.estimateTokensForParts([{ text: summary }]);
|
||||
const oldTokens = this.env.tokenCalculator.estimateTokensForParts([{ text: part.text }]);
|
||||
|
||||
if (newTokens < oldTokens) {
|
||||
part.presentation = { text: summary, tokens: newTokens };
|
||||
@@ -88,8 +88,8 @@ export class SemanticCompressionProcessor implements ContextProcessor {
|
||||
step.text,
|
||||
'Agent Thought',
|
||||
);
|
||||
const newTokens = estimateContextTokenCountSync([{ text: summary }], 0, { charsPerToken: this.env.charsPerToken });
|
||||
const oldTokens = estimateContextTokenCountSync([{ text: step.text }], 0, { charsPerToken: this.env.charsPerToken });
|
||||
const newTokens = this.env.tokenCalculator.estimateTokensForParts([{ text: summary }]);
|
||||
const oldTokens = this.env.tokenCalculator.estimateTokensForParts([{ text: step.text }]);
|
||||
|
||||
if (newTokens < oldTokens) {
|
||||
step.presentation = { text: summary, tokens: newTokens };
|
||||
@@ -130,7 +130,7 @@ export class SemanticCompressionProcessor implements ContextProcessor {
|
||||
// Wrap the summary in an object so the Gemini API accepts it as a valid functionResponse.response
|
||||
const newObsObject = { summary };
|
||||
|
||||
const newObsTokens = estimateContextTokenCountSync([
|
||||
const newObsTokens = this.env.tokenCalculator.estimateTokensForParts([
|
||||
{
|
||||
functionResponse: {
|
||||
name: step.toolName,
|
||||
@@ -138,7 +138,7 @@ export class SemanticCompressionProcessor implements ContextProcessor {
|
||||
id: step.id,
|
||||
},
|
||||
},
|
||||
], 0, { charsPerToken: this.env.charsPerToken });
|
||||
]);
|
||||
|
||||
const oldObsTokens =
|
||||
step.presentation?.tokens.observation ?? step.tokens.observation;
|
||||
|
||||
@@ -7,9 +7,10 @@
|
||||
import type { ContextProcessor, ContextAccountingState } from '../pipeline.js';
|
||||
import type { Episode, ToolExecution } from '../ir/types.js';
|
||||
import type { ContextEnvironment, ContextEventBus } from '../sidecar/environment.js';
|
||||
import { estimateContextTokenCountSync as estimateTokenCountSync } from '../utils/contextTokenCalculator.js';
|
||||
|
||||
import { v4 as uuidv4 } from 'uuid';
|
||||
import { LlmRole } from '../../telemetry/llmRole.js';
|
||||
import { debugLogger } from 'src/utils/debugLogger.js';
|
||||
|
||||
export interface StateSnapshotProcessorOptions {
|
||||
model?: string;
|
||||
@@ -48,7 +49,7 @@ export class StateSnapshotProcessor implements ContextProcessor {
|
||||
for (let i = 1; i < episodes.length - 1; i++) {
|
||||
const ep = episodes[i];
|
||||
selectedEpisodes.push(ep);
|
||||
deficitAccumulator += estimateTokenCountSync([
|
||||
deficitAccumulator += this.env.tokenCalculator.estimateTokensForParts([
|
||||
{ text: (ep.trigger as any)?.semanticParts?.[0]?.text ?? '' },
|
||||
{ text: ep.yield?.text ?? '' },
|
||||
]);
|
||||
@@ -116,7 +117,7 @@ Output ONLY the raw factual snapshot, formatted compactly. Do not include markdo
|
||||
|
||||
// Synthesize a new "Episode" representing this compressed block
|
||||
const newId = uuidv4();
|
||||
const contentTokens = estimateTokenCountSync([{ text: snapshotText }]);
|
||||
const contentTokens = this.env.tokenCalculator.estimateTokensForParts([{ text: snapshotText }]);
|
||||
|
||||
return {
|
||||
id: newId,
|
||||
@@ -144,7 +145,7 @@ Output ONLY the raw factual snapshot, formatted compactly. Do not include markdo
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Failed to synthesize snapshot:', error);
|
||||
debugLogger.error('Failed to synthesize snapshot:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
import type { ContextAccountingState, ContextProcessor } from '../pipeline.js';
|
||||
import type { ContextEnvironment } from '../sidecar/environment.js';
|
||||
import { estimateContextTokenCountSync } from '../utils/contextTokenCalculator.js';
|
||||
|
||||
import { sanitizeFilenamePart } from '../../utils/fileUtils.js';
|
||||
import * as fsPromises from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
@@ -173,7 +173,7 @@ export class ToolMaskingProcessor implements ContextProcessor {
|
||||
step.presentation.observation = obsRes.masked;
|
||||
|
||||
// Recalculate tokens perfectly
|
||||
const newIntentTokens = estimateTokenCountSync([
|
||||
const newIntentTokens = this.env.tokenCalculator.estimateTokensForParts([
|
||||
{
|
||||
functionCall: {
|
||||
name: toolName,
|
||||
@@ -182,7 +182,7 @@ export class ToolMaskingProcessor implements ContextProcessor {
|
||||
},
|
||||
},
|
||||
]);
|
||||
const newObsTokens = estimateTokenCountSync([
|
||||
const newObsTokens = this.env.tokenCalculator.estimateTokensForParts([
|
||||
{
|
||||
functionResponse: {
|
||||
name: toolName,
|
||||
|
||||
@@ -8,23 +8,20 @@ import * as fs from 'node:fs';
|
||||
import type { Config } from '../../config/config.js';
|
||||
import type { SidecarConfig } from './types.js';
|
||||
import { defaultSidecarProfile } from './profiles.js';
|
||||
import { debugLogger } from 'src/utils/debugLogger.js';
|
||||
|
||||
export class SidecarLoader {
|
||||
/**
|
||||
* Generates a Sidecar JSON graph from the experimental config file path or defaults.
|
||||
*/
|
||||
static fromConfig(config: Config): SidecarConfig {
|
||||
const sidecarPath =
|
||||
typeof (config as any).getExperimentalContextSidecarConfig === 'function'
|
||||
? (config as any).getExperimentalContextSidecarConfig()
|
||||
: undefined;
|
||||
|
||||
const sidecarPath = config.getExperimentalContextSidecarConfig()
|
||||
if (sidecarPath && fs.existsSync(sidecarPath)) {
|
||||
try {
|
||||
const fileContent = fs.readFileSync(sidecarPath, 'utf8');
|
||||
return JSON.parse(fileContent) as SidecarConfig;
|
||||
} catch (error) {
|
||||
console.error(
|
||||
debugLogger.error(
|
||||
`Failed to parse Sidecar configuration file at ${sidecarPath}:`,
|
||||
error,
|
||||
);
|
||||
@@ -34,8 +31,4 @@ export class SidecarLoader {
|
||||
|
||||
return defaultSidecarProfile;
|
||||
}
|
||||
|
||||
static fromLegacyConfig(config: Config): SidecarConfig {
|
||||
return SidecarLoader.fromConfig(config);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -77,6 +77,7 @@ export function createMockContextConfig(
|
||||
getUsageStatisticsEnabled: vi.fn().mockReturnValue(false),
|
||||
getTargetDir: vi.fn().mockReturnValue('/tmp'),
|
||||
getSessionId: vi.fn().mockReturnValue('test-session'),
|
||||
getExperimentalContextSidecarConfig: vi.fn().mockReturnValue(undefined),
|
||||
};
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||
@@ -95,7 +96,7 @@ import type { BaseLlmClient } from 'src/core/baseLlmClient.js';
|
||||
|
||||
export function setupContextComponentTest(config: Config) {
|
||||
const chatHistory = new AgentChatHistory();
|
||||
const sidecar = SidecarLoader.fromLegacyConfig(config);
|
||||
const sidecar = SidecarLoader.fromConfig(config);
|
||||
const tracer = new ContextTracer('/tmp', 'test-session');
|
||||
const eventBus = new ContextEventBus();
|
||||
const env = new ContextEnvironmentImpl(
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
import * as fs from 'node:fs';
|
||||
import * as path from 'node:path';
|
||||
import { randomUUID } from 'node:crypto';
|
||||
import { debugLogger } from '../utils/debugLogger.js';
|
||||
|
||||
export class ContextTracer {
|
||||
private traceDir: string;
|
||||
@@ -23,7 +24,7 @@ export class ContextTracer {
|
||||
fs.mkdirSync(this.assetsDir, { recursive: true });
|
||||
this.logEvent('SYSTEM', 'Context Tracer Initialized', { sessionId });
|
||||
} catch (e) {
|
||||
console.error('Failed to initialize ContextTracer', e);
|
||||
debugLogger.error('Failed to initialize ContextTracer', e);
|
||||
this.enabled = false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,6 +8,12 @@ import type { Part } from '@google/genai';
|
||||
import { estimateTokenCountSync as baseEstimate } from '../../utils/tokenCalculation.js';
|
||||
import type { Episode } from '../ir/types.js';
|
||||
|
||||
/**
|
||||
* The flat token cost assigned to a single multi-modal asset (like an image tile)
|
||||
* by the Gemini API. We use this as a baseline heuristic for inlineData/fileData.
|
||||
*/
|
||||
const BASE_MULTIMODAL_TOKEN_COST = 258;
|
||||
|
||||
export class ContextTokenCalculator {
|
||||
constructor(private readonly charsPerToken: number) {}
|
||||
|
||||
@@ -47,21 +53,19 @@ export class ContextTokenCalculator {
|
||||
* Deeply inspects the nested structure and uses the base tokenization math.
|
||||
*/
|
||||
estimateTokensForParts(parts: Part[], depth: number = 0): number {
|
||||
if (this.charsPerToken !== 4) {
|
||||
let totalTokens = 0;
|
||||
for (const part of parts) {
|
||||
if (typeof part.text === 'string') {
|
||||
totalTokens += Math.ceil(part.text.length / this.charsPerToken);
|
||||
} else {
|
||||
totalTokens += Math.ceil(
|
||||
JSON.stringify(part).length / this.charsPerToken,
|
||||
);
|
||||
}
|
||||
let totalTokens = 0;
|
||||
for (const part of parts) {
|
||||
if (typeof part.text === 'string') {
|
||||
totalTokens += Math.ceil(part.text.length / this.charsPerToken);
|
||||
} else if (part.inlineData !== undefined || part.fileData !== undefined) {
|
||||
totalTokens += BASE_MULTIMODAL_TOKEN_COST;
|
||||
} else {
|
||||
totalTokens += Math.ceil(
|
||||
JSON.stringify(part).length / this.charsPerToken,
|
||||
);
|
||||
}
|
||||
return totalTokens;
|
||||
}
|
||||
|
||||
// The baseEstimate no longer accepts config because we forked it!
|
||||
return baseEstimate(parts, depth);
|
||||
// Also include structural overhead
|
||||
return totalTokens + baseEstimate(parts, depth);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user