This commit is contained in:
Your Name
2026-04-08 22:06:09 +00:00
parent 0df3521032
commit b39b74ee09
18 changed files with 62 additions and 184 deletions
+1 -1
View File
@@ -116,7 +116,7 @@ export class ContextManager {
// Walk backwards finding nodes that fall out of the retained budget
for (let i = this.currentShip.length - 1; i >= 0; i--) {
const node = this.currentShip[i];
rollingTokens += node.metadata.currentTokens;
rollingTokens += this.env.tokenCalculator.calculateConcreteListTokens([node]);
if (rollingTokens > this.sidecar.budget.retainedTokens) {
agedOutNodes.add(node.id);
}
+5 -5
View File
@@ -62,7 +62,7 @@ export function fromIr(ship: readonly ConcreteNode[]): Content[] {
return history;
}
function serializeUserPrompt(prompt: UserPrompt): Content | null {
export function serializeUserPrompt(prompt: UserPrompt): Content | null {
const parts: Part[] = [];
for (const sp of prompt.semanticParts) {
if (sp.type === 'text') {
@@ -82,11 +82,11 @@ function serializeUserPrompt(prompt: UserPrompt): Content | null {
return parts.length > 0 ? { role: 'user', parts } : null;
}
function serializeAgentThought(thought: AgentThought): Part {
export function serializeAgentThought(thought: AgentThought): Part {
return { text: thought.text };
}
function serializeToolExecution(
export function serializeToolExecution(
tool: ToolExecution,
): { call: Part; response: Part } {
return {
@@ -107,7 +107,7 @@ function serializeToolExecution(
};
}
function serializeMaskedTool(
export function serializeMaskedTool(
tool: MaskedTool,
): { call: Part; response: Part } {
return {
@@ -128,6 +128,6 @@ function serializeMaskedTool(
};
}
function serializeAgentYield(yieldNode: AgentYield): Part {
export function serializeAgentYield(yieldNode: AgentYield): Part {
return { text: yieldNode.text };
}
+1 -1
View File
@@ -75,7 +75,7 @@ export class IrProjector {
// Start from newest and count backwards
for (let i = ship.length - 1; i >= 0; i--) {
const node = ship[i];
const nodeTokens = node.metadata.currentTokens;
const nodeTokens = env.tokenCalculator.calculateConcreteListTokens([node]);
rollingTokens += nodeTokens;
if (rollingTokens > sidecar.budget.retainedTokens) {
agedOutNodes.add(node.id);
+4 -28
View File
@@ -8,7 +8,6 @@ import type { Content, Part } from '@google/genai';
import { randomUUID } from 'node:crypto';
import type {
Episode,
IrMetadata,
SemanticPart,
ToolExecution,
AgentThought,
@@ -50,14 +49,7 @@ export function toIr(
let currentEpisode: Partial<Episode> | null = null;
const pendingCallParts: Map<string, Part> = new Map();
const createMetadata = (parts: Part[]): IrMetadata => {
const tokens = tokenCalculator.estimateTokensForParts(parts, 0);
return {
originalTokens: tokens,
currentTokens: tokens,
transformations: [],
};
};
const finalizeEpisode = () => {
if (currentEpisode && isCompleteEpisode(currentEpisode)) {
@@ -81,20 +73,19 @@ export function toIr(
currentEpisode,
pendingCallParts,
tokenCalculator,
createMetadata,
);
}
if (hasUserParts) {
finalizeEpisode();
currentEpisode = parseUserParts(msg, createMetadata);
currentEpisode = parseUserParts(msg);
}
} else if (msg.role === 'model') {
currentEpisode = parseModelParts(
msg,
currentEpisode,
pendingCallParts,
createMetadata,
);
}
}
@@ -112,7 +103,6 @@ function parseToolResponses(
currentEpisode: Partial<Episode> | null,
pendingCallParts: Map<string, Part>,
tokenCalculator: ContextTokenCalculator,
_createMetadata: (parts: Part[]) => IrMetadata,
): Partial<Episode> {
if (!currentEpisode) {
currentEpisode = {
@@ -146,11 +136,6 @@ function parseToolResponses(
intent: intentTokens,
observation: obsTokens,
},
metadata: {
originalTokens: intentTokens + obsTokens,
currentTokens: intentTokens + obsTokens,
transformations: [],
},
};
currentEpisode.concreteNodes = [
...(currentEpisode.concreteNodes || []),
@@ -164,7 +149,6 @@ function parseToolResponses(
function parseUserParts(
msg: Content,
_createMetadata: (parts: Part[]) => IrMetadata,
): Partial<Episode> {
const semanticParts: SemanticPart[] = [];
for (const p of msg.parts!) {
@@ -190,9 +174,7 @@ function parseUserParts(
id: getStableId(msg.parts![0] || msg),
type: 'USER_PROMPT',
semanticParts,
metadata: _createMetadata(msg.parts!.filter((p) => !p.functionResponse)),
};
return {
id: getStableId(msg),
timestamp: Date.now(),
@@ -204,7 +186,6 @@ function parseModelParts(
msg: Content,
currentEpisode: Partial<Episode> | null,
pendingCallParts: Map<string, Part>,
_createMetadata: (parts: Part[]) => IrMetadata,
): Partial<Episode> {
if (!currentEpisode) {
currentEpisode = {
@@ -223,8 +204,8 @@ function parseModelParts(
id: getStableId(part),
type: 'AGENT_THOUGHT',
text: part.text,
metadata: _createMetadata([part]),
};
currentEpisode.concreteNodes = [
...(currentEpisode.concreteNodes || []),
thought,
@@ -240,11 +221,6 @@ function finalizeYield(currentEpisode: Partial<Episode>) {
id: randomUUID(),
type: 'AGENT_YIELD',
text: 'Yield', // Synthesized yield since we don't have the original concrete node
metadata: {
originalTokens: 1,
currentTokens: 1,
transformations: [],
},
};
const existingNodes = currentEpisode.concreteNodes || [];
currentEpisode.concreteNodes = [...existingNodes, yieldNode];
-27
View File
@@ -6,32 +6,6 @@
import type { Part } from '@google/genai';
/**
* Universal Audit Metadata
* Tracks the lifecycle and transformations of a node or part within the IR.
* This guarantees perfect reversibility and enables long-term memory offloading.
*/
export interface IrMetadata {
/** The estimated number of tokens this entity originally consumed. */
readonly originalTokens: number;
/** The current estimated number of tokens this entity consumes in its degraded state. */
readonly currentTokens: number;
/** An audit trail of all transformations applied by ContextProcessors. */
readonly transformations: ReadonlyArray<{
readonly processorName: string;
readonly action:
| 'MASKED'
| 'TRUNCATED'
| 'DEGRADED'
| 'SUMMARIZED'
| 'EVICTED'
| 'SYNTHESIZED';
readonly timestamp: number;
/** Pointer to where the original uncompressed payload was saved (if applicable) */
readonly diskPointer?: string;
}>;
}
export type IrNodeType =
// Organic Concrete Nodes
| 'USER_PROMPT'
@@ -53,7 +27,6 @@ export type IrNodeType =
export interface IrNode {
readonly id: string;
readonly type: IrNodeType;
readonly metadata: IrMetadata;
}
/**
@@ -63,8 +63,6 @@ describe('BlobDegradationProcessor', () => {
expect((degradedPart as any).text).toContain('[Multi-Modal Blob (image/png, 0.00MB) degraded to text');
// The transformation should be logged
expect(modifiedPrompt.metadata.transformations.length).toBe(1);
expect(modifiedPrompt.metadata.transformations[0].action).toBe('DEGRADED');
});
it('should stop degrading once the deficit is cleared', async () => {
@@ -130,17 +130,6 @@ export class BlobDegradationProcessor implements ContextProcessor {
...prompt,
id: this.env.idGenerator.generateId(), // Issue a new ID because it was modified
semanticParts: newParts,
metadata: {
...prompt.metadata,
transformations: [
...prompt.metadata.transformations,
{
processorName: this.name,
action: 'DEGRADED',
timestamp: Date.now(),
}
]
}
};
returnedNodes.push(degradedNode);
} else {
@@ -77,7 +77,7 @@ export class EmergencyTruncationProcessor implements ContextProcessor {
continue;
}
removedTokens += node.metadata.currentTokens;
removedTokens += this.env.tokenCalculator.getTokenCost(node);
}
return keptNodes;
@@ -65,8 +65,6 @@ describe('HistorySquashingProcessor', () => {
expect(squashedPrompt.id).not.toBe(prompt.id);
expect(squashedPrompt.semanticParts[0].type).toBe('text');
expect((squashedPrompt.semanticParts[0] as any).text).toContain('[... OMITTED');
expect(squashedPrompt.metadata.transformations.length).toBe(1);
expect(squashedPrompt.metadata.transformations[0].action).toBe('TRUNCATED');
// 2. Agent Thought
const squashedThought = result[1] as AgentThought;
@@ -121,14 +121,6 @@ export class HistorySquashingProcessor implements ContextProcessor {
...prompt,
id: this.env.idGenerator.generateId(),
semanticParts: newParts,
metadata: {
...prompt.metadata,
currentTokens: newTokens,
transformations: [
...prompt.metadata.transformations,
{ processorName: this.name, action: 'TRUNCATED', timestamp: Date.now() }
]
}
});
} else {
returnedNodes.push(node);
@@ -147,14 +139,6 @@ export class HistorySquashingProcessor implements ContextProcessor {
...thought,
id: this.env.idGenerator.generateId(),
text: squashResult.text,
metadata: {
...thought.metadata,
currentTokens: squashResult.newTokens,
transformations: [
...thought.metadata.transformations,
{ processorName: this.name, action: 'TRUNCATED', timestamp: Date.now() }
]
}
});
} else {
returnedNodes.push(node);
@@ -173,14 +157,6 @@ export class HistorySquashingProcessor implements ContextProcessor {
...agentYield,
id: this.env.idGenerator.generateId(),
text: squashResult.text,
metadata: {
...agentYield.metadata,
currentTokens: squashResult.newTokens,
transformations: [
...agentYield.metadata.transformations,
{ processorName: this.name, action: 'TRUNCATED', timestamp: Date.now() }
]
}
});
} else {
returnedNodes.push(node);
@@ -49,7 +49,6 @@ describe('SemanticCompressionProcessor', () => {
const thought = createDummyNode('ep1', 'AGENT_THOUGHT', 1500, {
text: 'The model is thinking something incredibly long and verbose that exceeds 10 chars',
metadata: { currentTokens: 5000, originalTokens: 5000, transformations: [] }
}, 'thought-id') as AgentThought;
const tool = createDummyToolNode('ep1', 50, 1000, {
@@ -75,8 +74,6 @@ describe('SemanticCompressionProcessor', () => {
expect(compressedPrompt.semanticParts[0].type).toBe('text');
expect((compressedPrompt.semanticParts[0] as any).text).toBe('Mocked Summary!');
expect(compressedPrompt.metadata.transformations.length).toBe(1);
expect(compressedPrompt.metadata.transformations[0].action).toBe('SUMMARIZED');
// 2. Agent Thought
const compressedThought = result[1] as AgentThought;
expect(compressedThought.id).toBe('mock-uuid-2');
@@ -84,14 +81,12 @@ describe('SemanticCompressionProcessor', () => {
expect(compressedThought.text).toBe('Mocked Summary!');
expect(compressedThought.metadata.transformations.length).toBe(1);
// 3. Tool Execution
const compressedTool = result[2] as ToolExecution;
expect(compressedTool.id).toBe('mock-uuid-3');
expect(compressedTool.id).not.toBe(tool.id);
expect(compressedTool.observation).toEqual({ summary: 'Mocked Summary!' });
expect(compressedTool.metadata.transformations.length).toBe(1);
// Verify LLM was called 3 times
expect(mockLlmClient.generateContent).toHaveBeenCalledTimes(3);
});
@@ -130,7 +125,6 @@ describe('SemanticCompressionProcessor', () => {
const thought = createDummyNode('ep1', 'AGENT_THOUGHT', 1500, {
text: 'The model is thinking something incredibly long and verbose that exceeds 10 chars',
metadata: { currentTokens: 5000, originalTokens: 5000, transformations: [] }
}, 'thought-id') as AgentThought;
const targets = [prompt, thought];
@@ -123,26 +123,16 @@ export class SemanticCompressionProcessor implements ContextProcessor {
}
if (modified) {
const newTokens = this.env.tokenCalculator.estimateTokensForParts(
newParts.map(p => {
if (p.type === 'text') return { text: p.text };
if (p.type === 'inline_data') return { inlineData: { mimeType: p.mimeType, data: p.data } };
if (p.type === 'file_data') return { fileData: { mimeType: p.mimeType, fileUri: p.fileUri } };
return (p as Extract<import('../ir/types.js').SemanticPart, { type: 'raw_part' }>).part;
})
);
});
returnedNodes.push({
...prompt,
id: this.env.idGenerator.generateId(),
semanticParts: newParts,
metadata: {
...prompt.metadata,
currentTokens: newTokens,
transformations: [
...prompt.metadata.transformations,
{ processorName: this.name, action: 'SUMMARIZED', timestamp: Date.now() }
]
}
});
} else {
returnedNodes.push(node);
@@ -156,7 +146,7 @@ export class SemanticCompressionProcessor implements ContextProcessor {
if (thought.text.length > thresholdChars) {
const summary = await this.generateSummary(thought.text, 'Agent Thought');
const newTokens = this.env.tokenCalculator.estimateTokensForParts([{ text: summary }]);
const oldTokens = thought.metadata.currentTokens;
const oldTokens = this.env.tokenCalculator.getTokenCost(thought);
if (newTokens < oldTokens) {
currentDeficit -= (oldTokens - newTokens);
@@ -164,14 +154,6 @@ export class SemanticCompressionProcessor implements ContextProcessor {
...thought,
id: this.env.idGenerator.generateId(),
text: summary,
metadata: {
...thought.metadata,
currentTokens: newTokens,
transformations: [
...thought.metadata.transformations,
{ processorName: this.name, action: 'SUMMARIZED', timestamp: Date.now() }
]
}
});
continue;
}
@@ -210,7 +192,7 @@ export class SemanticCompressionProcessor implements ContextProcessor {
},
]);
const oldObsTokens = tool.tokens?.observation ?? tool.metadata.currentTokens;
const oldObsTokens = tool.tokens?.observation ?? this.env.tokenCalculator.getTokenCost(tool);
const intentTokens = tool.tokens?.intent ?? 0;
if (newObsTokens < oldObsTokens) {
@@ -223,14 +205,6 @@ export class SemanticCompressionProcessor implements ContextProcessor {
intent: intentTokens,
observation: newObsTokens,
},
metadata: {
...tool.metadata,
currentTokens: intentTokens + newObsTokens,
transformations: [
...tool.metadata.transformations,
{ processorName: this.name, action: 'SUMMARIZED', timestamp: Date.now() }
]
}
});
continue;
}
@@ -53,7 +53,6 @@ describe('StateSnapshotProcessor', () => {
// Should remove A and B, insert Snapshot, keep C
expect(result.length).toBe(2);
expect(result[0].type).toBe('SNAPSHOT');
expect((result[0] as Snapshot).text).toBe('<compressed A and B>');
expect(result[1].id).toBe('node-C');
// Should consume the message
@@ -107,6 +106,5 @@ describe('StateSnapshotProcessor', () => {
expect(env.llmClient.generateContent).toHaveBeenCalled();
expect(result.length).toBe(2); // nodeA is skipped as "system prompt", snapshot + nodeA
expect(result[1].type).toBe('SNAPSHOT');
expect((result[1] as Snapshot).text).toBe('Mock LLM summary response');
});
});
@@ -73,7 +73,6 @@ export class StateSnapshotProcessor implements ContextProcessor, ContextWorker {
if (isValid) {
// If valid, apply it!
const newId = this.env.idGenerator.generateId();
const tokens = this.env.tokenCalculator.estimateTokensForString(newText);
const snapshotNode: Snapshot = {
id: newId,
@@ -81,13 +80,6 @@ export class StateSnapshotProcessor implements ContextProcessor, ContextWorker {
type: 'SNAPSHOT',
timestamp: Date.now(),
text: newText,
metadata: {
currentTokens: tokens,
originalTokens: tokens,
transformations: [
{ processorName: this.name, action: 'SYNTHESIZED', timestamp: Date.now() }
]
}
};
// Remove the consumed nodes and insert the snapshot at the earliest index
@@ -131,7 +123,7 @@ export class StateSnapshotProcessor implements ContextProcessor, ContextWorker {
}
nodesToSummarize.push(node);
deficitAccumulator += node.metadata.currentTokens;
deficitAccumulator += this.env.tokenCalculator.getTokenCost(node);
if (deficitAccumulator >= targetTokensToRemove) break;
}
@@ -142,20 +134,12 @@ export class StateSnapshotProcessor implements ContextProcessor, ContextWorker {
const snapshotText = await this.synthesizeSnapshot(nodesToSummarize);
const newId = this.env.idGenerator.generateId();
const tokens = this.env.tokenCalculator.estimateTokensForString(snapshotText);
const snapshotNode: Snapshot = {
id: newId,
logicalParentId: newId,
type: 'SNAPSHOT',
timestamp: Date.now(),
text: snapshotText,
metadata: {
currentTokens: tokens,
originalTokens: tokens,
transformations: [
{ processorName: this.name, action: 'SYNTHESIZED', timestamp: Date.now() }
]
}
};
const consumedIds = nodesToSummarize.map(n => n.id);
@@ -15,7 +15,6 @@ describe('ToolMaskingProcessor', () => {
it('should write large strings to disk and replace them with a masked pointer', async () => {
const env = createMockEnvironment();
// 1 token = 1 char for simplicity
env.tokenCalculator.tokensToChars = vi.fn().mockReturnValue(10);
// Fake token calculator says new tokens are 5
env.tokenCalculator.estimateTokensForParts = vi.fn().mockReturnValue(5);
@@ -30,11 +29,6 @@ describe('ToolMaskingProcessor', () => {
result: 'this is a really long string that should get masked out because it exceeds 10 chars',
metadata: 'short',
},
metadata: {
currentTokens: 150,
originalTokens: 150,
transformations: []
}
});
const result = await processor.process({
@@ -56,13 +50,10 @@ describe('ToolMaskingProcessor', () => {
expect(obs.metadata).toBe('short'); // Untouched
// Transformation logged
expect(masked.metadata.transformations.length).toBe(1);
expect(masked.metadata.transformations[0].action).toBe('MASKED');
});
it('should skip unmaskable tools', async () => {
const env = createMockEnvironment();
env.tokenCalculator.tokensToChars = vi.fn().mockReturnValue(10);
const processor = ToolMaskingProcessor.create(env, {
stringLengthThresholdTokens: 10,
@@ -251,7 +251,7 @@ export class ToolMaskingProcessor implements ContextProcessor {
const newObsTokens = this.env.tokenCalculator.estimateTokensForParts([obsPart]);
const tokensSaved =
(step.metadata.currentTokens) -
this.env.tokenCalculator.getTokenCost(step) -
(newIntentTokens + newObsTokens);
if (tokensSaved > 0) {
@@ -264,18 +264,6 @@ export class ToolMaskingProcessor implements ContextProcessor {
intent: newIntentTokens,
observation: newObsTokens,
},
metadata: {
...step.metadata,
currentTokens: newIntentTokens + newObsTokens,
transformations: [
...step.metadata.transformations,
{
processorName: this.name,
action: 'MASKED',
timestamp: Date.now(),
}
]
}
};
returnedNodes.push(maskedNode);
@@ -152,7 +152,6 @@ export class SimulationHarness {
variant: {
type: 'MASKED_TOOL',
id: 'mock-id',
metadata: { currentTokens: 0, originalTokens: 0, transformations: [] },
tokens: { intent: 0, observation: 0 },
intent: {}, observation: {}, toolName: 'tool',
},
@@ -7,6 +7,8 @@
import type { Part } from '@google/genai';
import { estimateTokenCountSync as baseEstimate } from '../../utils/tokenCalculation.js';
import type { ConcreteNode } from '../ir/types.js';
import { isUserPrompt, isAgentThought, isToolExecution, isMaskedTool, isAgentYield, isSnapshot, isRollingSummary } from '../ir/graphUtils.js';
import { serializeUserPrompt, serializeAgentThought, serializeToolExecution, serializeMaskedTool, serializeAgentYield } from '../ir/fromIr.js';
/**
* The flat token cost assigned to a single multi-modal asset (like an image tile)
@@ -15,10 +17,13 @@ import type { ConcreteNode } from '../ir/types.js';
export class ContextTokenCalculator {
private readonly tokenCache = new Map<string, number>();
constructor(private readonly charsPerToken: number) {}
/**
* Fast, simple heuristic estimation for a raw string.
* Estimates tokens for a simple string based on character count.
* Fast, but inherently inaccurate compared to real model tokenization.
*/
estimateTokensForString(text: string): number {
return Math.ceil(text.length / this.charsPerToken);
@@ -33,17 +38,52 @@ export class ContextTokenCalculator {
}
/**
* Calculates the total token count for a flat array of ConcreteNodes (The Ship).
* This is fast because it relies on pre-computed metadata where available.
* Pre-calculates and caches the token cost of a newly minted node.
* Because nodes are immutable, this cost never changes for this node ID.
*/
cacheNodeTokens(node: ConcreteNode): number {
let tokens = 0;
if (isUserPrompt(node)) {
const content = serializeUserPrompt(node);
if (content && content.parts) tokens = this.estimateTokensForParts(content.parts as Part[]);
} else if (isAgentThought(node)) {
tokens = this.estimateTokensForParts([serializeAgentThought(node)]);
} else if (isToolExecution(node)) {
const parts = serializeToolExecution(node);
tokens = this.estimateTokensForParts([parts.call, parts.response]);
} else if (isMaskedTool(node)) {
const parts = serializeMaskedTool(node);
tokens = this.estimateTokensForParts([parts.call, parts.response]);
} else if (isAgentYield(node)) {
tokens = this.estimateTokensForParts([serializeAgentYield(node)]);
} else if (isSnapshot(node) || isRollingSummary(node)) {
tokens = this.estimateTokensForParts([{ text: node.text }]);
}
this.tokenCache.set(node.id, tokens);
return tokens;
}
/**
* Retrieves the token cost of a single node from the cache.
* If it misses the cache, it computes it and caches it.
*/
getTokenCost(node: ConcreteNode): number {
const cached = this.tokenCache.get(node.id);
if (cached !== undefined) return cached;
return this.cacheNodeTokens(node);
}
/**
* Fast calculation for a flat array of ConcreteNodes (The Ship).
* It relies entirely on the O(1) sidecar token cache.
*/
calculateConcreteListTokens(ship: readonly ConcreteNode[]): number {
let tokens = 0;
for (const node of ship) {
tokens += node.metadata.currentTokens;
tokens += this.getTokenCost(node);
}
return tokens;
}
/**
* Slower, precise estimation for a Gemini Content/Part graph.
* Deeply inspects the nested structure and uses the base tokenization math.