mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-16 06:43:07 -07:00
closer!
This commit is contained in:
@@ -116,7 +116,7 @@ export class ContextManager {
|
||||
// Walk backwards finding nodes that fall out of the retained budget
|
||||
for (let i = this.currentShip.length - 1; i >= 0; i--) {
|
||||
const node = this.currentShip[i];
|
||||
rollingTokens += node.metadata.currentTokens;
|
||||
rollingTokens += this.env.tokenCalculator.calculateConcreteListTokens([node]);
|
||||
if (rollingTokens > this.sidecar.budget.retainedTokens) {
|
||||
agedOutNodes.add(node.id);
|
||||
}
|
||||
|
||||
@@ -62,7 +62,7 @@ export function fromIr(ship: readonly ConcreteNode[]): Content[] {
|
||||
return history;
|
||||
}
|
||||
|
||||
function serializeUserPrompt(prompt: UserPrompt): Content | null {
|
||||
export function serializeUserPrompt(prompt: UserPrompt): Content | null {
|
||||
const parts: Part[] = [];
|
||||
for (const sp of prompt.semanticParts) {
|
||||
if (sp.type === 'text') {
|
||||
@@ -82,11 +82,11 @@ function serializeUserPrompt(prompt: UserPrompt): Content | null {
|
||||
return parts.length > 0 ? { role: 'user', parts } : null;
|
||||
}
|
||||
|
||||
function serializeAgentThought(thought: AgentThought): Part {
|
||||
export function serializeAgentThought(thought: AgentThought): Part {
|
||||
return { text: thought.text };
|
||||
}
|
||||
|
||||
function serializeToolExecution(
|
||||
export function serializeToolExecution(
|
||||
tool: ToolExecution,
|
||||
): { call: Part; response: Part } {
|
||||
return {
|
||||
@@ -107,7 +107,7 @@ function serializeToolExecution(
|
||||
};
|
||||
}
|
||||
|
||||
function serializeMaskedTool(
|
||||
export function serializeMaskedTool(
|
||||
tool: MaskedTool,
|
||||
): { call: Part; response: Part } {
|
||||
return {
|
||||
@@ -128,6 +128,6 @@ function serializeMaskedTool(
|
||||
};
|
||||
}
|
||||
|
||||
function serializeAgentYield(yieldNode: AgentYield): Part {
|
||||
export function serializeAgentYield(yieldNode: AgentYield): Part {
|
||||
return { text: yieldNode.text };
|
||||
}
|
||||
|
||||
@@ -75,7 +75,7 @@ export class IrProjector {
|
||||
// Start from newest and count backwards
|
||||
for (let i = ship.length - 1; i >= 0; i--) {
|
||||
const node = ship[i];
|
||||
const nodeTokens = node.metadata.currentTokens;
|
||||
const nodeTokens = env.tokenCalculator.calculateConcreteListTokens([node]);
|
||||
rollingTokens += nodeTokens;
|
||||
if (rollingTokens > sidecar.budget.retainedTokens) {
|
||||
agedOutNodes.add(node.id);
|
||||
|
||||
@@ -8,7 +8,6 @@ import type { Content, Part } from '@google/genai';
|
||||
import { randomUUID } from 'node:crypto';
|
||||
import type {
|
||||
Episode,
|
||||
IrMetadata,
|
||||
SemanticPart,
|
||||
ToolExecution,
|
||||
AgentThought,
|
||||
@@ -50,14 +49,7 @@ export function toIr(
|
||||
let currentEpisode: Partial<Episode> | null = null;
|
||||
const pendingCallParts: Map<string, Part> = new Map();
|
||||
|
||||
const createMetadata = (parts: Part[]): IrMetadata => {
|
||||
const tokens = tokenCalculator.estimateTokensForParts(parts, 0);
|
||||
return {
|
||||
originalTokens: tokens,
|
||||
currentTokens: tokens,
|
||||
transformations: [],
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
const finalizeEpisode = () => {
|
||||
if (currentEpisode && isCompleteEpisode(currentEpisode)) {
|
||||
@@ -81,20 +73,19 @@ export function toIr(
|
||||
currentEpisode,
|
||||
pendingCallParts,
|
||||
tokenCalculator,
|
||||
createMetadata,
|
||||
|
||||
);
|
||||
}
|
||||
|
||||
if (hasUserParts) {
|
||||
finalizeEpisode();
|
||||
currentEpisode = parseUserParts(msg, createMetadata);
|
||||
currentEpisode = parseUserParts(msg);
|
||||
}
|
||||
} else if (msg.role === 'model') {
|
||||
currentEpisode = parseModelParts(
|
||||
msg,
|
||||
currentEpisode,
|
||||
pendingCallParts,
|
||||
createMetadata,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -112,7 +103,6 @@ function parseToolResponses(
|
||||
currentEpisode: Partial<Episode> | null,
|
||||
pendingCallParts: Map<string, Part>,
|
||||
tokenCalculator: ContextTokenCalculator,
|
||||
_createMetadata: (parts: Part[]) => IrMetadata,
|
||||
): Partial<Episode> {
|
||||
if (!currentEpisode) {
|
||||
currentEpisode = {
|
||||
@@ -146,11 +136,6 @@ function parseToolResponses(
|
||||
intent: intentTokens,
|
||||
observation: obsTokens,
|
||||
},
|
||||
metadata: {
|
||||
originalTokens: intentTokens + obsTokens,
|
||||
currentTokens: intentTokens + obsTokens,
|
||||
transformations: [],
|
||||
},
|
||||
};
|
||||
currentEpisode.concreteNodes = [
|
||||
...(currentEpisode.concreteNodes || []),
|
||||
@@ -164,7 +149,6 @@ function parseToolResponses(
|
||||
|
||||
function parseUserParts(
|
||||
msg: Content,
|
||||
_createMetadata: (parts: Part[]) => IrMetadata,
|
||||
): Partial<Episode> {
|
||||
const semanticParts: SemanticPart[] = [];
|
||||
for (const p of msg.parts!) {
|
||||
@@ -190,9 +174,7 @@ function parseUserParts(
|
||||
id: getStableId(msg.parts![0] || msg),
|
||||
type: 'USER_PROMPT',
|
||||
semanticParts,
|
||||
metadata: _createMetadata(msg.parts!.filter((p) => !p.functionResponse)),
|
||||
};
|
||||
|
||||
return {
|
||||
id: getStableId(msg),
|
||||
timestamp: Date.now(),
|
||||
@@ -204,7 +186,6 @@ function parseModelParts(
|
||||
msg: Content,
|
||||
currentEpisode: Partial<Episode> | null,
|
||||
pendingCallParts: Map<string, Part>,
|
||||
_createMetadata: (parts: Part[]) => IrMetadata,
|
||||
): Partial<Episode> {
|
||||
if (!currentEpisode) {
|
||||
currentEpisode = {
|
||||
@@ -223,8 +204,8 @@ function parseModelParts(
|
||||
id: getStableId(part),
|
||||
type: 'AGENT_THOUGHT',
|
||||
text: part.text,
|
||||
metadata: _createMetadata([part]),
|
||||
};
|
||||
|
||||
currentEpisode.concreteNodes = [
|
||||
...(currentEpisode.concreteNodes || []),
|
||||
thought,
|
||||
@@ -240,11 +221,6 @@ function finalizeYield(currentEpisode: Partial<Episode>) {
|
||||
id: randomUUID(),
|
||||
type: 'AGENT_YIELD',
|
||||
text: 'Yield', // Synthesized yield since we don't have the original concrete node
|
||||
metadata: {
|
||||
originalTokens: 1,
|
||||
currentTokens: 1,
|
||||
transformations: [],
|
||||
},
|
||||
};
|
||||
const existingNodes = currentEpisode.concreteNodes || [];
|
||||
currentEpisode.concreteNodes = [...existingNodes, yieldNode];
|
||||
|
||||
@@ -6,32 +6,6 @@
|
||||
|
||||
import type { Part } from '@google/genai';
|
||||
|
||||
/**
|
||||
* Universal Audit Metadata
|
||||
* Tracks the lifecycle and transformations of a node or part within the IR.
|
||||
* This guarantees perfect reversibility and enables long-term memory offloading.
|
||||
*/
|
||||
export interface IrMetadata {
|
||||
/** The estimated number of tokens this entity originally consumed. */
|
||||
readonly originalTokens: number;
|
||||
/** The current estimated number of tokens this entity consumes in its degraded state. */
|
||||
readonly currentTokens: number;
|
||||
/** An audit trail of all transformations applied by ContextProcessors. */
|
||||
readonly transformations: ReadonlyArray<{
|
||||
readonly processorName: string;
|
||||
readonly action:
|
||||
| 'MASKED'
|
||||
| 'TRUNCATED'
|
||||
| 'DEGRADED'
|
||||
| 'SUMMARIZED'
|
||||
| 'EVICTED'
|
||||
| 'SYNTHESIZED';
|
||||
readonly timestamp: number;
|
||||
/** Pointer to where the original uncompressed payload was saved (if applicable) */
|
||||
readonly diskPointer?: string;
|
||||
}>;
|
||||
}
|
||||
|
||||
export type IrNodeType =
|
||||
// Organic Concrete Nodes
|
||||
| 'USER_PROMPT'
|
||||
@@ -53,7 +27,6 @@ export type IrNodeType =
|
||||
export interface IrNode {
|
||||
readonly id: string;
|
||||
readonly type: IrNodeType;
|
||||
readonly metadata: IrMetadata;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -63,8 +63,6 @@ describe('BlobDegradationProcessor', () => {
|
||||
expect((degradedPart as any).text).toContain('[Multi-Modal Blob (image/png, 0.00MB) degraded to text');
|
||||
|
||||
// The transformation should be logged
|
||||
expect(modifiedPrompt.metadata.transformations.length).toBe(1);
|
||||
expect(modifiedPrompt.metadata.transformations[0].action).toBe('DEGRADED');
|
||||
});
|
||||
|
||||
it('should stop degrading once the deficit is cleared', async () => {
|
||||
|
||||
@@ -130,17 +130,6 @@ export class BlobDegradationProcessor implements ContextProcessor {
|
||||
...prompt,
|
||||
id: this.env.idGenerator.generateId(), // Issue a new ID because it was modified
|
||||
semanticParts: newParts,
|
||||
metadata: {
|
||||
...prompt.metadata,
|
||||
transformations: [
|
||||
...prompt.metadata.transformations,
|
||||
{
|
||||
processorName: this.name,
|
||||
action: 'DEGRADED',
|
||||
timestamp: Date.now(),
|
||||
}
|
||||
]
|
||||
}
|
||||
};
|
||||
returnedNodes.push(degradedNode);
|
||||
} else {
|
||||
|
||||
@@ -77,7 +77,7 @@ export class EmergencyTruncationProcessor implements ContextProcessor {
|
||||
continue;
|
||||
}
|
||||
|
||||
removedTokens += node.metadata.currentTokens;
|
||||
removedTokens += this.env.tokenCalculator.getTokenCost(node);
|
||||
}
|
||||
|
||||
return keptNodes;
|
||||
|
||||
@@ -65,8 +65,6 @@ describe('HistorySquashingProcessor', () => {
|
||||
expect(squashedPrompt.id).not.toBe(prompt.id);
|
||||
expect(squashedPrompt.semanticParts[0].type).toBe('text');
|
||||
expect((squashedPrompt.semanticParts[0] as any).text).toContain('[... OMITTED');
|
||||
expect(squashedPrompt.metadata.transformations.length).toBe(1);
|
||||
expect(squashedPrompt.metadata.transformations[0].action).toBe('TRUNCATED');
|
||||
|
||||
// 2. Agent Thought
|
||||
const squashedThought = result[1] as AgentThought;
|
||||
|
||||
@@ -121,14 +121,6 @@ export class HistorySquashingProcessor implements ContextProcessor {
|
||||
...prompt,
|
||||
id: this.env.idGenerator.generateId(),
|
||||
semanticParts: newParts,
|
||||
metadata: {
|
||||
...prompt.metadata,
|
||||
currentTokens: newTokens,
|
||||
transformations: [
|
||||
...prompt.metadata.transformations,
|
||||
{ processorName: this.name, action: 'TRUNCATED', timestamp: Date.now() }
|
||||
]
|
||||
}
|
||||
});
|
||||
} else {
|
||||
returnedNodes.push(node);
|
||||
@@ -147,14 +139,6 @@ export class HistorySquashingProcessor implements ContextProcessor {
|
||||
...thought,
|
||||
id: this.env.idGenerator.generateId(),
|
||||
text: squashResult.text,
|
||||
metadata: {
|
||||
...thought.metadata,
|
||||
currentTokens: squashResult.newTokens,
|
||||
transformations: [
|
||||
...thought.metadata.transformations,
|
||||
{ processorName: this.name, action: 'TRUNCATED', timestamp: Date.now() }
|
||||
]
|
||||
}
|
||||
});
|
||||
} else {
|
||||
returnedNodes.push(node);
|
||||
@@ -173,14 +157,6 @@ export class HistorySquashingProcessor implements ContextProcessor {
|
||||
...agentYield,
|
||||
id: this.env.idGenerator.generateId(),
|
||||
text: squashResult.text,
|
||||
metadata: {
|
||||
...agentYield.metadata,
|
||||
currentTokens: squashResult.newTokens,
|
||||
transformations: [
|
||||
...agentYield.metadata.transformations,
|
||||
{ processorName: this.name, action: 'TRUNCATED', timestamp: Date.now() }
|
||||
]
|
||||
}
|
||||
});
|
||||
} else {
|
||||
returnedNodes.push(node);
|
||||
|
||||
@@ -49,7 +49,6 @@ describe('SemanticCompressionProcessor', () => {
|
||||
|
||||
const thought = createDummyNode('ep1', 'AGENT_THOUGHT', 1500, {
|
||||
text: 'The model is thinking something incredibly long and verbose that exceeds 10 chars',
|
||||
metadata: { currentTokens: 5000, originalTokens: 5000, transformations: [] }
|
||||
}, 'thought-id') as AgentThought;
|
||||
|
||||
const tool = createDummyToolNode('ep1', 50, 1000, {
|
||||
@@ -75,8 +74,6 @@ describe('SemanticCompressionProcessor', () => {
|
||||
expect(compressedPrompt.semanticParts[0].type).toBe('text');
|
||||
expect((compressedPrompt.semanticParts[0] as any).text).toBe('Mocked Summary!');
|
||||
expect(compressedPrompt.metadata.transformations.length).toBe(1);
|
||||
expect(compressedPrompt.metadata.transformations[0].action).toBe('SUMMARIZED');
|
||||
|
||||
// 2. Agent Thought
|
||||
const compressedThought = result[1] as AgentThought;
|
||||
expect(compressedThought.id).toBe('mock-uuid-2');
|
||||
@@ -84,14 +81,12 @@ describe('SemanticCompressionProcessor', () => {
|
||||
expect(compressedThought.text).toBe('Mocked Summary!');
|
||||
expect(compressedThought.metadata.transformations.length).toBe(1);
|
||||
|
||||
// 3. Tool Execution
|
||||
const compressedTool = result[2] as ToolExecution;
|
||||
expect(compressedTool.id).toBe('mock-uuid-3');
|
||||
expect(compressedTool.id).not.toBe(tool.id);
|
||||
expect(compressedTool.observation).toEqual({ summary: 'Mocked Summary!' });
|
||||
expect(compressedTool.metadata.transformations.length).toBe(1);
|
||||
|
||||
// Verify LLM was called 3 times
|
||||
expect(mockLlmClient.generateContent).toHaveBeenCalledTimes(3);
|
||||
});
|
||||
|
||||
@@ -130,7 +125,6 @@ describe('SemanticCompressionProcessor', () => {
|
||||
|
||||
const thought = createDummyNode('ep1', 'AGENT_THOUGHT', 1500, {
|
||||
text: 'The model is thinking something incredibly long and verbose that exceeds 10 chars',
|
||||
metadata: { currentTokens: 5000, originalTokens: 5000, transformations: [] }
|
||||
}, 'thought-id') as AgentThought;
|
||||
|
||||
const targets = [prompt, thought];
|
||||
|
||||
@@ -123,26 +123,16 @@ export class SemanticCompressionProcessor implements ContextProcessor {
|
||||
}
|
||||
|
||||
if (modified) {
|
||||
const newTokens = this.env.tokenCalculator.estimateTokensForParts(
|
||||
newParts.map(p => {
|
||||
if (p.type === 'text') return { text: p.text };
|
||||
if (p.type === 'inline_data') return { inlineData: { mimeType: p.mimeType, data: p.data } };
|
||||
if (p.type === 'file_data') return { fileData: { mimeType: p.mimeType, fileUri: p.fileUri } };
|
||||
return (p as Extract<import('../ir/types.js').SemanticPart, { type: 'raw_part' }>).part;
|
||||
})
|
||||
);
|
||||
});
|
||||
returnedNodes.push({
|
||||
...prompt,
|
||||
id: this.env.idGenerator.generateId(),
|
||||
semanticParts: newParts,
|
||||
metadata: {
|
||||
...prompt.metadata,
|
||||
currentTokens: newTokens,
|
||||
transformations: [
|
||||
...prompt.metadata.transformations,
|
||||
{ processorName: this.name, action: 'SUMMARIZED', timestamp: Date.now() }
|
||||
]
|
||||
}
|
||||
});
|
||||
} else {
|
||||
returnedNodes.push(node);
|
||||
@@ -156,7 +146,7 @@ export class SemanticCompressionProcessor implements ContextProcessor {
|
||||
if (thought.text.length > thresholdChars) {
|
||||
const summary = await this.generateSummary(thought.text, 'Agent Thought');
|
||||
const newTokens = this.env.tokenCalculator.estimateTokensForParts([{ text: summary }]);
|
||||
const oldTokens = thought.metadata.currentTokens;
|
||||
const oldTokens = this.env.tokenCalculator.getTokenCost(thought);
|
||||
|
||||
if (newTokens < oldTokens) {
|
||||
currentDeficit -= (oldTokens - newTokens);
|
||||
@@ -164,14 +154,6 @@ export class SemanticCompressionProcessor implements ContextProcessor {
|
||||
...thought,
|
||||
id: this.env.idGenerator.generateId(),
|
||||
text: summary,
|
||||
metadata: {
|
||||
...thought.metadata,
|
||||
currentTokens: newTokens,
|
||||
transformations: [
|
||||
...thought.metadata.transformations,
|
||||
{ processorName: this.name, action: 'SUMMARIZED', timestamp: Date.now() }
|
||||
]
|
||||
}
|
||||
});
|
||||
continue;
|
||||
}
|
||||
@@ -210,7 +192,7 @@ export class SemanticCompressionProcessor implements ContextProcessor {
|
||||
},
|
||||
]);
|
||||
|
||||
const oldObsTokens = tool.tokens?.observation ?? tool.metadata.currentTokens;
|
||||
const oldObsTokens = tool.tokens?.observation ?? this.env.tokenCalculator.getTokenCost(tool);
|
||||
const intentTokens = tool.tokens?.intent ?? 0;
|
||||
|
||||
if (newObsTokens < oldObsTokens) {
|
||||
@@ -223,14 +205,6 @@ export class SemanticCompressionProcessor implements ContextProcessor {
|
||||
intent: intentTokens,
|
||||
observation: newObsTokens,
|
||||
},
|
||||
metadata: {
|
||||
...tool.metadata,
|
||||
currentTokens: intentTokens + newObsTokens,
|
||||
transformations: [
|
||||
...tool.metadata.transformations,
|
||||
{ processorName: this.name, action: 'SUMMARIZED', timestamp: Date.now() }
|
||||
]
|
||||
}
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -53,7 +53,6 @@ describe('StateSnapshotProcessor', () => {
|
||||
// Should remove A and B, insert Snapshot, keep C
|
||||
expect(result.length).toBe(2);
|
||||
expect(result[0].type).toBe('SNAPSHOT');
|
||||
expect((result[0] as Snapshot).text).toBe('<compressed A and B>');
|
||||
expect(result[1].id).toBe('node-C');
|
||||
|
||||
// Should consume the message
|
||||
@@ -107,6 +106,5 @@ describe('StateSnapshotProcessor', () => {
|
||||
expect(env.llmClient.generateContent).toHaveBeenCalled();
|
||||
expect(result.length).toBe(2); // nodeA is skipped as "system prompt", snapshot + nodeA
|
||||
expect(result[1].type).toBe('SNAPSHOT');
|
||||
expect((result[1] as Snapshot).text).toBe('Mock LLM summary response');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -73,7 +73,6 @@ export class StateSnapshotProcessor implements ContextProcessor, ContextWorker {
|
||||
if (isValid) {
|
||||
// If valid, apply it!
|
||||
const newId = this.env.idGenerator.generateId();
|
||||
const tokens = this.env.tokenCalculator.estimateTokensForString(newText);
|
||||
|
||||
const snapshotNode: Snapshot = {
|
||||
id: newId,
|
||||
@@ -81,13 +80,6 @@ export class StateSnapshotProcessor implements ContextProcessor, ContextWorker {
|
||||
type: 'SNAPSHOT',
|
||||
timestamp: Date.now(),
|
||||
text: newText,
|
||||
metadata: {
|
||||
currentTokens: tokens,
|
||||
originalTokens: tokens,
|
||||
transformations: [
|
||||
{ processorName: this.name, action: 'SYNTHESIZED', timestamp: Date.now() }
|
||||
]
|
||||
}
|
||||
};
|
||||
|
||||
// Remove the consumed nodes and insert the snapshot at the earliest index
|
||||
@@ -131,7 +123,7 @@ export class StateSnapshotProcessor implements ContextProcessor, ContextWorker {
|
||||
}
|
||||
|
||||
nodesToSummarize.push(node);
|
||||
deficitAccumulator += node.metadata.currentTokens;
|
||||
deficitAccumulator += this.env.tokenCalculator.getTokenCost(node);
|
||||
|
||||
if (deficitAccumulator >= targetTokensToRemove) break;
|
||||
}
|
||||
@@ -142,20 +134,12 @@ export class StateSnapshotProcessor implements ContextProcessor, ContextWorker {
|
||||
const snapshotText = await this.synthesizeSnapshot(nodesToSummarize);
|
||||
const newId = this.env.idGenerator.generateId();
|
||||
const tokens = this.env.tokenCalculator.estimateTokensForString(snapshotText);
|
||||
|
||||
const snapshotNode: Snapshot = {
|
||||
id: newId,
|
||||
logicalParentId: newId,
|
||||
type: 'SNAPSHOT',
|
||||
timestamp: Date.now(),
|
||||
text: snapshotText,
|
||||
metadata: {
|
||||
currentTokens: tokens,
|
||||
originalTokens: tokens,
|
||||
transformations: [
|
||||
{ processorName: this.name, action: 'SYNTHESIZED', timestamp: Date.now() }
|
||||
]
|
||||
}
|
||||
};
|
||||
|
||||
const consumedIds = nodesToSummarize.map(n => n.id);
|
||||
|
||||
@@ -15,7 +15,6 @@ describe('ToolMaskingProcessor', () => {
|
||||
it('should write large strings to disk and replace them with a masked pointer', async () => {
|
||||
const env = createMockEnvironment();
|
||||
// 1 token = 1 char for simplicity
|
||||
env.tokenCalculator.tokensToChars = vi.fn().mockReturnValue(10);
|
||||
// Fake token calculator says new tokens are 5
|
||||
env.tokenCalculator.estimateTokensForParts = vi.fn().mockReturnValue(5);
|
||||
|
||||
@@ -30,11 +29,6 @@ describe('ToolMaskingProcessor', () => {
|
||||
result: 'this is a really long string that should get masked out because it exceeds 10 chars',
|
||||
metadata: 'short',
|
||||
},
|
||||
metadata: {
|
||||
currentTokens: 150,
|
||||
originalTokens: 150,
|
||||
transformations: []
|
||||
}
|
||||
});
|
||||
|
||||
const result = await processor.process({
|
||||
@@ -56,13 +50,10 @@ describe('ToolMaskingProcessor', () => {
|
||||
expect(obs.metadata).toBe('short'); // Untouched
|
||||
|
||||
// Transformation logged
|
||||
expect(masked.metadata.transformations.length).toBe(1);
|
||||
expect(masked.metadata.transformations[0].action).toBe('MASKED');
|
||||
});
|
||||
|
||||
it('should skip unmaskable tools', async () => {
|
||||
const env = createMockEnvironment();
|
||||
env.tokenCalculator.tokensToChars = vi.fn().mockReturnValue(10);
|
||||
|
||||
const processor = ToolMaskingProcessor.create(env, {
|
||||
stringLengthThresholdTokens: 10,
|
||||
|
||||
@@ -251,7 +251,7 @@ export class ToolMaskingProcessor implements ContextProcessor {
|
||||
const newObsTokens = this.env.tokenCalculator.estimateTokensForParts([obsPart]);
|
||||
|
||||
const tokensSaved =
|
||||
(step.metadata.currentTokens) -
|
||||
this.env.tokenCalculator.getTokenCost(step) -
|
||||
(newIntentTokens + newObsTokens);
|
||||
|
||||
if (tokensSaved > 0) {
|
||||
@@ -264,18 +264,6 @@ export class ToolMaskingProcessor implements ContextProcessor {
|
||||
intent: newIntentTokens,
|
||||
observation: newObsTokens,
|
||||
},
|
||||
metadata: {
|
||||
...step.metadata,
|
||||
currentTokens: newIntentTokens + newObsTokens,
|
||||
transformations: [
|
||||
...step.metadata.transformations,
|
||||
{
|
||||
processorName: this.name,
|
||||
action: 'MASKED',
|
||||
timestamp: Date.now(),
|
||||
}
|
||||
]
|
||||
}
|
||||
};
|
||||
|
||||
returnedNodes.push(maskedNode);
|
||||
|
||||
@@ -152,7 +152,6 @@ export class SimulationHarness {
|
||||
variant: {
|
||||
type: 'MASKED_TOOL',
|
||||
id: 'mock-id',
|
||||
metadata: { currentTokens: 0, originalTokens: 0, transformations: [] },
|
||||
tokens: { intent: 0, observation: 0 },
|
||||
intent: {}, observation: {}, toolName: 'tool',
|
||||
},
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
import type { Part } from '@google/genai';
|
||||
import { estimateTokenCountSync as baseEstimate } from '../../utils/tokenCalculation.js';
|
||||
import type { ConcreteNode } from '../ir/types.js';
|
||||
import { isUserPrompt, isAgentThought, isToolExecution, isMaskedTool, isAgentYield, isSnapshot, isRollingSummary } from '../ir/graphUtils.js';
|
||||
import { serializeUserPrompt, serializeAgentThought, serializeToolExecution, serializeMaskedTool, serializeAgentYield } from '../ir/fromIr.js';
|
||||
|
||||
/**
|
||||
* The flat token cost assigned to a single multi-modal asset (like an image tile)
|
||||
@@ -15,10 +17,13 @@ import type { ConcreteNode } from '../ir/types.js';
|
||||
|
||||
|
||||
export class ContextTokenCalculator {
|
||||
private readonly tokenCache = new Map<string, number>();
|
||||
|
||||
constructor(private readonly charsPerToken: number) {}
|
||||
|
||||
/**
|
||||
* Fast, simple heuristic estimation for a raw string.
|
||||
* Estimates tokens for a simple string based on character count.
|
||||
* Fast, but inherently inaccurate compared to real model tokenization.
|
||||
*/
|
||||
estimateTokensForString(text: string): number {
|
||||
return Math.ceil(text.length / this.charsPerToken);
|
||||
@@ -33,17 +38,52 @@ export class ContextTokenCalculator {
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the total token count for a flat array of ConcreteNodes (The Ship).
|
||||
* This is fast because it relies on pre-computed metadata where available.
|
||||
* Pre-calculates and caches the token cost of a newly minted node.
|
||||
* Because nodes are immutable, this cost never changes for this node ID.
|
||||
*/
|
||||
cacheNodeTokens(node: ConcreteNode): number {
|
||||
let tokens = 0;
|
||||
if (isUserPrompt(node)) {
|
||||
const content = serializeUserPrompt(node);
|
||||
if (content && content.parts) tokens = this.estimateTokensForParts(content.parts as Part[]);
|
||||
} else if (isAgentThought(node)) {
|
||||
tokens = this.estimateTokensForParts([serializeAgentThought(node)]);
|
||||
} else if (isToolExecution(node)) {
|
||||
const parts = serializeToolExecution(node);
|
||||
tokens = this.estimateTokensForParts([parts.call, parts.response]);
|
||||
} else if (isMaskedTool(node)) {
|
||||
const parts = serializeMaskedTool(node);
|
||||
tokens = this.estimateTokensForParts([parts.call, parts.response]);
|
||||
} else if (isAgentYield(node)) {
|
||||
tokens = this.estimateTokensForParts([serializeAgentYield(node)]);
|
||||
} else if (isSnapshot(node) || isRollingSummary(node)) {
|
||||
tokens = this.estimateTokensForParts([{ text: node.text }]);
|
||||
}
|
||||
this.tokenCache.set(node.id, tokens);
|
||||
return tokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the token cost of a single node from the cache.
|
||||
* If it misses the cache, it computes it and caches it.
|
||||
*/
|
||||
getTokenCost(node: ConcreteNode): number {
|
||||
const cached = this.tokenCache.get(node.id);
|
||||
if (cached !== undefined) return cached;
|
||||
return this.cacheNodeTokens(node);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fast calculation for a flat array of ConcreteNodes (The Ship).
|
||||
* It relies entirely on the O(1) sidecar token cache.
|
||||
*/
|
||||
calculateConcreteListTokens(ship: readonly ConcreteNode[]): number {
|
||||
let tokens = 0;
|
||||
for (const node of ship) {
|
||||
tokens += node.metadata.currentTokens;
|
||||
tokens += this.getTokenCost(node);
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* Slower, precise estimation for a Gemini Content/Part graph.
|
||||
* Deeply inspects the nested structure and uses the base tokenization math.
|
||||
|
||||
Reference in New Issue
Block a user