mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-04-28 05:55:17 -07:00
feat(core): introduce decoupled ContextManager and Sidecar architecture (#24752)
This commit is contained in:
@@ -0,0 +1,107 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import type { Part } from '@google/genai';
|
||||
import { estimateTokenCountSync as baseEstimate } from '../../utils/tokenCalculation.js';
|
||||
import type { ConcreteNode } from '../graph/types.js';
|
||||
import type { NodeBehaviorRegistry } from '../graph/behaviorRegistry.js';
|
||||
|
||||
/**
|
||||
* The flat token cost assigned to a single multi-modal asset (like an image tile)
|
||||
* by the Gemini API. We use this as a baseline heuristic for inlineData/fileData.
|
||||
*/
|
||||
|
||||
export class ContextTokenCalculator {
|
||||
private readonly tokenCache = new Map<string, number>();
|
||||
|
||||
constructor(
|
||||
private readonly charsPerToken: number,
|
||||
private readonly registry: NodeBehaviorRegistry,
|
||||
) {}
|
||||
|
||||
/**
|
||||
* Estimates tokens for a simple string based on character count.
|
||||
* Fast, but inherently inaccurate compared to real model tokenization.
|
||||
*/
|
||||
estimateTokensForString(text: string): number {
|
||||
return Math.ceil(text.length / this.charsPerToken);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fast, simple heuristic conversion from tokens to expected character length.
|
||||
* Useful for calculating truncation thresholds.
|
||||
*/
|
||||
tokensToChars(tokens: number): number {
|
||||
return tokens * this.charsPerToken;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pre-calculates and caches the token cost of a newly minted node.
|
||||
* Because nodes are immutable, this cost never changes for this node ID.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Removes cached token counts for any nodes that are no longer in the given live set.
|
||||
* This prevents unbounded memory growth during long sessions.
|
||||
*/
|
||||
garbageCollectCache(liveNodeIds: ReadonlySet<string>): void {
|
||||
for (const [id] of this.tokenCache) {
|
||||
if (!liveNodeIds.has(id)) {
|
||||
this.tokenCache.delete(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cacheNodeTokens(node: ConcreteNode): number {
|
||||
const behavior = this.registry.get(node.type);
|
||||
const parts = behavior.getEstimatableParts(node);
|
||||
const tokens = this.estimateTokensForParts(parts);
|
||||
this.tokenCache.set(node.id, tokens);
|
||||
return tokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the token cost of a single node from the cache.
|
||||
* If it misses the cache, it computes it and caches it.
|
||||
*/
|
||||
getTokenCost(node: ConcreteNode): number {
|
||||
const cached = this.tokenCache.get(node.id);
|
||||
if (cached !== undefined) return cached;
|
||||
return this.cacheNodeTokens(node);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fast calculation for a flat array of ConcreteNodes (The Nodes).
|
||||
* It relies entirely on the O(1) sidecar token cache.
|
||||
*/
|
||||
calculateConcreteListTokens(nodes: readonly ConcreteNode[]): number {
|
||||
let tokens = 0;
|
||||
for (const node of nodes) {
|
||||
tokens += this.getTokenCost(node);
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
/**
|
||||
* Slower, precise estimation for a Gemini Content/Part graph.
|
||||
* Deeply inspects the nested structure and uses the base tokenization math.
|
||||
*/
|
||||
estimateTokensForParts(parts: Part[], depth: number = 0): number {
|
||||
let totalTokens = 0;
|
||||
for (const part of parts) {
|
||||
if (typeof part.text === 'string') {
|
||||
totalTokens += Math.ceil(part.text.length / this.charsPerToken);
|
||||
} else if (part.inlineData !== undefined || part.fileData !== undefined) {
|
||||
totalTokens += 258;
|
||||
} else {
|
||||
totalTokens += Math.ceil(
|
||||
JSON.stringify(part).length / this.charsPerToken,
|
||||
);
|
||||
}
|
||||
}
|
||||
// Also include structural overhead
|
||||
return totalTokens + baseEstimate(parts, depth);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
import type { ConcreteNode } from '../graph/types.js';
|
||||
import type { ContextEnvironment } from '../pipeline/environment.js';
|
||||
import { LlmRole } from '../../telemetry/llmRole.js';
|
||||
|
||||
export class SnapshotGenerator {
|
||||
constructor(private readonly env: ContextEnvironment) {}
|
||||
|
||||
async synthesizeSnapshot(
|
||||
nodes: readonly ConcreteNode[],
|
||||
systemInstruction?: string,
|
||||
): Promise<string> {
|
||||
const systemPrompt =
|
||||
systemInstruction ??
|
||||
`You are an expert Context Memory Manager. You will be provided with a raw transcript of older conversation turns between a user and an AI assistant.
|
||||
Your task is to synthesize these turns into a single, dense, factual snapshot that preserves all critical context, preferences, active tasks, and factual knowledge, but discards conversational filler, pleasantries, and redundant back-and-forth iterations.
|
||||
|
||||
Output ONLY the raw factual snapshot, formatted compactly. Do not include markdown wrappers, prefixes like "Here is the snapshot", or conversational elements.`;
|
||||
|
||||
let userPromptText = 'TRANSCRIPT TO SNAPSHOT:\n\n';
|
||||
for (const node of nodes) {
|
||||
let nodeContent = '';
|
||||
if ('text' in node && typeof node.text === 'string') {
|
||||
nodeContent = node.text;
|
||||
} else if ('semanticParts' in node) {
|
||||
nodeContent = JSON.stringify(node.semanticParts);
|
||||
} else if ('observation' in node) {
|
||||
nodeContent =
|
||||
typeof node.observation === 'string'
|
||||
? node.observation
|
||||
: JSON.stringify(node.observation);
|
||||
}
|
||||
|
||||
userPromptText += `[${node.type}]: ${nodeContent}\n`;
|
||||
}
|
||||
|
||||
const response = await this.env.llmClient.generateContent({
|
||||
role: LlmRole.UTILITY_STATE_SNAPSHOT_PROCESSOR,
|
||||
modelConfigKey: { model: 'gemini-3-flash-base' },
|
||||
contents: [{ role: 'user', parts: [{ text: userPromptText }] }],
|
||||
systemInstruction: { role: 'system', parts: [{ text: systemPrompt }] },
|
||||
promptId: this.env.promptId,
|
||||
abortSignal: new AbortController().signal,
|
||||
});
|
||||
|
||||
const candidate = response.candidates?.[0];
|
||||
const textPart = candidate?.content?.parts?.[0];
|
||||
return textPart?.text || '';
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user