feat(core): introduce decoupled ContextManager and Sidecar architecture (#24752)

2026-04-28 05:55:17 -07:00 · 2026-04-13 15:02:22 -07:00
parent 706d4d4707
commit daf5006237
54 changed files with 6454 additions and 0 deletions
@@ -0,0 +1,107 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { Part } from '@google/genai';
+import { estimateTokenCountSync as baseEstimate } from '../../utils/tokenCalculation.js';
+import type { ConcreteNode } from '../graph/types.js';
+import type { NodeBehaviorRegistry } from '../graph/behaviorRegistry.js';
+
+/**
+ * The flat token cost assigned to a single multi-modal asset (like an image tile)
+ * by the Gemini API. We use this as a baseline heuristic for inlineData/fileData.
+ */
+
+export class ContextTokenCalculator {
+  private readonly tokenCache = new Map<string, number>();
+
+  constructor(
+    private readonly charsPerToken: number,
+    private readonly registry: NodeBehaviorRegistry,
+  ) {}
+
+  /**
+   * Estimates tokens for a simple string based on character count.
+   * Fast, but inherently inaccurate compared to real model tokenization.
+   */
+  estimateTokensForString(text: string): number {
+    return Math.ceil(text.length / this.charsPerToken);
+  }
+
+  /**
+   * Fast, simple heuristic conversion from tokens to expected character length.
+   * Useful for calculating truncation thresholds.
+   */
+  tokensToChars(tokens: number): number {
+    return tokens * this.charsPerToken;
+  }
+
+  /**
+   * Pre-calculates and caches the token cost of a newly minted node.
+   * Because nodes are immutable, this cost never changes for this node ID.
+   */
+
+  /**
+   * Removes cached token counts for any nodes that are no longer in the given live set.
+   * This prevents unbounded memory growth during long sessions.
+   */
+  garbageCollectCache(liveNodeIds: ReadonlySet<string>): void {
+    for (const [id] of this.tokenCache) {
+      if (!liveNodeIds.has(id)) {
+        this.tokenCache.delete(id);
+      }
+    }
+  }
+
+  cacheNodeTokens(node: ConcreteNode): number {
+    const behavior = this.registry.get(node.type);
+    const parts = behavior.getEstimatableParts(node);
+    const tokens = this.estimateTokensForParts(parts);
+    this.tokenCache.set(node.id, tokens);
+    return tokens;
+  }
+
+  /**
+   * Retrieves the token cost of a single node from the cache.
+   * If it misses the cache, it computes it and caches it.
+   */
+  getTokenCost(node: ConcreteNode): number {
+    const cached = this.tokenCache.get(node.id);
+    if (cached !== undefined) return cached;
+    return this.cacheNodeTokens(node);
+  }
+
+  /**
+   * Fast calculation for a flat array of ConcreteNodes (The Nodes).
+   * It relies entirely on the O(1) sidecar token cache.
+   */
+  calculateConcreteListTokens(nodes: readonly ConcreteNode[]): number {
+    let tokens = 0;
+    for (const node of nodes) {
+      tokens += this.getTokenCost(node);
+    }
+    return tokens;
+  }
+  /**
+   * Slower, precise estimation for a Gemini Content/Part graph.
+   * Deeply inspects the nested structure and uses the base tokenization math.
+   */
+  estimateTokensForParts(parts: Part[], depth: number = 0): number {
+    let totalTokens = 0;
+    for (const part of parts) {
+      if (typeof part.text === 'string') {
+        totalTokens += Math.ceil(part.text.length / this.charsPerToken);
+      } else if (part.inlineData !== undefined || part.fileData !== undefined) {
+        totalTokens += 258;
+      } else {
+        totalTokens += Math.ceil(
+          JSON.stringify(part).length / this.charsPerToken,
+        );
+      }
+    }
+    // Also include structural overhead
+    return totalTokens + baseEstimate(parts, depth);
+  }
+}
@@ -0,0 +1,54 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+import type { ConcreteNode } from '../graph/types.js';
+import type { ContextEnvironment } from '../pipeline/environment.js';
+import { LlmRole } from '../../telemetry/llmRole.js';
+
+export class SnapshotGenerator {
+  constructor(private readonly env: ContextEnvironment) {}
+
+  async synthesizeSnapshot(
+    nodes: readonly ConcreteNode[],
+    systemInstruction?: string,
+  ): Promise<string> {
+    const systemPrompt =
+      systemInstruction ??
+      `You are an expert Context Memory Manager. You will be provided with a raw transcript of older conversation turns between a user and an AI assistant.
+Your task is to synthesize these turns into a single, dense, factual snapshot that preserves all critical context, preferences, active tasks, and factual knowledge, but discards conversational filler, pleasantries, and redundant back-and-forth iterations.
+
+Output ONLY the raw factual snapshot, formatted compactly. Do not include markdown wrappers, prefixes like "Here is the snapshot", or conversational elements.`;
+
+    let userPromptText = 'TRANSCRIPT TO SNAPSHOT:\n\n';
+    for (const node of nodes) {
+      let nodeContent = '';
+      if ('text' in node && typeof node.text === 'string') {
+        nodeContent = node.text;
+      } else if ('semanticParts' in node) {
+        nodeContent = JSON.stringify(node.semanticParts);
+      } else if ('observation' in node) {
+        nodeContent =
+          typeof node.observation === 'string'
+            ? node.observation
+            : JSON.stringify(node.observation);
+      }
+
+      userPromptText += `[${node.type}]: ${nodeContent}\n`;
+    }
+
+    const response = await this.env.llmClient.generateContent({
+      role: LlmRole.UTILITY_STATE_SNAPSHOT_PROCESSOR,
+      modelConfigKey: { model: 'gemini-3-flash-base' },
+      contents: [{ role: 'user', parts: [{ text: userPromptText }] }],
+      systemInstruction: { role: 'system', parts: [{ text: systemPrompt }] },
+      promptId: this.env.promptId,
+      abortSignal: new AbortController().signal,
+    });
+
+    const candidate = response.candidates?.[0];
+    const textPart = candidate?.content?.parts?.[0];
+    return textPart?.text || '';
+  }
+}