mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-15 06:12:50 -07:00
445 lines
15 KiB
TypeScript
445 lines
15 KiB
TypeScript
/**
|
|
* @license
|
|
* Copyright 2026 Google LLC
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
import type { Content } from '@google/genai';
|
|
import type { AgentChatHistory } from '../core/agentChatHistory.js';
|
|
import { isToolExecution, type ConcreteNode } from './graph/types.js';
|
|
import type { ContextEventBus } from './eventBus.js';
|
|
import type { ContextTracer } from './tracer.js';
|
|
import type { ContextEnvironment } from './pipeline/environment.js';
|
|
import type { ContextProfile } from './config/profiles.js';
|
|
import type { PipelineOrchestrator } from './pipeline/orchestrator.js';
|
|
import { HistoryObserver } from './historyObserver.js';
|
|
import { render } from './graph/render.js';
|
|
import { ContextWorkingBufferImpl } from './pipeline/contextWorkingBuffer.js';
|
|
import { debugLogger } from '../utils/debugLogger.js';
|
|
import { SnapshotStateHelper } from './utils/snapshotGenerator.js';
|
|
import type { ContextEngineState } from '../services/chatRecordingTypes.js';
|
|
import { hardenHistory } from '../utils/historyHardening.js';
|
|
import { checkContextInvariants } from './utils/invariantChecker.js';
|
|
import type { AdvancedTokenCalculator } from './utils/contextTokenCalculator.js';
|
|
|
|
export class ContextManager {
|
|
// The master state containing the pristine graph and current active graph.
|
|
private buffer: ContextWorkingBufferImpl =
|
|
ContextWorkingBufferImpl.initialize([]);
|
|
|
|
private readonly eventBus: ContextEventBus;
|
|
|
|
// Internal sub-components
|
|
private readonly orchestrator: PipelineOrchestrator;
|
|
private readonly historyObserver: HistoryObserver;
|
|
|
|
// Hysteresis tracking to prevent utility call churn
|
|
private lastTriggeredDeficit = 0;
|
|
|
|
// Cache for Anomaly 3 (Redundant Renders)
|
|
private lastRenderCache?: {
|
|
nodesHash: string;
|
|
result: {
|
|
history: Content[];
|
|
didApplyManagement: boolean;
|
|
baseUnits: number;
|
|
};
|
|
};
|
|
|
|
private hasPerformedHotStart = false;
|
|
|
|
constructor(
|
|
private readonly sidecar: ContextProfile,
|
|
private readonly env: ContextEnvironment,
|
|
private readonly tracer: ContextTracer,
|
|
orchestrator: PipelineOrchestrator,
|
|
chatHistory: AgentChatHistory,
|
|
private readonly advancedTokenCalculator: AdvancedTokenCalculator,
|
|
private readonly headerProvider?: () => Promise<Content | undefined>,
|
|
) {
|
|
this.eventBus = env.eventBus;
|
|
this.orchestrator = orchestrator;
|
|
|
|
// Provide the orchestrator with a way to fetch the latest nodes from the live buffer
|
|
this.orchestrator.setNodeProvider(() => this.buffer.nodes);
|
|
|
|
this.historyObserver = new HistoryObserver(
|
|
chatHistory,
|
|
this.env.eventBus,
|
|
this.tracer,
|
|
this.env.graphMapper,
|
|
);
|
|
|
|
this.eventBus.onPristineHistoryUpdated((event) => {
|
|
// Sync the entire pristine history chronologically
|
|
this.buffer = this.buffer.syncPristineHistory(event.nodes);
|
|
|
|
this.evaluateTriggers(event.newNodes);
|
|
});
|
|
this.eventBus.onProcessorResult((event) => {
|
|
this.buffer = this.buffer.applyProcessorResult(
|
|
event.processorId,
|
|
event.targets,
|
|
event.returnedNodes,
|
|
);
|
|
// We explicitly DO NOT call evaluateTriggers here.
|
|
// The Context Manager is a one-way assembly line. It only evaluates triggers
|
|
// when fundamentally new organic context is added via PristineHistoryUpdated.
|
|
// Re-evaluating after a processor finishes creates infinite feedback loops if
|
|
// the processor fails to reduce the token count below the threshold.
|
|
});
|
|
|
|
this.historyObserver.start();
|
|
}
|
|
|
|
/**
|
|
* Returns a promise that resolves when all currently executing async pipelines have finished.
|
|
*/
|
|
async waitForPipelines(): Promise<void> {
|
|
return this.orchestrator.waitForPipelines();
|
|
}
|
|
|
|
/**
|
|
* Safely stops background async pipelines and clears event listeners.
|
|
*/
|
|
shutdown() {
|
|
this.orchestrator.shutdown();
|
|
this.historyObserver.stop();
|
|
}
|
|
|
|
/**
|
|
* Evaluates if the current working buffer exceeds configured budget thresholds,
|
|
* firing consolidation events if necessary.
|
|
*/
|
|
private evaluateTriggers(newNodes: Set<string>) {
|
|
if (!this.sidecar.config.budget) return;
|
|
|
|
if (newNodes.size > 0) {
|
|
this.eventBus.emitChunkReceived({
|
|
nodes: this.buffer.nodes,
|
|
targetNodeIds: newNodes,
|
|
});
|
|
}
|
|
|
|
const currentTokens = this.env.tokenCalculator.calculateConcreteListTokens(
|
|
this.buffer.nodes,
|
|
);
|
|
|
|
if (currentTokens > this.sidecar.config.budget.retainedTokens) {
|
|
const agedOutNodes = new Set<string>();
|
|
let rollingTokens = 0;
|
|
|
|
// Identify active tool calls that must NEVER be truncated
|
|
const protectedIds = this.getProtectedNodeIds(this.buffer.nodes);
|
|
if (protectedIds.size > 0) {
|
|
debugLogger.log(
|
|
`[ContextManager] Pinning ${protectedIds.size} active tool call nodes to prevent truncation.`,
|
|
);
|
|
}
|
|
|
|
// Walk backwards finding nodes that fall out of the retained budget
|
|
for (let i = this.buffer.nodes.length - 1; i >= 0; i--) {
|
|
const node = this.buffer.nodes[i];
|
|
const priorTokens = rollingTokens;
|
|
rollingTokens += this.env.tokenCalculator.calculateConcreteListTokens([
|
|
node,
|
|
]);
|
|
|
|
// Loose Boundary Policy: If this node is the one that pushes us over the retained limit,
|
|
// we KEEP it to prevent aggressive undershooting. We only age out nodes that are
|
|
// strictly *older* than the boundary node.
|
|
if (priorTokens > this.sidecar.config.budget.retainedTokens) {
|
|
// Only age out if not protected
|
|
if (!protectedIds.has(node.id)) {
|
|
agedOutNodes.add(node.id);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (agedOutNodes.size > 0) {
|
|
const targetDeficit =
|
|
currentTokens - this.sidecar.config.budget.retainedTokens;
|
|
|
|
// If the deficit has shrunk (e.g. after a consolidation), update the baseline
|
|
// so we can track growth from this new, smaller deficit.
|
|
if (targetDeficit < this.lastTriggeredDeficit) {
|
|
this.lastTriggeredDeficit = targetDeficit;
|
|
}
|
|
|
|
// Respect coalescing threshold for background work
|
|
const threshold =
|
|
this.sidecar.config.budget.coalescingThresholdTokens || 0;
|
|
|
|
// Only trigger if deficit has grown significantly since last time
|
|
const growthSinceLast = targetDeficit - this.lastTriggeredDeficit;
|
|
|
|
if (
|
|
targetDeficit >= threshold &&
|
|
(growthSinceLast >= threshold || this.lastTriggeredDeficit === 0)
|
|
) {
|
|
this.lastTriggeredDeficit = targetDeficit;
|
|
this.env.tokenCalculator.garbageCollectCache(
|
|
new Set(this.buffer.nodes.map((n) => n.id)),
|
|
);
|
|
this.eventBus.emitConsolidationNeeded({
|
|
nodes: this.buffer.nodes,
|
|
targetDeficit,
|
|
targetNodeIds: agedOutNodes,
|
|
});
|
|
}
|
|
} else {
|
|
// Budget is healthy, reset hysteresis
|
|
this.lastTriggeredDeficit = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Identifies 'pinned' nodes that should not be truncated.
|
|
* This includes:
|
|
* 1. The entire last turn (Recent context).
|
|
* 2. Active tool calls (calls without responses in the graph).
|
|
*/
|
|
private getProtectedNodeIds(
|
|
nodes: readonly ConcreteNode[],
|
|
extraProtectedIds: Set<string> = new Set(),
|
|
): Map<string, string> {
|
|
const protectionMap = new Map<string, string>();
|
|
if (nodes.length === 0) return protectionMap;
|
|
|
|
// 1. Identify all nodes belonging to the last turn (Recent context)
|
|
const lastNode = nodes[nodes.length - 1];
|
|
const lastTurnId = lastNode.turnId;
|
|
|
|
for (const node of nodes) {
|
|
if (node.turnId === lastTurnId) {
|
|
protectionMap.set(node.id, 'recent_turn');
|
|
}
|
|
}
|
|
|
|
// 2. Identify active tool calls that must NEVER be truncated
|
|
const calls = nodes.filter((n) => isToolExecution(n) && n.role === 'model');
|
|
const responses = new Set(
|
|
nodes
|
|
.filter((n) => isToolExecution(n) && n.role === 'user')
|
|
.map((n) => n.payload.functionResponse?.id)
|
|
.filter((id): id is string => !!id),
|
|
);
|
|
|
|
for (const call of calls) {
|
|
const id = call.payload.functionCall?.id;
|
|
// If we have a call but no response in the current graph, it's 'in flight'
|
|
if (id && !responses.has(id)) {
|
|
protectionMap.set(call.id, 'in_flight_tool_call');
|
|
}
|
|
}
|
|
|
|
// 3. Any externally requested protections
|
|
for (const id of extraProtectedIds) {
|
|
protectionMap.set(id, 'external_active_task');
|
|
}
|
|
|
|
return protectionMap;
|
|
}
|
|
|
|
/**
|
|
* Retrieves the raw, uncompressed Episodic Context Graph graph.
|
|
* Useful for internal tool rendering (like the trace viewer).
|
|
* Note: This is an expensive, deep clone operation.
|
|
*/
|
|
getPristineGraph(): readonly ConcreteNode[] {
|
|
const pristineSet = new Map<string, ConcreteNode>();
|
|
for (const node of this.buffer.nodes) {
|
|
const roots = this.buffer.getPristineNodes(node.id);
|
|
for (const root of roots) {
|
|
pristineSet.set(root.id, root);
|
|
}
|
|
}
|
|
// We sort them by timestamp to ensure they are returned in chronological order
|
|
return Array.from(pristineSet.values()).sort(
|
|
(a, b) => a.timestamp - b.timestamp,
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Generates a virtual view of the pristine graph, substituting in variants
|
|
* up to the configured token budget.
|
|
* This is the view that will eventually be projected back to the LLM.
|
|
*/
|
|
getNodes(): readonly ConcreteNode[] {
|
|
return [...this.buffer.nodes];
|
|
}
|
|
|
|
getEnvironment(): ContextEnvironment {
|
|
return this.env;
|
|
}
|
|
|
|
/**
|
|
* Executes the final 'gc_backstop' pipeline if necessary, enforcing the token budget,
|
|
* and maps the Episodic Context Graph back into a raw Gemini Content[] array for transmission.
|
|
* This is the primary method called by the agent framework before sending a request.
|
|
*/
|
|
async renderHistory(
|
|
pendingRequest?: Content,
|
|
activeTaskIds: Set<string> = new Set(),
|
|
abortSignal?: AbortSignal,
|
|
): Promise<{
|
|
history: Content[];
|
|
didApplyManagement: boolean;
|
|
baseUnits: number;
|
|
}> {
|
|
this.tracer.logEvent('ContextManager', 'Starting rendering of LLM context');
|
|
|
|
let previewNodes: ConcreteNode[] = [];
|
|
if (pendingRequest) {
|
|
previewNodes = this.env.graphMapper.applyEvent({
|
|
type: 'PUSH',
|
|
payload: [pendingRequest],
|
|
});
|
|
}
|
|
|
|
// --- Hot Start Calibration ---
|
|
// If we are resuming a session with history, we don't want the adaptive token calculator
|
|
// to fly blind on its first GC pass. We do a one-time API calibration.
|
|
const hotStartPromise = (async () => {
|
|
if (!this.hasPerformedHotStart) {
|
|
this.hasPerformedHotStart = true;
|
|
|
|
if (this.buffer.nodes.length > 0) {
|
|
const nodesForHotStart = [...this.buffer.nodes, ...previewNodes];
|
|
await this.performHotStartCalibration(nodesForHotStart, abortSignal);
|
|
}
|
|
}
|
|
})();
|
|
|
|
// 1. Synchronous Pressure Barrier: Wait for background management pipelines to finish.
|
|
// We run hot start calibration in parallel to hide the network latency.
|
|
await Promise.all([this.orchestrator.waitForPipelines(), hotStartPromise]);
|
|
|
|
let nodes = this.buffer.nodes;
|
|
const previewNodeIds = new Set<string>();
|
|
|
|
// Apply the preview nodes to the final graph
|
|
if (previewNodes.length > 0) {
|
|
for (const n of previewNodes) {
|
|
previewNodeIds.add(n.id);
|
|
}
|
|
nodes = [...nodes, ...previewNodes];
|
|
}
|
|
|
|
// 2. Fetch Header and calculate tokens
|
|
const header = this.headerProvider
|
|
? await this.headerProvider()
|
|
: undefined;
|
|
|
|
// 3. Cache Check (Anomaly 3): If nodes haven't changed, return previous result.
|
|
// We combine the graph hash with a hash of the header to ensure total freshness.
|
|
const graphHash = nodes.map((n) => n.id).join('|');
|
|
const headerHash = header ? JSON.stringify(header.parts) : 'no-header';
|
|
const totalHash = `${graphHash}::${headerHash}`;
|
|
|
|
if (this.lastRenderCache?.nodesHash === totalHash) {
|
|
debugLogger.log(
|
|
'[ContextManager] Render cache hit. Skipping redundant render.',
|
|
);
|
|
return this.lastRenderCache.result;
|
|
}
|
|
|
|
const protectionReasons = this.getProtectedNodeIds(nodes, activeTaskIds);
|
|
|
|
// Apply final GC Backstop pressure barrier synchronously before mapping
|
|
const {
|
|
history: renderedHistory,
|
|
didApplyManagement,
|
|
baseUnits,
|
|
} = await render(
|
|
nodes,
|
|
this.orchestrator,
|
|
this.sidecar,
|
|
this.tracer,
|
|
this.env,
|
|
this.advancedTokenCalculator,
|
|
protectionReasons,
|
|
header,
|
|
previewNodeIds,
|
|
);
|
|
|
|
// Structural validation in debug mode
|
|
checkContextInvariants(this.buffer.nodes, 'RenderHistory');
|
|
|
|
this.tracer.logEvent('ContextManager', 'Finished rendering');
|
|
|
|
const combinedHistory = header
|
|
? [header, ...renderedHistory]
|
|
: renderedHistory;
|
|
|
|
const result = {
|
|
history: hardenHistory(combinedHistory, {
|
|
sentinels: this.sidecar.sentinels,
|
|
}),
|
|
didApplyManagement,
|
|
baseUnits,
|
|
};
|
|
|
|
// Update cache
|
|
this.lastRenderCache = { nodesHash: totalHash, result };
|
|
return result;
|
|
}
|
|
|
|
private async performHotStartCalibration(
|
|
nodes: readonly ConcreteNode[],
|
|
abortSignal?: AbortSignal,
|
|
) {
|
|
try {
|
|
this.tracer.logEvent(
|
|
'ContextManager',
|
|
'Performing Hot Start Token Calibration',
|
|
);
|
|
|
|
const contents = this.env.graphMapper.fromGraph(nodes);
|
|
const header = this.headerProvider
|
|
? await this.headerProvider()
|
|
: undefined;
|
|
const combinedHistory = header ? [header, ...contents] : contents;
|
|
|
|
const baseUnits =
|
|
this.advancedTokenCalculator.getRawBaseUnits(nodes) +
|
|
(header
|
|
? this.advancedTokenCalculator.getRawBaseUnitsForContent(header)
|
|
: 0);
|
|
|
|
// We only make the network call if we have actual contents to send,
|
|
// avoiding 400 Bad Request errors from the API.
|
|
if (combinedHistory.length > 0) {
|
|
const result = await this.env.llmClient.countTokens({
|
|
contents: combinedHistory,
|
|
abortSignal,
|
|
});
|
|
if (result.totalTokens > 0) {
|
|
this.env.eventBus.emitTokenGroundTruth({
|
|
actualTokens: result.totalTokens,
|
|
promptBaseUnits: baseUnits,
|
|
});
|
|
}
|
|
}
|
|
} catch (error) {
|
|
// Hot start calibration is purely an optimization. If the network fails or auth is weird,
|
|
// we silently swallow and fallback to the un-calibrated 1.0 ratio heuristic.
|
|
this.tracer.logEvent(
|
|
'ContextManager',
|
|
'Hot Start Token Calibration Failed (Ignored)',
|
|
{ error },
|
|
);
|
|
}
|
|
}
|
|
|
|
exportState(): ContextEngineState {
|
|
return SnapshotStateHelper.exportState(this.buffer.nodes);
|
|
}
|
|
|
|
async restoreState(state: ContextEngineState): Promise<void> {
|
|
if (!state) return;
|
|
SnapshotStateHelper.restoreState(state, this.env.inbox);
|
|
}
|
|
}
|