gemini-cli/packages/core/src/context/contextManager.ts

/**
 * @license
 * Copyright 2026 Google LLC
 * SPDX-License-Identifier: Apache-2.0
 */

import type { Content } from '@google/genai';
import type { AgentChatHistory } from '../core/agentChatHistory.js';
import { isToolExecution, type ConcreteNode } from './graph/types.js';
import type { ContextEventBus } from './eventBus.js';
import type { ContextTracer } from './tracer.js';
import type { ContextEnvironment } from './pipeline/environment.js';
import type { ContextProfile } from './config/profiles.js';
import type { PipelineOrchestrator } from './pipeline/orchestrator.js';
import { HistoryObserver } from './historyObserver.js';
import { render } from './graph/render.js';
import { ContextWorkingBufferImpl } from './pipeline/contextWorkingBuffer.js';
import { debugLogger } from '../utils/debugLogger.js';
import { SnapshotStateHelper } from './utils/snapshotGenerator.js';
import type { ContextEngineState } from '../services/chatRecordingTypes.js';
import { hardenHistory } from '../utils/historyHardening.js';
import { checkContextInvariants } from './utils/invariantChecker.js';
import type { AdvancedTokenCalculator } from './utils/contextTokenCalculator.js';

export class ContextManager {
  // The master state containing the pristine graph and current active graph.
  private buffer: ContextWorkingBufferImpl =
    ContextWorkingBufferImpl.initialize([]);

  private readonly eventBus: ContextEventBus;

  // Internal sub-components
  private readonly orchestrator: PipelineOrchestrator;
  private readonly historyObserver: HistoryObserver;

  // Hysteresis tracking to prevent utility call churn
  private lastTriggeredDeficit = 0;

  // Cache for Anomaly 3 (Redundant Renders)
  private lastRenderCache?: {
    nodesHash: string;
    result: {
      history: Content[];
      didApplyManagement: boolean;
      baseUnits: number;
    };
  };

  private hasPerformedHotStart = false;

  constructor(
    private readonly sidecar: ContextProfile,
    private readonly env: ContextEnvironment,
    private readonly tracer: ContextTracer,
    orchestrator: PipelineOrchestrator,
    chatHistory: AgentChatHistory,
    private readonly advancedTokenCalculator: AdvancedTokenCalculator,
    private readonly headerProvider?: () => Promise<Content | undefined>,
  ) {
    this.eventBus = env.eventBus;
    this.orchestrator = orchestrator;

    // Provide the orchestrator with a way to fetch the latest nodes from the live buffer
    this.orchestrator.setNodeProvider(() => this.buffer.nodes);

    this.historyObserver = new HistoryObserver(
      chatHistory,
      this.env.eventBus,
      this.tracer,
      this.env.graphMapper,
    );

    this.eventBus.onPristineHistoryUpdated((event) => {
      // Sync the entire pristine history chronologically
      this.buffer = this.buffer.syncPristineHistory(event.nodes);

      this.evaluateTriggers(event.newNodes);
    });
    this.eventBus.onProcessorResult((event) => {
      this.buffer = this.buffer.applyProcessorResult(
        event.processorId,
        event.targets,
        event.returnedNodes,
      );
      // We explicitly DO NOT call evaluateTriggers here.
      // The Context Manager is a one-way assembly line. It only evaluates triggers
      // when fundamentally new organic context is added via PristineHistoryUpdated.
      // Re-evaluating after a processor finishes creates infinite feedback loops if
      // the processor fails to reduce the token count below the threshold.
    });

    this.historyObserver.start();
  }

  /**
   * Returns a promise that resolves when all currently executing async pipelines have finished.
   */
  async waitForPipelines(): Promise<void> {
    return this.orchestrator.waitForPipelines();
  }

  /**
   * Safely stops background async pipelines and clears event listeners.
   */
  shutdown() {
    this.orchestrator.shutdown();
    this.historyObserver.stop();
  }

  /**
   * Evaluates if the current working buffer exceeds configured budget thresholds,
   * firing consolidation events if necessary.
   */
  private evaluateTriggers(newNodes: Set<string>) {
    if (!this.sidecar.config.budget) return;

    if (newNodes.size > 0) {
      this.eventBus.emitChunkReceived({
        nodes: this.buffer.nodes,
        targetNodeIds: newNodes,
      });
    }

    const currentTokens = this.env.tokenCalculator.calculateConcreteListTokens(
      this.buffer.nodes,
    );

    if (currentTokens > this.sidecar.config.budget.retainedTokens) {
      const agedOutNodes = new Set<string>();
      let rollingTokens = 0;

      // Identify active tool calls that must NEVER be truncated
      const protectedIds = this.getProtectedNodeIds(this.buffer.nodes);
      if (protectedIds.size > 0) {
        debugLogger.log(
          `[ContextManager] Pinning ${protectedIds.size} active tool call nodes to prevent truncation.`,
        );
      }

      // Walk backwards finding nodes that fall out of the retained budget
      for (let i = this.buffer.nodes.length - 1; i >= 0; i--) {
        const node = this.buffer.nodes[i];
        const priorTokens = rollingTokens;
        rollingTokens += this.env.tokenCalculator.calculateConcreteListTokens([
          node,
        ]);

        // Loose Boundary Policy: If this node is the one that pushes us over the retained limit,
        // we KEEP it to prevent aggressive undershooting. We only age out nodes that are
        // strictly *older* than the boundary node.
        if (priorTokens > this.sidecar.config.budget.retainedTokens) {
          // Only age out if not protected
          if (!protectedIds.has(node.id)) {
            agedOutNodes.add(node.id);
          }
        }
      }

      if (agedOutNodes.size > 0) {
        const targetDeficit =
          currentTokens - this.sidecar.config.budget.retainedTokens;

        // If the deficit has shrunk (e.g. after a consolidation), update the baseline
        // so we can track growth from this new, smaller deficit.
        if (targetDeficit < this.lastTriggeredDeficit) {
          this.lastTriggeredDeficit = targetDeficit;
        }

        // Respect coalescing threshold for background work
        const threshold =
          this.sidecar.config.budget.coalescingThresholdTokens || 0;

        // Only trigger if deficit has grown significantly since last time
        const growthSinceLast = targetDeficit - this.lastTriggeredDeficit;

        if (
          targetDeficit >= threshold &&
          (growthSinceLast >= threshold || this.lastTriggeredDeficit === 0)
        ) {
          this.lastTriggeredDeficit = targetDeficit;
          this.env.tokenCalculator.garbageCollectCache(
            new Set(this.buffer.nodes.map((n) => n.id)),
          );
          this.eventBus.emitConsolidationNeeded({
            nodes: this.buffer.nodes,
            targetDeficit,
            targetNodeIds: agedOutNodes,
          });
        }
      } else {
        // Budget is healthy, reset hysteresis
        this.lastTriggeredDeficit = 0;
      }
    }
  }

  /**
   * Identifies 'pinned' nodes that should not be truncated.
   * This includes:
   * 1. The entire last turn (Recent context).
   * 2. Active tool calls (calls without responses in the graph).
   */
  private getProtectedNodeIds(
    nodes: readonly ConcreteNode[],
    extraProtectedIds: Set<string> = new Set(),
  ): Map<string, string> {
    const protectionMap = new Map<string, string>();
    if (nodes.length === 0) return protectionMap;

    // 1. Identify all nodes belonging to the last turn (Recent context)
    const lastNode = nodes[nodes.length - 1];
    const lastTurnId = lastNode.turnId;

    for (const node of nodes) {
      if (node.turnId === lastTurnId) {
        protectionMap.set(node.id, 'recent_turn');
      }
    }

    // 2. Identify active tool calls that must NEVER be truncated
    const calls = nodes.filter((n) => isToolExecution(n) && n.role === 'model');
    const responses = new Set(
      nodes
        .filter((n) => isToolExecution(n) && n.role === 'user')
        .map((n) => n.payload.functionResponse?.id)
        .filter((id): id is string => !!id),
    );

    for (const call of calls) {
      const id = call.payload.functionCall?.id;
      // If we have a call but no response in the current graph, it's 'in flight'
      if (id && !responses.has(id)) {
        protectionMap.set(call.id, 'in_flight_tool_call');
      }
    }

    // 3. Any externally requested protections
    for (const id of extraProtectedIds) {
      protectionMap.set(id, 'external_active_task');
    }

    return protectionMap;
  }

  /**
   * Retrieves the raw, uncompressed Episodic Context Graph graph.
   * Useful for internal tool rendering (like the trace viewer).
   * Note: This is an expensive, deep clone operation.
   */
  getPristineGraph(): readonly ConcreteNode[] {
    const pristineSet = new Map<string, ConcreteNode>();
    for (const node of this.buffer.nodes) {
      const roots = this.buffer.getPristineNodes(node.id);
      for (const root of roots) {
        pristineSet.set(root.id, root);
      }
    }
    // We sort them by timestamp to ensure they are returned in chronological order
    return Array.from(pristineSet.values()).sort(
      (a, b) => a.timestamp - b.timestamp,
    );
  }

  /**
   * Generates a virtual view of the pristine graph, substituting in variants
   * up to the configured token budget.
   * This is the view that will eventually be projected back to the LLM.
   */
  getNodes(): readonly ConcreteNode[] {
    return [...this.buffer.nodes];
  }

  getEnvironment(): ContextEnvironment {
    return this.env;
  }

  /**
   * Executes the final 'gc_backstop' pipeline if necessary, enforcing the token budget,
   * and maps the Episodic Context Graph back into a raw Gemini Content[] array for transmission.
   * This is the primary method called by the agent framework before sending a request.
   */
  async renderHistory(
    pendingRequest?: Content,
    activeTaskIds: Set<string> = new Set(),
    abortSignal?: AbortSignal,
  ): Promise<{
    history: Content[];
    didApplyManagement: boolean;
    baseUnits: number;
  }> {
    this.tracer.logEvent('ContextManager', 'Starting rendering of LLM context');

    let previewNodes: ConcreteNode[] = [];
    if (pendingRequest) {
      previewNodes = this.env.graphMapper.applyEvent({
        type: 'PUSH',
        payload: [pendingRequest],
      });
    }

    // --- Hot Start Calibration ---
    // If we are resuming a session with history, we don't want the adaptive token calculator
    // to fly blind on its first GC pass. We do a one-time API calibration.
    const hotStartPromise = (async () => {
      if (!this.hasPerformedHotStart) {
        this.hasPerformedHotStart = true;

        if (this.buffer.nodes.length > 0) {
          const nodesForHotStart = [...this.buffer.nodes, ...previewNodes];
          await this.performHotStartCalibration(nodesForHotStart, abortSignal);
        }
      }
    })();

    // 1. Synchronous Pressure Barrier: Wait for background management pipelines to finish.
    // We run hot start calibration in parallel to hide the network latency.
    await Promise.all([this.orchestrator.waitForPipelines(), hotStartPromise]);

    let nodes = this.buffer.nodes;
    const previewNodeIds = new Set<string>();

    // Apply the preview nodes to the final graph
    if (previewNodes.length > 0) {
      for (const n of previewNodes) {
        previewNodeIds.add(n.id);
      }
      nodes = [...nodes, ...previewNodes];
    }

    // 2. Fetch Header and calculate tokens
    const header = this.headerProvider
      ? await this.headerProvider()
      : undefined;

    // 3. Cache Check (Anomaly 3): If nodes haven't changed, return previous result.
    // We combine the graph hash with a hash of the header to ensure total freshness.
    const graphHash = nodes.map((n) => n.id).join('|');
    const headerHash = header ? JSON.stringify(header.parts) : 'no-header';
    const totalHash = `${graphHash}::${headerHash}`;

    if (this.lastRenderCache?.nodesHash === totalHash) {
      debugLogger.log(
        '[ContextManager] Render cache hit. Skipping redundant render.',
      );
      return this.lastRenderCache.result;
    }

    const protectionReasons = this.getProtectedNodeIds(nodes, activeTaskIds);

    // Apply final GC Backstop pressure barrier synchronously before mapping
    const {
      history: renderedHistory,
      didApplyManagement,
      baseUnits,
    } = await render(
      nodes,
      this.orchestrator,
      this.sidecar,
      this.tracer,
      this.env,
      this.advancedTokenCalculator,
      protectionReasons,
      header,
      previewNodeIds,
    );

    // Structural validation in debug mode
    checkContextInvariants(this.buffer.nodes, 'RenderHistory');

    this.tracer.logEvent('ContextManager', 'Finished rendering');

    const combinedHistory = header
      ? [header, ...renderedHistory]
      : renderedHistory;

    const result = {
      history: hardenHistory(combinedHistory, {
        sentinels: this.sidecar.sentinels,
      }),
      didApplyManagement,
      baseUnits,
    };

    // Update cache
    this.lastRenderCache = { nodesHash: totalHash, result };
    return result;
  }

  private async performHotStartCalibration(
    nodes: readonly ConcreteNode[],
    abortSignal?: AbortSignal,
  ) {
    try {
      this.tracer.logEvent(
        'ContextManager',
        'Performing Hot Start Token Calibration',
      );

      const contents = this.env.graphMapper.fromGraph(nodes);
      const header = this.headerProvider
        ? await this.headerProvider()
        : undefined;
      const combinedHistory = header ? [header, ...contents] : contents;

      const baseUnits =
        this.advancedTokenCalculator.getRawBaseUnits(nodes) +
        (header
          ? this.advancedTokenCalculator.getRawBaseUnitsForContent(header)
          : 0);

      // We only make the network call if we have actual contents to send,
      // avoiding 400 Bad Request errors from the API.
      if (combinedHistory.length > 0) {
        const result = await this.env.llmClient.countTokens({
          contents: combinedHistory,
          abortSignal,
        });
        if (result.totalTokens > 0) {
          this.env.eventBus.emitTokenGroundTruth({
            actualTokens: result.totalTokens,
            promptBaseUnits: baseUnits,
          });
        }
      }
    } catch (error) {
      // Hot start calibration is purely an optimization. If the network fails or auth is weird,
      // we silently swallow and fallback to the un-calibrated 1.0 ratio heuristic.
      this.tracer.logEvent(
        'ContextManager',
        'Hot Start Token Calibration Failed (Ignored)',
        { error },
      );
    }
  }

  exportState(): ContextEngineState {
    return SnapshotStateHelper.exportState(this.buffer.nodes);
  }

  async restoreState(state: ContextEngineState): Promise<void> {
    if (!state) return;
    SnapshotStateHelper.restoreState(state, this.env.inbox);
  }
}