feat(core): wire up the new ContextManager and AgentChatHistory (#25409)

2026-06-12 12:26:57 -07:00 · 2026-04-27 11:50:00 -07:00
parent 98aca28985
commit 71f313b51a
35 changed files with 707 additions and 377 deletions
@@ -1846,6 +1846,12 @@ their corresponding top-level category object in your `settings.json` file.
  - **Default:** `true`
  - **Requires restart:** Yes

+- **`experimental.stressTestProfile`** (boolean):
+  - **Description:** Significantly lowers token limits to force early garbage
+    collection and distillation for testing purposes.
+  - **Default:** `false`
+  - **Requires restart:** Yes
+
 - **`experimental.autoMemory`** (boolean):
  - **Description:** Automatically extract reusable skills from past sessions in
    the background. Review results with /memory inbox.
@@ -21,8 +21,6 @@ import {
  type MCPServerConfig,
  type GeminiCLIExtension,
  Storage,
-  generalistProfile,
-  type ContextManagementConfig,
 } from '@google/gemini-cli-core';
 import { loadCliConfig, parseArguments, type CliArgs } from './config.js';
 import {
@@ -2217,51 +2215,6 @@ describe('loadCliConfig context management', () => {
      },
    });
    const config = await loadCliConfig(settings, 'test-session', argv);
-    expect(config.getContextManagementConfig()).toStrictEqual(
-      generalistProfile,
-    );
-    expect(config.isContextManagementEnabled()).toBe(true);
-  });
-
-  it('should be true when contextManagement is set to true in settings', async () => {
-    process.argv = ['node', 'script.js'];
-    const argv = await parseArguments(createTestMergedSettings());
-    const contextManagementConfig: Partial<ContextManagementConfig> = {
-      historyWindow: {
-        maxTokens: 100_000,
-        retainedTokens: 50_000,
-      },
-      messageLimits: {
-        normalMaxTokens: 1000,
-        retainedMaxTokens: 10_000,
-        normalizationHeadRatio: 0.25,
-      },
-      tools: {
-        distillation: {
-          maxOutputTokens: 10_000,
-          summarizationThresholdTokens: 15_000,
-        },
-        outputMasking: {
-          protectionThresholdTokens: 30_000,
-          minPrunableThresholdTokens: 10_000,
-          protectLatestTurn: false,
-        },
-      },
-    };
-    const settings = createTestMergedSettings({
-      experimental: {
-        contextManagement: true,
-      },
-      // The type of numbers is being inferred strangely, and so we have to cast
-      // to `any` here.
-      // eslint-disable-next-line @typescript-eslint/no-explicit-any
-      contextManagement: contextManagementConfig as any,
-    });
-    const config = await loadCliConfig(settings, 'test-session', argv);
-    expect(config.getContextManagementConfig()).toStrictEqual({
-      enabled: true,
-      ...contextManagementConfig,
-    });
    expect(config.isContextManagementEnabled()).toBe(true);
  });
 });
@@ -48,7 +48,6 @@ import {
  type HookEventName,
  type OutputFormat,
  detectIdeFromEnv,
-  generalistProfile,
 } from '@google/gemini-cli-core';
 import {
  type Settings,
@@ -904,14 +903,19 @@ export async function loadCliConfig(
    }
  }

-  const useGeneralistProfile =
-    settings.experimental?.generalistProfile ?? false;
-  const useContextManagement =
-    settings.experimental?.contextManagement ?? false;
+  // TODO(joshualitt): Clean this up alongside removal of the legacy config.
+  let profileSelector: string | undefined = undefined;
+  if (settings.experimental?.stressTestProfile) {
+    profileSelector = 'stressTestProfile';
+  } else if (
+    settings.experimental?.generalistProfile ||
+    settings.experimental?.contextManagement
+  ) {
+    profileSelector = 'generalistProfile';
+  }
+
  const contextManagement = {
-    ...(useGeneralistProfile ? generalistProfile : {}),
-    ...(useContextManagement ? settings?.contextManagement : {}),
-    enabled: useContextManagement || useGeneralistProfile,
+    enabled: !!profileSelector,
  };

  return new Config({
@@ -935,6 +939,7 @@ export async function loadCliConfig(
    worktreeSettings,

    coreTools: settings.tools?.core || undefined,
+    experimentalContextManagementConfig: profileSelector,
    allowedTools: allowedTools.length > 0 ? allowedTools : undefined,
    policyEngineConfig,
    policyUpdateConfirmationRequest,
@@ -2388,6 +2388,17 @@ const SETTINGS_SCHEMA = {
          'Disable the built-in save_memory tool and let the main agent persist project context by editing markdown files directly with edit/write_file. Route facts across four tiers: team-shared conventions go to project GEMINI.md files, project-specific personal notes go to the per-project private memory folder (MEMORY.md as index + sibling .md files for detail), and cross-project personal preferences go to the global ~/.gemini/GEMINI.md (the only file under ~/.gemini/ that the agent can edit — settings, credentials, etc. remain off-limits). Set to false to fall back to the legacy save_memory tool.',
        showInDialog: true,
      },
+      stressTestProfile: {
+        type: 'boolean',
+        label:
+          'Use the stress test profile to aggressively trigger context management.',
+        category: 'Experimental',
+        requiresRestart: true,
+        default: false,
+        description:
+          'Significantly lowers token limits to force early garbage collection and distillation for testing purposes.',
+        showInDialog: false,
+      },
      autoMemory: {
        type: 'boolean',
        label: 'Auto Memory',
@@ -6,19 +6,17 @@

 import { describe, it, expect, beforeEach, afterEach } from 'vitest';
 import { loadContextManagementConfig } from './configLoader.js';
-import { defaultContextProfile } from './profiles.js';
+import { generalistProfile } from './profiles.js';
 import { ContextProcessorRegistry } from './registry.js';
 import * as fs from 'node:fs/promises';
 import * as path from 'node:path';
 import * as os from 'node:os';
-import type { Config } from '../../config/config.js';
 import type { JSONSchemaType } from 'ajv';

 describe('SidecarLoader (Real FS)', () => {
  let tmpDir: string;
  let registry: ContextProcessorRegistry;
  let sidecarPath: string;
-  let mockConfig: Config;

  beforeEach(async () => {
    tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'gemini-sidecar-test-'));
@@ -32,10 +30,6 @@ describe('SidecarLoader (Real FS)', () => {
        required: ['maxTokens'],
      } as unknown as JSONSchemaType<{ maxTokens: number }>,
    });
-
-    mockConfig = {
-      getExperimentalContextManagementConfig: () => sidecarPath,
-    } as unknown as Config;
  });

  afterEach(async () => {
@@ -43,14 +37,14 @@ describe('SidecarLoader (Real FS)', () => {
  });

  it('returns default profile if file does not exist', async () => {
-    const result = await loadContextManagementConfig(mockConfig, registry);
-    expect(result).toBe(defaultContextProfile);
+    const result = await loadContextManagementConfig(sidecarPath, registry);
+    expect(result).toBe(generalistProfile);
  });

  it('returns default profile if file exists but is 0 bytes', async () => {
    await fs.writeFile(sidecarPath, '');
-    const result = await loadContextManagementConfig(mockConfig, registry);
-    expect(result).toBe(defaultContextProfile);
+    const result = await loadContextManagementConfig(sidecarPath, registry);
+    expect(result).toBe(generalistProfile);
  });

  it('returns parsed config if file is valid', async () => {
@@ -64,7 +58,7 @@ describe('SidecarLoader (Real FS)', () => {
      },
    };
    await fs.writeFile(sidecarPath, JSON.stringify(validConfig));
-    const result = await loadContextManagementConfig(mockConfig, registry);
+    const result = await loadContextManagementConfig(sidecarPath, registry);
    expect(result.config.budget?.maxTokens).toBe(2000);
    expect(result.config.processorOptions?.['myTruncation']).toBeDefined();
  });
@@ -81,14 +75,14 @@ describe('SidecarLoader (Real FS)', () => {
    };
    await fs.writeFile(sidecarPath, JSON.stringify(invalidConfig));
    await expect(
-      loadContextManagementConfig(mockConfig, registry),
+      loadContextManagementConfig(sidecarPath, registry),
    ).rejects.toThrow('Validation error');
  });

  it('throws validation error if file is empty whitespace', async () => {
    await fs.writeFile(sidecarPath, '   \n  ');
    await expect(
-      loadContextManagementConfig(mockConfig, registry),
+      loadContextManagementConfig(sidecarPath, registry),
    ).rejects.toThrow('Unexpected end of JSON input');
  });
 });
@@ -4,11 +4,14 @@
 * SPDX-License-Identifier: Apache-2.0
 */

-import type { Config } from '../../config/config.js';
 import * as fsSync from 'node:fs';
 import * as fs from 'node:fs/promises';
 import type { ContextManagementConfig } from './types.js';
-import { defaultContextProfile, type ContextProfile } from './profiles.js';
+import {
+  generalistProfile,
+  stressTestProfile,
+  type ContextProfile,
+} from './profiles.js';
 import { SchemaValidator } from '../../utils/schemaValidator.js';
 import { getContextManagementConfigSchema } from './schema.js';
 import type { ContextProcessorRegistry } from './registry.js';
@@ -54,9 +57,9 @@ async function loadConfigFromFile(
  // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
  const validConfig = parsed as ContextManagementConfig;
  return {
-    ...defaultContextProfile,
+    ...generalistProfile,
    config: {
-      ...defaultContextProfile.config,
+      ...generalistProfile.config,
      ...(validConfig.budget ? { budget: validConfig.budget } : {}),
      ...(validConfig.processorOptions
        ? { processorOptions: validConfig.processorOptions }
@@ -70,21 +73,27 @@ async function loadConfigFromFile(
 * If a config file is present but invalid, this will THROW to prevent silent misconfiguration.
 */
 export async function loadContextManagementConfig(
-  config: Config,
+  sidecarPath: string | undefined,
  registry: ContextProcessorRegistry,
 ): Promise<ContextProfile> {
-  const sidecarPath = config.getExperimentalContextManagementConfig();
+  if (sidecarPath === 'stressTestProfile') {
+    return stressTestProfile;
+  }
+
+  if (sidecarPath === 'generalistProfile') {
+    return generalistProfile;
+  }

  if (sidecarPath && fsSync.existsSync(sidecarPath)) {
    const size = fsSync.statSync(sidecarPath).size;
    // If the file exists but is completely empty (0 bytes), it's safe to fallback.
    if (size === 0) {
-      return defaultContextProfile;
+      return generalistProfile;
    }

    // If the file has content, enforce strict validation and throw on failure.
    return loadConfigFromFile(sidecarPath, registry);
  }

-  return defaultContextProfile;
+  return generalistProfile;
 }
@@ -62,7 +62,7 @@ export interface ContextProfile {
 * The standard default context management profile.
 * Optimized for safety, precision, and reliable summarization.
 */
-export const defaultContextProfile: ContextProfile = {
+export const generalistProfile: ContextProfile = {
  config: {
    budget: {
      retainedTokens: 65000,
@@ -88,24 +88,32 @@ export const defaultContextProfile: ContextProfile = {
            }),
          ),
          createBlobDegradationProcessor('BlobDegradation', env), // No options
+          // Automatically distill extremely large blocks (e.g. huge source files pasted by the user)
+          createNodeDistillationProcessor(
+            'ImmediateNodeDistillation',
+            env,
+            resolveProcessorOptions(config, 'ImmediateNodeDistillation', {
+              nodeThresholdTokens: 15000,
+            }),
+          ),
        ],
      },
      {
        name: 'Normalization',
        triggers: ['retained_exceeded'],
        processors: [
-          createNodeTruncationProcessor(
-            'NodeTruncation',
-            env,
-            resolveProcessorOptions(config, 'NodeTruncation', {
-              maxTokensPerNode: 3000,
-            }),
-          ),
          createNodeDistillationProcessor(
            'NodeDistillation',
            env,
            resolveProcessorOptions(config, 'NodeDistillation', {
-              nodeThresholdTokens: 5000,
+              nodeThresholdTokens: 3000,
+            }),
+          ),
+          createNodeTruncationProcessor(
+            'NodeTruncation',
+            env,
+            resolveProcessorOptions(config, 'NodeTruncation', {
+              maxTokensPerNode: 2000,
            }),
          ),
        ],
@@ -143,3 +151,41 @@ export const defaultContextProfile: ContextProfile = {
    },
  ],
 };
+
+/**
+ * A highly aggressive profile designed exclusively for testing Context Management.
+ * Lowers token limits dramatically to force garbage collection and distillation loops
+ * within a few conversational turns.
+ */
+export const stressTestProfile: ContextProfile = {
+  config: {
+    budget: {
+      retainedTokens: 4000,
+      maxTokens: 10000,
+    },
+    processorOptions: {
+      ToolMasking: {
+        type: 'ToolMaskingProcessor',
+        options: {
+          stringLengthThresholdTokens: 500,
+        },
+      },
+      NodeTruncation: {
+        type: 'NodeTruncationProcessor',
+        options: {
+          maxTokensPerNode: 1000,
+        },
+      },
+      NodeDistillation: {
+        type: 'NodeDistillationProcessor',
+        options: {
+          nodeThresholdTokens: 1500,
+        },
+      },
+    },
+  },
+  // Re-use the generalist pipeline architecture exactly, but the `config` above
+  // will be passed into `resolveProcessorOptions` to aggressively override the thresholds.
+  buildPipelines: generalistProfile.buildPipelines,
+  buildAsyncPipelines: generalistProfile.buildAsyncPipelines,
+};
@@ -44,12 +44,10 @@ export class ContextManager {
      this.env.tokenCalculator,
      this.env.graphMapper,
    );
-    this.historyObserver.start();

    this.eventBus.onPristineHistoryUpdated((event) => {
-      const existingIds = new Set(this.buffer.nodes.map((n) => n.id));
      const newIds = new Set(event.nodes.map((n) => n.id));
-      const addedNodes = event.nodes.filter((n) => !existingIds.has(n.id));
+      const addedNodes = event.nodes.filter((n) => event.newNodes.has(n.id));

      // Prune any pristine nodes that were dropped from the upstream history
      this.buffer = this.buffer.prunePristineNodes(newIds);
@@ -60,6 +58,15 @@ export class ContextManager {

      this.evaluateTriggers(event.newNodes);
    });
+    this.eventBus.onProcessorResult((event) => {
+      this.buffer = this.buffer.applyProcessorResult(
+        event.processorId,
+        event.targets,
+        event.returnedNodes,
+      );
+    });
+
+    this.historyObserver.start();
  }

  /**
@@ -153,6 +160,7 @@ export class ContextManager {
    activeTaskIds: Set<string> = new Set(),
  ): Promise<Content[]> {
    this.tracer.logEvent('ContextManager', 'Starting rendering of LLM context');
+
    // Apply final GC Backstop pressure barrier synchronously before mapping
    const finalHistory = await render(
      this.buffer.nodes,
@@ -7,6 +7,12 @@
 import { EventEmitter } from 'node:events';
 import type { ConcreteNode } from './graph/types.js';

+export interface ProcessorResultEvent {
+  processorId: string;
+  targets: readonly ConcreteNode[];
+  returnedNodes: readonly ConcreteNode[];
+}
+
 export interface PristineHistoryUpdatedEvent {
  nodes: readonly ConcreteNode[];
  newNodes: Set<string>;
@@ -49,4 +55,12 @@ export class ContextEventBus extends EventEmitter {
  onConsolidationNeeded(listener: (event: ContextConsolidationEvent) => void) {
    this.on('BUDGET_RETAINED_CROSSED', listener);
  }
+
+  emitProcessorResult(event: ProcessorResultEvent) {
+    this.emit('PROCESSOR_RESULT', event);
+  }
+
+  onProcessorResult(listener: (event: ProcessorResultEvent) => void) {
+    this.on('PROCESSOR_RESULT', listener);
+  }
 }
@@ -122,9 +122,9 @@ export const AgentYieldBehavior: NodeBehavior<AgentYield> = {
  getEstimatableParts(yieldNode) {
    return [{ text: yieldNode.text }];
  },
-  serialize(yieldNode, writer) {
-    writer.appendModelPart({ text: yieldNode.text });
-    writer.flushModelParts();
+  serialize() {
+    // AGENT_YIELD is a synthetic marker node used for internal graph tracking.
+    // We intentionally do NOT serialize it to the LLM to prevent prompt corruption.
  },
 };

@@ -3,9 +3,10 @@
 * Copyright 2026 Google LLC
 * SPDX-License-Identifier: Apache-2.0
 */
+import type { ConcreteNode } from './types.js';
+import { ContextGraphBuilder } from './toGraph.js';
 import type { Content } from '@google/genai';
-import type { Episode, ConcreteNode } from './types.js';
-import { toGraph } from './toGraph.js';
+import type { HistoryEvent } from '../../core/agentChatHistory.js';
 import { fromGraph } from './fromGraph.js';
 import type { ContextTokenCalculator } from '../utils/contextTokenCalculator.js';
 import type { NodeBehaviorRegistry } from './behaviorRegistry.js';
@@ -15,11 +16,30 @@ export class ContextGraphMapper {

  constructor(private readonly registry: NodeBehaviorRegistry) {}

-  toGraph(
-    history: readonly Content[],
+  private builder?: ContextGraphBuilder;
+
+  applyEvent(
+    event: HistoryEvent,
    tokenCalculator: ContextTokenCalculator,
-  ): Episode[] {
-    return toGraph(history, tokenCalculator, this.nodeIdentityMap);
+  ): ConcreteNode[] {
+    if (!this.builder) {
+      this.builder = new ContextGraphBuilder(
+        tokenCalculator,
+        this.nodeIdentityMap,
+      );
+    }
+
+    if (event.type === 'CLEAR') {
+      this.builder.clear();
+      return [];
+    }
+
+    if (event.type === 'SYNC_FULL') {
+      this.builder.clear();
+    }
+
+    this.builder.processHistory(event.payload);
+    return this.builder.getNodes();
  }

  fromGraph(nodes: readonly ConcreteNode[]): Content[] {
@@ -6,6 +6,7 @@

 import type { Content, Part } from '@google/genai';
 import type {
+  ConcreteNode,
  Episode,
  SemanticPart,
  ToolExecution,
@@ -38,67 +39,98 @@ function isCompleteEpisode(ep: Partial<Episode>): ep is Episode {
  );
 }

-export function toGraph(
-  history: readonly Content[],
-  tokenCalculator: ContextTokenCalculator,
-  nodeIdentityMap: WeakMap<object, string>,
-): Episode[] {
-  const episodes: Episode[] = [];
-  let currentEpisode: Partial<Episode> | null = null;
-  const pendingCallParts: Map<string, Part> = new Map();
+export class ContextGraphBuilder {
+  private episodes: Episode[] = [];
+  private currentEpisode: Partial<Episode> | null = null;
+  private pendingCallParts: Map<string, Part> = new Map();
+  private pendingCallPartsWithoutId: Part[] = [];

-  const finalizeEpisode = () => {
-    if (currentEpisode && isCompleteEpisode(currentEpisode)) {
-      episodes.push(currentEpisode);
-    }
-    currentEpisode = null;
-  };
+  constructor(
+    private readonly tokenCalculator: ContextTokenCalculator,
+    private readonly nodeIdentityMap: WeakMap<object, string> = new WeakMap(),
+  ) {}

-  for (const msg of history) {
-    if (!msg.parts) continue;
+  clear() {
+    this.episodes = [];
+    this.currentEpisode = null;
+    this.pendingCallParts.clear();
+    this.pendingCallPartsWithoutId = [];
+  }

-    if (msg.role === 'user') {
-      const hasToolResponses = msg.parts.some((p) => !!p.functionResponse);
-      const hasUserParts = msg.parts.some(
-        (p) => !!p.text || !!p.inlineData || !!p.fileData,
-      );
+  processHistory(history: readonly Content[]) {
+    const finalizeEpisode = () => {
+      if (this.currentEpisode && isCompleteEpisode(this.currentEpisode)) {
+        this.episodes.push(this.currentEpisode);
+      }
+      this.currentEpisode = null;
+    };

-      if (hasToolResponses) {
-        currentEpisode = parseToolResponses(
+    for (const msg of history) {
+      if (!msg.parts) continue;
+
+      if (msg.role === 'user') {
+        const hasToolResponses = msg.parts.some((p) => !!p.functionResponse);
+        const hasUserParts = msg.parts.some(
+          (p) => !!p.text || !!p.inlineData || !!p.fileData,
+        );
+
+        if (hasToolResponses) {
+          this.currentEpisode = parseToolResponses(
+            msg,
+            this.currentEpisode,
+            this.pendingCallParts,
+            this.pendingCallPartsWithoutId,
+            this.tokenCalculator,
+            this.nodeIdentityMap,
+          );
+        }
+
+        if (hasUserParts) {
+          finalizeEpisode();
+          this.currentEpisode = parseUserParts(msg, this.nodeIdentityMap);
+        }
+      } else if (msg.role === 'model') {
+        this.currentEpisode = parseModelParts(
          msg,
-          currentEpisode,
-          pendingCallParts,
-          tokenCalculator,
-          nodeIdentityMap,
+          this.currentEpisode,
+          this.pendingCallParts,
+          this.pendingCallPartsWithoutId,
+          this.nodeIdentityMap,
        );
      }
-
-      if (hasUserParts) {
-        finalizeEpisode();
-        currentEpisode = parseUserParts(msg, nodeIdentityMap);
-      }
-    } else if (msg.role === 'model') {
-      currentEpisode = parseModelParts(
-        msg,
-        currentEpisode,
-        pendingCallParts,
-        nodeIdentityMap,
-      );
    }
  }

-  if (currentEpisode) {
-    finalizeYield(currentEpisode);
-    finalizeEpisode();
-  }
+  getNodes(): ConcreteNode[] {
+    const copy = [...this.episodes];
+    if (this.currentEpisode) {
+      const activeEp = {
+        ...this.currentEpisode,
+        concreteNodes: [...(this.currentEpisode.concreteNodes || [])],
+      };
+      finalizeYield(activeEp);
+      if (isCompleteEpisode(activeEp)) {
+        copy.push(activeEp);
+      }
+    }

-  return episodes;
+    const nodes: ConcreteNode[] = [];
+    for (const ep of copy) {
+      if (ep.concreteNodes) {
+        for (const child of ep.concreteNodes) {
+          nodes.push(child);
+        }
+      }
+    }
+    return nodes;
+  }
 }

 function parseToolResponses(
  msg: Content,
  currentEpisode: Partial<Episode> | null,
  pendingCallParts: Map<string, Part>,
+  pendingCallPartsWithoutId: Part[],
  tokenCalculator: ContextTokenCalculator,
  nodeIdentityMap: WeakMap<object, string>,
 ): Partial<Episode> {
@@ -114,7 +146,19 @@ function parseToolResponses(
  for (const part of parts) {
    if (part.functionResponse) {
      const callId = part.functionResponse.id || '';
-      const matchingCall = pendingCallParts.get(callId);
+      let matchingCall = pendingCallParts.get(callId);
+
+      if (!matchingCall && pendingCallPartsWithoutId.length > 0) {
+        const idx = pendingCallPartsWithoutId.findIndex(
+          (p) => p.functionCall?.name === part.functionResponse!.name,
+        );
+        if (idx !== -1) {
+          matchingCall = pendingCallPartsWithoutId[idx];
+          pendingCallPartsWithoutId.splice(idx, 1);
+        } else {
+          matchingCall = pendingCallPartsWithoutId.shift();
+        }
+      }

      const intentTokens = matchingCall
        ? tokenCalculator.estimateTokensForParts([matchingCall])
@@ -137,6 +181,7 @@ function parseToolResponses(
          observation: obsTokens,
        },
      };
+
      currentEpisode.concreteNodes = [
        ...(currentEpisode.concreteNodes || []),
        step,
@@ -190,6 +235,7 @@ function parseModelParts(
  msg: Content,
  currentEpisode: Partial<Episode> | null,
  pendingCallParts: Map<string, Part>,
+  pendingCallPartsWithoutId: Part[],
  nodeIdentityMap: WeakMap<object, string>,
 ): Partial<Episode> {
  if (!currentEpisode) {
@@ -204,7 +250,23 @@ function parseModelParts(
  for (const part of parts) {
    if (part.functionCall) {
      const callId = part.functionCall.id || '';
-      if (callId) pendingCallParts.set(callId, part);
+      if (callId) {
+        pendingCallParts.set(callId, part);
+      } else {
+        const lastIdx = pendingCallPartsWithoutId.length - 1;
+        const lastPart = pendingCallPartsWithoutId[lastIdx];
+
+        if (
+          lastPart &&
+          lastPart.functionCall &&
+          lastPart.functionCall.name === part.functionCall.name
+        ) {
+          // Replace the previous chunk with the more complete one
+          pendingCallPartsWithoutId[lastIdx] = part;
+        } else {
+          pendingCallPartsWithoutId.push(part);
+        }
+      }
    } else if (part.text) {
      const thought: AgentThought = {
        id: getStableId(part, nodeIdentityMap),
@@ -33,50 +33,47 @@ export class HistoryObserver {
    private readonly graphMapper: ContextGraphMapper,
  ) {}

+  private processEvent = (event: HistoryEvent) => {
+    let nodes: ConcreteNode[] = [];
+
+    if (event.type === 'CLEAR') {
+      this.seenNodeIds.clear();
+    }
+
+    nodes = this.graphMapper.applyEvent(event, this.tokenCalculator);
+
+    const newNodes = new Set<string>();
+    for (const node of nodes) {
+      if (!this.seenNodeIds.has(node.id)) {
+        newNodes.add(node.id);
+        this.seenNodeIds.add(node.id);
+      }
+    }
+
+    this.tracer.logEvent(
+      'HistoryObserver',
+      `Rebuilt pristine graph from ${event.type} event`,
+      { nodesSize: nodes.length, newNodesCount: newNodes.size },
+    );
+
+    this.eventBus.emitPristineHistoryUpdated({
+      nodes,
+      newNodes,
+    });
+  };
+
  start() {
    if (this.unsubscribeHistory) {
      this.unsubscribeHistory();
    }

-    this.unsubscribeHistory = this.chatHistory.subscribe(
-      (_event: HistoryEvent) => {
-        // Rebuild the pristine Context Graph graph from the full source history on every change.
-        // Wait, toGraph still returns an Episode[].
-        // We actually need to map the Episode[] to a flat ConcreteNode[] here to form the 'nodes'.
-        const pristineEpisodes = this.graphMapper.toGraph(
-          this.chatHistory.get(),
-          this.tokenCalculator,
-        );
+    this.unsubscribeHistory = this.chatHistory.subscribe(this.processEvent);

-        const nodes: ConcreteNode[] = [];
-        for (const ep of pristineEpisodes) {
-          if (ep.concreteNodes) {
-            for (const child of ep.concreteNodes) {
-              nodes.push(child);
-            }
-          }
-        }
-
-        const newNodes = new Set<string>();
-        for (const node of nodes) {
-          if (!this.seenNodeIds.has(node.id)) {
-            newNodes.add(node.id);
-            this.seenNodeIds.add(node.id);
-          }
-        }
-
-        this.tracer.logEvent(
-          'HistoryObserver',
-          'Rebuilt pristine graph from chat history update',
-          { nodesSize: nodes.length, newNodesCount: newNodes.size },
-        );
-
-        this.eventBus.emitPristineHistoryUpdated({
-          nodes,
-          newNodes,
-        });
-      },
-    );
+    // Process any existing history immediately upon start
+    const existing = this.chatHistory.get();
+    if (existing && existing.length > 0) {
+      this.processEvent({ type: 'SYNC_FULL', payload: existing });
+    }
  }

  stop() {
@@ -0,0 +1,117 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { Config } from '../config/config.js';
+import type { GeminiChat } from '../core/geminiChat.js';
+import { ContextProcessorRegistry } from './config/registry.js';
+import { loadContextManagementConfig } from './config/configLoader.js';
+import { ContextTracer } from './tracer.js';
+import { ContextEventBus } from './eventBus.js';
+import { ContextEnvironmentImpl } from './pipeline/environmentImpl.js';
+import { PipelineOrchestrator } from './pipeline/orchestrator.js';
+import { ContextManager } from './contextManager.js';
+import { debugLogger } from '../utils/debugLogger.js';
+import { NodeTruncationProcessorOptionsSchema } from './processors/nodeTruncationProcessor.js';
+import { ToolMaskingProcessorOptionsSchema } from './processors/toolMaskingProcessor.js';
+import { HistoryTruncationProcessorOptionsSchema } from './processors/historyTruncationProcessor.js';
+import { BlobDegradationProcessorOptionsSchema } from './processors/blobDegradationProcessor.js';
+import { NodeDistillationProcessorOptionsSchema } from './processors/nodeDistillationProcessor.js';
+import { StateSnapshotProcessorOptionsSchema } from './processors/stateSnapshotProcessor.js';
+import { StateSnapshotAsyncProcessorOptionsSchema } from './processors/stateSnapshotAsyncProcessor.js';
+import { RollingSummaryProcessorOptionsSchema } from './processors/rollingSummaryProcessor.js';
+
+export async function initializeContextManager(
+  config: Config,
+  chat: GeminiChat,
+  lastPromptId: string,
+): Promise<ContextManager | undefined> {
+  const isV1Enabled = config.getContextManagementConfig().enabled;
+  debugLogger.log(
+    `[initializer] called with enabled=${isV1Enabled}, GEMINI_CONTEXT_TRACE_DIR=${process.env['GEMINI_CONTEXT_TRACE_DIR']}`,
+  );
+
+  if (!isV1Enabled) {
+    return undefined;
+  }
+
+  const registry = new ContextProcessorRegistry();
+  registry.registerProcessor({
+    id: 'NodeTruncationProcessor',
+    schema: NodeTruncationProcessorOptionsSchema,
+  });
+  registry.registerProcessor({
+    id: 'ToolMaskingProcessor',
+    schema: ToolMaskingProcessorOptionsSchema,
+  });
+  registry.registerProcessor({
+    id: 'HistoryTruncationProcessor',
+    schema: HistoryTruncationProcessorOptionsSchema,
+  });
+  registry.registerProcessor({
+    id: 'BlobDegradationProcessor',
+    schema: BlobDegradationProcessorOptionsSchema,
+  });
+  registry.registerProcessor({
+    id: 'NodeDistillationProcessor',
+    schema: NodeDistillationProcessorOptionsSchema,
+  });
+  registry.registerProcessor({
+    id: 'StateSnapshotProcessor',
+    schema: StateSnapshotProcessorOptionsSchema,
+  });
+  registry.registerProcessor({
+    id: 'StateSnapshotAsyncProcessor',
+    schema: StateSnapshotAsyncProcessorOptionsSchema,
+  });
+  registry.registerProcessor({
+    id: 'RollingSummaryProcessor',
+    schema: RollingSummaryProcessorOptionsSchema,
+  });
+
+  const sidecarProfile = await loadContextManagementConfig(
+    config.getExperimentalContextManagementConfig(),
+    registry,
+  );
+
+  const storage = config.storage;
+  const logDir = storage.getProjectTempLogsDir();
+  const projectTempDir = storage.getProjectTempDir();
+
+  const tracer = new ContextTracer({
+    enabled: !!process.env['GEMINI_CONTEXT_TRACE_DIR'],
+    targetDir: projectTempDir,
+    sessionId: lastPromptId,
+  });
+
+  const eventBus = new ContextEventBus();
+
+  const env = new ContextEnvironmentImpl(
+    () => config.getBaseLlmClient(),
+    config.getSessionId(),
+    lastPromptId,
+    logDir,
+    projectTempDir,
+    tracer,
+    4,
+    eventBus,
+  );
+
+  const orchestrator = new PipelineOrchestrator(
+    sidecarProfile.buildPipelines(env),
+    sidecarProfile.buildAsyncPipelines(env),
+    env,
+    eventBus,
+    tracer,
+  );
+
+  return new ContextManager(
+    sidecarProfile,
+    env,
+    tracer,
+    orchestrator,
+    chat.agentHistory,
+  );
+}
@@ -95,7 +95,7 @@ describe('ContextWorkingBufferImpl', () => {
    buffer = buffer.applyProcessorResult('Summarizer', [p1, p2], [summaryNode]);

    // p1 and p2 are removed, p3 remains, s1 is added
-    expect(buffer.nodes.map((n) => n.id)).toEqual(['p3', 's1']);
+    expect(buffer.nodes.map((n) => n.id)).toEqual(['s1', 'p3']);

    // Provenance lookup: The summary node should resolve to both p1 and p2!
    const roots = buffer.getPristineNodes('s1');
@@ -107,13 +107,19 @@ export class ContextWorkingBufferImpl implements ContextWorkingBuffer {

    // Calculate new node array
    const removedSet = new Set(removedIds);
-    const retainedNodes = this.nodes.filter((n) => !removedSet.has(n.id));
-    const newGraph = [...retainedNodes];

-    // We append the output nodes in the same general position if possible,
-    // but in a complex graph we just ensure they exist. V2 graph uses timestamps for order.
-    // For simplicity, we just push added nodes to the end of the retained array
-    newGraph.push(...addedNodes);
+    const newGraph = this.nodes.filter((n) => !removedSet.has(n.id));
+    const insertionIndex = this.nodes.findIndex((n) => removedSet.has(n.id));
+
+    // IMPORTANT: We do NOT use structuredClone here.
+    // The ContextTokenCalculator relies on a WeakMap tied to exact object references
+    // for O(1) performance. Deep cloning would cause catastrophic cache misses.
+    // The pipeline enforces immutability, making reference passing safe.
+    if (insertionIndex !== -1) {
+      newGraph.splice(insertionIndex, 0, ...addedNodes);
+    } else {
+      newGraph.push(...addedNodes);
+    }

    // Calculate new provenance map
    const newProvenanceMap = new Map(this.provenanceMap);
@@ -16,7 +16,7 @@ describe('ContextEnvironmentImpl', () => {
    const mockLlmClient = createMockLlmClient();

    const env = new ContextEnvironmentImpl(
-      mockLlmClient,
+      () => mockLlmClient,
      'mock-session',
      'mock-prompt',
      '/tmp/trace',
@@ -21,7 +21,7 @@ export class ContextEnvironmentImpl implements ContextEnvironment {
  readonly graphMapper: ContextGraphMapper;

  constructor(
-    readonly llmClient: BaseLlmClient,
+    private readonly llmClientProvider: () => BaseLlmClient,
    readonly sessionId: string,
    readonly promptId: string,
    readonly traceDir: string,
@@ -39,4 +39,8 @@ export class ContextEnvironmentImpl implements ContextEnvironment {
    this.inbox = new LiveInbox();
    this.graphMapper = new ContextGraphMapper(this.behaviorRegistry);
  }
+
+  get llmClient(): BaseLlmClient {
+    return this.llmClientProvider();
+  }
 }
@@ -204,6 +204,11 @@ export class PipelineOrchestrator {
          allowedTargets,
          returnedNodes,
        );
+        this.eventBus.emitProcessorResult({
+          processorId: processor.id,
+          targets: allowedTargets,
+          returnedNodes,
+        });
      } catch (error) {
        debugLogger.error(
          `Pipeline ${pipeline.name} failed async at ${processor.id}:`,
@@ -65,4 +65,34 @@ describe('ToolMaskingProcessor', () => {
    // Returned the exact same object reference
    expect(result[0]).toBe(toolStep);
  });
+  it('should strictly preserve the original intent args when only the observation is masked', async () => {
+    const env = createMockEnvironment();
+
+    const processor = createToolMaskingProcessor('ToolMaskingProcessor', env, {
+      stringLengthThresholdTokens: 10,
+    });
+
+    const originalIntent = { command: 'ls -R', dir: '/tmp' };
+    const longString = 'A'.repeat(500);
+
+    const toolStep = createDummyToolNode('ep1', 50, 500, {
+      intent: originalIntent,
+      observation: {
+        result: longString,
+      },
+    });
+
+    const result = await processor.process(createMockProcessArgs([toolStep]));
+
+    expect(result.length).toBe(1);
+    const masked = result[0] as ToolExecution;
+
+    expect(masked.id).not.toBe(toolStep.id);
+
+    const obs = masked.observation as { result: string };
+    expect(obs.result).toContain('<tool_output_masked>');
+
+    // The intent MUST be perfectly preserved and not fall back to {} or undefined incorrectly
+    expect(masked.intent).toEqual(originalIntent);
+  });
 });
@@ -129,7 +129,10 @@ export function createToolMaskingProcessor(
          1024
        ).toFixed(2);
        const totalLines = content.split('\n').length;
-        return `<tool_output_masked>\n[Tool ${nodeType} string (${fileSizeMB}MB, ${totalLines} lines) masked to preserve context window. Full string saved to: ${filePath}]\n</tool_output_masked>`;
+
+        // Ensure consistent path separators for LLM tokenization and deterministic tests across OSes
+        const normalizedPath = filePath.split(path.sep).join('/');
+        return `<tool_output_masked>\n[Tool ${nodeType} string (${fileSizeMB}MB, ${totalLines} lines) masked to preserve context window. Full string saved to: ${normalizedPath}]\n</tool_output_masked>`;
      };

      const returnedNodes: ConcreteNode[] = [];
@@ -199,6 +202,13 @@ export function createToolMaskingProcessor(
              const maskedIntent = isMaskableRecord(intentRes.masked)
                ? (intentRes.masked as Record<string, unknown>)
                : undefined;
+              // Ensure we strictly preserve the original intent if it was unchanged and is a record
+              const finalIntent = intentRes.changed
+                ? maskedIntent
+                : isMaskableRecord(rawIntent)
+                  ? (rawIntent as Record<string, unknown>)
+                  : undefined;
+
              // Handle observation explicitly as string vs object
              const maskedObs =
                typeof obsRes.masked === 'string'
@@ -206,13 +216,21 @@ export function createToolMaskingProcessor(
                  : isMaskableRecord(obsRes.masked)
                    ? (obsRes.masked as Record<string, unknown>)
                    : undefined;
+              // Ensure we strictly preserve the original observation if it was unchanged
+              const finalObs = obsRes.changed
+                ? maskedObs
+                : typeof rawObs === 'string'
+                  ? ({ message: rawObs } as Record<string, unknown>)
+                  : isMaskableRecord(rawObs)
+                    ? (rawObs as Record<string, unknown>)
+                    : undefined;

              const newIntentTokens =
                env.tokenCalculator.estimateTokensForParts([
                  {
                    functionCall: {
                      name: toolName || 'unknown',
-                      args: maskedIntent,
+                      args: finalIntent,
                      id: callId,
                    },
                  },
@@ -223,7 +241,7 @@ export function createToolMaskingProcessor(
                obsPart = {
                  functionResponse: {
                    name: toolName || 'unknown',
-                    response: maskedObs,
+                    response: finalObs,
                    id: callId,
                  },
                };
@@ -241,8 +259,8 @@ export function createToolMaskingProcessor(
                const maskedNode: ToolExecution = {
                  ...node,
                  id: randomUUID(), // Modified, so generate new ID
-                  intent: maskedIntent ?? node.intent,
-                  observation: maskedObs ?? node.observation,
+                  intent: finalIntent ?? node.intent,
+                  observation: finalObs ?? node.observation,
                  tokens: {
                    intent: newIntentTokens,
                    observation: newObsTokens,
@@ -5,6 +5,7 @@
 */

 import { describe, it, expect, vi, beforeAll, afterAll } from 'vitest';
+import fs from 'node:fs';
 import { SimulationHarness } from './simulationHarness.js';
 import { createMockLlmClient } from '../testing/contextTestUtils.js';
 import type { ContextProfile } from '../config/profiles.js';
@@ -28,6 +29,11 @@ expect.addSnapshotSerializer({
 });

 describe('System Lifecycle Golden Tests', () => {
+  afterAll(async () => {
+    fs.rmSync('/tmp/sim', { recursive: true, force: true });
+    fs.rmSync('mock', { recursive: true, force: true });
+  });
+
  beforeAll(() => {
    vi.spyOn(Math, 'random').mockReturnValue(0.5);
  });
@@ -59,7 +59,7 @@ export class SimulationHarness {
      sessionId: 'sim-session',
    });
    this.env = new ContextEnvironmentImpl(
-      mockLlmClient,
+      () => mockLlmClient,
      'sim-prompt',
      'sim-session',
      mockTempDir,
@@ -145,8 +145,8 @@ export function createMockEnvironment(
  });
  const eventBus = new ContextEventBus();

-  const env = new ContextEnvironmentImpl(
-    llmClient,
+  let env = new ContextEnvironmentImpl(
+    () => llmClient as BaseLlmClient,
    'mock-session',
    'mock-prompt-id',
    '/tmp/.gemini/trace',
@@ -157,7 +157,20 @@ export function createMockEnvironment(
  );

  if (overrides) {
-    Object.assign(env, overrides);
+    if (overrides.llmClient) {
+      env = new ContextEnvironmentImpl(
+        () => overrides.llmClient!,
+        env.sessionId,
+        env.promptId,
+        env.traceDir,
+        env.projectTempDir,
+        env.tracer,
+        env.charsPerToken,
+        env.eventBus,
+      );
+    }
+    const { llmClient: _llmClient, ...restOverrides } = overrides;
+    Object.assign(env, restOverrides);
  }
  return env;
 }
@@ -247,7 +260,7 @@ export function setupContextComponentTest(
  });
  const eventBus = new ContextEventBus();
  const env = new ContextEnvironmentImpl(
-    config.getBaseLlmClient(),
+    () => config.getBaseLlmClient(),
    'test prompt-id',
    'test-session',
    '/tmp',
@@ -22,6 +22,7 @@ describe('ContextTracer (Real FS & Mock ID Gen)', () => {
  let tmpDir: string;

  beforeEach(async () => {
+    vi.stubEnv('GEMINI_CONTEXT_TRACE_DIR', '');
    tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'gemini-tracer-test-'));

    vi.useFakeTimers();
@@ -29,6 +30,7 @@ describe('ContextTracer (Real FS & Mock ID Gen)', () => {
  });

  afterEach(async () => {
+    vi.unstubAllEnvs();
    vi.useRealTimers();
    await fs.rm(tmpDir, { recursive: true, force: true });
  });
@@ -45,7 +47,9 @@ describe('ContextTracer (Real FS & Mock ID Gen)', () => {
    // Verify Initialization
    const traceLogPath = path.join(
      tmpDir,
-      '.gemini/context_trace/test-session/trace.log',
+      'context_trace',
+      'test-session',
+      'trace.log',
    );
    const initTraceLog = readFileSync(traceLogPath, 'utf-8');
    expect(initTraceLog).toContain('[SYSTEM] Context Tracer Initialized');
@@ -65,7 +69,10 @@ describe('ContextTracer (Real FS & Mock ID Gen)', () => {

    const expectedAssetPath = path.join(
      tmpDir,
-      '.gemini/context_trace/test-session/assets/1767268800020-mock-uuid-1-largeKey.json',
+      'context_trace',
+      'test-session',
+      'assets',
+      '1767268800020-mock-uuid-1-largeKey.json',
    );
    expect(existsSync(expectedAssetPath)).toBe(true);

@@ -25,12 +25,9 @@ export class ContextTracer {
  constructor(options: ContextTracerOptions) {
    this.enabled = options.enabled ?? false;

-    this.traceDir = path.join(
-      options.targetDir,
-      '.gemini',
-      'context_trace',
-      options.sessionId,
-    );
+    this.traceDir =
+      process.env['GEMINI_CONTEXT_TRACE_DIR'] ||
+      path.join(options.targetDir, 'context_trace', options.sessionId);
    this.assetsDir = path.join(this.traceDir, 'assets');

    if (this.enabled) {
@@ -5,7 +5,7 @@
 */

 import type { Part } from '@google/genai';
-import { estimateTokenCountSync as baseEstimate } from '../../utils/tokenCalculation.js';
+import { estimateTokenCountSync } from '../../utils/tokenCalculation.js';
 import type { ConcreteNode } from '../graph/types.js';
 import type { NodeBehaviorRegistry } from '../graph/behaviorRegistry.js';

@@ -84,24 +84,27 @@ export class ContextTokenCalculator {
    }
    return tokens;
  }
+
  /**
   * Slower, precise estimation for a Gemini Content/Part graph.
   * Deeply inspects the nested structure and uses the base tokenization math.
   */
-  estimateTokensForParts(parts: Part[], depth: number = 0): number {
-    let totalTokens = 0;
+  private readonly partTokenCache = new WeakMap<object, number>();
+
+  estimateTokensForParts(parts: Part[]): number {
+    let total = 0;
    for (const part of parts) {
-      if (typeof part.text === 'string') {
-        totalTokens += Math.ceil(part.text.length / this.charsPerToken);
-      } else if (part.inlineData !== undefined || part.fileData !== undefined) {
-        totalTokens += 258;
+      if (part !== null && typeof part === 'object') {
+        let cost = this.partTokenCache.get(part);
+        if (cost === undefined) {
+          cost = estimateTokenCountSync([part], 0, this.charsPerToken);
+          this.partTokenCache.set(part, cost);
+        }
+        total += cost;
      } else {
-        totalTokens += Math.ceil(
-          JSON.stringify(part).length / this.charsPerToken,
-        );
+        total += estimateTokenCountSync([part], 0, this.charsPerToken);
      }
    }
-    // Also include structural overhead
-    return totalTokens + baseEstimate(parts, depth);
+    return total;
  }
 }
@@ -45,6 +45,7 @@ import type { ContentGenerator } from './contentGenerator.js';
 import { LoopDetectionService } from '../services/loopDetectionService.js';
 import { ChatCompressionService } from '../context/chatCompressionService.js';
 import { AgentHistoryProvider } from '../context/agentHistoryProvider.js';
+import type { ContextManager } from '../context/contextManager.js';
 import { ideContextStore } from '../ide/ideContext.js';
 import {
  logContentRetryFailure,
@@ -74,6 +75,7 @@ import {
 import { getDisplayString, resolveModel } from '../config/models.js';
 import { partToString } from '../utils/partUtils.js';
 import { coreEvents, CoreEvent } from '../utils/events.js';
+import { initializeContextManager } from '../context/initializer.js';

 const MAX_TURNS = 100;

@@ -97,6 +99,7 @@ export class GeminiClient {
  private readonly compressionService: ChatCompressionService;
  private readonly agentHistoryProvider: AgentHistoryProvider;
  private readonly toolOutputMaskingService: ToolOutputMaskingService;
+  private contextManager?: ContextManager;
  private lastPromptId: string;
  private currentSequenceModel: string | null = null;
  private lastSentIdeContext: IdeContext | undefined;
@@ -393,6 +396,11 @@ export class GeminiClient {
        },
      );
      await chat.initialize(resumedSessionData, 'main');
+      this.contextManager = await initializeContextManager(
+        this.config,
+        chat,
+        this.lastPromptId,
+      );
      return chat;
    } catch (error) {
      await reportError(
@@ -618,10 +626,12 @@ export class GeminiClient {
    const modelForLimitCheck = this._getActiveModelForCurrentTurn();

    if (this.config.getContextManagementConfig().enabled) {
-      const newHistory = await this.agentHistoryProvider.manageHistory(
-        this.getHistory(),
-        signal,
-      );
+      const newHistory = this.contextManager
+        ? await this.contextManager.renderHistory()
+        : await this.agentHistoryProvider.manageHistory(
+            this.getHistory(),
+            signal,
+          );
      if (newHistory.length !== this.getHistory().length) {
        this.getChat().setHistory(newHistory);
      }
@@ -183,6 +183,7 @@ describe('GeminiChat', () => {
      getRetryFetchErrors: vi.fn().mockReturnValue(false),
      getMaxAttempts: vi.fn().mockReturnValue(10),
      getUserTier: vi.fn().mockReturnValue(undefined),
+      isContextManagementEnabled: vi.fn().mockReturnValue(false),
      modelConfigService: {
        getResolvedConfig: vi.fn().mockImplementation((modelConfigKey) => {
          const model = modelConfigKey.model ?? mockConfig.getModel();
@@ -17,7 +17,9 @@ import {
  type PartListUnion,
  type GenerateContentConfig,
  type GenerateContentParameters,
+  type FunctionCall,
 } from '@google/genai';
+import { AgentChatHistory } from './agentChatHistory.js';
 import { toParts } from '../code_assist/converter.js';
 import {
  retryWithBackoff,
@@ -248,19 +250,21 @@ export class GeminiChat {
  private sendPromise: Promise<void> = Promise.resolve();
  private readonly chatRecordingService: ChatRecordingService;
  private lastPromptTokenCount: number;
+  agentHistory: AgentChatHistory;

  constructor(
    private readonly context: AgentLoopContext,
    private systemInstruction: string = '',
    private tools: Tool[] = [],
-    private history: Content[] = [],
+    history: Content[] = [],
    resumedSessionData?: ResumedSessionData,
    private readonly onModelChanged?: (modelId: string) => Promise<Tool[]>,
  ) {
    validateHistory(history);
+    this.agentHistory = new AgentChatHistory(history);
    this.chatRecordingService = new ChatRecordingService(context);
    this.lastPromptTokenCount = estimateTokenCountSync(
-      this.history.flatMap((c) => c.parts || []),
+      this.agentHistory.flatMap((c) => c.parts || []),
    );
  }

@@ -347,7 +351,7 @@ export class GeminiChat {
    }

    // Add user content to history ONCE before any attempts.
-    this.history.push(userContent);
+    this.agentHistory.push(userContent);
    const requestContents = this.getHistory(true);

    const streamWithRetries = async function* (
@@ -747,8 +751,8 @@ export class GeminiChat {
   */
  getHistory(curated: boolean = false): readonly Content[] {
    const history = curated
-      ? extractCuratedHistory(this.history)
-      : this.history;
+      ? extractCuratedHistory([...this.agentHistory.get()])
+      : this.agentHistory.get();
    return [...history];
  }

@@ -756,26 +760,26 @@ export class GeminiChat {
   * Clears the chat history.
   */
  clearHistory(): void {
-    this.history = [];
+    this.agentHistory.clear();
  }

  /**
   * Adds a new entry to the chat history.
   */
  addHistory(content: Content): void {
-    this.history.push(content);
+    this.agentHistory.push(content);
  }

  setHistory(history: readonly Content[]): void {
-    this.history = [...history];
+    this.agentHistory.set(history);
    this.lastPromptTokenCount = estimateTokenCountSync(
-      this.history.flatMap((c) => c.parts || []),
+      this.agentHistory.flatMap((c) => c.parts || []),
    );
    this.chatRecordingService.updateMessagesFromHistory(history);
  }

  stripThoughtsFromHistory(): void {
-    this.history = this.history.map((content) => {
+    this.agentHistory.map((content) => {
      const newContent = { ...content };
      if (newContent.parts) {
        newContent.parts = newContent.parts.map((part) => {
@@ -885,6 +889,9 @@ export class GeminiChat {
    let hasThoughts = false;
    let finishReason: FinishReason | undefined;

+    // The SDK provides fully assembled FunctionCall objects in chunk.functionCalls
+    const finalFunctionCalls: FunctionCall[] = [];
+
    for await (const chunk of streamResponse) {
      const candidateWithReason = chunk?.candidates?.find(
        (candidate) => candidate.finishReason,
@@ -894,6 +901,10 @@ export class GeminiChat {
        finishReason = candidateWithReason.finishReason as FinishReason;
      }

+      if (chunk.functionCalls && chunk.functionCalls.length > 0) {
+        finalFunctionCalls.push(...chunk.functionCalls);
+      }
+
      if (isValidResponse(chunk)) {
        const content = chunk.candidates?.[0]?.content;
        if (content?.parts) {
@@ -948,16 +959,66 @@ export class GeminiChat {

    // String thoughts and consolidate text parts.
    const consolidatedParts: Part[] = [];
-    for (const part of modelResponseParts) {
-      const lastPart = consolidatedParts[consolidatedParts.length - 1];
-      if (
-        lastPart?.text &&
-        isValidNonThoughtTextPart(lastPart) &&
-        isValidNonThoughtTextPart(part)
-      ) {
-        lastPart.text += part.text;
-      } else {
-        consolidatedParts.push(part);
+
+    if (this.context.config.isContextManagementEnabled()) {
+      for (const part of modelResponseParts) {
+        if (part.functionCall) {
+          // Skip partial functionCall stream chunks! We will replace them
+          // entirely with the pristine, fully assembled objects from the SDK
+          // (finalFunctionCalls) immediately below. We only push the very first
+          // partial chunk of a sequence as a placeholder so we know *where*
+          // in the sequence of parts the tool call happened.
+          const lastPart = consolidatedParts[consolidatedParts.length - 1];
+          const currentId = part.functionCall.id;
+          const lastId = lastPart?.functionCall?.id;
+
+          const isNewCall =
+            !lastPart?.functionCall ||
+            (currentId !== undefined &&
+              lastId !== undefined &&
+              currentId !== lastId) ||
+            lastPart.functionCall.name !== part.functionCall.name;
+
+          if (isNewCall) {
+            consolidatedParts.push({ ...part }); // Push placeholder
+          }
+        } else {
+          const lastPart = consolidatedParts[consolidatedParts.length - 1];
+          if (
+            lastPart?.text &&
+            isValidNonThoughtTextPart(lastPart) &&
+            isValidNonThoughtTextPart(part)
+          ) {
+            lastPart.text += part.text;
+          } else {
+            consolidatedParts.push(part);
+          }
+        }
+      }
+
+      // Now, replace the placeholders with the perfectly assembled final arguments
+      if (finalFunctionCalls.length > 0) {
+        let callIndex = 0;
+        for (const part of consolidatedParts) {
+          if (part.functionCall && callIndex < finalFunctionCalls.length) {
+            part.functionCall = finalFunctionCalls[callIndex];
+            callIndex++;
+          }
+        }
+      }
+    } else {
+      // Fallback to legacy consolidation for non-context-manager users
+      for (const part of modelResponseParts) {
+        const lastPart = consolidatedParts[consolidatedParts.length - 1];
+        if (
+          lastPart?.text &&
+          isValidNonThoughtTextPart(lastPart) &&
+          isValidNonThoughtTextPart(part)
+        ) {
+          lastPart.text += part.text;
+        } else {
+          consolidatedParts.push(part);
+        }
      }
    }

@@ -1013,7 +1074,7 @@ export class GeminiChat {
      }
    }

-    this.history.push({ role: 'model', parts: consolidatedParts });
+    this.agentHistory.push({ role: 'model', parts: consolidatedParts });
  }

  getLastPromptTokenCount(): number {
@@ -121,6 +121,7 @@ describe('GeminiChat Network Retries', () => {
          generateContentConfig: { temperature: 0 },
        })),
      },
+      isContextManagementEnabled: vi.fn().mockReturnValue(false),
      getEnableHooks: vi.fn().mockReturnValue(false),
      getModelAvailabilityService: vi
        .fn()
@@ -293,7 +293,12 @@ export type { Content, Part, FunctionCall } from '@google/genai';

 // Export context types and profiles
 export * from './context/types.js';
-export * from './context/profiles.js';
+
+export { generalistProfile as legacyGeneralistProfile } from './context/profiles.js';
+export {
+  generalistProfile,
+  stressTestProfile,
+} from './context/config/profiles.js';

 // Export trust utility
 export * from './utils/trust.js';
@@ -29,12 +29,14 @@ const MAX_CHARS_FOR_FULL_HEURISTIC = 100_000;
 // standard multimodal responses are typically depth 1.
 const MAX_RECURSION_DEPTH = 3;

+const DEFAULT_CHARS_PER_TOKEN = 4;
+
 /**
 * Heuristic estimation of tokens for a text string.
 */
-function estimateTextTokens(text: string): number {
+function estimateTextTokens(text: string, charsPerToken: number): number {
  if (text.length > MAX_CHARS_FOR_FULL_HEURISTIC) {
-    return text.length / 4;
+    return text.length / charsPerToken;
  }

  let tokens = 0;
@@ -73,25 +75,33 @@ function estimateMediaTokens(part: Part): number | undefined {
 * Heuristic estimation for tool responses, avoiding massive string copies
 * and accounting for nested Gemini 3 multimodal parts.
 */
-function estimateFunctionResponseTokens(part: Part, depth: number): number {
+function estimateFunctionResponseTokens(
+  part: Part,
+  depth: number,
+  charsPerToken: number,
+): number {
  const fr = part.functionResponse;
  if (!fr) return 0;

-  let totalTokens = (fr.name?.length ?? 0) / 4;
+  let totalTokens = (fr.name?.length ?? 0) / charsPerToken;
  const response = fr.response as unknown;

  if (typeof response === 'string') {
-    totalTokens += response.length / 4;
+    totalTokens += response.length / charsPerToken;
  } else if (response !== undefined && response !== null) {
    // For objects, stringify only the payload, not the whole Part object.
-    totalTokens += JSON.stringify(response).length / 4;
+    totalTokens += JSON.stringify(response).length / charsPerToken;
  }

  // Gemini 3: Handle nested multimodal parts recursively.
  // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
  const nestedParts = (fr as unknown as { parts?: Part[] }).parts;
  if (nestedParts && nestedParts.length > 0) {
-    totalTokens += estimateTokenCountSync(nestedParts, depth + 1);
+    totalTokens += estimateTokenCountSync(
+      nestedParts,
+      depth + 1,
+      charsPerToken,
+    );
  }

  return totalTokens;
@@ -100,11 +110,12 @@ function estimateFunctionResponseTokens(part: Part, depth: number): number {
 /**
 * Estimates token count for parts synchronously using a heuristic.
 * - Text: character-based heuristic (ASCII vs CJK) for small strings, length/4 for massive ones.
- * - Non-text (Tools, etc): JSON string length / 4.
+ * - Non-text (Tools, etc): JSON string length / charsPerToken.
 */
 export function estimateTokenCountSync(
  parts: Part[],
  depth: number = 0,
+  charsPerToken: number = DEFAULT_CHARS_PER_TOKEN,
 ): number {
  if (depth > MAX_RECURSION_DEPTH) {
    return 0;
@@ -113,9 +124,9 @@ export function estimateTokenCountSync(
  let totalTokens = 0;
  for (const part of parts) {
    if (typeof part.text === 'string') {
-      totalTokens += estimateTextTokens(part.text);
+      totalTokens += estimateTextTokens(part.text, charsPerToken);
    } else if (part.functionResponse) {
-      totalTokens += estimateFunctionResponseTokens(part, depth);
+      totalTokens += estimateFunctionResponseTokens(part, depth, charsPerToken);
    } else {
      const mediaEstimate = estimateMediaTokens(part);
      if (mediaEstimate !== undefined) {
@@ -123,7 +134,7 @@ export function estimateTokenCountSync(
      } else {
        // Fallback for other non-text parts (e.g., functionCall).
        // Note: JSON.stringify(part) here is safe as these parts are typically small.
-        totalTokens += JSON.stringify(part).length / 4;
+        totalTokens += JSON.stringify(part).length / charsPerToken;
      }
    }
  }
@@ -162,9 +173,9 @@ export async function calculateRequestTokenCount(
    } catch (error) {
      // Fallback to local estimation if the API call fails
      debugLogger.debug('countTokens API failed:', error);
-      return estimateTokenCountSync(parts);
+      return estimateTokenCountSync(parts, 0, DEFAULT_CHARS_PER_TOKEN);
    }
  }

-  return estimateTokenCountSync(parts);
+  return estimateTokenCountSync(parts, 0, DEFAULT_CHARS_PER_TOKEN);
 }
@@ -3147,6 +3147,13 @@
          "default": true,
          "type": "boolean"
        },
+        "stressTestProfile": {
+          "title": "Use the stress test profile to aggressively trigger context management.",
+          "description": "Significantly lowers token limits to force early garbage collection and distillation for testing purposes.",
+          "markdownDescription": "Significantly lowers token limits to force early garbage collection and distillation for testing purposes.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `false`",
+          "default": false,
+          "type": "boolean"
+        },
        "autoMemory": {
          "title": "Auto Memory",
          "description": "Automatically extract reusable skills from past sessions in the background. Review results with /memory inbox.",