feat(core): wire up the new ContextManager and AgentChatHistory (#25409)

This commit is contained in:
joshualitt
2026-04-27 11:50:00 -07:00
committed by GitHub
parent 98aca28985
commit 71f313b51a
35 changed files with 707 additions and 377 deletions
+6
View File
@@ -1846,6 +1846,12 @@ their corresponding top-level category object in your `settings.json` file.
- **Default:** `true`
- **Requires restart:** Yes
- **`experimental.stressTestProfile`** (boolean):
- **Description:** Significantly lowers token limits to force early garbage
collection and distillation for testing purposes.
- **Default:** `false`
- **Requires restart:** Yes
- **`experimental.autoMemory`** (boolean):
- **Description:** Automatically extract reusable skills from past sessions in
the background. Review results with /memory inbox.
-47
View File
@@ -21,8 +21,6 @@ import {
type MCPServerConfig,
type GeminiCLIExtension,
Storage,
generalistProfile,
type ContextManagementConfig,
} from '@google/gemini-cli-core';
import { loadCliConfig, parseArguments, type CliArgs } from './config.js';
import {
@@ -2217,51 +2215,6 @@ describe('loadCliConfig context management', () => {
},
});
const config = await loadCliConfig(settings, 'test-session', argv);
expect(config.getContextManagementConfig()).toStrictEqual(
generalistProfile,
);
expect(config.isContextManagementEnabled()).toBe(true);
});
it('should be true when contextManagement is set to true in settings', async () => {
process.argv = ['node', 'script.js'];
const argv = await parseArguments(createTestMergedSettings());
const contextManagementConfig: Partial<ContextManagementConfig> = {
historyWindow: {
maxTokens: 100_000,
retainedTokens: 50_000,
},
messageLimits: {
normalMaxTokens: 1000,
retainedMaxTokens: 10_000,
normalizationHeadRatio: 0.25,
},
tools: {
distillation: {
maxOutputTokens: 10_000,
summarizationThresholdTokens: 15_000,
},
outputMasking: {
protectionThresholdTokens: 30_000,
minPrunableThresholdTokens: 10_000,
protectLatestTurn: false,
},
},
};
const settings = createTestMergedSettings({
experimental: {
contextManagement: true,
},
// The type of numbers is being inferred strangely, and so we have to cast
// to `any` here.
// eslint-disable-next-line @typescript-eslint/no-explicit-any
contextManagement: contextManagementConfig as any,
});
const config = await loadCliConfig(settings, 'test-session', argv);
expect(config.getContextManagementConfig()).toStrictEqual({
enabled: true,
...contextManagementConfig,
});
expect(config.isContextManagementEnabled()).toBe(true);
});
});
+13 -8
View File
@@ -48,7 +48,6 @@ import {
type HookEventName,
type OutputFormat,
detectIdeFromEnv,
generalistProfile,
} from '@google/gemini-cli-core';
import {
type Settings,
@@ -904,14 +903,19 @@ export async function loadCliConfig(
}
}
const useGeneralistProfile =
settings.experimental?.generalistProfile ?? false;
const useContextManagement =
settings.experimental?.contextManagement ?? false;
// TODO(joshualitt): Clean this up alongside removal of the legacy config.
let profileSelector: string | undefined = undefined;
if (settings.experimental?.stressTestProfile) {
profileSelector = 'stressTestProfile';
} else if (
settings.experimental?.generalistProfile ||
settings.experimental?.contextManagement
) {
profileSelector = 'generalistProfile';
}
const contextManagement = {
...(useGeneralistProfile ? generalistProfile : {}),
...(useContextManagement ? settings?.contextManagement : {}),
enabled: useContextManagement || useGeneralistProfile,
enabled: !!profileSelector,
};
return new Config({
@@ -935,6 +939,7 @@ export async function loadCliConfig(
worktreeSettings,
coreTools: settings.tools?.core || undefined,
experimentalContextManagementConfig: profileSelector,
allowedTools: allowedTools.length > 0 ? allowedTools : undefined,
policyEngineConfig,
policyUpdateConfirmationRequest,
+11
View File
@@ -2388,6 +2388,17 @@ const SETTINGS_SCHEMA = {
'Disable the built-in save_memory tool and let the main agent persist project context by editing markdown files directly with edit/write_file. Route facts across four tiers: team-shared conventions go to project GEMINI.md files, project-specific personal notes go to the per-project private memory folder (MEMORY.md as index + sibling .md files for detail), and cross-project personal preferences go to the global ~/.gemini/GEMINI.md (the only file under ~/.gemini/ that the agent can edit — settings, credentials, etc. remain off-limits). Set to false to fall back to the legacy save_memory tool.',
showInDialog: true,
},
stressTestProfile: {
type: 'boolean',
label:
'Use the stress test profile to aggressively trigger context management.',
category: 'Experimental',
requiresRestart: true,
default: false,
description:
'Significantly lowers token limits to force early garbage collection and distillation for testing purposes.',
showInDialog: false,
},
autoMemory: {
type: 'boolean',
label: 'Auto Memory',
@@ -6,19 +6,17 @@
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import { loadContextManagementConfig } from './configLoader.js';
import { defaultContextProfile } from './profiles.js';
import { generalistProfile } from './profiles.js';
import { ContextProcessorRegistry } from './registry.js';
import * as fs from 'node:fs/promises';
import * as path from 'node:path';
import * as os from 'node:os';
import type { Config } from '../../config/config.js';
import type { JSONSchemaType } from 'ajv';
describe('SidecarLoader (Real FS)', () => {
let tmpDir: string;
let registry: ContextProcessorRegistry;
let sidecarPath: string;
let mockConfig: Config;
beforeEach(async () => {
tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'gemini-sidecar-test-'));
@@ -32,10 +30,6 @@ describe('SidecarLoader (Real FS)', () => {
required: ['maxTokens'],
} as unknown as JSONSchemaType<{ maxTokens: number }>,
});
mockConfig = {
getExperimentalContextManagementConfig: () => sidecarPath,
} as unknown as Config;
});
afterEach(async () => {
@@ -43,14 +37,14 @@ describe('SidecarLoader (Real FS)', () => {
});
it('returns default profile if file does not exist', async () => {
const result = await loadContextManagementConfig(mockConfig, registry);
expect(result).toBe(defaultContextProfile);
const result = await loadContextManagementConfig(sidecarPath, registry);
expect(result).toBe(generalistProfile);
});
it('returns default profile if file exists but is 0 bytes', async () => {
await fs.writeFile(sidecarPath, '');
const result = await loadContextManagementConfig(mockConfig, registry);
expect(result).toBe(defaultContextProfile);
const result = await loadContextManagementConfig(sidecarPath, registry);
expect(result).toBe(generalistProfile);
});
it('returns parsed config if file is valid', async () => {
@@ -64,7 +58,7 @@ describe('SidecarLoader (Real FS)', () => {
},
};
await fs.writeFile(sidecarPath, JSON.stringify(validConfig));
const result = await loadContextManagementConfig(mockConfig, registry);
const result = await loadContextManagementConfig(sidecarPath, registry);
expect(result.config.budget?.maxTokens).toBe(2000);
expect(result.config.processorOptions?.['myTruncation']).toBeDefined();
});
@@ -81,14 +75,14 @@ describe('SidecarLoader (Real FS)', () => {
};
await fs.writeFile(sidecarPath, JSON.stringify(invalidConfig));
await expect(
loadContextManagementConfig(mockConfig, registry),
loadContextManagementConfig(sidecarPath, registry),
).rejects.toThrow('Validation error');
});
it('throws validation error if file is empty whitespace', async () => {
await fs.writeFile(sidecarPath, ' \n ');
await expect(
loadContextManagementConfig(mockConfig, registry),
loadContextManagementConfig(sidecarPath, registry),
).rejects.toThrow('Unexpected end of JSON input');
});
});
@@ -4,11 +4,14 @@
* SPDX-License-Identifier: Apache-2.0
*/
import type { Config } from '../../config/config.js';
import * as fsSync from 'node:fs';
import * as fs from 'node:fs/promises';
import type { ContextManagementConfig } from './types.js';
import { defaultContextProfile, type ContextProfile } from './profiles.js';
import {
generalistProfile,
stressTestProfile,
type ContextProfile,
} from './profiles.js';
import { SchemaValidator } from '../../utils/schemaValidator.js';
import { getContextManagementConfigSchema } from './schema.js';
import type { ContextProcessorRegistry } from './registry.js';
@@ -54,9 +57,9 @@ async function loadConfigFromFile(
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
const validConfig = parsed as ContextManagementConfig;
return {
...defaultContextProfile,
...generalistProfile,
config: {
...defaultContextProfile.config,
...generalistProfile.config,
...(validConfig.budget ? { budget: validConfig.budget } : {}),
...(validConfig.processorOptions
? { processorOptions: validConfig.processorOptions }
@@ -70,21 +73,27 @@ async function loadConfigFromFile(
* If a config file is present but invalid, this will THROW to prevent silent misconfiguration.
*/
export async function loadContextManagementConfig(
config: Config,
sidecarPath: string | undefined,
registry: ContextProcessorRegistry,
): Promise<ContextProfile> {
const sidecarPath = config.getExperimentalContextManagementConfig();
if (sidecarPath === 'stressTestProfile') {
return stressTestProfile;
}
if (sidecarPath === 'generalistProfile') {
return generalistProfile;
}
if (sidecarPath && fsSync.existsSync(sidecarPath)) {
const size = fsSync.statSync(sidecarPath).size;
// If the file exists but is completely empty (0 bytes), it's safe to fallback.
if (size === 0) {
return defaultContextProfile;
return generalistProfile;
}
// If the file has content, enforce strict validation and throw on failure.
return loadConfigFromFile(sidecarPath, registry);
}
return defaultContextProfile;
return generalistProfile;
}
+55 -9
View File
@@ -62,7 +62,7 @@ export interface ContextProfile {
* The standard default context management profile.
* Optimized for safety, precision, and reliable summarization.
*/
export const defaultContextProfile: ContextProfile = {
export const generalistProfile: ContextProfile = {
config: {
budget: {
retainedTokens: 65000,
@@ -88,24 +88,32 @@ export const defaultContextProfile: ContextProfile = {
}),
),
createBlobDegradationProcessor('BlobDegradation', env), // No options
// Automatically distill extremely large blocks (e.g. huge source files pasted by the user)
createNodeDistillationProcessor(
'ImmediateNodeDistillation',
env,
resolveProcessorOptions(config, 'ImmediateNodeDistillation', {
nodeThresholdTokens: 15000,
}),
),
],
},
{
name: 'Normalization',
triggers: ['retained_exceeded'],
processors: [
createNodeTruncationProcessor(
'NodeTruncation',
env,
resolveProcessorOptions(config, 'NodeTruncation', {
maxTokensPerNode: 3000,
}),
),
createNodeDistillationProcessor(
'NodeDistillation',
env,
resolveProcessorOptions(config, 'NodeDistillation', {
nodeThresholdTokens: 5000,
nodeThresholdTokens: 3000,
}),
),
createNodeTruncationProcessor(
'NodeTruncation',
env,
resolveProcessorOptions(config, 'NodeTruncation', {
maxTokensPerNode: 2000,
}),
),
],
@@ -143,3 +151,41 @@ export const defaultContextProfile: ContextProfile = {
},
],
};
/**
* A highly aggressive profile designed exclusively for testing Context Management.
* Lowers token limits dramatically to force garbage collection and distillation loops
* within a few conversational turns.
*/
export const stressTestProfile: ContextProfile = {
config: {
budget: {
retainedTokens: 4000,
maxTokens: 10000,
},
processorOptions: {
ToolMasking: {
type: 'ToolMaskingProcessor',
options: {
stringLengthThresholdTokens: 500,
},
},
NodeTruncation: {
type: 'NodeTruncationProcessor',
options: {
maxTokensPerNode: 1000,
},
},
NodeDistillation: {
type: 'NodeDistillationProcessor',
options: {
nodeThresholdTokens: 1500,
},
},
},
},
// Re-use the generalist pipeline architecture exactly, but the `config` above
// will be passed into `resolveProcessorOptions` to aggressively override the thresholds.
buildPipelines: generalistProfile.buildPipelines,
buildAsyncPipelines: generalistProfile.buildAsyncPipelines,
};
+11 -3
View File
@@ -44,12 +44,10 @@ export class ContextManager {
this.env.tokenCalculator,
this.env.graphMapper,
);
this.historyObserver.start();
this.eventBus.onPristineHistoryUpdated((event) => {
const existingIds = new Set(this.buffer.nodes.map((n) => n.id));
const newIds = new Set(event.nodes.map((n) => n.id));
const addedNodes = event.nodes.filter((n) => !existingIds.has(n.id));
const addedNodes = event.nodes.filter((n) => event.newNodes.has(n.id));
// Prune any pristine nodes that were dropped from the upstream history
this.buffer = this.buffer.prunePristineNodes(newIds);
@@ -60,6 +58,15 @@ export class ContextManager {
this.evaluateTriggers(event.newNodes);
});
this.eventBus.onProcessorResult((event) => {
this.buffer = this.buffer.applyProcessorResult(
event.processorId,
event.targets,
event.returnedNodes,
);
});
this.historyObserver.start();
}
/**
@@ -153,6 +160,7 @@ export class ContextManager {
activeTaskIds: Set<string> = new Set(),
): Promise<Content[]> {
this.tracer.logEvent('ContextManager', 'Starting rendering of LLM context');
// Apply final GC Backstop pressure barrier synchronously before mapping
const finalHistory = await render(
this.buffer.nodes,
+14
View File
@@ -7,6 +7,12 @@
import { EventEmitter } from 'node:events';
import type { ConcreteNode } from './graph/types.js';
export interface ProcessorResultEvent {
processorId: string;
targets: readonly ConcreteNode[];
returnedNodes: readonly ConcreteNode[];
}
export interface PristineHistoryUpdatedEvent {
nodes: readonly ConcreteNode[];
newNodes: Set<string>;
@@ -49,4 +55,12 @@ export class ContextEventBus extends EventEmitter {
onConsolidationNeeded(listener: (event: ContextConsolidationEvent) => void) {
this.on('BUDGET_RETAINED_CROSSED', listener);
}
emitProcessorResult(event: ProcessorResultEvent) {
this.emit('PROCESSOR_RESULT', event);
}
onProcessorResult(listener: (event: ProcessorResultEvent) => void) {
this.on('PROCESSOR_RESULT', listener);
}
}
@@ -122,9 +122,9 @@ export const AgentYieldBehavior: NodeBehavior<AgentYield> = {
getEstimatableParts(yieldNode) {
return [{ text: yieldNode.text }];
},
serialize(yieldNode, writer) {
writer.appendModelPart({ text: yieldNode.text });
writer.flushModelParts();
serialize() {
// AGENT_YIELD is a synthetic marker node used for internal graph tracking.
// We intentionally do NOT serialize it to the LLM to prevent prompt corruption.
},
};
+26 -6
View File
@@ -3,9 +3,10 @@
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import type { ConcreteNode } from './types.js';
import { ContextGraphBuilder } from './toGraph.js';
import type { Content } from '@google/genai';
import type { Episode, ConcreteNode } from './types.js';
import { toGraph } from './toGraph.js';
import type { HistoryEvent } from '../../core/agentChatHistory.js';
import { fromGraph } from './fromGraph.js';
import type { ContextTokenCalculator } from '../utils/contextTokenCalculator.js';
import type { NodeBehaviorRegistry } from './behaviorRegistry.js';
@@ -15,11 +16,30 @@ export class ContextGraphMapper {
constructor(private readonly registry: NodeBehaviorRegistry) {}
toGraph(
history: readonly Content[],
private builder?: ContextGraphBuilder;
applyEvent(
event: HistoryEvent,
tokenCalculator: ContextTokenCalculator,
): Episode[] {
return toGraph(history, tokenCalculator, this.nodeIdentityMap);
): ConcreteNode[] {
if (!this.builder) {
this.builder = new ContextGraphBuilder(
tokenCalculator,
this.nodeIdentityMap,
);
}
if (event.type === 'CLEAR') {
this.builder.clear();
return [];
}
if (event.type === 'SYNC_FULL') {
this.builder.clear();
}
this.builder.processHistory(event.payload);
return this.builder.getNodes();
}
fromGraph(nodes: readonly ConcreteNode[]): Content[] {
+108 -46
View File
@@ -6,6 +6,7 @@
import type { Content, Part } from '@google/genai';
import type {
ConcreteNode,
Episode,
SemanticPart,
ToolExecution,
@@ -38,67 +39,98 @@ function isCompleteEpisode(ep: Partial<Episode>): ep is Episode {
);
}
export function toGraph(
history: readonly Content[],
tokenCalculator: ContextTokenCalculator,
nodeIdentityMap: WeakMap<object, string>,
): Episode[] {
const episodes: Episode[] = [];
let currentEpisode: Partial<Episode> | null = null;
const pendingCallParts: Map<string, Part> = new Map();
export class ContextGraphBuilder {
private episodes: Episode[] = [];
private currentEpisode: Partial<Episode> | null = null;
private pendingCallParts: Map<string, Part> = new Map();
private pendingCallPartsWithoutId: Part[] = [];
const finalizeEpisode = () => {
if (currentEpisode && isCompleteEpisode(currentEpisode)) {
episodes.push(currentEpisode);
}
currentEpisode = null;
};
constructor(
private readonly tokenCalculator: ContextTokenCalculator,
private readonly nodeIdentityMap: WeakMap<object, string> = new WeakMap(),
) {}
for (const msg of history) {
if (!msg.parts) continue;
clear() {
this.episodes = [];
this.currentEpisode = null;
this.pendingCallParts.clear();
this.pendingCallPartsWithoutId = [];
}
if (msg.role === 'user') {
const hasToolResponses = msg.parts.some((p) => !!p.functionResponse);
const hasUserParts = msg.parts.some(
(p) => !!p.text || !!p.inlineData || !!p.fileData,
);
processHistory(history: readonly Content[]) {
const finalizeEpisode = () => {
if (this.currentEpisode && isCompleteEpisode(this.currentEpisode)) {
this.episodes.push(this.currentEpisode);
}
this.currentEpisode = null;
};
if (hasToolResponses) {
currentEpisode = parseToolResponses(
for (const msg of history) {
if (!msg.parts) continue;
if (msg.role === 'user') {
const hasToolResponses = msg.parts.some((p) => !!p.functionResponse);
const hasUserParts = msg.parts.some(
(p) => !!p.text || !!p.inlineData || !!p.fileData,
);
if (hasToolResponses) {
this.currentEpisode = parseToolResponses(
msg,
this.currentEpisode,
this.pendingCallParts,
this.pendingCallPartsWithoutId,
this.tokenCalculator,
this.nodeIdentityMap,
);
}
if (hasUserParts) {
finalizeEpisode();
this.currentEpisode = parseUserParts(msg, this.nodeIdentityMap);
}
} else if (msg.role === 'model') {
this.currentEpisode = parseModelParts(
msg,
currentEpisode,
pendingCallParts,
tokenCalculator,
nodeIdentityMap,
this.currentEpisode,
this.pendingCallParts,
this.pendingCallPartsWithoutId,
this.nodeIdentityMap,
);
}
if (hasUserParts) {
finalizeEpisode();
currentEpisode = parseUserParts(msg, nodeIdentityMap);
}
} else if (msg.role === 'model') {
currentEpisode = parseModelParts(
msg,
currentEpisode,
pendingCallParts,
nodeIdentityMap,
);
}
}
if (currentEpisode) {
finalizeYield(currentEpisode);
finalizeEpisode();
}
getNodes(): ConcreteNode[] {
const copy = [...this.episodes];
if (this.currentEpisode) {
const activeEp = {
...this.currentEpisode,
concreteNodes: [...(this.currentEpisode.concreteNodes || [])],
};
finalizeYield(activeEp);
if (isCompleteEpisode(activeEp)) {
copy.push(activeEp);
}
}
return episodes;
const nodes: ConcreteNode[] = [];
for (const ep of copy) {
if (ep.concreteNodes) {
for (const child of ep.concreteNodes) {
nodes.push(child);
}
}
}
return nodes;
}
}
function parseToolResponses(
msg: Content,
currentEpisode: Partial<Episode> | null,
pendingCallParts: Map<string, Part>,
pendingCallPartsWithoutId: Part[],
tokenCalculator: ContextTokenCalculator,
nodeIdentityMap: WeakMap<object, string>,
): Partial<Episode> {
@@ -114,7 +146,19 @@ function parseToolResponses(
for (const part of parts) {
if (part.functionResponse) {
const callId = part.functionResponse.id || '';
const matchingCall = pendingCallParts.get(callId);
let matchingCall = pendingCallParts.get(callId);
if (!matchingCall && pendingCallPartsWithoutId.length > 0) {
const idx = pendingCallPartsWithoutId.findIndex(
(p) => p.functionCall?.name === part.functionResponse!.name,
);
if (idx !== -1) {
matchingCall = pendingCallPartsWithoutId[idx];
pendingCallPartsWithoutId.splice(idx, 1);
} else {
matchingCall = pendingCallPartsWithoutId.shift();
}
}
const intentTokens = matchingCall
? tokenCalculator.estimateTokensForParts([matchingCall])
@@ -137,6 +181,7 @@ function parseToolResponses(
observation: obsTokens,
},
};
currentEpisode.concreteNodes = [
...(currentEpisode.concreteNodes || []),
step,
@@ -190,6 +235,7 @@ function parseModelParts(
msg: Content,
currentEpisode: Partial<Episode> | null,
pendingCallParts: Map<string, Part>,
pendingCallPartsWithoutId: Part[],
nodeIdentityMap: WeakMap<object, string>,
): Partial<Episode> {
if (!currentEpisode) {
@@ -204,7 +250,23 @@ function parseModelParts(
for (const part of parts) {
if (part.functionCall) {
const callId = part.functionCall.id || '';
if (callId) pendingCallParts.set(callId, part);
if (callId) {
pendingCallParts.set(callId, part);
} else {
const lastIdx = pendingCallPartsWithoutId.length - 1;
const lastPart = pendingCallPartsWithoutId[lastIdx];
if (
lastPart &&
lastPart.functionCall &&
lastPart.functionCall.name === part.functionCall.name
) {
// Replace the previous chunk with the more complete one
pendingCallPartsWithoutId[lastIdx] = part;
} else {
pendingCallPartsWithoutId.push(part);
}
}
} else if (part.text) {
const thought: AgentThought = {
id: getStableId(part, nodeIdentityMap),
+35 -38
View File
@@ -33,50 +33,47 @@ export class HistoryObserver {
private readonly graphMapper: ContextGraphMapper,
) {}
private processEvent = (event: HistoryEvent) => {
let nodes: ConcreteNode[] = [];
if (event.type === 'CLEAR') {
this.seenNodeIds.clear();
}
nodes = this.graphMapper.applyEvent(event, this.tokenCalculator);
const newNodes = new Set<string>();
for (const node of nodes) {
if (!this.seenNodeIds.has(node.id)) {
newNodes.add(node.id);
this.seenNodeIds.add(node.id);
}
}
this.tracer.logEvent(
'HistoryObserver',
`Rebuilt pristine graph from ${event.type} event`,
{ nodesSize: nodes.length, newNodesCount: newNodes.size },
);
this.eventBus.emitPristineHistoryUpdated({
nodes,
newNodes,
});
};
start() {
if (this.unsubscribeHistory) {
this.unsubscribeHistory();
}
this.unsubscribeHistory = this.chatHistory.subscribe(
(_event: HistoryEvent) => {
// Rebuild the pristine Context Graph graph from the full source history on every change.
// Wait, toGraph still returns an Episode[].
// We actually need to map the Episode[] to a flat ConcreteNode[] here to form the 'nodes'.
const pristineEpisodes = this.graphMapper.toGraph(
this.chatHistory.get(),
this.tokenCalculator,
);
this.unsubscribeHistory = this.chatHistory.subscribe(this.processEvent);
const nodes: ConcreteNode[] = [];
for (const ep of pristineEpisodes) {
if (ep.concreteNodes) {
for (const child of ep.concreteNodes) {
nodes.push(child);
}
}
}
const newNodes = new Set<string>();
for (const node of nodes) {
if (!this.seenNodeIds.has(node.id)) {
newNodes.add(node.id);
this.seenNodeIds.add(node.id);
}
}
this.tracer.logEvent(
'HistoryObserver',
'Rebuilt pristine graph from chat history update',
{ nodesSize: nodes.length, newNodesCount: newNodes.size },
);
this.eventBus.emitPristineHistoryUpdated({
nodes,
newNodes,
});
},
);
// Process any existing history immediately upon start
const existing = this.chatHistory.get();
if (existing && existing.length > 0) {
this.processEvent({ type: 'SYNC_FULL', payload: existing });
}
}
stop() {
+117
View File
@@ -0,0 +1,117 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import type { Config } from '../config/config.js';
import type { GeminiChat } from '../core/geminiChat.js';
import { ContextProcessorRegistry } from './config/registry.js';
import { loadContextManagementConfig } from './config/configLoader.js';
import { ContextTracer } from './tracer.js';
import { ContextEventBus } from './eventBus.js';
import { ContextEnvironmentImpl } from './pipeline/environmentImpl.js';
import { PipelineOrchestrator } from './pipeline/orchestrator.js';
import { ContextManager } from './contextManager.js';
import { debugLogger } from '../utils/debugLogger.js';
import { NodeTruncationProcessorOptionsSchema } from './processors/nodeTruncationProcessor.js';
import { ToolMaskingProcessorOptionsSchema } from './processors/toolMaskingProcessor.js';
import { HistoryTruncationProcessorOptionsSchema } from './processors/historyTruncationProcessor.js';
import { BlobDegradationProcessorOptionsSchema } from './processors/blobDegradationProcessor.js';
import { NodeDistillationProcessorOptionsSchema } from './processors/nodeDistillationProcessor.js';
import { StateSnapshotProcessorOptionsSchema } from './processors/stateSnapshotProcessor.js';
import { StateSnapshotAsyncProcessorOptionsSchema } from './processors/stateSnapshotAsyncProcessor.js';
import { RollingSummaryProcessorOptionsSchema } from './processors/rollingSummaryProcessor.js';
export async function initializeContextManager(
config: Config,
chat: GeminiChat,
lastPromptId: string,
): Promise<ContextManager | undefined> {
const isV1Enabled = config.getContextManagementConfig().enabled;
debugLogger.log(
`[initializer] called with enabled=${isV1Enabled}, GEMINI_CONTEXT_TRACE_DIR=${process.env['GEMINI_CONTEXT_TRACE_DIR']}`,
);
if (!isV1Enabled) {
return undefined;
}
const registry = new ContextProcessorRegistry();
registry.registerProcessor({
id: 'NodeTruncationProcessor',
schema: NodeTruncationProcessorOptionsSchema,
});
registry.registerProcessor({
id: 'ToolMaskingProcessor',
schema: ToolMaskingProcessorOptionsSchema,
});
registry.registerProcessor({
id: 'HistoryTruncationProcessor',
schema: HistoryTruncationProcessorOptionsSchema,
});
registry.registerProcessor({
id: 'BlobDegradationProcessor',
schema: BlobDegradationProcessorOptionsSchema,
});
registry.registerProcessor({
id: 'NodeDistillationProcessor',
schema: NodeDistillationProcessorOptionsSchema,
});
registry.registerProcessor({
id: 'StateSnapshotProcessor',
schema: StateSnapshotProcessorOptionsSchema,
});
registry.registerProcessor({
id: 'StateSnapshotAsyncProcessor',
schema: StateSnapshotAsyncProcessorOptionsSchema,
});
registry.registerProcessor({
id: 'RollingSummaryProcessor',
schema: RollingSummaryProcessorOptionsSchema,
});
const sidecarProfile = await loadContextManagementConfig(
config.getExperimentalContextManagementConfig(),
registry,
);
const storage = config.storage;
const logDir = storage.getProjectTempLogsDir();
const projectTempDir = storage.getProjectTempDir();
const tracer = new ContextTracer({
enabled: !!process.env['GEMINI_CONTEXT_TRACE_DIR'],
targetDir: projectTempDir,
sessionId: lastPromptId,
});
const eventBus = new ContextEventBus();
const env = new ContextEnvironmentImpl(
() => config.getBaseLlmClient(),
config.getSessionId(),
lastPromptId,
logDir,
projectTempDir,
tracer,
4,
eventBus,
);
const orchestrator = new PipelineOrchestrator(
sidecarProfile.buildPipelines(env),
sidecarProfile.buildAsyncPipelines(env),
env,
eventBus,
tracer,
);
return new ContextManager(
sidecarProfile,
env,
tracer,
orchestrator,
chat.agentHistory,
);
}
@@ -95,7 +95,7 @@ describe('ContextWorkingBufferImpl', () => {
buffer = buffer.applyProcessorResult('Summarizer', [p1, p2], [summaryNode]);
// p1 and p2 are removed, p3 remains, s1 is added
expect(buffer.nodes.map((n) => n.id)).toEqual(['p3', 's1']);
expect(buffer.nodes.map((n) => n.id)).toEqual(['s1', 'p3']);
// Provenance lookup: The summary node should resolve to both p1 and p2!
const roots = buffer.getPristineNodes('s1');
@@ -107,13 +107,19 @@ export class ContextWorkingBufferImpl implements ContextWorkingBuffer {
// Calculate new node array
const removedSet = new Set(removedIds);
const retainedNodes = this.nodes.filter((n) => !removedSet.has(n.id));
const newGraph = [...retainedNodes];
// We append the output nodes in the same general position if possible,
// but in a complex graph we just ensure they exist. V2 graph uses timestamps for order.
// For simplicity, we just push added nodes to the end of the retained array
newGraph.push(...addedNodes);
const newGraph = this.nodes.filter((n) => !removedSet.has(n.id));
const insertionIndex = this.nodes.findIndex((n) => removedSet.has(n.id));
// IMPORTANT: We do NOT use structuredClone here.
// The ContextTokenCalculator relies on a WeakMap tied to exact object references
// for O(1) performance. Deep cloning would cause catastrophic cache misses.
// The pipeline enforces immutability, making reference passing safe.
if (insertionIndex !== -1) {
newGraph.splice(insertionIndex, 0, ...addedNodes);
} else {
newGraph.push(...addedNodes);
}
// Calculate new provenance map
const newProvenanceMap = new Map(this.provenanceMap);
@@ -16,7 +16,7 @@ describe('ContextEnvironmentImpl', () => {
const mockLlmClient = createMockLlmClient();
const env = new ContextEnvironmentImpl(
mockLlmClient,
() => mockLlmClient,
'mock-session',
'mock-prompt',
'/tmp/trace',
@@ -21,7 +21,7 @@ export class ContextEnvironmentImpl implements ContextEnvironment {
readonly graphMapper: ContextGraphMapper;
constructor(
readonly llmClient: BaseLlmClient,
private readonly llmClientProvider: () => BaseLlmClient,
readonly sessionId: string,
readonly promptId: string,
readonly traceDir: string,
@@ -39,4 +39,8 @@ export class ContextEnvironmentImpl implements ContextEnvironment {
this.inbox = new LiveInbox();
this.graphMapper = new ContextGraphMapper(this.behaviorRegistry);
}
get llmClient(): BaseLlmClient {
return this.llmClientProvider();
}
}
@@ -204,6 +204,11 @@ export class PipelineOrchestrator {
allowedTargets,
returnedNodes,
);
this.eventBus.emitProcessorResult({
processorId: processor.id,
targets: allowedTargets,
returnedNodes,
});
} catch (error) {
debugLogger.error(
`Pipeline ${pipeline.name} failed async at ${processor.id}:`,
@@ -65,4 +65,34 @@ describe('ToolMaskingProcessor', () => {
// Returned the exact same object reference
expect(result[0]).toBe(toolStep);
});
it('should strictly preserve the original intent args when only the observation is masked', async () => {
const env = createMockEnvironment();
const processor = createToolMaskingProcessor('ToolMaskingProcessor', env, {
stringLengthThresholdTokens: 10,
});
const originalIntent = { command: 'ls -R', dir: '/tmp' };
const longString = 'A'.repeat(500);
const toolStep = createDummyToolNode('ep1', 50, 500, {
intent: originalIntent,
observation: {
result: longString,
},
});
const result = await processor.process(createMockProcessArgs([toolStep]));
expect(result.length).toBe(1);
const masked = result[0] as ToolExecution;
expect(masked.id).not.toBe(toolStep.id);
const obs = masked.observation as { result: string };
expect(obs.result).toContain('<tool_output_masked>');
// The intent MUST be perfectly preserved and not fall back to {} or undefined incorrectly
expect(masked.intent).toEqual(originalIntent);
});
});
@@ -129,7 +129,10 @@ export function createToolMaskingProcessor(
1024
).toFixed(2);
const totalLines = content.split('\n').length;
return `<tool_output_masked>\n[Tool ${nodeType} string (${fileSizeMB}MB, ${totalLines} lines) masked to preserve context window. Full string saved to: ${filePath}]\n</tool_output_masked>`;
// Ensure consistent path separators for LLM tokenization and deterministic tests across OSes
const normalizedPath = filePath.split(path.sep).join('/');
return `<tool_output_masked>\n[Tool ${nodeType} string (${fileSizeMB}MB, ${totalLines} lines) masked to preserve context window. Full string saved to: ${normalizedPath}]\n</tool_output_masked>`;
};
const returnedNodes: ConcreteNode[] = [];
@@ -199,6 +202,13 @@ export function createToolMaskingProcessor(
const maskedIntent = isMaskableRecord(intentRes.masked)
? (intentRes.masked as Record<string, unknown>)
: undefined;
// Ensure we strictly preserve the original intent if it was unchanged and is a record
const finalIntent = intentRes.changed
? maskedIntent
: isMaskableRecord(rawIntent)
? (rawIntent as Record<string, unknown>)
: undefined;
// Handle observation explicitly as string vs object
const maskedObs =
typeof obsRes.masked === 'string'
@@ -206,13 +216,21 @@ export function createToolMaskingProcessor(
: isMaskableRecord(obsRes.masked)
? (obsRes.masked as Record<string, unknown>)
: undefined;
// Ensure we strictly preserve the original observation if it was unchanged
const finalObs = obsRes.changed
? maskedObs
: typeof rawObs === 'string'
? ({ message: rawObs } as Record<string, unknown>)
: isMaskableRecord(rawObs)
? (rawObs as Record<string, unknown>)
: undefined;
const newIntentTokens =
env.tokenCalculator.estimateTokensForParts([
{
functionCall: {
name: toolName || 'unknown',
args: maskedIntent,
args: finalIntent,
id: callId,
},
},
@@ -223,7 +241,7 @@ export function createToolMaskingProcessor(
obsPart = {
functionResponse: {
name: toolName || 'unknown',
response: maskedObs,
response: finalObs,
id: callId,
},
};
@@ -241,8 +259,8 @@ export function createToolMaskingProcessor(
const maskedNode: ToolExecution = {
...node,
id: randomUUID(), // Modified, so generate new ID
intent: maskedIntent ?? node.intent,
observation: maskedObs ?? node.observation,
intent: finalIntent ?? node.intent,
observation: finalObs ?? node.observation,
tokens: {
intent: newIntentTokens,
observation: newObsTokens,
File diff suppressed because one or more lines are too long
@@ -5,6 +5,7 @@
*/
import { describe, it, expect, vi, beforeAll, afterAll } from 'vitest';
import fs from 'node:fs';
import { SimulationHarness } from './simulationHarness.js';
import { createMockLlmClient } from '../testing/contextTestUtils.js';
import type { ContextProfile } from '../config/profiles.js';
@@ -28,6 +29,11 @@ expect.addSnapshotSerializer({
});
describe('System Lifecycle Golden Tests', () => {
afterAll(async () => {
fs.rmSync('/tmp/sim', { recursive: true, force: true });
fs.rmSync('mock', { recursive: true, force: true });
});
beforeAll(() => {
vi.spyOn(Math, 'random').mockReturnValue(0.5);
});
@@ -59,7 +59,7 @@ export class SimulationHarness {
sessionId: 'sim-session',
});
this.env = new ContextEnvironmentImpl(
mockLlmClient,
() => mockLlmClient,
'sim-prompt',
'sim-session',
mockTempDir,
@@ -145,8 +145,8 @@ export function createMockEnvironment(
});
const eventBus = new ContextEventBus();
const env = new ContextEnvironmentImpl(
llmClient,
let env = new ContextEnvironmentImpl(
() => llmClient as BaseLlmClient,
'mock-session',
'mock-prompt-id',
'/tmp/.gemini/trace',
@@ -157,7 +157,20 @@ export function createMockEnvironment(
);
if (overrides) {
Object.assign(env, overrides);
if (overrides.llmClient) {
env = new ContextEnvironmentImpl(
() => overrides.llmClient!,
env.sessionId,
env.promptId,
env.traceDir,
env.projectTempDir,
env.tracer,
env.charsPerToken,
env.eventBus,
);
}
const { llmClient: _llmClient, ...restOverrides } = overrides;
Object.assign(env, restOverrides);
}
return env;
}
@@ -247,7 +260,7 @@ export function setupContextComponentTest(
});
const eventBus = new ContextEventBus();
const env = new ContextEnvironmentImpl(
config.getBaseLlmClient(),
() => config.getBaseLlmClient(),
'test prompt-id',
'test-session',
'/tmp',
+9 -2
View File
@@ -22,6 +22,7 @@ describe('ContextTracer (Real FS & Mock ID Gen)', () => {
let tmpDir: string;
beforeEach(async () => {
vi.stubEnv('GEMINI_CONTEXT_TRACE_DIR', '');
tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'gemini-tracer-test-'));
vi.useFakeTimers();
@@ -29,6 +30,7 @@ describe('ContextTracer (Real FS & Mock ID Gen)', () => {
});
afterEach(async () => {
vi.unstubAllEnvs();
vi.useRealTimers();
await fs.rm(tmpDir, { recursive: true, force: true });
});
@@ -45,7 +47,9 @@ describe('ContextTracer (Real FS & Mock ID Gen)', () => {
// Verify Initialization
const traceLogPath = path.join(
tmpDir,
'.gemini/context_trace/test-session/trace.log',
'context_trace',
'test-session',
'trace.log',
);
const initTraceLog = readFileSync(traceLogPath, 'utf-8');
expect(initTraceLog).toContain('[SYSTEM] Context Tracer Initialized');
@@ -65,7 +69,10 @@ describe('ContextTracer (Real FS & Mock ID Gen)', () => {
const expectedAssetPath = path.join(
tmpDir,
'.gemini/context_trace/test-session/assets/1767268800020-mock-uuid-1-largeKey.json',
'context_trace',
'test-session',
'assets',
'1767268800020-mock-uuid-1-largeKey.json',
);
expect(existsSync(expectedAssetPath)).toBe(true);
+3 -6
View File
@@ -25,12 +25,9 @@ export class ContextTracer {
constructor(options: ContextTracerOptions) {
this.enabled = options.enabled ?? false;
this.traceDir = path.join(
options.targetDir,
'.gemini',
'context_trace',
options.sessionId,
);
this.traceDir =
process.env['GEMINI_CONTEXT_TRACE_DIR'] ||
path.join(options.targetDir, 'context_trace', options.sessionId);
this.assetsDir = path.join(this.traceDir, 'assets');
if (this.enabled) {
@@ -5,7 +5,7 @@
*/
import type { Part } from '@google/genai';
import { estimateTokenCountSync as baseEstimate } from '../../utils/tokenCalculation.js';
import { estimateTokenCountSync } from '../../utils/tokenCalculation.js';
import type { ConcreteNode } from '../graph/types.js';
import type { NodeBehaviorRegistry } from '../graph/behaviorRegistry.js';
@@ -84,24 +84,27 @@ export class ContextTokenCalculator {
}
return tokens;
}
/**
* Slower, precise estimation for a Gemini Content/Part graph.
* Deeply inspects the nested structure and uses the base tokenization math.
*/
estimateTokensForParts(parts: Part[], depth: number = 0): number {
let totalTokens = 0;
private readonly partTokenCache = new WeakMap<object, number>();
estimateTokensForParts(parts: Part[]): number {
let total = 0;
for (const part of parts) {
if (typeof part.text === 'string') {
totalTokens += Math.ceil(part.text.length / this.charsPerToken);
} else if (part.inlineData !== undefined || part.fileData !== undefined) {
totalTokens += 258;
if (part !== null && typeof part === 'object') {
let cost = this.partTokenCache.get(part);
if (cost === undefined) {
cost = estimateTokenCountSync([part], 0, this.charsPerToken);
this.partTokenCache.set(part, cost);
}
total += cost;
} else {
totalTokens += Math.ceil(
JSON.stringify(part).length / this.charsPerToken,
);
total += estimateTokenCountSync([part], 0, this.charsPerToken);
}
}
// Also include structural overhead
return totalTokens + baseEstimate(parts, depth);
return total;
}
}
+14 -4
View File
@@ -45,6 +45,7 @@ import type { ContentGenerator } from './contentGenerator.js';
import { LoopDetectionService } from '../services/loopDetectionService.js';
import { ChatCompressionService } from '../context/chatCompressionService.js';
import { AgentHistoryProvider } from '../context/agentHistoryProvider.js';
import type { ContextManager } from '../context/contextManager.js';
import { ideContextStore } from '../ide/ideContext.js';
import {
logContentRetryFailure,
@@ -74,6 +75,7 @@ import {
import { getDisplayString, resolveModel } from '../config/models.js';
import { partToString } from '../utils/partUtils.js';
import { coreEvents, CoreEvent } from '../utils/events.js';
import { initializeContextManager } from '../context/initializer.js';
const MAX_TURNS = 100;
@@ -97,6 +99,7 @@ export class GeminiClient {
private readonly compressionService: ChatCompressionService;
private readonly agentHistoryProvider: AgentHistoryProvider;
private readonly toolOutputMaskingService: ToolOutputMaskingService;
private contextManager?: ContextManager;
private lastPromptId: string;
private currentSequenceModel: string | null = null;
private lastSentIdeContext: IdeContext | undefined;
@@ -393,6 +396,11 @@ export class GeminiClient {
},
);
await chat.initialize(resumedSessionData, 'main');
this.contextManager = await initializeContextManager(
this.config,
chat,
this.lastPromptId,
);
return chat;
} catch (error) {
await reportError(
@@ -618,10 +626,12 @@ export class GeminiClient {
const modelForLimitCheck = this._getActiveModelForCurrentTurn();
if (this.config.getContextManagementConfig().enabled) {
const newHistory = await this.agentHistoryProvider.manageHistory(
this.getHistory(),
signal,
);
const newHistory = this.contextManager
? await this.contextManager.renderHistory()
: await this.agentHistoryProvider.manageHistory(
this.getHistory(),
signal,
);
if (newHistory.length !== this.getHistory().length) {
this.getChat().setHistory(newHistory);
}
@@ -183,6 +183,7 @@ describe('GeminiChat', () => {
getRetryFetchErrors: vi.fn().mockReturnValue(false),
getMaxAttempts: vi.fn().mockReturnValue(10),
getUserTier: vi.fn().mockReturnValue(undefined),
isContextManagementEnabled: vi.fn().mockReturnValue(false),
modelConfigService: {
getResolvedConfig: vi.fn().mockImplementation((modelConfigKey) => {
const model = modelConfigKey.model ?? mockConfig.getModel();
+82 -21
View File
@@ -17,7 +17,9 @@ import {
type PartListUnion,
type GenerateContentConfig,
type GenerateContentParameters,
type FunctionCall,
} from '@google/genai';
import { AgentChatHistory } from './agentChatHistory.js';
import { toParts } from '../code_assist/converter.js';
import {
retryWithBackoff,
@@ -248,19 +250,21 @@ export class GeminiChat {
private sendPromise: Promise<void> = Promise.resolve();
private readonly chatRecordingService: ChatRecordingService;
private lastPromptTokenCount: number;
agentHistory: AgentChatHistory;
constructor(
private readonly context: AgentLoopContext,
private systemInstruction: string = '',
private tools: Tool[] = [],
private history: Content[] = [],
history: Content[] = [],
resumedSessionData?: ResumedSessionData,
private readonly onModelChanged?: (modelId: string) => Promise<Tool[]>,
) {
validateHistory(history);
this.agentHistory = new AgentChatHistory(history);
this.chatRecordingService = new ChatRecordingService(context);
this.lastPromptTokenCount = estimateTokenCountSync(
this.history.flatMap((c) => c.parts || []),
this.agentHistory.flatMap((c) => c.parts || []),
);
}
@@ -347,7 +351,7 @@ export class GeminiChat {
}
// Add user content to history ONCE before any attempts.
this.history.push(userContent);
this.agentHistory.push(userContent);
const requestContents = this.getHistory(true);
const streamWithRetries = async function* (
@@ -747,8 +751,8 @@ export class GeminiChat {
*/
getHistory(curated: boolean = false): readonly Content[] {
const history = curated
? extractCuratedHistory(this.history)
: this.history;
? extractCuratedHistory([...this.agentHistory.get()])
: this.agentHistory.get();
return [...history];
}
@@ -756,26 +760,26 @@ export class GeminiChat {
* Clears the chat history.
*/
clearHistory(): void {
this.history = [];
this.agentHistory.clear();
}
/**
* Adds a new entry to the chat history.
*/
addHistory(content: Content): void {
this.history.push(content);
this.agentHistory.push(content);
}
setHistory(history: readonly Content[]): void {
this.history = [...history];
this.agentHistory.set(history);
this.lastPromptTokenCount = estimateTokenCountSync(
this.history.flatMap((c) => c.parts || []),
this.agentHistory.flatMap((c) => c.parts || []),
);
this.chatRecordingService.updateMessagesFromHistory(history);
}
stripThoughtsFromHistory(): void {
this.history = this.history.map((content) => {
this.agentHistory.map((content) => {
const newContent = { ...content };
if (newContent.parts) {
newContent.parts = newContent.parts.map((part) => {
@@ -885,6 +889,9 @@ export class GeminiChat {
let hasThoughts = false;
let finishReason: FinishReason | undefined;
// The SDK provides fully assembled FunctionCall objects in chunk.functionCalls
const finalFunctionCalls: FunctionCall[] = [];
for await (const chunk of streamResponse) {
const candidateWithReason = chunk?.candidates?.find(
(candidate) => candidate.finishReason,
@@ -894,6 +901,10 @@ export class GeminiChat {
finishReason = candidateWithReason.finishReason as FinishReason;
}
if (chunk.functionCalls && chunk.functionCalls.length > 0) {
finalFunctionCalls.push(...chunk.functionCalls);
}
if (isValidResponse(chunk)) {
const content = chunk.candidates?.[0]?.content;
if (content?.parts) {
@@ -948,16 +959,66 @@ export class GeminiChat {
// String thoughts and consolidate text parts.
const consolidatedParts: Part[] = [];
for (const part of modelResponseParts) {
const lastPart = consolidatedParts[consolidatedParts.length - 1];
if (
lastPart?.text &&
isValidNonThoughtTextPart(lastPart) &&
isValidNonThoughtTextPart(part)
) {
lastPart.text += part.text;
} else {
consolidatedParts.push(part);
if (this.context.config.isContextManagementEnabled()) {
for (const part of modelResponseParts) {
if (part.functionCall) {
// Skip partial functionCall stream chunks! We will replace them
// entirely with the pristine, fully assembled objects from the SDK
// (finalFunctionCalls) immediately below. We only push the very first
// partial chunk of a sequence as a placeholder so we know *where*
// in the sequence of parts the tool call happened.
const lastPart = consolidatedParts[consolidatedParts.length - 1];
const currentId = part.functionCall.id;
const lastId = lastPart?.functionCall?.id;
const isNewCall =
!lastPart?.functionCall ||
(currentId !== undefined &&
lastId !== undefined &&
currentId !== lastId) ||
lastPart.functionCall.name !== part.functionCall.name;
if (isNewCall) {
consolidatedParts.push({ ...part }); // Push placeholder
}
} else {
const lastPart = consolidatedParts[consolidatedParts.length - 1];
if (
lastPart?.text &&
isValidNonThoughtTextPart(lastPart) &&
isValidNonThoughtTextPart(part)
) {
lastPart.text += part.text;
} else {
consolidatedParts.push(part);
}
}
}
// Now, replace the placeholders with the perfectly assembled final arguments
if (finalFunctionCalls.length > 0) {
let callIndex = 0;
for (const part of consolidatedParts) {
if (part.functionCall && callIndex < finalFunctionCalls.length) {
part.functionCall = finalFunctionCalls[callIndex];
callIndex++;
}
}
}
} else {
// Fallback to legacy consolidation for non-context-manager users
for (const part of modelResponseParts) {
const lastPart = consolidatedParts[consolidatedParts.length - 1];
if (
lastPart?.text &&
isValidNonThoughtTextPart(lastPart) &&
isValidNonThoughtTextPart(part)
) {
lastPart.text += part.text;
} else {
consolidatedParts.push(part);
}
}
}
@@ -1013,7 +1074,7 @@ export class GeminiChat {
}
}
this.history.push({ role: 'model', parts: consolidatedParts });
this.agentHistory.push({ role: 'model', parts: consolidatedParts });
}
getLastPromptTokenCount(): number {
@@ -121,6 +121,7 @@ describe('GeminiChat Network Retries', () => {
generateContentConfig: { temperature: 0 },
})),
},
isContextManagementEnabled: vi.fn().mockReturnValue(false),
getEnableHooks: vi.fn().mockReturnValue(false),
getModelAvailabilityService: vi
.fn()
+6 -1
View File
@@ -293,7 +293,12 @@ export type { Content, Part, FunctionCall } from '@google/genai';
// Export context types and profiles
export * from './context/types.js';
export * from './context/profiles.js';
export { generalistProfile as legacyGeneralistProfile } from './context/profiles.js';
export {
generalistProfile,
stressTestProfile,
} from './context/config/profiles.js';
// Export trust utility
export * from './utils/trust.js';
+24 -13
View File
@@ -29,12 +29,14 @@ const MAX_CHARS_FOR_FULL_HEURISTIC = 100_000;
// standard multimodal responses are typically depth 1.
const MAX_RECURSION_DEPTH = 3;
const DEFAULT_CHARS_PER_TOKEN = 4;
/**
* Heuristic estimation of tokens for a text string.
*/
function estimateTextTokens(text: string): number {
function estimateTextTokens(text: string, charsPerToken: number): number {
if (text.length > MAX_CHARS_FOR_FULL_HEURISTIC) {
return text.length / 4;
return text.length / charsPerToken;
}
let tokens = 0;
@@ -73,25 +75,33 @@ function estimateMediaTokens(part: Part): number | undefined {
* Heuristic estimation for tool responses, avoiding massive string copies
* and accounting for nested Gemini 3 multimodal parts.
*/
function estimateFunctionResponseTokens(part: Part, depth: number): number {
function estimateFunctionResponseTokens(
part: Part,
depth: number,
charsPerToken: number,
): number {
const fr = part.functionResponse;
if (!fr) return 0;
let totalTokens = (fr.name?.length ?? 0) / 4;
let totalTokens = (fr.name?.length ?? 0) / charsPerToken;
const response = fr.response as unknown;
if (typeof response === 'string') {
totalTokens += response.length / 4;
totalTokens += response.length / charsPerToken;
} else if (response !== undefined && response !== null) {
// For objects, stringify only the payload, not the whole Part object.
totalTokens += JSON.stringify(response).length / 4;
totalTokens += JSON.stringify(response).length / charsPerToken;
}
// Gemini 3: Handle nested multimodal parts recursively.
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
const nestedParts = (fr as unknown as { parts?: Part[] }).parts;
if (nestedParts && nestedParts.length > 0) {
totalTokens += estimateTokenCountSync(nestedParts, depth + 1);
totalTokens += estimateTokenCountSync(
nestedParts,
depth + 1,
charsPerToken,
);
}
return totalTokens;
@@ -100,11 +110,12 @@ function estimateFunctionResponseTokens(part: Part, depth: number): number {
/**
* Estimates token count for parts synchronously using a heuristic.
* - Text: character-based heuristic (ASCII vs CJK) for small strings, length/4 for massive ones.
* - Non-text (Tools, etc): JSON string length / 4.
* - Non-text (Tools, etc): JSON string length / charsPerToken.
*/
export function estimateTokenCountSync(
parts: Part[],
depth: number = 0,
charsPerToken: number = DEFAULT_CHARS_PER_TOKEN,
): number {
if (depth > MAX_RECURSION_DEPTH) {
return 0;
@@ -113,9 +124,9 @@ export function estimateTokenCountSync(
let totalTokens = 0;
for (const part of parts) {
if (typeof part.text === 'string') {
totalTokens += estimateTextTokens(part.text);
totalTokens += estimateTextTokens(part.text, charsPerToken);
} else if (part.functionResponse) {
totalTokens += estimateFunctionResponseTokens(part, depth);
totalTokens += estimateFunctionResponseTokens(part, depth, charsPerToken);
} else {
const mediaEstimate = estimateMediaTokens(part);
if (mediaEstimate !== undefined) {
@@ -123,7 +134,7 @@ export function estimateTokenCountSync(
} else {
// Fallback for other non-text parts (e.g., functionCall).
// Note: JSON.stringify(part) here is safe as these parts are typically small.
totalTokens += JSON.stringify(part).length / 4;
totalTokens += JSON.stringify(part).length / charsPerToken;
}
}
}
@@ -162,9 +173,9 @@ export async function calculateRequestTokenCount(
} catch (error) {
// Fallback to local estimation if the API call fails
debugLogger.debug('countTokens API failed:', error);
return estimateTokenCountSync(parts);
return estimateTokenCountSync(parts, 0, DEFAULT_CHARS_PER_TOKEN);
}
}
return estimateTokenCountSync(parts);
return estimateTokenCountSync(parts, 0, DEFAULT_CHARS_PER_TOKEN);
}
+7
View File
@@ -3147,6 +3147,13 @@
"default": true,
"type": "boolean"
},
"stressTestProfile": {
"title": "Use the stress test profile to aggressively trigger context management.",
"description": "Significantly lowers token limits to force early garbage collection and distillation for testing purposes.",
"markdownDescription": "Significantly lowers token limits to force early garbage collection and distillation for testing purposes.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `false`",
"default": false,
"type": "boolean"
},
"autoMemory": {
"title": "Auto Memory",
"description": "Automatically extract reusable skills from past sessions in the background. Review results with /memory inbox.",