refactor(core): Unified context management settings schema (#24391)

This commit is contained in:
joshualitt
2026-04-01 10:24:45 -07:00
committed by GitHub
parent 3a32d9723e
commit 377e834e03
10 changed files with 215 additions and 212 deletions
+46 -42
View File
@@ -206,6 +206,12 @@ export interface OutputSettings {
format?: OutputFormat;
}
export interface ToolOutputMaskingConfig {
protectionThresholdTokens: number;
minPrunableThresholdTokens: number;
protectLatestTurn: boolean;
}
export interface ContextManagementConfig {
enabled: boolean;
historyWindow: {
@@ -217,19 +223,15 @@ export interface ContextManagementConfig {
retainedMaxTokens: number;
normalizationHeadRatio: number;
};
toolDistillation: {
maxOutputTokens: number;
summarizationThresholdTokens: number;
tools: {
distillation: {
maxOutputTokens: number;
summarizationThresholdTokens: number;
};
outputMasking: ToolOutputMaskingConfig;
};
}
export interface ToolOutputMaskingConfig {
enabled: boolean;
toolProtectionThreshold: number;
minPrunableTokensThreshold: number;
protectLatestTurn: boolean;
}
export interface GemmaModelRouterSettings {
enabled?: boolean;
classifier?: {
@@ -711,7 +713,7 @@ export interface ConfigParameters {
experimentalAgentHistorySummarization?: boolean;
memoryBoundaryMarkers?: string[];
topicUpdateNarration?: boolean;
toolOutputMasking?: Partial<ToolOutputMaskingConfig>;
disableLLMCorrection?: boolean;
plan?: boolean;
tracker?: boolean;
@@ -913,7 +915,7 @@ export class Config implements McpContext, AgentLoopContext {
private pendingIncludeDirectories: string[];
private readonly enableHooks: boolean;
private readonly enableHooksUI: boolean;
private readonly toolOutputMasking: ToolOutputMaskingConfig;
private hooks: { [K in HookEventName]?: HookDefinition[] } | undefined;
private projectHooks:
| ({ [K in HookEventName]?: HookDefinition[] } & { disabled?: string[] })
@@ -1162,12 +1164,27 @@ export class Config implements McpContext, AgentLoopContext {
params.contextManagement?.messageLimits?.normalizationHeadRatio ??
0.25,
},
toolDistillation: {
maxOutputTokens:
params.contextManagement?.toolDistillation?.maxOutputTokens ?? 10000,
summarizationThresholdTokens:
params.contextManagement?.toolDistillation
?.summarizationThresholdTokens ?? 20000,
tools: {
distillation: {
maxOutputTokens:
params.contextManagement?.tools?.distillation?.maxOutputTokens ??
10000,
summarizationThresholdTokens:
params.contextManagement?.tools?.distillation
?.summarizationThresholdTokens ?? 20000,
},
outputMasking: {
protectionThresholdTokens:
params.contextManagement?.tools?.outputMasking
?.protectionThresholdTokens ?? DEFAULT_TOOL_PROTECTION_THRESHOLD,
minPrunableThresholdTokens:
params.contextManagement?.tools?.outputMasking
?.minPrunableThresholdTokens ??
DEFAULT_MIN_PRUNABLE_TOKENS_THRESHOLD,
protectLatestTurn:
params.contextManagement?.tools?.outputMasking?.protectLatestTurn ??
DEFAULT_PROTECT_LATEST_TURN,
},
},
};
this.topicUpdateNarration = params.topicUpdateNarration ?? false;
@@ -1176,18 +1193,6 @@ export class Config implements McpContext, AgentLoopContext {
this.isModelSteeringEnabled(),
);
ExecutionLifecycleService.setInjectionService(this.injectionService);
this.toolOutputMasking = {
enabled: params.toolOutputMasking?.enabled ?? true,
toolProtectionThreshold:
params.toolOutputMasking?.toolProtectionThreshold ??
DEFAULT_TOOL_PROTECTION_THRESHOLD,
minPrunableTokensThreshold:
params.toolOutputMasking?.minPrunableTokensThreshold ??
DEFAULT_MIN_PRUNABLE_TOKENS_THRESHOLD,
protectLatestTurn:
params.toolOutputMasking?.protectLatestTurn ??
DEFAULT_PROTECT_LATEST_TURN,
};
this.maxSessionTurns = params.maxSessionTurns ?? -1;
this.acpMode = params.acpMode ?? false;
this.listSessions = params.listSessions ?? false;
@@ -2415,10 +2420,6 @@ export class Config implements McpContext, AgentLoopContext {
return this.modelSteering;
}
getToolOutputMaskingEnabled(): boolean {
return this.toolOutputMasking.enabled;
}
async getToolOutputMaskingConfig(): Promise<ToolOutputMaskingConfig> {
await this.ensureExperimentsLoaded();
@@ -2440,17 +2441,19 @@ export class Config implements McpContext, AgentLoopContext {
: undefined;
return {
enabled: this.toolOutputMasking.enabled,
toolProtectionThreshold:
protectionThresholdTokens:
parsedProtection !== undefined && !isNaN(parsedProtection)
? parsedProtection
: this.toolOutputMasking.toolProtectionThreshold,
minPrunableTokensThreshold:
: this.contextManagement.tools.outputMasking
.protectionThresholdTokens,
minPrunableThresholdTokens:
parsedPrunable !== undefined && !isNaN(parsedPrunable)
? parsedPrunable
: this.toolOutputMasking.minPrunableTokensThreshold,
: this.contextManagement.tools.outputMasking
.minPrunableThresholdTokens,
protectLatestTurn:
remoteProtectLatest ?? this.toolOutputMasking.protectLatestTurn,
remoteProtectLatest ??
this.contextManagement.tools.outputMasking.protectLatestTurn,
};
}
@@ -3301,11 +3304,12 @@ export class Config implements McpContext, AgentLoopContext {
}
getToolMaxOutputTokens(): number {
return this.contextManagement.toolDistillation.maxOutputTokens;
return this.contextManagement.tools.distillation.maxOutputTokens;
}
getToolSummarizationThresholdTokens(): number {
return this.contextManagement.toolDistillation.summarizationThresholdTokens;
return this.contextManagement.tools.distillation
.summarizationThresholdTokens;
}
getNextCompressionTruncationId(): number {
@@ -45,11 +45,10 @@ describe('ToolOutputMaskingService', () => {
},
getSessionId: () => 'mock-session',
getUsageStatisticsEnabled: () => false,
getToolOutputMaskingEnabled: () => true,
getToolOutputMaskingConfig: async () => ({
enabled: true,
toolProtectionThreshold: 50000,
minPrunableTokensThreshold: 30000,
protectionThresholdTokens: 50000,
minPrunableThresholdTokens: 30000,
protectLatestTurn: true,
}),
} as unknown as Config;
@@ -66,8 +65,8 @@ describe('ToolOutputMaskingService', () => {
it('should respect remote configuration overrides', async () => {
mockConfig.getToolOutputMaskingConfig = async () => ({
enabled: true,
toolProtectionThreshold: 100, // Very low threshold
minPrunableTokensThreshold: 50,
protectionThresholdTokens: 100, // Very low threshold
minPrunableThresholdTokens: 50,
protectLatestTurn: false,
});
@@ -53,13 +53,13 @@ export interface MaskingResult {
*
* It implements a "Hybrid Backward Scanned FIFO" algorithm to balance context relevance with
* token savings:
* 1. **Protection Window**: Protects the newest `toolProtectionThreshold` (default 50k) tool tokens
* 1. **Protection Window**: Protects the newest `protectionThresholdTokens` (default 50k) tool tokens
* from pruning. Optionally skips the entire latest conversation turn to ensure full context for
* the model's next response.
* 2. **Global Aggregation**: Scans backwards past the protection window to identify all remaining
* tool outputs that haven't been masked yet.
* 3. **Batch Trigger**: Trigger masking only if the total prunable tokens exceed
* `minPrunableTokensThreshold` (default 30k).
* `minPrunableThresholdTokens` (default 30k).
*
* @remarks
* Effectively, this means masking only starts once the conversation contains approximately 80k
@@ -71,11 +71,11 @@ export class ToolOutputMaskingService {
history: readonly Content[],
config: Config,
): Promise<MaskingResult> {
const maskingConfig = await config.getToolOutputMaskingConfig();
if (!maskingConfig.enabled || history.length === 0) {
if (history.length === 0) {
return { newHistory: history, maskedCount: 0, tokensSaved: 0 };
}
const maskingConfig = await config.getToolOutputMaskingConfig();
let cumulativeToolTokens = 0;
let protectionBoundaryReached = false;
let totalPrunableTokens = 0;
@@ -124,7 +124,7 @@ export class ToolOutputMaskingService {
if (!protectionBoundaryReached) {
cumulativeToolTokens += partTokens;
if (cumulativeToolTokens > maskingConfig.toolProtectionThreshold) {
if (cumulativeToolTokens > maskingConfig.protectionThresholdTokens) {
protectionBoundaryReached = true;
// The part that crossed the boundary is prunable.
totalPrunableTokens += partTokens;
@@ -151,12 +151,12 @@ export class ToolOutputMaskingService {
// Trigger pruning only if we have accumulated enough savings to justify the
// overhead of masking and file I/O (batch pruning threshold).
if (totalPrunableTokens < maskingConfig.minPrunableTokensThreshold) {
if (totalPrunableTokens < maskingConfig.minPrunableThresholdTokens) {
return { newHistory: history, maskedCount: 0, tokensSaved: 0 };
}
debugLogger.debug(
`[ToolOutputMasking] Triggering masking. Prunable tool tokens: ${totalPrunableTokens.toLocaleString()} (> ${maskingConfig.minPrunableTokensThreshold.toLocaleString()})`,
`[ToolOutputMasking] Triggering masking. Prunable tool tokens: ${totalPrunableTokens.toLocaleString()} (> ${maskingConfig.minPrunableThresholdTokens.toLocaleString()})`,
);
// Perform masking and offloading
+5 -1
View File
@@ -220,8 +220,12 @@ describe('Gemini Client (client.ts)', () => {
getSessionMemory: vi.fn().mockReturnValue(''),
isJitContextEnabled: vi.fn().mockReturnValue(false),
getContextManager: vi.fn().mockReturnValue(undefined),
getToolOutputMaskingEnabled: vi.fn().mockReturnValue(false),
getDisableLoopDetection: vi.fn().mockReturnValue(false),
getToolOutputMaskingConfig: vi.fn().mockReturnValue({
protectionThresholdTokens: 50000,
minPrunableThresholdTokens: 30000,
protectLatestTurn: true,
}),
getSessionId: vi.fn().mockReturnValue('test-session-id'),
getProxy: vi.fn().mockReturnValue(undefined),
-3
View File
@@ -1231,9 +1231,6 @@ export class GeminiClient {
* Masks bulky tool outputs to save context window space.
*/
private async tryMaskToolOutputs(history: readonly Content[]): Promise<void> {
if (!this.config.getToolOutputMaskingEnabled()) {
return;
}
const result = await this.toolOutputMaskingService.mask(
history,
this.config,