feat(core): Land ContextCompressionService (#24483)

This commit is contained in:
joshualitt
2026-04-02 09:22:04 -07:00
committed by GitHub
parent beff8c91aa
commit e0044f2868
32 changed files with 1160 additions and 229 deletions
@@ -15,9 +15,12 @@ vi.mock('../utils/tokenCalculation.js', () => ({
}));
import type { Content, GenerateContentResponse, Part } from '@google/genai';
import type { Config, ContextManagementConfig } from '../config/config.js';
import type { Config } from '../config/config.js';
import type { BaseLlmClient } from '../core/baseLlmClient.js';
import type { AgentHistoryProviderConfig } from '../services/types.js';
import type {
AgentHistoryProviderConfig,
ContextManagementConfig,
} from './types.js';
import {
TEXT_TRUNCATION_PREFIX,
TOOL_TRUNCATION_PREFIX,
@@ -56,8 +59,6 @@ describe('AgentHistoryProvider', () => {
normalMessageTokens: 2500,
maximumMessageTokens: 10000,
normalizationHeadRatio: 0.2,
isSummarizationEnabled: false,
isTruncationEnabled: false,
};
provider = new AgentHistoryProvider(providerConfig, config);
});
@@ -68,19 +69,7 @@ describe('AgentHistoryProvider', () => {
parts: [{ text: `Message ${i}` }],
}));
it('should return history unchanged if truncation is disabled', async () => {
providerConfig.isTruncationEnabled = false;
const history = createMockHistory(40);
const result = await provider.manageHistory(history);
expect(result).toBe(history);
expect(result.length).toBe(40);
});
it('should return history unchanged if length is under threshold', async () => {
providerConfig.isTruncationEnabled = true;
const history = createMockHistory(20); // Threshold is 30
const result = await provider.manageHistory(history);
@@ -89,7 +78,6 @@ describe('AgentHistoryProvider', () => {
});
it('should truncate when total tokens exceed budget, preserving structural integrity', async () => {
providerConfig.isTruncationEnabled = true;
providerConfig.maxTokens = 60000;
providerConfig.retainedTokens = 60000;
vi.spyOn(config, 'getContextManagementConfig').mockReturnValue({
@@ -102,28 +90,10 @@ describe('AgentHistoryProvider', () => {
);
const history = createMockHistory(35); // 35 * 4000 = 140,000 total tokens > maxTokens
const result = await provider.manageHistory(history);
// Budget = 60000. Each message costs 4000. 60000 / 4000 = 15.
// However, some messages get normalized.
// The grace period is 15 messages. Their target is MAXIMUM_MESSAGE_TOKENS (10000).
// So the 15 newest messages remain at 4000 tokens each.
// That's 15 * 4000 = 60000 tokens EXACTLY!
// The next older message will push it over budget.
// So EXACTLY 15 messages will be retained.
// If the 15th newest message is a user message with a functionResponse, it might pull in the model call.
// In our createMockHistory, we don't use functionResponses.
expect(result.length).toBe(15);
expect(generateContentMock).not.toHaveBeenCalled();
expect(result[0].role).toBe('user');
expect(result[0].parts![0].text).toContain(
'### [System Note: Conversation History Truncated]',
);
expect(result.length).toBe(15); // Budget = 60000. Each message costs 4000. 60000 / 4000 = 15.
});
it('should call summarizer and prepend summary when summarization is enabled', async () => {
providerConfig.isTruncationEnabled = true;
providerConfig.isSummarizationEnabled = true;
it('should call summarizer and prepend summary', async () => {
providerConfig.maxTokens = 60000;
providerConfig.retainedTokens = 60000;
vi.spyOn(config, 'getContextManagementConfig').mockReturnValue({
@@ -144,8 +114,6 @@ describe('AgentHistoryProvider', () => {
});
it('should handle summarizer failures gracefully', async () => {
providerConfig.isTruncationEnabled = true;
providerConfig.isSummarizationEnabled = true;
providerConfig.maxTokens = 60000;
providerConfig.retainedTokens = 60000;
vi.spyOn(config, 'getContextManagementConfig').mockReturnValue({
@@ -168,8 +136,6 @@ describe('AgentHistoryProvider', () => {
});
it('should pass the contextual bridge to the summarizer', async () => {
providerConfig.isTruncationEnabled = true;
providerConfig.isSummarizationEnabled = true;
vi.spyOn(config, 'getContextManagementConfig').mockReturnValue({
enabled: true,
} as unknown as ContextManagementConfig);
@@ -201,8 +167,6 @@ describe('AgentHistoryProvider', () => {
});
it('should detect a previous summary in the truncated head', async () => {
providerConfig.isTruncationEnabled = true;
providerConfig.isSummarizationEnabled = true;
vi.spyOn(config, 'getContextManagementConfig').mockReturnValue({
enabled: true,
} as unknown as ContextManagementConfig);
@@ -233,8 +197,6 @@ describe('AgentHistoryProvider', () => {
});
it('should include the Action Path (necklace of function names) in the prompt', async () => {
providerConfig.isTruncationEnabled = true;
providerConfig.isSummarizationEnabled = true;
vi.spyOn(config, 'getContextManagementConfig').mockReturnValue({
enabled: true,
} as unknown as ContextManagementConfig);
@@ -268,7 +230,6 @@ describe('AgentHistoryProvider', () => {
describe('Tiered Normalization Logic', () => {
it('normalizes large messages incrementally: newest and exit-grace', async () => {
providerConfig.isTruncationEnabled = true;
providerConfig.retainedTokens = 30000;
providerConfig.maximumMessageTokens = 10000;
providerConfig.normalMessageTokens = 2500; // History of 35 messages.
@@ -312,7 +273,6 @@ describe('AgentHistoryProvider', () => {
});
it('normalize function responses correctly by targeting large string values', async () => {
providerConfig.isTruncationEnabled = true;
providerConfig.maximumMessageTokens = 1000;
const hugeValue = 'O'.repeat(5000);
@@ -410,7 +370,6 @@ describe('AgentHistoryProvider', () => {
describe('Multi-part Proportional Normalization', () => {
it('distributes token budget proportionally across multiple large parts', async () => {
providerConfig.isTruncationEnabled = true;
providerConfig.maximumMessageTokens = 2500; // Small limit to trigger normalization on last msg
const history = createMockHistory(35);
@@ -459,7 +418,6 @@ describe('AgentHistoryProvider', () => {
});
it('preserves small parts while truncating large parts in the same message', async () => {
providerConfig.isTruncationEnabled = true;
providerConfig.maximumMessageTokens = 2500;
const history = createMockHistory(35);
@@ -9,7 +9,7 @@ import { getResponseText } from '../utils/partUtils.js';
import { estimateTokenCountSync } from '../utils/tokenCalculation.js';
import { LlmRole } from '../telemetry/llmRole.js';
import { debugLogger } from '../utils/debugLogger.js';
import type { AgentHistoryProviderConfig } from '../services/types.js';
import type { AgentHistoryProviderConfig } from './types.js';
import type { Config } from '../config/config.js';
import {
MIN_TARGET_TOKENS,
@@ -35,7 +35,7 @@ export class AgentHistoryProvider {
history: readonly Content[],
abortSignal?: AbortSignal,
): Promise<readonly Content[]> {
if (!this.providerConfig.isTruncationEnabled || history.length === 0) {
if (history.length === 0) {
return history;
}
@@ -288,13 +288,6 @@ export class AgentHistoryProvider {
): Promise<string> {
if (messagesToTruncate.length === 0) return '';
if (!this.providerConfig.isSummarizationEnabled) {
debugLogger.log(
'AgentHistoryProvider: Summarization disabled, using fallback note.',
);
return this.getFallbackSummaryText(messagesToTruncate);
}
try {
// Use the first few messages of the Grace Zone as a "contextual bridge"
// to give the summarizer lookahead into the current state.
@@ -0,0 +1,288 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import { ContextCompressionService } from './contextCompressionService.js';
import type { Config } from '../config/config.js';
import type { Content } from '@google/genai';
import * as fsSync from 'node:fs';
vi.mock('node:fs/promises', () => ({
readFile: vi.fn(),
writeFile: vi.fn(),
}));
vi.mock('node:fs', () => ({
existsSync: vi.fn(),
}));
describe('ContextCompressionService', () => {
let mockConfig: Partial<Config>;
let service: ContextCompressionService;
const generateContentMock: ReturnType<typeof vi.fn> = vi.fn();
const generateJsonMock: ReturnType<typeof vi.fn> = vi.fn();
beforeEach(() => {
mockConfig = {
storage: {
getProjectTempDir: vi.fn().mockReturnValue('/mock/temp/dir'),
},
isContextManagementEnabled: vi.fn().mockResolvedValue(true),
getBaseLlmClient: vi.fn().mockReturnValue({
generateContent: generateContentMock,
generateJson: generateJsonMock,
}),
} as unknown as Config;
vi.mocked(fsSync.existsSync).mockReturnValue(false);
service = new ContextCompressionService(mockConfig as Config);
});
afterEach(() => {
vi.clearAllMocks();
});
describe('compressHistory', () => {
it('bypasses compression if feature flag is false', async () => {
mockConfig.isContextManagementEnabled = vi.fn().mockResolvedValue(false);
const history: Content[] = [{ role: 'user', parts: [{ text: 'hello' }] }];
const res = await service.compressHistory(history, 'test prompt');
expect(res).toStrictEqual(history);
});
it('protects files that were read within the RECENT_TURNS_PROTECTED window', async () => {
const history: Content[] = [
// Turn 0 & 1 (Old)
{
role: 'model',
parts: [
{
functionCall: {
name: 'read_file',
args: { filepath: 'src/app.ts' },
},
},
],
},
{
role: 'user',
parts: [
{
functionResponse: {
name: 'read_file',
response: {
output: '--- src/app.ts ---\nLine 1\nLine 2\nLine 3',
},
},
},
],
},
// Padding (Turns 2 & 3)
{ role: 'model', parts: [{ text: 'res 1' }] },
{ role: 'user', parts: [{ text: 'res 2' }] },
// Padding (Turns 4 & 5)
{ role: 'model', parts: [{ text: 'res 3' }] },
{ role: 'user', parts: [{ text: 'res 4' }] },
// Recent Turn (Turn 6 & 7, inside window, cutoff is Math.max(0, 8 - 4) = 4)
// Here the model explicitly reads the file again
{
role: 'model',
parts: [
{
functionCall: {
name: 'read_file',
args: { filepath: 'src/app.ts' },
},
},
],
},
{
role: 'user',
parts: [
{
functionResponse: {
name: 'read_file',
response: {
output: '--- src/app.ts ---\nLine 1\nLine 2\nLine 3',
},
},
},
],
},
];
const res = await service.compressHistory(history, 'test prompt');
// Because src/app.ts was re-read recently (index 6 is >= 4), the OLD response at index 1 is PROTECTED.
// It should NOT be compressed.
const compressedOutput =
res[1].parts![0].functionResponse!.response!['output'];
expect(compressedOutput).toBe(
'--- src/app.ts ---\nLine 1\nLine 2\nLine 3',
);
// Verify generateContentMock wasn't called because it bypassed the LLM routing
expect(generateContentMock).not.toHaveBeenCalled();
});
it('compresses files read outside the protected window', async () => {
const history: Content[] = [
// Turn 0: The original function call to read the file
{
role: 'model',
parts: [
{
functionCall: {
name: 'read_file',
args: { filepath: 'src/old.ts' },
},
},
],
},
// Turn 1: The tool output response
{
role: 'user',
parts: [
{
functionResponse: {
name: 'read_file',
response: {
output: '--- src/old.ts ---\nLine 1\nLine 2\nLine 3\nLine 4',
},
},
},
],
},
// Padding turns to push it out of the recent window
{ role: 'model', parts: [{ text: 'msg 2' }] },
{ role: 'user', parts: [{ text: 'res 2' }] },
{ role: 'model', parts: [{ text: 'msg 3' }] },
{ role: 'user', parts: [{ text: 'res 3' }] },
{ role: 'model', parts: [{ text: 'msg 4' }] },
{ role: 'user', parts: [{ text: 'res 4' }] },
];
// Mock the routing request to return PARTIAL
generateJsonMock.mockResolvedValueOnce({
'src/old.ts': {
level: 'PARTIAL',
start_line: 2,
end_line: 3,
},
});
const res = await service.compressHistory(history, 'test prompt');
const compressedOutput =
res[1].parts![0].functionResponse!.response!['output'];
expect(compressedOutput).toContain('[Showing lines 23 of 4 in old.ts.');
expect(compressedOutput).toContain('2 | Line 2');
expect(compressedOutput).toContain('3 | Line 3');
});
it('returns SUMMARY and hits cache on subsequent requests', async () => {
const history1: Content[] = [
{
role: 'model',
parts: [
{
functionCall: {
name: 'read_file',
args: { filepath: 'src/index.ts' },
},
},
],
},
{
role: 'user',
parts: [
{
functionResponse: {
name: 'read_file',
response: {
output: `--- src/index.ts ---\nVery long content here...`,
},
},
},
],
},
{ role: 'model', parts: [{ text: 'p1' }] },
{ role: 'user', parts: [{ text: 'p2' }] },
{ role: 'model', parts: [{ text: 'p3' }] },
{ role: 'user', parts: [{ text: 'p4' }] },
{ role: 'model', parts: [{ text: 'p5' }] },
{ role: 'user', parts: [{ text: 'p6' }] },
];
// 1st request: routing says SUMMARY
generateJsonMock.mockResolvedValueOnce({
'src/index.ts': { level: 'SUMMARY' },
});
// 2nd request: the actual summarization call
generateContentMock.mockResolvedValueOnce({
candidates: [
{ content: { parts: [{ text: 'This is a cached summary.' }] } },
],
});
await service.compressHistory(history1, 'test query');
expect(generateJsonMock).toHaveBeenCalledTimes(1);
expect(generateContentMock).toHaveBeenCalledTimes(1);
// Time passes, we get a new query. The file is still old.
const history2: Content[] = [
...history1,
{ role: 'model', parts: [{ text: 'p7' }] },
{ role: 'user', parts: [{ text: 'p8' }] },
];
// 3rd request: routing says SUMMARY again.
generateJsonMock.mockResolvedValueOnce({
'src/index.ts': { level: 'SUMMARY' },
});
const res = await service.compressHistory(history2, 'new query');
// It should NOT make a 3rd fetch call for routing, since content has not changed and state is cached.
expect(generateJsonMock).toHaveBeenCalledTimes(1);
expect(generateContentMock).toHaveBeenCalledTimes(1);
const compressedOutput =
res[1].parts![0].functionResponse!.response!['output'];
expect(compressedOutput).toContain('This is a cached summary.');
});
it('returns unmodified history if structural validation fails', async () => {
// Creating a broken history where functionCall is NOT followed by user functionResponse
const brokenHistory: Content[] = [
{
role: 'model',
parts: [
{
functionCall: {
name: 'read_file',
args: { filepath: 'src/index.ts' },
},
},
],
},
// Missing user functionResponse!
{ role: 'model', parts: [{ text: 'Wait, I am a model again.' }] },
{ role: 'user', parts: [{ text: 'This is invalid.' }] },
{ role: 'model', parts: [{ text: 'Yep.' }] },
{ role: 'user', parts: [{ text: 'Padding.' }] },
{ role: 'model', parts: [{ text: 'Padding.' }] },
];
const res = await service.compressHistory(brokenHistory, 'test query');
// Because it's broken, it should return the exact same array by reference.
expect(res).toBe(brokenHistory);
});
});
});
@@ -0,0 +1,526 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { type Config } from '../config/config.js';
import type { Content, Part } from '@google/genai';
import { LlmRole } from '../telemetry/types.js';
import { debugLogger } from '../utils/debugLogger.js';
import { getResponseText } from '../utils/partUtils.js';
import * as fs from 'node:fs/promises';
import { existsSync } from 'node:fs';
import * as path from 'node:path';
import * as crypto from 'node:crypto';
export type FileLevel = 'FULL' | 'PARTIAL' | 'SUMMARY' | 'EXCLUDED';
export interface FileRecord {
level: FileLevel;
cachedSummary?: string;
contentHash?: string;
startLine?: number;
endLine?: number;
}
interface CompressionRecord {
level: FileLevel;
startLine?: number;
endLine?: number;
}
interface CompressionRecordJSON {
level: FileLevel;
start_line?: number;
end_line?: number;
}
function hashStringSlice(
content: string,
start: number = 0,
end: number = 12,
): string {
return crypto
.createHash('sha256')
.update(content)
.digest('hex')
.slice(start, end);
}
export class ContextCompressionService {
private config: Config;
private state: Map<string, FileRecord> = new Map();
private stateFilePath: string;
constructor(config: Config) {
this.config = config;
const dir = this.config.storage.getProjectTempDir();
this.stateFilePath = path.join(dir, 'compression_state.json');
}
async loadState() {
try {
if (existsSync(this.stateFilePath)) {
const data = await fs.readFile(this.stateFilePath, 'utf-8');
// Just throw if any invariant fails.
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
const parsed: Record<string, FileRecord> = JSON.parse(data);
for (const [k, v] of Object.entries(parsed)) {
this.state.set(k, v);
}
}
} catch (e) {
debugLogger.warn(`Failed to load compression state: ${e}`);
}
}
getState(): Record<string, FileRecord> {
const obj: Record<string, FileRecord> = {};
for (const [k, v] of this.state.entries()) {
obj[k] = v;
}
return obj;
}
setState(stateData: Record<string, FileRecord>) {
this.state.clear();
for (const [k, v] of Object.entries(stateData)) {
this.state.set(k, v);
}
}
async saveState() {
try {
const obj: Record<string, FileRecord> = {};
for (const [k, v] of this.state.entries()) {
obj[k] = v;
}
await fs.writeFile(
this.stateFilePath,
JSON.stringify(obj, null, 2),
'utf-8',
);
} catch (e) {
debugLogger.warn(`Failed to save compression state: ${e}`);
}
}
async compressHistory(
history: Content[],
userPrompt: string,
abortSignal?: AbortSignal,
): Promise<Content[]> {
const enabled = this.config.isContextManagementEnabled();
if (!enabled) return history;
const RECENT_TURNS_PROTECTED = 2;
const cutoff = Math.max(0, history.length - RECENT_TURNS_PROTECTED * 2);
// Pass 1: Find protected files
const protectedFiles = new Set<string>();
for (let i = 0; i < history.length; i++) {
const turn = history[i];
if (!turn.parts) continue;
for (const part of turn.parts) {
if (
part.functionCall &&
(part.functionCall.name === 'read_file' ||
part.functionCall.name === 'read_many_files')
) {
const args = part.functionCall.args;
if (args) {
if (Array.isArray(args['paths'])) {
if (i >= cutoff) {
for (const path of args['paths']) {
protectedFiles.add(path);
}
}
}
const filepath = args['filepath'];
if (filepath && typeof filepath === 'string') {
// If this read happened within the protected window, it's protected.
if (i >= cutoff) {
protectedFiles.add(filepath);
}
}
}
}
}
}
// Pass 2: Collect files needing routing decisions
type PendingFile = {
filepath: string;
rawContent: string;
contentToProcess: string;
lines: string[];
preview: string;
lineCount: number;
};
const pendingFiles: PendingFile[] = [];
const pendingFilesSet = new Set<string>(); // deduplicate by filepath
for (let i = 0; i < history.length; i++) {
const turn = history[i];
if (i >= cutoff || turn.role !== 'user' || !turn.parts) continue;
for (const part of turn.parts) {
const resp = part.functionResponse;
if (!resp) continue;
if (resp.name !== 'read_file' && resp.name !== 'read_many_files')
continue;
const output = resp.response?.['output'];
if (!output || typeof output !== 'string') continue;
const match = output.match(/--- (.+?) ---\n/);
let filepath = '';
if (match) {
filepath = match[1];
} else {
const lines = output.split('\n');
if (lines[0] && lines[0].includes('---')) {
filepath = lines[0].replace(/---/g, '').trim();
}
}
if (!filepath || protectedFiles.has(filepath)) continue;
const hash = hashStringSlice(output);
const existing = this.state.get(filepath);
if (
existing?.level === 'SUMMARY' &&
existing.cachedSummary &&
existing.contentHash === hash
) {
continue; // Cache hit — skip routing for this file
}
if (pendingFilesSet.has(filepath)) continue; // already queued
pendingFilesSet.add(filepath);
let contentToProcess = output;
if (contentToProcess.startsWith('--- ')) {
const firstNewline = contentToProcess.indexOf('\n');
if (firstNewline !== -1) {
contentToProcess = contentToProcess.substring(firstNewline + 1);
}
}
const lines = contentToProcess.split('\n');
pendingFiles.push({
filepath,
rawContent: output,
contentToProcess,
lines,
preview: lines.slice(0, 30).join('\n'),
lineCount: lines.length,
});
}
}
// Pass 3: Single batched routing call for all pending files
const routingDecisions = await this.batchQueryModel(
pendingFiles.map((f) => ({
filepath: f.filepath,
lineCount: f.lineCount,
preview: f.preview,
})),
userPrompt,
abortSignal,
);
// Update state and save once for all files
for (const f of pendingFiles) {
const decision = routingDecisions.get(f.filepath) ?? {
level: 'FULL' as FileLevel,
};
const record = this.state.get(f.filepath) ?? {
level: 'FULL' as FileLevel,
};
const hash = hashStringSlice(f.rawContent);
if (record.contentHash && record.contentHash !== hash) {
record.cachedSummary = undefined;
}
record.contentHash = hash;
record.level = decision.level;
record.startLine = decision.startLine;
record.endLine = decision.endLine;
this.state.set(f.filepath, record);
}
await this.saveState();
// Pass 4: Apply decisions — now applyCompressionDecision reads from state, no model calls
const result: Content[] = [];
for (let i = 0; i < history.length; i++) {
const turn = history[i];
if (i >= cutoff || turn.role !== 'user' || !turn.parts) {
result.push(turn);
continue;
}
const newParts = await Promise.all(
turn.parts.map((part: Part) =>
this.applyCompressionDecision(
part,
protectedFiles,
userPrompt,
abortSignal,
),
),
);
result.push({ ...turn, parts: newParts });
}
// Check for invalid mixed-part turns (functionResponse combined with text parts).
for (let i = 0; i < result.length; i++) {
const turn = result[i];
if (turn.role !== 'user' || !turn.parts) continue;
const hasFunctionResponse = turn.parts.some((p) => !!p.functionResponse);
const hasNonFunctionResponse = turn.parts.some(
(p) => !p.functionResponse,
);
if (hasFunctionResponse && hasNonFunctionResponse) {
debugLogger.warn(
'Compression produced a mixed-part turn. Restoring original turn.',
);
result[i] = history[i];
}
}
// Validate structural integrity: every functionCall MUST be followed by a functionResponse in the next turn.
for (let i = 0; i < result.length; i++) {
const turn = result[i];
if (turn.parts) {
for (const part of turn.parts) {
if (part.functionCall) {
// Check the very next turn
const nextTurn = result[i + 1];
// If the functionCall is the final element of the existing payload,
// the functionResponse is implicitly represented by the current incoming turn in client.ts
if (!nextTurn) {
continue;
}
if (nextTurn.role !== 'user' || !nextTurn.parts) {
debugLogger.warn(
'Compression broke functionCall/functionResponse adjacency invariant. Falling back to uncompressed history.',
);
return history;
}
const hasMatchingResponse = nextTurn.parts.some(
(p) =>
p.functionResponse &&
p.functionResponse.name === part.functionCall!.name,
);
if (!hasMatchingResponse) {
debugLogger.warn(
'Compression broke functionCall/functionResponse adjacency invariant. Falling back to uncompressed history.',
);
return history;
}
}
}
}
}
return result;
}
private async applyCompressionDecision(
part: Part,
protectedFiles: Set<string>,
userPrompt: string,
abortSignal?: AbortSignal,
): Promise<Part> {
const resp = part.functionResponse;
if (!resp) return part;
if (resp.name !== 'read_file' && resp.name !== 'read_many_files')
return part;
const output = resp.response?.['output'];
if (!output || typeof output !== 'string') return part;
const match = output.match(/--- (.+?) ---\n/);
let filepath = '';
if (match) {
filepath = match[1];
} else {
const lines = output.split('\n');
if (lines[0] && lines[0].includes('---')) {
filepath = lines[0].replace(/---/g, '').trim();
} else {
return part;
}
}
if (protectedFiles.has(filepath)) return part;
const record = this.state.get(filepath);
if (!record || record.level === 'FULL') return part;
let contentToProcess = output;
if (contentToProcess.startsWith('--- ')) {
const firstNewline = contentToProcess.indexOf('\n');
if (firstNewline !== -1) {
contentToProcess = contentToProcess.substring(firstNewline + 1);
}
}
const lines = contentToProcess.split('\n');
let compressed: string;
if (record.level === 'PARTIAL' && record.startLine && record.endLine) {
const start = Math.max(0, record.startLine - 1);
const end = Math.min(lines.length, record.endLine);
const snippet = lines
.slice(start, end)
.map((l, i) => `${start + i + 1} | ${l}`)
.join('\n');
compressed =
`[Showing lines ${record.startLine}${record.endLine} of ${lines.length} ` +
`in ${path.basename(filepath)}. Full file available via read_file.]\n\n${snippet}`;
} else if (record.level === 'SUMMARY') {
if (!record.cachedSummary) {
record.cachedSummary = await this.generateSummary(
filepath,
contentToProcess,
abortSignal,
);
this.state.set(filepath, record);
await this.saveState();
}
compressed =
`[Summary of ${path.basename(filepath)} (${lines.length} lines). ` +
`Full file available via read_file.]\n\n${record.cachedSummary}`;
} else if (record.level === 'EXCLUDED') {
compressed =
`[${path.basename(filepath)} omitted as not relevant to current query. ` +
`Request via read_file if needed.]`;
} else {
return part;
}
if (compressed === output) return part;
return {
functionResponse: {
// `FunctionResponse` should be safe to spread
// eslint-disable-next-line @typescript-eslint/no-misused-spread
...resp,
response: { ...resp.response, output: compressed },
},
};
}
getFileState(filepath: string): FileRecord | undefined {
return this.state.get(filepath);
}
private async batchQueryModel(
files: Array<{ filepath: string; lineCount: number; preview: string }>,
userPrompt: string,
abortSignal?: AbortSignal,
): Promise<Map<string, CompressionRecord>> {
const results = new Map<string, CompressionRecord>();
// Default all to FULL so any failure is safe
for (const f of files) {
results.set(f.filepath, { level: 'FULL' });
}
if (files.length === 0) return results;
const systemPrompt = `You are a context routing agent for a coding AI session.
For each file listed, decide what level of content to send to the main model.
Levels: FULL, PARTIAL (with line range), SUMMARY, EXCLUDED.
Rules:
- FULL if the file is directly relevant to the query or small (<80 lines)
- PARTIAL if only a specific section is needed provide start_line and end_line
- SUMMARY for background context files not directly needed
- EXCLUDED for completely unrelated files
Respond ONLY with a JSON object where each key is the filepath and the value is:
{"level":"FULL"|"PARTIAL"|"SUMMARY"|"EXCLUDED","start_line":null,"end_line":null}`;
const fileList = files
.map(
(f) =>
`File: ${f.filepath} (${f.lineCount} lines)\nPreview:\n${f.preview}`,
)
.join('\n\n---\n\n');
const userMessage = `Query: "${userPrompt}"\n\n${fileList}`;
const client = this.config.getBaseLlmClient();
try {
// Build per-file schema properties dynamically
const properties: Record<string, object> = {};
for (const f of files) {
properties[f.filepath] = {
type: 'OBJECT',
properties: {
level: { type: 'STRING' },
start_line: { type: 'INTEGER' },
end_line: { type: 'INTEGER' },
},
required: ['level'],
};
}
const responseJson = await client.generateJson({
modelConfigKey: { model: 'chat-compression-2.5-flash-lite' },
contents: [{ role: 'user', parts: [{ text: userMessage }] }],
systemInstruction: systemPrompt,
schema: { properties, required: files.map((f) => f.filepath) },
promptId: 'context-compression-batch-query',
role: LlmRole.UTILITY_COMPRESSOR,
abortSignal: abortSignal ?? new AbortController().signal,
});
for (const f of files) {
// Just throw if JSON parsing fails.
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
const decision = responseJson[f.filepath] as
| CompressionRecordJSON
| undefined;
if (typeof decision !== 'object') continue;
if (typeof decision === 'object' && decision && decision.level) {
results.set(f.filepath, {
level: decision.level ?? 'FULL',
startLine: decision.start_line ?? undefined,
endLine: decision.end_line ?? undefined,
});
}
}
} catch (e) {
debugLogger.warn(
`Batch cloud routing failed: ${e}. Defaulting all to FULL.`,
);
}
return results;
}
private async generateSummary(
filepath: string,
content: string,
abortSignal?: AbortSignal,
): Promise<string> {
const promptMessage = `Summarize this file in 2-3 sentences. Be technical and specific about what it exports, its key functions, and dependencies. File: ${filepath}\n\n${content.slice(0, 4000)}`;
const client = this.config.getBaseLlmClient();
try {
const response = await client.generateContent({
modelConfigKey: { model: 'chat-compression-2.5-flash-lite' },
contents: [{ role: 'user', parts: [{ text: promptMessage }] }],
promptId: 'local-context-compression-summary',
role: LlmRole.UTILITY_COMPRESSOR,
abortSignal: abortSignal ?? new AbortController().signal,
});
const text = getResponseText(response) ?? '';
return text.trim();
} catch (e) {
return `[Summary generation failed for ${filepath} (cloud error): ${e}]`;
}
}
}
@@ -5,7 +5,7 @@
*/
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { ContextManager } from './contextManager.js';
import { MemoryContextManager } from './memoryContextManager.js';
import * as memoryDiscovery from '../utils/memoryDiscovery.js';
import type { Config } from '../config/config.js';
import { coreEvents, CoreEvent } from '../utils/events.js';
@@ -29,8 +29,8 @@ vi.mock('../utils/memoryDiscovery.js', async (importOriginal) => {
};
});
describe('ContextManager', () => {
let contextManager: ContextManager;
describe('MemoryContextManager', () => {
let memoryContextManager: MemoryContextManager;
let mockConfig: Config;
beforeEach(() => {
@@ -55,7 +55,7 @@ describe('ContextManager', () => {
},
} as unknown as Config;
contextManager = new ContextManager(mockConfig);
memoryContextManager = new MemoryContextManager(mockConfig);
vi.clearAllMocks();
vi.spyOn(coreEvents, 'emit');
vi.mocked(memoryDiscovery.getExtensionMemoryPaths).mockReturnValue([]);
@@ -86,7 +86,7 @@ describe('ContextManager', () => {
{ filePath: envPaths[0], content: 'Env Content' },
]);
await contextManager.refresh();
await memoryContextManager.refresh();
expect(memoryDiscovery.getGlobalMemoryPaths).toHaveBeenCalled();
expect(memoryDiscovery.getEnvironmentMemoryPaths).toHaveBeenCalledWith(
@@ -99,14 +99,18 @@ describe('ContextManager', () => {
['.git'],
);
expect(contextManager.getGlobalMemory()).toContain('Global Content');
expect(contextManager.getEnvironmentMemory()).toContain('Env Content');
expect(contextManager.getEnvironmentMemory()).toContain(
expect(memoryContextManager.getGlobalMemory()).toContain(
'Global Content',
);
expect(memoryContextManager.getEnvironmentMemory()).toContain(
'Env Content',
);
expect(memoryContextManager.getEnvironmentMemory()).toContain(
'MCP Instructions',
);
expect(contextManager.getLoadedPaths()).toContain(globalPaths[0]);
expect(contextManager.getLoadedPaths()).toContain(envPaths[0]);
expect(memoryContextManager.getLoadedPaths()).toContain(globalPaths[0]);
expect(memoryContextManager.getLoadedPaths()).toContain(envPaths[0]);
});
it('should emit MemoryChanged event when memory is refreshed', async () => {
@@ -121,7 +125,7 @@ describe('ContextManager', () => {
{ filePath: '/app/src/GEMINI.md', content: 'env content' },
]);
await contextManager.refresh();
await memoryContextManager.refresh();
expect(coreEvents.emit).toHaveBeenCalledWith(CoreEvent.MemoryChanged, {
fileCount: 2,
@@ -137,11 +141,13 @@ describe('ContextManager', () => {
{ filePath: '/home/user/.gemini/GEMINI.md', content: 'Global Content' },
]);
await contextManager.refresh();
await memoryContextManager.refresh();
expect(memoryDiscovery.getEnvironmentMemoryPaths).not.toHaveBeenCalled();
expect(contextManager.getEnvironmentMemory()).toBe('');
expect(contextManager.getGlobalMemory()).toContain('Global Content');
expect(memoryContextManager.getEnvironmentMemory()).toBe('');
expect(memoryContextManager.getGlobalMemory()).toContain(
'Global Content',
);
});
it('should deduplicate files by file identity in case-insensitive filesystems', async () => {
@@ -168,7 +174,7 @@ describe('ContextManager', () => {
{ filePath: '/app/gemini.md', content: 'Project Content' },
]);
await contextManager.refresh();
await memoryContextManager.refresh();
expect(
memoryDiscovery.deduplicatePathsByFileIdentity,
@@ -184,7 +190,7 @@ describe('ContextManager', () => {
'tree',
['.git'],
);
expect(contextManager.getEnvironmentMemory()).toContain(
expect(memoryContextManager.getEnvironmentMemory()).toContain(
'Project Content',
);
});
@@ -199,9 +205,10 @@ describe('ContextManager', () => {
mockResult,
);
const result = await contextManager.discoverContext('/app/src/file.ts', [
'/app',
]);
const result = await memoryContextManager.discoverContext(
'/app/src/file.ts',
['/app'],
);
expect(memoryDiscovery.loadJitSubdirectoryMemory).toHaveBeenCalledWith(
'/app/src/file.ts',
@@ -212,7 +219,9 @@ describe('ContextManager', () => {
);
expect(result).toMatch(/--- Context from: \/app\/src\/GEMINI\.md ---/);
expect(result).toContain('Src Content');
expect(contextManager.getLoadedPaths()).toContain('/app/src/GEMINI.md');
expect(memoryContextManager.getLoadedPaths()).toContain(
'/app/src/GEMINI.md',
);
});
it('should return empty string if no new files found', async () => {
@@ -221,9 +230,10 @@ describe('ContextManager', () => {
mockResult,
);
const result = await contextManager.discoverContext('/app/src/file.ts', [
'/app',
]);
const result = await memoryContextManager.discoverContext(
'/app/src/file.ts',
['/app'],
);
expect(result).toBe('');
});
@@ -231,9 +241,10 @@ describe('ContextManager', () => {
it('should return empty string if folder is not trusted', async () => {
vi.mocked(mockConfig.isTrustedFolder).mockReturnValue(false);
const result = await contextManager.discoverContext('/app/src/file.ts', [
'/app',
]);
const result = await memoryContextManager.discoverContext(
'/app/src/file.ts',
['/app'],
);
expect(memoryDiscovery.loadJitSubdirectoryMemory).not.toHaveBeenCalled();
expect(result).toBe('');
@@ -248,7 +259,7 @@ describe('ContextManager', () => {
files: [],
});
await contextManager.discoverContext('/app/src/file.ts', ['/app']);
await memoryContextManager.discoverContext('/app/src/file.ts', ['/app']);
expect(memoryDiscovery.loadJitSubdirectoryMemory).toHaveBeenCalledWith(
'/app/src/file.ts',
@@ -19,7 +19,7 @@ import {
import type { Config } from '../config/config.js';
import { coreEvents, CoreEvent } from '../utils/events.js';
export class ContextManager {
export class MemoryContextManager {
private readonly loadedPaths: Set<string> = new Set();
private readonly loadedFileIdentities: Set<string> = new Set();
private readonly config: Config;
+27
View File
@@ -0,0 +1,27 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import type { ContextManagementConfig } from './types.js';
export const generalistProfile: ContextManagementConfig = {
enabled: true,
historyWindow: { maxTokens: 150_000, retainedTokens: 80_000 },
messageLimits: {
normalMaxTokens: 3_000,
retainedMaxTokens: 30_000,
normalizationHeadRatio: 0.15,
},
tools: {
distillation: {
maxOutputTokens: 10_000,
summarizationThresholdTokens: 20_000,
},
outputMasking: {
protectionThresholdTokens: 50_000,
minPrunableThresholdTokens: 30_000,
protectLatestTurn: true,
},
},
};
+39
View File
@@ -0,0 +1,39 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
export interface AgentHistoryProviderConfig {
maxTokens: number;
retainedTokens: number;
normalMessageTokens: number;
maximumMessageTokens: number;
normalizationHeadRatio: number;
}
export interface ToolOutputMaskingConfig {
protectionThresholdTokens: number;
minPrunableThresholdTokens: number;
protectLatestTurn: boolean;
}
export interface ContextManagementConfig {
enabled: boolean;
historyWindow: {
maxTokens: number;
retainedTokens: number;
};
messageLimits: {
normalMaxTokens: number;
retainedMaxTokens: number;
normalizationHeadRatio: number;
};
tools: {
distillation: {
maxOutputTokens: number;
summarizationThresholdTokens: number;
};
outputMasking: ToolOutputMaskingConfig;
};
}