fix(cli): filter internal session context from history during resumption (#27391)

This commit is contained in:
Ramón Medrano Llamas
2026-05-24 08:38:53 +02:00
committed by GitHub
parent 3cc7e5b096
commit 630ecc21b9
5 changed files with 288 additions and 196 deletions
+222 -193
View File
@@ -32,227 +32,256 @@ describe('Context Management Fidelity E2E', () => {
afterEach(async () => await rig.cleanup());
it('should reproduce the exact context working buffer on resume', async () => {
// Mock responses to trigger GC (summarization)
const snapshotResponse: FakeResponse = {
method: 'generateContent',
response: {
candidates: [
{
content: {
parts: [
{
text: JSON.stringify({
new_facts: ['GC Triggered.'],
new_constraints: [],
new_tasks: [],
resolved_task_ids: [],
obsolete_fact_indices: [],
obsolete_constraint_indices: [],
chronological_summary: 'Snapshot created.',
}),
},
],
role: 'model',
},
finishReason: FinishReason.STOP,
index: 0,
},
],
} as unknown as GenerateContentResponse,
};
const countTokensResponse: FakeResponse = {
method: 'countTokens',
response: { totalTokens: 1000 },
};
const streamResponse = (text: string): FakeResponse => ({
method: 'generateContentStream',
response: [
{
it(
'should reproduce the exact context working buffer on resume',
{ timeout: 300000 },
async () => {
// Mock responses to trigger GC (summarization)
const snapshotResponse: FakeResponse = {
method: 'generateContent',
response: {
candidates: [
{
content: { parts: [{ text }], role: 'model' },
content: {
parts: [
{
text: JSON.stringify({
new_facts: ['GC Triggered.'],
new_constraints: [],
new_tasks: [],
resolved_task_ids: [],
obsolete_fact_indices: [],
obsolete_constraint_indices: [],
chronological_summary: 'Snapshot created.',
}),
},
],
role: 'model',
},
finishReason: FinishReason.STOP,
index: 0,
},
],
},
] as unknown as GenerateContentResponse[],
});
} as unknown as GenerateContentResponse,
};
const setupResponses = (fileName: string, mocks: FakeResponse[]) => {
const filePath = path.join(rig.testDir!, fileName);
const countTokensResponse: FakeResponse = {
method: 'countTokens',
response: { totalTokens: 1000 },
};
const streamResponse = (text: string): FakeResponse => ({
method: 'generateContentStream',
response: [
{
candidates: [
{
content: { parts: [{ text }], role: 'model' },
finishReason: FinishReason.STOP,
index: 0,
},
],
},
] as unknown as GenerateContentResponse[],
});
const setupResponses = (fileName: string, mocks: FakeResponse[]) => {
const filePath = path.join(rig.testDir!, fileName);
fs.writeFileSync(
filePath,
mocks.map((m) => JSON.stringify(m)).join('\n'),
);
return filePath;
};
await rig.setup('context-fidelity', {
settings: {
experimental: {
stressTestProfile: true, // Lowers thresholds to trigger GC easily
},
},
});
const traceDir = path.join(rig.testDir!, 'traces');
fs.mkdirSync(traceDir, { recursive: true });
const traceLog = path.join(traceDir, 'trace.log');
// Ignore trace and response files to keep environment context clean and stable
fs.writeFileSync(
filePath,
mocks.map((m) => JSON.stringify(m)).join('\n'),
path.join(rig.testDir!, '.geminiignore'),
'traces/\nresp*.json\ndebug.log\n',
);
return filePath;
};
await rig.setup('context-fidelity', {
settings: {
experimental: {
stressTestProfile: true, // Lowers thresholds to trigger GC easily
},
},
});
const commonEnv = {
GEMINI_API_KEY: 'mock-key',
GEMINI_CONTEXT_TRACE_DIR: traceDir,
GEMINI_CONTEXT_TRACE_ENABLED: 'true',
GEMINI_DEBUG_LOG_FILE: path.join(rig.testDir!, 'debug.log'),
};
const traceDir = path.join(rig.testDir!, 'traces');
fs.mkdirSync(traceDir, { recursive: true });
const traceLog = path.join(traceDir, 'trace.log');
const runMocks: FakeResponse[] = [
streamResponse('Ack 1'),
streamResponse('Ack 2'),
streamResponse('Ack 3'),
streamResponse('Ack 4'),
streamResponse('Ack 5'),
streamResponse('Ack 6'),
streamResponse('Ack 7'),
streamResponse('Ack 8'),
streamResponse('Ack 9'),
streamResponse('Ack 10'),
streamResponse('Ack 11'),
streamResponse('Ack 12'),
];
for (let i = 0; i < 50; i++) {
runMocks.push(snapshotResponse);
runMocks.push(countTokensResponse);
}
const commonEnv = {
GEMINI_API_KEY: 'mock-key',
GEMINI_CONTEXT_TRACE_DIR: traceDir,
GEMINI_CONTEXT_TRACE_ENABLED: 'true',
GEMINI_DEBUG_LOG_FILE: path.join(rig.testDir!, 'debug.log'),
};
// Turns 1-10: Build up history
for (let i = 1; i <= 10; i++) {
await rig.run({
args: [
'--debug',
i === 1 ? '' : '--resume',
i === 1 ? '' : 'latest',
'--fake-responses-non-strict',
setupResponses(`resp_init_${i}.json`, runMocks),
].filter(Boolean),
stdin: `Turn ${i}: ` + generateRandomString(900),
env: commonEnv,
});
}
const runMocks: FakeResponse[] = [
streamResponse('Ack 1'),
streamResponse('Ack 2'),
streamResponse('Ack 3'),
streamResponse('Ack 4'),
streamResponse('Ack 5'),
streamResponse('Ack 6'),
streamResponse('Ack 7'),
streamResponse('Ack 8'),
streamResponse('Ack 9'),
streamResponse('Ack 10'),
streamResponse('Ack 11'),
streamResponse('Ack 12'),
];
for (let i = 0; i < 50; i++) {
runMocks.push(snapshotResponse);
runMocks.push(countTokensResponse);
}
// Turns 1-10: Build up history
for (let i = 1; i <= 10; i++) {
// Turn 11: Penultimate turn
await rig.run({
args: [
'--debug',
i === 1 ? '' : '--resume',
i === 1 ? '' : 'latest',
'--resume',
'latest',
'--fake-responses-non-strict',
setupResponses(`resp_init_${i}.json`, runMocks),
].filter(Boolean),
stdin: `Turn ${i}: ` + generateRandomString(900),
setupResponses('resp2.json', runMocks),
],
stdin: 'Turn 11: ' + generateRandomString(900),
env: commonEnv,
});
}
// Turn 11: Penultimate turn
await rig.run({
args: [
'--debug',
'--resume',
'latest',
'--fake-responses-non-strict',
setupResponses('resp2.json', runMocks),
],
stdin: 'Turn 11: ' + generateRandomString(900),
env: commonEnv,
});
// Turn 12: Breach threshold and force GC
await rig.run({
args: [
'--debug',
'--resume',
'latest',
'--fake-responses-non-strict',
setupResponses('resp3.json', runMocks),
],
stdin: 'Turn 12: ' + generateRandomString(900),
env: commonEnv,
});
// Turn 12: Breach threshold and force GC
await rig.run({
args: [
'--debug',
'--resume',
'latest',
'--fake-responses-non-strict',
setupResponses('resp3.json', runMocks),
],
stdin: 'Turn 12: ' + generateRandomString(900),
env: commonEnv,
});
// Extract the rendered context asset from the log
const getRenderedContext = (logContent: string): HistoryTurn[] | null => {
const lines = logContent.split('\n');
const renderLines = lines.filter(
(l) =>
l.includes('[Render] Render Sanitized Context for LLM') ||
l.includes('[Render] Render Context for LLM'),
);
if (renderLines.length === 0) return null;
// Extract the rendered context asset from the log
const getRenderedContext = (logContent: string): HistoryTurn[] | null => {
const lines = logContent.split('\n');
const renderLines = lines.filter(
(l) =>
l.includes('[Render] Render Sanitized Context for LLM') ||
l.includes('[Render] Render Context for LLM'),
const lastRender = renderLines[renderLines.length - 1];
const detailsMatch = lastRender.match(/\| Details: (.*)$/);
if (!detailsMatch) return null;
const details = JSON.parse(detailsMatch[1]);
const assetInfo =
details.renderedContextSanitized || details.renderedContext;
if (assetInfo && assetInfo.$asset) {
const assetPath = path.join(traceDir, 'assets', assetInfo.$asset);
return JSON.parse(fs.readFileSync(assetPath, 'utf-8'));
}
return assetInfo;
};
const log1 = fs.readFileSync(traceLog, 'utf-8');
const contextBeforeExit = getRenderedContext(log1);
expect(contextBeforeExit).toBeDefined();
console.log(
'Context Before Exit (First 2 turns):',
JSON.stringify(contextBeforeExit!.slice(0, 2), null, 2),
);
if (renderLines.length === 0) return null;
const lastRender = renderLines[renderLines.length - 1];
const detailsMatch = lastRender.match(/\| Details: (.*)$/);
if (!detailsMatch) return null;
// Turn 4: Resume and run a small command
await rig.run({
args: [
'--debug',
'--resume',
'latest',
'--fake-responses-non-strict',
setupResponses('resp4.json', runMocks),
'continue',
],
env: commonEnv,
});
const details = JSON.parse(detailsMatch[1]);
const assetInfo =
details.renderedContextSanitized || details.renderedContext;
if (assetInfo && assetInfo.$asset) {
const assetPath = path.join(traceDir, 'assets', assetInfo.$asset);
return JSON.parse(fs.readFileSync(assetPath, 'utf-8'));
const log2 = fs.readFileSync(traceLog, 'utf-8');
const contextAfterResume = getRenderedContext(log2);
expect(contextAfterResume).toBeDefined();
console.log(
'Context After Resume (First 2 turns):',
JSON.stringify(contextAfterResume!.slice(0, 2), null, 2),
);
expect(contextAfterResume!.length).toBeGreaterThanOrEqual(
contextBeforeExit!.length,
);
// The environment context is intentionally refreshed on resume to reflect
// the current state of the workspace (e.g. new files, current date).
// We allow its content to differ but ensure it's still an environment context.
const isEnvContext = (turn: HistoryTurn) =>
turn.content.parts?.some((p) => p.text?.includes('<session_context>'));
for (let i = 0; i < contextBeforeExit!.length; i++) {
expect(contextAfterResume![i].id).toBe(contextBeforeExit![i].id);
const turnBefore = contextBeforeExit![i];
const turnAfter = contextAfterResume![i];
if (isEnvContext(turnBefore)) {
expect(isEnvContext(turnAfter)).toBe(true);
continue;
}
expect(turnAfter.content).toEqual(turnBefore.content);
}
return assetInfo;
};
const log1 = fs.readFileSync(traceLog, 'utf-8');
const contextBeforeExit = getRenderedContext(log1);
expect(contextBeforeExit).toBeDefined();
console.log(
'Context Before Exit (First 2 turns):',
JSON.stringify(contextBeforeExit!.slice(0, 2), null, 2),
);
// Turn 4: Resume and run a small command
await rig.run({
args: [
'--debug',
'--resume',
'latest',
'--fake-responses-non-strict',
setupResponses('resp4.json', runMocks),
'continue',
],
env: commonEnv,
});
const log2 = fs.readFileSync(traceLog, 'utf-8');
const contextAfterResume = getRenderedContext(log2);
expect(contextAfterResume).toBeDefined();
console.log(
'Context After Resume (First 2 turns):',
JSON.stringify(contextAfterResume!.slice(0, 2), null, 2),
);
expect(contextAfterResume!.length).toBeGreaterThanOrEqual(
contextBeforeExit!.length,
);
for (let i = 0; i < contextBeforeExit!.length; i++) {
expect(contextAfterResume![i].id).toBe(contextBeforeExit![i].id);
expect(contextAfterResume![i].content).toEqual(
contextBeforeExit![i].content,
// Most importantly, synthetic IDs (like summaries) must be stable.
const syntheticTurns = contextBeforeExit!.filter(
(t: HistoryTurn) =>
t.content.parts?.some((p) => p.text?.includes('active_tasks')) ||
(t.id && t.id.length === 32),
);
}
expect(syntheticTurns.length).toBeGreaterThan(0);
// Most importantly, synthetic IDs (like summaries) must be stable.
const syntheticTurns = contextBeforeExit!.filter(
(t: HistoryTurn) =>
t.content.parts?.some((p) => p.text?.includes('active_tasks')) ||
(t.id && t.id.length === 32),
);
expect(syntheticTurns.length).toBeGreaterThan(0);
const syntheticTurnsAfter = contextAfterResume!.filter(
(t: HistoryTurn) =>
t.content.parts?.some((p) => p.text?.includes('active_tasks')) ||
(t.id && t.id.length === 32),
);
expect(syntheticTurnsAfter.length).toBeGreaterThanOrEqual(
syntheticTurns.length,
);
const syntheticTurnsAfter = contextAfterResume!.filter(
(t: HistoryTurn) =>
t.content.parts?.some((p) => p.text?.includes('active_tasks')) ||
(t.id && t.id.length === 32),
);
expect(syntheticTurnsAfter.length).toBeGreaterThanOrEqual(
syntheticTurns.length,
);
// Check if the first synthetic turn is identical
expect(syntheticTurnsAfter[0].id).toBe(syntheticTurns[0].id);
expect(syntheticTurnsAfter[0].content).toEqual(syntheticTurns[0].content);
});
// Check if the first synthetic turn is identical (with relaxation for environment context)
expect(syntheticTurnsAfter[0].id).toBe(syntheticTurns[0].id);
if (isEnvContext(syntheticTurns[0])) {
expect(isEnvContext(syntheticTurnsAfter[0])).toBe(true);
} else {
expect(syntheticTurnsAfter[0].content).toEqual(
syntheticTurns[0].content,
);
}
},
);
});
@@ -1111,6 +1111,28 @@ describe('convertSessionToHistoryFormats', () => {
});
});
it('should filter out <session_context> from UI history', () => {
const messages: MessageRecord[] = [
{
id: '1',
timestamp: new Date().toISOString(),
type: 'user',
content:
'<session_context>\nThis is the Gemini CLI\n</session_context>',
},
{
id: '2',
timestamp: new Date().toISOString(),
type: 'user',
content: 'Real message',
},
];
const result = convertSessionToHistoryFormats(messages);
expect(result.uiHistory).toHaveLength(1);
expect(result.uiHistory[0].text).toBe('Real message');
});
it('should handle missing tool descriptions and displayNames', () => {
const messages: MessageRecord[] = [
{
+10 -1
View File
@@ -606,7 +606,16 @@ export function convertSessionToHistoryFormats(
const contentString = partListUnionToString(msg.content);
const uiText = displayContentString || contentString;
if (uiText.trim()) {
// Skip internal context messages in the UI history
const trimmedText = uiText.trim();
if (
trimmedText.startsWith('<session_context>') ||
trimmedText.startsWith('<hook_context>')
) {
continue;
}
if (trimmedText) {
let messageType: MessageType;
switch (msg.type) {
case 'user':
@@ -105,6 +105,35 @@ describe('convertSessionToClientHistory', () => {
]);
});
it('should ignore <session_context> and <hook_context>', () => {
const messages: ConversationRecord['messages'] = [
{
id: '1',
type: 'user',
timestamp: '2024-01-01T10:00:00Z',
content: '<session_context>\nOld context\n</session_context>',
},
{
id: '2',
type: 'user',
timestamp: '2024-01-01T10:01:00Z',
content: '<hook_context>\nOld hook context\n</hook_context>',
},
{
id: '3',
type: 'user',
timestamp: '2024-01-01T10:02:00Z',
content: 'Actual query',
},
];
const history = convertSessionToClientHistory(messages);
expect(history.map((h) => h.content)).toEqual([
{ role: 'user', parts: [{ text: 'Actual query' }] },
]);
});
it('should correctly map tool calls and their responses', () => {
const messages: ConversationRecord['messages'] = [
{
+5 -2
View File
@@ -109,9 +109,12 @@ export function convertSessionToClientHistory(
if (msg.type === 'user') {
const contentString = partListUnionToString(msg.content);
const trimmedContent = contentString.trim();
if (
contentString.trim().startsWith('/') ||
contentString.trim().startsWith('?')
trimmedContent.startsWith('/') ||
trimmedContent.startsWith('?') ||
trimmedContent.startsWith('<session_context>') ||
trimmedContent.startsWith('<hook_context>')
) {
continue;
}