From 630ecc21b94e747ef6b7e1356db42327a1f93dd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ram=C3=B3n=20Medrano=20Llamas?= <45878745+rmedranollamas@users.noreply.github.com> Date: Sun, 24 May 2026 08:38:53 +0200 Subject: [PATCH] fix(cli): filter internal session context from history during resumption (#27391) --- integration-tests/context-fidelity.test.ts | 415 ++++++++++--------- packages/cli/src/utils/sessionUtils.test.ts | 22 + packages/cli/src/utils/sessionUtils.ts | 11 +- packages/core/src/utils/sessionUtils.test.ts | 29 ++ packages/core/src/utils/sessionUtils.ts | 7 +- 5 files changed, 288 insertions(+), 196 deletions(-) diff --git a/integration-tests/context-fidelity.test.ts b/integration-tests/context-fidelity.test.ts index 591fbeb29e..845b25b22f 100644 --- a/integration-tests/context-fidelity.test.ts +++ b/integration-tests/context-fidelity.test.ts @@ -32,227 +32,256 @@ describe('Context Management Fidelity E2E', () => { afterEach(async () => await rig.cleanup()); - it('should reproduce the exact context working buffer on resume', async () => { - // Mock responses to trigger GC (summarization) - const snapshotResponse: FakeResponse = { - method: 'generateContent', - response: { - candidates: [ - { - content: { - parts: [ - { - text: JSON.stringify({ - new_facts: ['GC Triggered.'], - new_constraints: [], - new_tasks: [], - resolved_task_ids: [], - obsolete_fact_indices: [], - obsolete_constraint_indices: [], - chronological_summary: 'Snapshot created.', - }), - }, - ], - role: 'model', - }, - finishReason: FinishReason.STOP, - index: 0, - }, - ], - } as unknown as GenerateContentResponse, - }; - - const countTokensResponse: FakeResponse = { - method: 'countTokens', - response: { totalTokens: 1000 }, - }; - - const streamResponse = (text: string): FakeResponse => ({ - method: 'generateContentStream', - response: [ - { + it( + 'should reproduce the exact context working buffer on resume', + { timeout: 300000 }, + async () => { + // Mock responses to trigger GC (summarization) + const snapshotResponse: FakeResponse = { + method: 'generateContent', + response: { candidates: [ { - content: { parts: [{ text }], role: 'model' }, + content: { + parts: [ + { + text: JSON.stringify({ + new_facts: ['GC Triggered.'], + new_constraints: [], + new_tasks: [], + resolved_task_ids: [], + obsolete_fact_indices: [], + obsolete_constraint_indices: [], + chronological_summary: 'Snapshot created.', + }), + }, + ], + role: 'model', + }, finishReason: FinishReason.STOP, index: 0, }, ], - }, - ] as unknown as GenerateContentResponse[], - }); + } as unknown as GenerateContentResponse, + }; - const setupResponses = (fileName: string, mocks: FakeResponse[]) => { - const filePath = path.join(rig.testDir!, fileName); + const countTokensResponse: FakeResponse = { + method: 'countTokens', + response: { totalTokens: 1000 }, + }; + + const streamResponse = (text: string): FakeResponse => ({ + method: 'generateContentStream', + response: [ + { + candidates: [ + { + content: { parts: [{ text }], role: 'model' }, + finishReason: FinishReason.STOP, + index: 0, + }, + ], + }, + ] as unknown as GenerateContentResponse[], + }); + + const setupResponses = (fileName: string, mocks: FakeResponse[]) => { + const filePath = path.join(rig.testDir!, fileName); + fs.writeFileSync( + filePath, + mocks.map((m) => JSON.stringify(m)).join('\n'), + ); + return filePath; + }; + + await rig.setup('context-fidelity', { + settings: { + experimental: { + stressTestProfile: true, // Lowers thresholds to trigger GC easily + }, + }, + }); + + const traceDir = path.join(rig.testDir!, 'traces'); + fs.mkdirSync(traceDir, { recursive: true }); + const traceLog = path.join(traceDir, 'trace.log'); + + // Ignore trace and response files to keep environment context clean and stable fs.writeFileSync( - filePath, - mocks.map((m) => JSON.stringify(m)).join('\n'), + path.join(rig.testDir!, '.geminiignore'), + 'traces/\nresp*.json\ndebug.log\n', ); - return filePath; - }; - await rig.setup('context-fidelity', { - settings: { - experimental: { - stressTestProfile: true, // Lowers thresholds to trigger GC easily - }, - }, - }); + const commonEnv = { + GEMINI_API_KEY: 'mock-key', + GEMINI_CONTEXT_TRACE_DIR: traceDir, + GEMINI_CONTEXT_TRACE_ENABLED: 'true', + GEMINI_DEBUG_LOG_FILE: path.join(rig.testDir!, 'debug.log'), + }; - const traceDir = path.join(rig.testDir!, 'traces'); - fs.mkdirSync(traceDir, { recursive: true }); - const traceLog = path.join(traceDir, 'trace.log'); + const runMocks: FakeResponse[] = [ + streamResponse('Ack 1'), + streamResponse('Ack 2'), + streamResponse('Ack 3'), + streamResponse('Ack 4'), + streamResponse('Ack 5'), + streamResponse('Ack 6'), + streamResponse('Ack 7'), + streamResponse('Ack 8'), + streamResponse('Ack 9'), + streamResponse('Ack 10'), + streamResponse('Ack 11'), + streamResponse('Ack 12'), + ]; + for (let i = 0; i < 50; i++) { + runMocks.push(snapshotResponse); + runMocks.push(countTokensResponse); + } - const commonEnv = { - GEMINI_API_KEY: 'mock-key', - GEMINI_CONTEXT_TRACE_DIR: traceDir, - GEMINI_CONTEXT_TRACE_ENABLED: 'true', - GEMINI_DEBUG_LOG_FILE: path.join(rig.testDir!, 'debug.log'), - }; + // Turns 1-10: Build up history + for (let i = 1; i <= 10; i++) { + await rig.run({ + args: [ + '--debug', + i === 1 ? '' : '--resume', + i === 1 ? '' : 'latest', + '--fake-responses-non-strict', + setupResponses(`resp_init_${i}.json`, runMocks), + ].filter(Boolean), + stdin: `Turn ${i}: ` + generateRandomString(900), + env: commonEnv, + }); + } - const runMocks: FakeResponse[] = [ - streamResponse('Ack 1'), - streamResponse('Ack 2'), - streamResponse('Ack 3'), - streamResponse('Ack 4'), - streamResponse('Ack 5'), - streamResponse('Ack 6'), - streamResponse('Ack 7'), - streamResponse('Ack 8'), - streamResponse('Ack 9'), - streamResponse('Ack 10'), - streamResponse('Ack 11'), - streamResponse('Ack 12'), - ]; - for (let i = 0; i < 50; i++) { - runMocks.push(snapshotResponse); - runMocks.push(countTokensResponse); - } - - // Turns 1-10: Build up history - for (let i = 1; i <= 10; i++) { + // Turn 11: Penultimate turn await rig.run({ args: [ '--debug', - i === 1 ? '' : '--resume', - i === 1 ? '' : 'latest', + '--resume', + 'latest', '--fake-responses-non-strict', - setupResponses(`resp_init_${i}.json`, runMocks), - ].filter(Boolean), - stdin: `Turn ${i}: ` + generateRandomString(900), + setupResponses('resp2.json', runMocks), + ], + stdin: 'Turn 11: ' + generateRandomString(900), env: commonEnv, }); - } - // Turn 11: Penultimate turn - await rig.run({ - args: [ - '--debug', - '--resume', - 'latest', - '--fake-responses-non-strict', - setupResponses('resp2.json', runMocks), - ], - stdin: 'Turn 11: ' + generateRandomString(900), - env: commonEnv, - }); + // Turn 12: Breach threshold and force GC + await rig.run({ + args: [ + '--debug', + '--resume', + 'latest', + '--fake-responses-non-strict', + setupResponses('resp3.json', runMocks), + ], + stdin: 'Turn 12: ' + generateRandomString(900), + env: commonEnv, + }); - // Turn 12: Breach threshold and force GC - await rig.run({ - args: [ - '--debug', - '--resume', - 'latest', - '--fake-responses-non-strict', - setupResponses('resp3.json', runMocks), - ], - stdin: 'Turn 12: ' + generateRandomString(900), - env: commonEnv, - }); + // Extract the rendered context asset from the log + const getRenderedContext = (logContent: string): HistoryTurn[] | null => { + const lines = logContent.split('\n'); + const renderLines = lines.filter( + (l) => + l.includes('[Render] Render Sanitized Context for LLM') || + l.includes('[Render] Render Context for LLM'), + ); + if (renderLines.length === 0) return null; - // Extract the rendered context asset from the log - const getRenderedContext = (logContent: string): HistoryTurn[] | null => { - const lines = logContent.split('\n'); - const renderLines = lines.filter( - (l) => - l.includes('[Render] Render Sanitized Context for LLM') || - l.includes('[Render] Render Context for LLM'), + const lastRender = renderLines[renderLines.length - 1]; + const detailsMatch = lastRender.match(/\| Details: (.*)$/); + if (!detailsMatch) return null; + + const details = JSON.parse(detailsMatch[1]); + const assetInfo = + details.renderedContextSanitized || details.renderedContext; + if (assetInfo && assetInfo.$asset) { + const assetPath = path.join(traceDir, 'assets', assetInfo.$asset); + return JSON.parse(fs.readFileSync(assetPath, 'utf-8')); + } + return assetInfo; + }; + + const log1 = fs.readFileSync(traceLog, 'utf-8'); + const contextBeforeExit = getRenderedContext(log1); + expect(contextBeforeExit).toBeDefined(); + console.log( + 'Context Before Exit (First 2 turns):', + JSON.stringify(contextBeforeExit!.slice(0, 2), null, 2), ); - if (renderLines.length === 0) return null; - const lastRender = renderLines[renderLines.length - 1]; - const detailsMatch = lastRender.match(/\| Details: (.*)$/); - if (!detailsMatch) return null; + // Turn 4: Resume and run a small command + await rig.run({ + args: [ + '--debug', + '--resume', + 'latest', + '--fake-responses-non-strict', + setupResponses('resp4.json', runMocks), + 'continue', + ], + env: commonEnv, + }); - const details = JSON.parse(detailsMatch[1]); - const assetInfo = - details.renderedContextSanitized || details.renderedContext; - if (assetInfo && assetInfo.$asset) { - const assetPath = path.join(traceDir, 'assets', assetInfo.$asset); - return JSON.parse(fs.readFileSync(assetPath, 'utf-8')); + const log2 = fs.readFileSync(traceLog, 'utf-8'); + const contextAfterResume = getRenderedContext(log2); + expect(contextAfterResume).toBeDefined(); + console.log( + 'Context After Resume (First 2 turns):', + JSON.stringify(contextAfterResume!.slice(0, 2), null, 2), + ); + + expect(contextAfterResume!.length).toBeGreaterThanOrEqual( + contextBeforeExit!.length, + ); + + // The environment context is intentionally refreshed on resume to reflect + // the current state of the workspace (e.g. new files, current date). + // We allow its content to differ but ensure it's still an environment context. + const isEnvContext = (turn: HistoryTurn) => + turn.content.parts?.some((p) => p.text?.includes('')); + + for (let i = 0; i < contextBeforeExit!.length; i++) { + expect(contextAfterResume![i].id).toBe(contextBeforeExit![i].id); + + const turnBefore = contextBeforeExit![i]; + const turnAfter = contextAfterResume![i]; + + if (isEnvContext(turnBefore)) { + expect(isEnvContext(turnAfter)).toBe(true); + continue; + } + + expect(turnAfter.content).toEqual(turnBefore.content); } - return assetInfo; - }; - const log1 = fs.readFileSync(traceLog, 'utf-8'); - const contextBeforeExit = getRenderedContext(log1); - expect(contextBeforeExit).toBeDefined(); - console.log( - 'Context Before Exit (First 2 turns):', - JSON.stringify(contextBeforeExit!.slice(0, 2), null, 2), - ); - - // Turn 4: Resume and run a small command - await rig.run({ - args: [ - '--debug', - '--resume', - 'latest', - '--fake-responses-non-strict', - setupResponses('resp4.json', runMocks), - 'continue', - ], - env: commonEnv, - }); - - const log2 = fs.readFileSync(traceLog, 'utf-8'); - const contextAfterResume = getRenderedContext(log2); - expect(contextAfterResume).toBeDefined(); - console.log( - 'Context After Resume (First 2 turns):', - JSON.stringify(contextAfterResume!.slice(0, 2), null, 2), - ); - - expect(contextAfterResume!.length).toBeGreaterThanOrEqual( - contextBeforeExit!.length, - ); - - for (let i = 0; i < contextBeforeExit!.length; i++) { - expect(contextAfterResume![i].id).toBe(contextBeforeExit![i].id); - expect(contextAfterResume![i].content).toEqual( - contextBeforeExit![i].content, + // Most importantly, synthetic IDs (like summaries) must be stable. + const syntheticTurns = contextBeforeExit!.filter( + (t: HistoryTurn) => + t.content.parts?.some((p) => p.text?.includes('active_tasks')) || + (t.id && t.id.length === 32), ); - } + expect(syntheticTurns.length).toBeGreaterThan(0); - // Most importantly, synthetic IDs (like summaries) must be stable. - const syntheticTurns = contextBeforeExit!.filter( - (t: HistoryTurn) => - t.content.parts?.some((p) => p.text?.includes('active_tasks')) || - (t.id && t.id.length === 32), - ); - expect(syntheticTurns.length).toBeGreaterThan(0); + const syntheticTurnsAfter = contextAfterResume!.filter( + (t: HistoryTurn) => + t.content.parts?.some((p) => p.text?.includes('active_tasks')) || + (t.id && t.id.length === 32), + ); + expect(syntheticTurnsAfter.length).toBeGreaterThanOrEqual( + syntheticTurns.length, + ); - const syntheticTurnsAfter = contextAfterResume!.filter( - (t: HistoryTurn) => - t.content.parts?.some((p) => p.text?.includes('active_tasks')) || - (t.id && t.id.length === 32), - ); - expect(syntheticTurnsAfter.length).toBeGreaterThanOrEqual( - syntheticTurns.length, - ); - - // Check if the first synthetic turn is identical - expect(syntheticTurnsAfter[0].id).toBe(syntheticTurns[0].id); - expect(syntheticTurnsAfter[0].content).toEqual(syntheticTurns[0].content); - }); + // Check if the first synthetic turn is identical (with relaxation for environment context) + expect(syntheticTurnsAfter[0].id).toBe(syntheticTurns[0].id); + if (isEnvContext(syntheticTurns[0])) { + expect(isEnvContext(syntheticTurnsAfter[0])).toBe(true); + } else { + expect(syntheticTurnsAfter[0].content).toEqual( + syntheticTurns[0].content, + ); + } + }, + ); }); diff --git a/packages/cli/src/utils/sessionUtils.test.ts b/packages/cli/src/utils/sessionUtils.test.ts index cfdadf795f..cbd033a2c6 100644 --- a/packages/cli/src/utils/sessionUtils.test.ts +++ b/packages/cli/src/utils/sessionUtils.test.ts @@ -1111,6 +1111,28 @@ describe('convertSessionToHistoryFormats', () => { }); }); + it('should filter out from UI history', () => { + const messages: MessageRecord[] = [ + { + id: '1', + timestamp: new Date().toISOString(), + type: 'user', + content: + '\nThis is the Gemini CLI\n', + }, + { + id: '2', + timestamp: new Date().toISOString(), + type: 'user', + content: 'Real message', + }, + ]; + + const result = convertSessionToHistoryFormats(messages); + expect(result.uiHistory).toHaveLength(1); + expect(result.uiHistory[0].text).toBe('Real message'); + }); + it('should handle missing tool descriptions and displayNames', () => { const messages: MessageRecord[] = [ { diff --git a/packages/cli/src/utils/sessionUtils.ts b/packages/cli/src/utils/sessionUtils.ts index a2918eae3e..2abab79e90 100644 --- a/packages/cli/src/utils/sessionUtils.ts +++ b/packages/cli/src/utils/sessionUtils.ts @@ -606,7 +606,16 @@ export function convertSessionToHistoryFormats( const contentString = partListUnionToString(msg.content); const uiText = displayContentString || contentString; - if (uiText.trim()) { + // Skip internal context messages in the UI history + const trimmedText = uiText.trim(); + if ( + trimmedText.startsWith('') || + trimmedText.startsWith('') + ) { + continue; + } + + if (trimmedText) { let messageType: MessageType; switch (msg.type) { case 'user': diff --git a/packages/core/src/utils/sessionUtils.test.ts b/packages/core/src/utils/sessionUtils.test.ts index adcb2e76e0..141efab572 100644 --- a/packages/core/src/utils/sessionUtils.test.ts +++ b/packages/core/src/utils/sessionUtils.test.ts @@ -105,6 +105,35 @@ describe('convertSessionToClientHistory', () => { ]); }); + it('should ignore and ', () => { + const messages: ConversationRecord['messages'] = [ + { + id: '1', + type: 'user', + timestamp: '2024-01-01T10:00:00Z', + content: '\nOld context\n', + }, + { + id: '2', + type: 'user', + timestamp: '2024-01-01T10:01:00Z', + content: '\nOld hook context\n', + }, + { + id: '3', + type: 'user', + timestamp: '2024-01-01T10:02:00Z', + content: 'Actual query', + }, + ]; + + const history = convertSessionToClientHistory(messages); + + expect(history.map((h) => h.content)).toEqual([ + { role: 'user', parts: [{ text: 'Actual query' }] }, + ]); + }); + it('should correctly map tool calls and their responses', () => { const messages: ConversationRecord['messages'] = [ { diff --git a/packages/core/src/utils/sessionUtils.ts b/packages/core/src/utils/sessionUtils.ts index cef35650c9..763a29e990 100644 --- a/packages/core/src/utils/sessionUtils.ts +++ b/packages/core/src/utils/sessionUtils.ts @@ -109,9 +109,12 @@ export function convertSessionToClientHistory( if (msg.type === 'user') { const contentString = partListUnionToString(msg.content); + const trimmedContent = contentString.trim(); if ( - contentString.trim().startsWith('/') || - contentString.trim().startsWith('?') + trimmedContent.startsWith('/') || + trimmedContent.startsWith('?') || + trimmedContent.startsWith('') || + trimmedContent.startsWith('') ) { continue; }