/** * @license * Copyright 2026 Google LLC * SPDX-License-Identifier: Apache-2.0 */ import { describe, it, expect, beforeEach, afterEach } from 'vitest'; import { TestRig } from './test-helper.js'; import * as path from 'node:path'; import * as fs from 'node:fs'; import { FinishReason, GenerateContentResponse } from '@google/genai'; import type { FakeResponse, HistoryTurn } from '@google/gemini-cli-core'; describe('Context Management Fidelity E2E', () => { let rig: TestRig; function generateRandomString(length: number): string { const characters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'; let result = ''; for (let i = 0; i < length; i++) { result += characters.charAt( Math.floor(Math.random() * characters.length), ); } return result; } beforeEach(() => { rig = new TestRig(); }); afterEach(async () => await rig.cleanup()); it( 'should reproduce the exact context working buffer on resume', { timeout: 300000 }, async () => { // Mock responses to trigger GC (summarization) const snapshotResponse: FakeResponse = { method: 'generateContent', response: { candidates: [ { content: { parts: [ { text: JSON.stringify({ new_facts: ['GC Triggered.'], new_constraints: [], new_tasks: [], resolved_task_ids: [], obsolete_fact_indices: [], obsolete_constraint_indices: [], chronological_summary: 'Snapshot created.', }), }, ], role: 'model', }, finishReason: FinishReason.STOP, index: 0, }, ], } as unknown as GenerateContentResponse, }; const countTokensResponse: FakeResponse = { method: 'countTokens', response: { totalTokens: 1000 }, }; const streamResponse = (text: string): FakeResponse => ({ method: 'generateContentStream', response: [ { candidates: [ { content: { parts: [{ text }], role: 'model' }, finishReason: FinishReason.STOP, index: 0, }, ], }, ] as unknown as GenerateContentResponse[], }); const setupResponses = (fileName: string, mocks: FakeResponse[]) => { const filePath = path.join(rig.testDir!, fileName); fs.writeFileSync( filePath, mocks.map((m) => JSON.stringify(m)).join('\n'), ); return filePath; }; await rig.setup('context-fidelity', { settings: { experimental: { stressTestProfile: true, // Lowers thresholds to trigger GC easily }, }, }); const traceDir = path.join(rig.testDir!, 'traces'); fs.mkdirSync(traceDir, { recursive: true }); const traceLog = path.join(traceDir, 'trace.log'); // Ignore trace and response files to keep environment context clean and stable fs.writeFileSync( path.join(rig.testDir!, '.geminiignore'), 'traces/\nresp*.json\ndebug.log\n', ); const commonEnv = { GEMINI_API_KEY: 'mock-key', GEMINI_CONTEXT_TRACE_DIR: traceDir, GEMINI_CONTEXT_TRACE_ENABLED: 'true', GEMINI_DEBUG_LOG_FILE: path.join(rig.testDir!, 'debug.log'), }; const runMocks: FakeResponse[] = [ streamResponse('Ack 1'), streamResponse('Ack 2'), streamResponse('Ack 3'), streamResponse('Ack 4'), streamResponse('Ack 5'), streamResponse('Ack 6'), streamResponse('Ack 7'), streamResponse('Ack 8'), streamResponse('Ack 9'), streamResponse('Ack 10'), streamResponse('Ack 11'), streamResponse('Ack 12'), ]; for (let i = 0; i < 50; i++) { runMocks.push(snapshotResponse); runMocks.push(countTokensResponse); } // Turns 1-10: Build up history for (let i = 1; i <= 10; i++) { await rig.run({ args: [ '--debug', i === 1 ? '' : '--resume', i === 1 ? '' : 'latest', '--fake-responses-non-strict', setupResponses(`resp_init_${i}.json`, runMocks), ].filter(Boolean), stdin: `Turn ${i}: ` + generateRandomString(900), env: commonEnv, }); } // Turn 11: Penultimate turn await rig.run({ args: [ '--debug', '--resume', 'latest', '--fake-responses-non-strict', setupResponses('resp2.json', runMocks), ], stdin: 'Turn 11: ' + generateRandomString(900), env: commonEnv, }); // Turn 12: Breach threshold and force GC await rig.run({ args: [ '--debug', '--resume', 'latest', '--fake-responses-non-strict', setupResponses('resp3.json', runMocks), ], stdin: 'Turn 12: ' + generateRandomString(900), env: commonEnv, }); // Extract the rendered context asset from the log const getRenderedContext = (logContent: string): HistoryTurn[] | null => { const lines = logContent.split('\n'); const renderLines = lines.filter( (l) => l.includes('[Render] Render Sanitized Context for LLM') || l.includes('[Render] Render Context for LLM'), ); if (renderLines.length === 0) return null; const lastRender = renderLines[renderLines.length - 1]; const detailsMatch = lastRender.match(/\| Details: (.*)$/); if (!detailsMatch) return null; const details = JSON.parse(detailsMatch[1]); const assetInfo = details.renderedContextSanitized || details.renderedContext; if (assetInfo && assetInfo.$asset) { const assetPath = path.join(traceDir, 'assets', assetInfo.$asset); return JSON.parse(fs.readFileSync(assetPath, 'utf-8')); } return assetInfo; }; const log1 = fs.readFileSync(traceLog, 'utf-8'); const contextBeforeExit = getRenderedContext(log1); expect(contextBeforeExit).toBeDefined(); console.log( 'Context Before Exit (First 2 turns):', JSON.stringify(contextBeforeExit!.slice(0, 2), null, 2), ); // Turn 4: Resume and run a small command await rig.run({ args: [ '--debug', '--resume', 'latest', '--fake-responses-non-strict', setupResponses('resp4.json', runMocks), 'continue', ], env: commonEnv, }); const log2 = fs.readFileSync(traceLog, 'utf-8'); const contextAfterResume = getRenderedContext(log2); expect(contextAfterResume).toBeDefined(); console.log( 'Context After Resume (First 2 turns):', JSON.stringify(contextAfterResume!.slice(0, 2), null, 2), ); expect(contextAfterResume!.length).toBeGreaterThanOrEqual( contextBeforeExit!.length, ); // The environment context is intentionally refreshed on resume to reflect // the current state of the workspace (e.g. new files, current date). // We allow its content to differ but ensure it's still an environment context. const isEnvContext = (turn: HistoryTurn) => turn.content.parts?.some((p) => p.text?.includes('')); for (let i = 0; i < contextBeforeExit!.length; i++) { expect(contextAfterResume![i].id).toBe(contextBeforeExit![i].id); const turnBefore = contextBeforeExit![i]; const turnAfter = contextAfterResume![i]; if (isEnvContext(turnBefore)) { expect(isEnvContext(turnAfter)).toBe(true); continue; } expect(turnAfter.content).toEqual(turnBefore.content); } // Most importantly, synthetic IDs (like summaries) must be stable. const syntheticTurns = contextBeforeExit!.filter( (t: HistoryTurn) => t.content.parts?.some((p) => p.text?.includes('active_tasks')) || (t.id && t.id.length === 32), ); expect(syntheticTurns.length).toBeGreaterThan(0); const syntheticTurnsAfter = contextAfterResume!.filter( (t: HistoryTurn) => t.content.parts?.some((p) => p.text?.includes('active_tasks')) || (t.id && t.id.length === 32), ); expect(syntheticTurnsAfter.length).toBeGreaterThanOrEqual( syntheticTurns.length, ); // Check if the first synthetic turn is identical (with relaxation for environment context) expect(syntheticTurnsAfter[0].id).toBe(syntheticTurns[0].id); if (isEnvContext(syntheticTurns[0])) { expect(isEnvContext(syntheticTurnsAfter[0])).toBe(true); } else { expect(syntheticTurnsAfter[0].content).toEqual( syntheticTurns[0].content, ); } }, ); });