fix(cli): filter internal session context from history during resumption (#27391)

2026-06-11 03:46:49 -07:00 · 2026-05-24 08:38:53 +02:00
parent 3cc7e5b096
commit 630ecc21b9
5 changed files with 288 additions and 196 deletions
@@ -32,227 +32,256 @@ describe('Context Management Fidelity E2E', () => {

  afterEach(async () => await rig.cleanup());

-  it('should reproduce the exact context working buffer on resume', async () => {
-    // Mock responses to trigger GC (summarization)
-    const snapshotResponse: FakeResponse = {
-      method: 'generateContent',
-      response: {
-        candidates: [
-          {
-            content: {
-              parts: [
-                {
-                  text: JSON.stringify({
-                    new_facts: ['GC Triggered.'],
-                    new_constraints: [],
-                    new_tasks: [],
-                    resolved_task_ids: [],
-                    obsolete_fact_indices: [],
-                    obsolete_constraint_indices: [],
-                    chronological_summary: 'Snapshot created.',
-                  }),
-                },
-              ],
-              role: 'model',
-            },
-            finishReason: FinishReason.STOP,
-            index: 0,
-          },
-        ],
-      } as unknown as GenerateContentResponse,
-    };
-
-    const countTokensResponse: FakeResponse = {
-      method: 'countTokens',
-      response: { totalTokens: 1000 },
-    };
-
-    const streamResponse = (text: string): FakeResponse => ({
-      method: 'generateContentStream',
-      response: [
-        {
+  it(
+    'should reproduce the exact context working buffer on resume',
+    { timeout: 300000 },
+    async () => {
+      // Mock responses to trigger GC (summarization)
+      const snapshotResponse: FakeResponse = {
+        method: 'generateContent',
+        response: {
          candidates: [
            {
-              content: { parts: [{ text }], role: 'model' },
+              content: {
+                parts: [
+                  {
+                    text: JSON.stringify({
+                      new_facts: ['GC Triggered.'],
+                      new_constraints: [],
+                      new_tasks: [],
+                      resolved_task_ids: [],
+                      obsolete_fact_indices: [],
+                      obsolete_constraint_indices: [],
+                      chronological_summary: 'Snapshot created.',
+                    }),
+                  },
+                ],
+                role: 'model',
+              },
              finishReason: FinishReason.STOP,
              index: 0,
            },
          ],
-        },
-      ] as unknown as GenerateContentResponse[],
-    });
+        } as unknown as GenerateContentResponse,
+      };

-    const setupResponses = (fileName: string, mocks: FakeResponse[]) => {
-      const filePath = path.join(rig.testDir!, fileName);
+      const countTokensResponse: FakeResponse = {
+        method: 'countTokens',
+        response: { totalTokens: 1000 },
+      };
+
+      const streamResponse = (text: string): FakeResponse => ({
+        method: 'generateContentStream',
+        response: [
+          {
+            candidates: [
+              {
+                content: { parts: [{ text }], role: 'model' },
+                finishReason: FinishReason.STOP,
+                index: 0,
+              },
+            ],
+          },
+        ] as unknown as GenerateContentResponse[],
+      });
+
+      const setupResponses = (fileName: string, mocks: FakeResponse[]) => {
+        const filePath = path.join(rig.testDir!, fileName);
+        fs.writeFileSync(
+          filePath,
+          mocks.map((m) => JSON.stringify(m)).join('\n'),
+        );
+        return filePath;
+      };
+
+      await rig.setup('context-fidelity', {
+        settings: {
+          experimental: {
+            stressTestProfile: true, // Lowers thresholds to trigger GC easily
+          },
+        },
+      });
+
+      const traceDir = path.join(rig.testDir!, 'traces');
+      fs.mkdirSync(traceDir, { recursive: true });
+      const traceLog = path.join(traceDir, 'trace.log');
+
+      // Ignore trace and response files to keep environment context clean and stable
      fs.writeFileSync(
-        filePath,
-        mocks.map((m) => JSON.stringify(m)).join('\n'),
+        path.join(rig.testDir!, '.geminiignore'),
+        'traces/\nresp*.json\ndebug.log\n',
      );
-      return filePath;
-    };

-    await rig.setup('context-fidelity', {
-      settings: {
-        experimental: {
-          stressTestProfile: true, // Lowers thresholds to trigger GC easily
-        },
-      },
-    });
+      const commonEnv = {
+        GEMINI_API_KEY: 'mock-key',
+        GEMINI_CONTEXT_TRACE_DIR: traceDir,
+        GEMINI_CONTEXT_TRACE_ENABLED: 'true',
+        GEMINI_DEBUG_LOG_FILE: path.join(rig.testDir!, 'debug.log'),
+      };

-    const traceDir = path.join(rig.testDir!, 'traces');
-    fs.mkdirSync(traceDir, { recursive: true });
-    const traceLog = path.join(traceDir, 'trace.log');
+      const runMocks: FakeResponse[] = [
+        streamResponse('Ack 1'),
+        streamResponse('Ack 2'),
+        streamResponse('Ack 3'),
+        streamResponse('Ack 4'),
+        streamResponse('Ack 5'),
+        streamResponse('Ack 6'),
+        streamResponse('Ack 7'),
+        streamResponse('Ack 8'),
+        streamResponse('Ack 9'),
+        streamResponse('Ack 10'),
+        streamResponse('Ack 11'),
+        streamResponse('Ack 12'),
+      ];
+      for (let i = 0; i < 50; i++) {
+        runMocks.push(snapshotResponse);
+        runMocks.push(countTokensResponse);
+      }

-    const commonEnv = {
-      GEMINI_API_KEY: 'mock-key',
-      GEMINI_CONTEXT_TRACE_DIR: traceDir,
-      GEMINI_CONTEXT_TRACE_ENABLED: 'true',
-      GEMINI_DEBUG_LOG_FILE: path.join(rig.testDir!, 'debug.log'),
-    };
+      // Turns 1-10: Build up history
+      for (let i = 1; i <= 10; i++) {
+        await rig.run({
+          args: [
+            '--debug',
+            i === 1 ? '' : '--resume',
+            i === 1 ? '' : 'latest',
+            '--fake-responses-non-strict',
+            setupResponses(`resp_init_${i}.json`, runMocks),
+          ].filter(Boolean),
+          stdin: `Turn ${i}: ` + generateRandomString(900),
+          env: commonEnv,
+        });
+      }

-    const runMocks: FakeResponse[] = [
-      streamResponse('Ack 1'),
-      streamResponse('Ack 2'),
-      streamResponse('Ack 3'),
-      streamResponse('Ack 4'),
-      streamResponse('Ack 5'),
-      streamResponse('Ack 6'),
-      streamResponse('Ack 7'),
-      streamResponse('Ack 8'),
-      streamResponse('Ack 9'),
-      streamResponse('Ack 10'),
-      streamResponse('Ack 11'),
-      streamResponse('Ack 12'),
-    ];
-    for (let i = 0; i < 50; i++) {
-      runMocks.push(snapshotResponse);
-      runMocks.push(countTokensResponse);
-    }
-
-    // Turns 1-10: Build up history
-    for (let i = 1; i <= 10; i++) {
+      // Turn 11: Penultimate turn
      await rig.run({
        args: [
          '--debug',
-          i === 1 ? '' : '--resume',
-          i === 1 ? '' : 'latest',
+          '--resume',
+          'latest',
          '--fake-responses-non-strict',
-          setupResponses(`resp_init_${i}.json`, runMocks),
-        ].filter(Boolean),
-        stdin: `Turn ${i}: ` + generateRandomString(900),
+          setupResponses('resp2.json', runMocks),
+        ],
+        stdin: 'Turn 11: ' + generateRandomString(900),
        env: commonEnv,
      });
-    }

-    // Turn 11: Penultimate turn
-    await rig.run({
-      args: [
-        '--debug',
-        '--resume',
-        'latest',
-        '--fake-responses-non-strict',
-        setupResponses('resp2.json', runMocks),
-      ],
-      stdin: 'Turn 11: ' + generateRandomString(900),
-      env: commonEnv,
-    });
+      // Turn 12: Breach threshold and force GC
+      await rig.run({
+        args: [
+          '--debug',
+          '--resume',
+          'latest',
+          '--fake-responses-non-strict',
+          setupResponses('resp3.json', runMocks),
+        ],
+        stdin: 'Turn 12: ' + generateRandomString(900),
+        env: commonEnv,
+      });

-    // Turn 12: Breach threshold and force GC
-    await rig.run({
-      args: [
-        '--debug',
-        '--resume',
-        'latest',
-        '--fake-responses-non-strict',
-        setupResponses('resp3.json', runMocks),
-      ],
-      stdin: 'Turn 12: ' + generateRandomString(900),
-      env: commonEnv,
-    });
+      // Extract the rendered context asset from the log
+      const getRenderedContext = (logContent: string): HistoryTurn[] | null => {
+        const lines = logContent.split('\n');
+        const renderLines = lines.filter(
+          (l) =>
+            l.includes('[Render] Render Sanitized Context for LLM') ||
+            l.includes('[Render] Render Context for LLM'),
+        );
+        if (renderLines.length === 0) return null;

-    // Extract the rendered context asset from the log
-    const getRenderedContext = (logContent: string): HistoryTurn[] | null => {
-      const lines = logContent.split('\n');
-      const renderLines = lines.filter(
-        (l) =>
-          l.includes('[Render] Render Sanitized Context for LLM') ||
-          l.includes('[Render] Render Context for LLM'),
+        const lastRender = renderLines[renderLines.length - 1];
+        const detailsMatch = lastRender.match(/\| Details: (.*)$/);
+        if (!detailsMatch) return null;
+
+        const details = JSON.parse(detailsMatch[1]);
+        const assetInfo =
+          details.renderedContextSanitized || details.renderedContext;
+        if (assetInfo && assetInfo.$asset) {
+          const assetPath = path.join(traceDir, 'assets', assetInfo.$asset);
+          return JSON.parse(fs.readFileSync(assetPath, 'utf-8'));
+        }
+        return assetInfo;
+      };
+
+      const log1 = fs.readFileSync(traceLog, 'utf-8');
+      const contextBeforeExit = getRenderedContext(log1);
+      expect(contextBeforeExit).toBeDefined();
+      console.log(
+        'Context Before Exit (First 2 turns):',
+        JSON.stringify(contextBeforeExit!.slice(0, 2), null, 2),
      );
-      if (renderLines.length === 0) return null;

-      const lastRender = renderLines[renderLines.length - 1];
-      const detailsMatch = lastRender.match(/\| Details: (.*)$/);
-      if (!detailsMatch) return null;
+      // Turn 4: Resume and run a small command
+      await rig.run({
+        args: [
+          '--debug',
+          '--resume',
+          'latest',
+          '--fake-responses-non-strict',
+          setupResponses('resp4.json', runMocks),
+          'continue',
+        ],
+        env: commonEnv,
+      });

-      const details = JSON.parse(detailsMatch[1]);
-      const assetInfo =
-        details.renderedContextSanitized || details.renderedContext;
-      if (assetInfo && assetInfo.$asset) {
-        const assetPath = path.join(traceDir, 'assets', assetInfo.$asset);
-        return JSON.parse(fs.readFileSync(assetPath, 'utf-8'));
+      const log2 = fs.readFileSync(traceLog, 'utf-8');
+      const contextAfterResume = getRenderedContext(log2);
+      expect(contextAfterResume).toBeDefined();
+      console.log(
+        'Context After Resume (First 2 turns):',
+        JSON.stringify(contextAfterResume!.slice(0, 2), null, 2),
+      );
+
+      expect(contextAfterResume!.length).toBeGreaterThanOrEqual(
+        contextBeforeExit!.length,
+      );
+
+      // The environment context is intentionally refreshed on resume to reflect
+      // the current state of the workspace (e.g. new files, current date).
+      // We allow its content to differ but ensure it's still an environment context.
+      const isEnvContext = (turn: HistoryTurn) =>
+        turn.content.parts?.some((p) => p.text?.includes('<session_context>'));
+
+      for (let i = 0; i < contextBeforeExit!.length; i++) {
+        expect(contextAfterResume![i].id).toBe(contextBeforeExit![i].id);
+
+        const turnBefore = contextBeforeExit![i];
+        const turnAfter = contextAfterResume![i];
+
+        if (isEnvContext(turnBefore)) {
+          expect(isEnvContext(turnAfter)).toBe(true);
+          continue;
+        }
+
+        expect(turnAfter.content).toEqual(turnBefore.content);
      }
-      return assetInfo;
-    };

-    const log1 = fs.readFileSync(traceLog, 'utf-8');
-    const contextBeforeExit = getRenderedContext(log1);
-    expect(contextBeforeExit).toBeDefined();
-    console.log(
-      'Context Before Exit (First 2 turns):',
-      JSON.stringify(contextBeforeExit!.slice(0, 2), null, 2),
-    );
-
-    // Turn 4: Resume and run a small command
-    await rig.run({
-      args: [
-        '--debug',
-        '--resume',
-        'latest',
-        '--fake-responses-non-strict',
-        setupResponses('resp4.json', runMocks),
-        'continue',
-      ],
-      env: commonEnv,
-    });
-
-    const log2 = fs.readFileSync(traceLog, 'utf-8');
-    const contextAfterResume = getRenderedContext(log2);
-    expect(contextAfterResume).toBeDefined();
-    console.log(
-      'Context After Resume (First 2 turns):',
-      JSON.stringify(contextAfterResume!.slice(0, 2), null, 2),
-    );
-
-    expect(contextAfterResume!.length).toBeGreaterThanOrEqual(
-      contextBeforeExit!.length,
-    );
-
-    for (let i = 0; i < contextBeforeExit!.length; i++) {
-      expect(contextAfterResume![i].id).toBe(contextBeforeExit![i].id);
-      expect(contextAfterResume![i].content).toEqual(
-        contextBeforeExit![i].content,
+      // Most importantly, synthetic IDs (like summaries) must be stable.
+      const syntheticTurns = contextBeforeExit!.filter(
+        (t: HistoryTurn) =>
+          t.content.parts?.some((p) => p.text?.includes('active_tasks')) ||
+          (t.id && t.id.length === 32),
      );
-    }
+      expect(syntheticTurns.length).toBeGreaterThan(0);

-    // Most importantly, synthetic IDs (like summaries) must be stable.
-    const syntheticTurns = contextBeforeExit!.filter(
-      (t: HistoryTurn) =>
-        t.content.parts?.some((p) => p.text?.includes('active_tasks')) ||
-        (t.id && t.id.length === 32),
-    );
-    expect(syntheticTurns.length).toBeGreaterThan(0);
+      const syntheticTurnsAfter = contextAfterResume!.filter(
+        (t: HistoryTurn) =>
+          t.content.parts?.some((p) => p.text?.includes('active_tasks')) ||
+          (t.id && t.id.length === 32),
+      );
+      expect(syntheticTurnsAfter.length).toBeGreaterThanOrEqual(
+        syntheticTurns.length,
+      );

-    const syntheticTurnsAfter = contextAfterResume!.filter(
-      (t: HistoryTurn) =>
-        t.content.parts?.some((p) => p.text?.includes('active_tasks')) ||
-        (t.id && t.id.length === 32),
-    );
-    expect(syntheticTurnsAfter.length).toBeGreaterThanOrEqual(
-      syntheticTurns.length,
-    );
-
-    // Check if the first synthetic turn is identical
-    expect(syntheticTurnsAfter[0].id).toBe(syntheticTurns[0].id);
-    expect(syntheticTurnsAfter[0].content).toEqual(syntheticTurns[0].content);
-  });
+      // Check if the first synthetic turn is identical (with relaxation for environment context)
+      expect(syntheticTurnsAfter[0].id).toBe(syntheticTurns[0].id);
+      if (isEnvContext(syntheticTurns[0])) {
+        expect(isEnvContext(syntheticTurnsAfter[0])).toBe(true);
+      } else {
+        expect(syntheticTurnsAfter[0].content).toEqual(
+          syntheticTurns[0].content,
+        );
+      }
+    },
+  );
 });
@@ -1111,6 +1111,28 @@ describe('convertSessionToHistoryFormats', () => {
    });
  });

+  it('should filter out <session_context> from UI history', () => {
+    const messages: MessageRecord[] = [
+      {
+        id: '1',
+        timestamp: new Date().toISOString(),
+        type: 'user',
+        content:
+          '<session_context>\nThis is the Gemini CLI\n</session_context>',
+      },
+      {
+        id: '2',
+        timestamp: new Date().toISOString(),
+        type: 'user',
+        content: 'Real message',
+      },
+    ];
+
+    const result = convertSessionToHistoryFormats(messages);
+    expect(result.uiHistory).toHaveLength(1);
+    expect(result.uiHistory[0].text).toBe('Real message');
+  });
+
  it('should handle missing tool descriptions and displayNames', () => {
    const messages: MessageRecord[] = [
      {
@@ -606,7 +606,16 @@ export function convertSessionToHistoryFormats(
    const contentString = partListUnionToString(msg.content);
    const uiText = displayContentString || contentString;

-    if (uiText.trim()) {
+    // Skip internal context messages in the UI history
+    const trimmedText = uiText.trim();
+    if (
+      trimmedText.startsWith('<session_context>') ||
+      trimmedText.startsWith('<hook_context>')
+    ) {
+      continue;
+    }
+
+    if (trimmedText) {
      let messageType: MessageType;
      switch (msg.type) {
        case 'user':
@@ -105,6 +105,35 @@ describe('convertSessionToClientHistory', () => {
    ]);
  });

+  it('should ignore <session_context> and <hook_context>', () => {
+    const messages: ConversationRecord['messages'] = [
+      {
+        id: '1',
+        type: 'user',
+        timestamp: '2024-01-01T10:00:00Z',
+        content: '<session_context>\nOld context\n</session_context>',
+      },
+      {
+        id: '2',
+        type: 'user',
+        timestamp: '2024-01-01T10:01:00Z',
+        content: '<hook_context>\nOld hook context\n</hook_context>',
+      },
+      {
+        id: '3',
+        type: 'user',
+        timestamp: '2024-01-01T10:02:00Z',
+        content: 'Actual query',
+      },
+    ];
+
+    const history = convertSessionToClientHistory(messages);
+
+    expect(history.map((h) => h.content)).toEqual([
+      { role: 'user', parts: [{ text: 'Actual query' }] },
+    ]);
+  });
+
  it('should correctly map tool calls and their responses', () => {
    const messages: ConversationRecord['messages'] = [
      {
@@ -109,9 +109,12 @@ export function convertSessionToClientHistory(

    if (msg.type === 'user') {
      const contentString = partListUnionToString(msg.content);
+      const trimmedContent = contentString.trim();
      if (
-        contentString.trim().startsWith('/') ||
-        contentString.trim().startsWith('?')
+        trimmedContent.startsWith('/') ||
+        trimmedContent.startsWith('?') ||
+        trimmedContent.startsWith('<session_context>') ||
+        trimmedContent.startsWith('<hook_context>')
      ) {
        continue;
      }