From 055e0f64524217fefb1a3d7c8b0fceef7ed14867 Mon Sep 17 00:00:00 2001 From: joshualitt Date: Mon, 18 May 2026 09:44:59 -0700 Subject: [PATCH] fix(context): Fix snapshot recovery across sessions. (#26939) --- integration-tests/context-fidelity.test.ts | 232 ++++++++ integration-tests/resume-gc.test.ts | 149 ++++++ packages/cli/src/config/config.ts | 8 + .../cli/src/ui/commands/rewindCommand.tsx | 3 +- .../src/ui/hooks/useSessionBrowser.test.ts | 82 +-- .../cli/src/ui/hooks/useSessionBrowser.ts | 14 +- .../cli/src/ui/hooks/useSessionResume.test.ts | 11 +- packages/cli/src/ui/hooks/useSessionResume.ts | 13 +- .../agents/browser/snapshotSuperseder.test.ts | 11 +- .../src/agents/browser/snapshotSuperseder.ts | 23 +- .../core/src/agents/local-executor.test.ts | 29 +- packages/core/src/agents/local-executor.ts | 12 +- packages/core/src/config/config.ts | 3 + .../context/chatCompressionService.test.ts | 2 +- .../src/context/chatCompressionService.ts | 4 +- .../context/contextManager.barrier.test.ts | 37 +- .../context/contextManager.hotstart.test.ts | 8 +- packages/core/src/context/contextManager.ts | 122 +++-- .../core/src/context/graph/fromGraph.test.ts | 202 +++++++ packages/core/src/context/graph/fromGraph.ts | 47 +- .../core/src/context/graph/mapper.test.ts | 110 ++++ packages/core/src/context/graph/mapper.ts | 16 +- .../core/src/context/graph/nodeIdService.ts | 23 + packages/core/src/context/graph/render.ts | 27 +- .../core/src/context/graph/toGraph.test.ts | 115 +++- packages/core/src/context/graph/toGraph.ts | 122 +++-- packages/core/src/context/initializer.ts | 17 + .../processors/blobDegradationProcessor.ts | 7 +- .../processors/nodeDistillationProcessor.ts | 8 +- .../processors/nodeTruncationProcessor.ts | 5 +- .../processors/rollingSummaryProcessor.ts | 8 +- .../processors/stateSnapshotProcessor.ts | 13 +- .../processors/toolMaskingProcessor.ts | 10 +- .../lifecycle.golden.test.ts.snap | 496 +++++++++++------- .../context/system-tests/hysteresis.test.ts | 17 +- .../system-tests/lifecycle.golden.test.ts | 2 + .../context/system-tests/simulationHarness.ts | 4 +- .../utils/adaptiveTokenCalculator.test.ts | 65 +++ .../context/utils/adaptiveTokenCalculator.ts | 50 +- .../context/utils/snapshotGenerator.test.ts | 122 ++++- .../src/context/utils/snapshotGenerator.ts | 103 ++++ packages/core/src/core/agentChatHistory.ts | 61 ++- packages/core/src/core/client.ts | 22 +- packages/core/src/core/contentGenerator.ts | 7 + .../core/src/core/fakeContentGenerator.ts | 41 +- packages/core/src/core/geminiChat.test.ts | 182 +++++-- packages/core/src/core/geminiChat.ts | 258 ++++++--- packages/core/src/index.ts | 1 + .../src/services/chatRecordingService.test.ts | 210 +++++--- .../core/src/services/chatRecordingService.ts | 130 +++-- packages/core/src/utils/cryptoUtils.test.ts | 35 ++ packages/core/src/utils/cryptoUtils.ts | 20 + packages/core/src/utils/environmentContext.ts | 5 +- .../core/src/utils/historyHardening.test.ts | 378 +++++++++++++ packages/core/src/utils/historyHardening.ts | 149 ++++-- packages/core/src/utils/sessionUtils.test.ts | 10 +- packages/core/src/utils/sessionUtils.ts | 33 +- 57 files changed, 3143 insertions(+), 751 deletions(-) create mode 100644 integration-tests/context-fidelity.test.ts create mode 100644 integration-tests/resume-gc.test.ts create mode 100644 packages/core/src/context/graph/fromGraph.test.ts create mode 100644 packages/core/src/context/graph/mapper.test.ts create mode 100644 packages/core/src/context/graph/nodeIdService.ts create mode 100644 packages/core/src/utils/cryptoUtils.test.ts create mode 100644 packages/core/src/utils/cryptoUtils.ts create mode 100644 packages/core/src/utils/historyHardening.test.ts diff --git a/integration-tests/context-fidelity.test.ts b/integration-tests/context-fidelity.test.ts new file mode 100644 index 0000000000..5b5f83d03b --- /dev/null +++ b/integration-tests/context-fidelity.test.ts @@ -0,0 +1,232 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { TestRig } from './test-helper.js'; +import * as path from 'node:path'; +import * as fs from 'node:fs'; +import { FinishReason, GenerateContentResponse } from '@google/genai'; +import type { FakeResponse, HistoryTurn } from '@google/gemini-cli-core'; + +describe('Context Management Fidelity E2E', () => { + let rig: TestRig; + + beforeEach(() => { + rig = new TestRig(); + }); + + afterEach(async () => await rig.cleanup()); + + it('should reproduce the exact context working buffer on resume', async () => { + // Mock responses to trigger GC (summarization) + const snapshotResponse: FakeResponse = { + method: 'generateContent', + response: { + candidates: [ + { + content: { + parts: [ + { + text: JSON.stringify({ + new_facts: ['GC Triggered.'], + new_constraints: [], + new_tasks: [], + resolved_task_ids: [], + obsolete_fact_indices: [], + obsolete_constraint_indices: [], + chronological_summary: 'Snapshot created.', + }), + }, + ], + role: 'model', + }, + finishReason: FinishReason.STOP, + index: 0, + }, + ], + } as unknown as GenerateContentResponse, + }; + + const countTokensResponse: FakeResponse = { + method: 'countTokens', + response: { totalTokens: 50000 }, + }; + + const streamResponse = (text: string): FakeResponse => ({ + method: 'generateContentStream', + response: [ + { + candidates: [ + { + content: { parts: [{ text }], role: 'model' }, + finishReason: FinishReason.STOP, + index: 0, + }, + ], + }, + ] as unknown as GenerateContentResponse[], + }); + + const setupResponses = (fileName: string, mocks: FakeResponse[]) => { + const filePath = path.join(rig.testDir!, fileName); + fs.writeFileSync( + filePath, + mocks.map((m) => JSON.stringify(m)).join('\n'), + ); + return filePath; + }; + + await rig.setup('context-fidelity', { + settings: { + experimental: { + stressTestProfile: true, // Lowers thresholds to trigger GC easily + }, + }, + }); + + const massivePayload = 'X'.repeat(50000); + const traceDir = path.join(rig.testDir!, 'traces'); + fs.mkdirSync(traceDir, { recursive: true }); + const traceLog = path.join(traceDir, 'trace.log'); + + const commonEnv = { + GEMINI_API_KEY: 'mock-key', + GEMINI_CONTEXT_TRACE_DIR: traceDir, + GEMINI_CONTEXT_TRACE_ENABLED: 'true', + GEMINI_DEBUG_LOG_FILE: path.join(rig.testDir!, 'debug.log'), + }; + + const runMocks: FakeResponse[] = [ + streamResponse('Ack 1'), + streamResponse('Ack 2'), + streamResponse('Ack 3'), + streamResponse('Ack 4'), + streamResponse('Ack 5'), + ]; + for (let i = 0; i < 50; i++) { + runMocks.push(snapshotResponse); + runMocks.push(countTokensResponse); + } + + // Turn 1: Initial massive payload to put pressure + await rig.run({ + args: [ + '--debug', + '--fake-responses-non-strict', + setupResponses('resp1.json', runMocks), + ], + stdin: 'Turn 1: ' + massivePayload, + env: commonEnv, + }); + + // Turn 2: Another turn, resuming Turn 1 + await rig.run({ + args: [ + '--debug', + '--resume', + 'latest', + '--fake-responses-non-strict', + setupResponses('resp2.json', runMocks), + ], + stdin: 'Turn 2: ' + massivePayload, + env: commonEnv, + }); + + // Turn 3: Third turn to force GC, resuming Turn 2 + await rig.run({ + args: [ + '--debug', + '--resume', + 'latest', + '--fake-responses-non-strict', + setupResponses('resp3.json', runMocks), + ], + stdin: 'Turn 3: ' + massivePayload, + env: commonEnv, + }); + + // Extract the rendered context asset from the log + const getRenderedContext = (logContent: string): HistoryTurn[] | null => { + const lines = logContent.split('\n'); + const renderLines = lines.filter( + (l) => + l.includes('[Render] Render Sanitized Context for LLM') || + l.includes('[Render] Render Context for LLM'), + ); + if (renderLines.length === 0) return null; + + const lastRender = renderLines[renderLines.length - 1]; + const detailsMatch = lastRender.match(/\| Details: (.*)$/); + if (!detailsMatch) return null; + + const details = JSON.parse(detailsMatch[1]); + const assetInfo = + details.renderedContextSanitized || details.renderedContext; + if (assetInfo && assetInfo.$asset) { + const assetPath = path.join(traceDir, 'assets', assetInfo.$asset); + return JSON.parse(fs.readFileSync(assetPath, 'utf-8')); + } + return assetInfo; + }; + + const log1 = fs.readFileSync(traceLog, 'utf-8'); + const contextBeforeExit = getRenderedContext(log1); + expect(contextBeforeExit).toBeDefined(); + console.log( + 'Context Before Exit (First 2 turns):', + JSON.stringify(contextBeforeExit!.slice(0, 2), null, 2), + ); + + // Turn 4: Resume and run a small command + await rig.run({ + args: [ + '--debug', + '--resume', + 'latest', + '--fake-responses-non-strict', + setupResponses('resp4.json', runMocks), + 'continue', + ], + env: commonEnv, + }); + + const log2 = fs.readFileSync(traceLog, 'utf-8'); + const contextAfterResume = getRenderedContext(log2); + expect(contextAfterResume).toBeDefined(); + console.log( + 'Context After Resume (First 2 turns):', + JSON.stringify(contextAfterResume!.slice(0, 2), null, 2), + ); + + expect(contextAfterResume!.length).toBeGreaterThanOrEqual( + contextBeforeExit!.length, + ); + + for (let i = 0; i < contextBeforeExit!.length; i++) { + expect(contextAfterResume![i].id).toBe(contextBeforeExit![i].id); + expect(contextAfterResume![i].content).toEqual( + contextBeforeExit![i].content, + ); + } + + // Most importantly, synthetic IDs (like summaries) must be stable. + const syntheticTurns = contextBeforeExit!.filter( + (t: HistoryTurn) => t.id && t.id.length === 32, + ); // deriveStableId produces 32-char hex + expect(syntheticTurns.length).toBeGreaterThan(0); + + const syntheticTurnsAfter = contextAfterResume!.filter( + (t: HistoryTurn) => t.id && t.id.length === 32, + ); + expect(syntheticTurnsAfter.length).toBeGreaterThanOrEqual( + syntheticTurns.length, + ); + + // Check if the first synthetic turn is identical + expect(syntheticTurnsAfter[0].id).toBe(syntheticTurns[0].id); + expect(syntheticTurnsAfter[0].content).toEqual(syntheticTurns[0].content); + }); +}); diff --git a/integration-tests/resume-gc.test.ts b/integration-tests/resume-gc.test.ts new file mode 100644 index 0000000000..3380177049 --- /dev/null +++ b/integration-tests/resume-gc.test.ts @@ -0,0 +1,149 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { TestRig } from './test-helper.js'; +import * as path from 'node:path'; +import * as fs from 'node:fs'; +import { FinishReason, GenerateContentResponse } from '@google/genai'; +import type { FakeResponse } from '@google/gemini-cli-core'; + +describe('Context Management Resume E2E', () => { + let rig: TestRig; + + beforeEach(() => { + rig = new TestRig(); + }); + + afterEach(async () => await rig.cleanup()); + + it('should preserve and utilize GC snapshot boundaries when resuming a session', async () => { + const snapshotResponse: FakeResponse = { + method: 'generateContent', + response: { + candidates: [ + { + content: { + parts: [ + { + text: JSON.stringify({ + new_facts: ['GC Triggered.'], + new_constraints: [], + new_tasks: [], + resolved_task_ids: [], + obsolete_fact_indices: [], + obsolete_constraint_indices: [], + chronological_summary: 'Snapshot created.', + }), + }, + ], + role: 'model', + }, + finishReason: FinishReason.STOP, + index: 0, + }, + ], + } as unknown as GenerateContentResponse, + }; + + const countTokensResponse: FakeResponse = { + method: 'countTokens', + response: { totalTokens: 50000 }, + }; + + const streamResponse = (text: string): FakeResponse => ({ + method: 'generateContentStream', + response: [ + { + candidates: [ + { + content: { parts: [{ text }], role: 'model' }, + finishReason: FinishReason.STOP, + index: 0, + }, + ], + }, + ] as unknown as GenerateContentResponse[], + }); + + const setupResponses = (fileName: string, mocks: FakeResponse[]) => { + const filePath = path.join(rig.testDir!, fileName); + fs.writeFileSync( + filePath, + mocks.map((m) => JSON.stringify(m)).join('\n'), + ); + return filePath; + }; + + await rig.setup('resume-gc-snapshot', { + settings: { + experimental: { + stressTestProfile: true, + }, + }, + }); + + const massivePayload = 'X'.repeat(40000); + const logFile = path.join(rig.testDir!, 'debug.log'); + const traceDir = path.join(rig.testDir!, 'traces'); + fs.mkdirSync(traceDir, { recursive: true }); + const traceLog = path.join(traceDir, 'trace.log'); + + const commonEnv = { + GEMINI_API_KEY: 'mock-key', + GEMINI_DEBUG_LOG_FILE: logFile, + GEMINI_CONTEXT_TRACE_DIR: traceDir, + }; + + // Provide a massive pool of responses to prevent exhaustion + const runMocks: FakeResponse[] = [streamResponse('Acknowledged block.')]; + for (let i = 0; i < 50; i++) { + runMocks.push(snapshotResponse); + runMocks.push(countTokensResponse); + } + + // Use stdin for the massive payload to avoid ENAMETOOLONG on Windows + await rig.run({ + args: [ + '--debug', + '--fake-responses-non-strict', + setupResponses('resp1.json', runMocks), + ], + stdin: 'Turn 1: ' + massivePayload, + env: commonEnv, + }); + + await rig.run({ + args: [ + '--debug', + '--resume', + 'latest', + '--fake-responses-non-strict', + setupResponses('resp2.json', runMocks), + ], + stdin: 'Turn 2: ' + massivePayload, + env: commonEnv, + }); + + const result3 = await rig.run({ + args: [ + '--debug', + '--resume', + 'latest', + '--fake-responses-non-strict', + setupResponses('resp3.json', runMocks), + 'continue', + ], + env: commonEnv, + }); + + expect(result3).toContain('Acknowledged block'); + + const traces = fs.readFileSync(traceLog, 'utf-8'); + expect(traces).toContain('Hitting Synchronous Pressure Barrier'); + expect(traces).toContain('GC Triggered.'); + }); +}); diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index 6444ac4f83..278fd0695a 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -103,6 +103,7 @@ export interface CliArgs { useWriteTodos: boolean | undefined; outputFormat: string | undefined; fakeResponses: string | undefined; + fakeResponsesNonStrict?: string | undefined; recordResponses: string | undefined; startupMessages?: string[]; rawOutput: boolean | undefined; @@ -474,6 +475,12 @@ export async function parseArguments( description: 'Path to a file with fake model responses for testing.', hidden: true, }) + .option('fake-responses-non-strict', { + type: 'string', + description: + 'Path to a file with fake model responses for testing (non-strict mode).', + hidden: true, + }) .option('record-responses', { type: 'string', description: 'Path to a file to record model responses for testing.', @@ -1074,6 +1081,7 @@ export async function loadCliConfig( gemmaModelRouter: settings.experimental?.gemmaModelRouter, adk: settings.experimental?.adk, fakeResponses: argv.fakeResponses, + fakeResponsesNonStrict: argv.fakeResponsesNonStrict, recordResponses: argv.recordResponses, retryFetchErrors: settings.general?.retryFetchErrors, billing: settings.billing, diff --git a/packages/cli/src/ui/commands/rewindCommand.tsx b/packages/cli/src/ui/commands/rewindCommand.tsx index f703323c1b..a7b0feee4a 100644 --- a/packages/cli/src/ui/commands/rewindCommand.tsx +++ b/packages/cli/src/ui/commands/rewindCommand.tsx @@ -14,7 +14,6 @@ import { type HistoryItem } from '../types.js'; import { convertSessionToHistoryFormats } from '../hooks/useSessionBrowser.js'; import { revertFileChanges } from '../utils/rewindFileOps.js'; import { RewindOutcome } from '../components/RewindConfirmation.js'; -import type { Content } from '@google/genai'; import { checkExhaustive, coreEvents, @@ -58,7 +57,7 @@ async function rewindConversation( const { uiHistory } = convertSessionToHistoryFormats(conversation.messages); const clientHistory = convertSessionToClientHistory(conversation.messages); - client.setHistory(clientHistory as Content[]); + client.setHistory(clientHistory); // Reset context manager as we are rewinding history await context.services.agentContext?.config diff --git a/packages/cli/src/ui/hooks/useSessionBrowser.test.ts b/packages/cli/src/ui/hooks/useSessionBrowser.test.ts index cb4e3bd17d..2402bb1adc 100644 --- a/packages/cli/src/ui/hooks/useSessionBrowser.test.ts +++ b/packages/cli/src/ui/hooks/useSessionBrowser.test.ts @@ -194,14 +194,16 @@ describe('convertSessionToHistoryFormats', () => { const clientHistory = convertSessionToClientHistory(messages); expect(clientHistory).toHaveLength(2); - expect(clientHistory[0]).toEqual({ - role: 'user', - parts: [{ text: 'Hello' }], - }); - expect(clientHistory[1]).toEqual({ - role: 'model', - parts: [{ text: 'Hi there' }], - }); + expect(clientHistory.map((h) => h.content)).toEqual([ + { + role: 'user', + parts: [{ text: 'Hello' }], + }, + { + role: 'model', + parts: [{ text: 'Hi there' }], + }, + ]); }); it('should convert thinking tokens (thoughts) to thinking history items', () => { @@ -254,10 +256,12 @@ describe('convertSessionToHistoryFormats', () => { const clientHistory = convertSessionToClientHistory(messages); expect(clientHistory).toHaveLength(1); - expect(clientHistory[0]).toEqual({ - role: 'user', - parts: [{ text: 'Expanded content' }], - }); + expect(clientHistory.map((h) => h.content)).toEqual([ + { + role: 'user', + parts: [{ text: 'Expanded content' }], + }, + ]); }); it('should filter out slash commands from client history but keep in UI', () => { @@ -316,33 +320,35 @@ describe('convertSessionToHistoryFormats', () => { const clientHistory = convertSessionToClientHistory(messages); expect(clientHistory).toHaveLength(3); // User, Model (call), User (response) - expect(clientHistory[0]).toEqual({ - role: 'user', - parts: [{ text: 'What time is it?' }], - }); - expect(clientHistory[1]).toEqual({ - role: 'model', - parts: [ - { - functionCall: { - name: 'get_time', - args: {}, - id: 'call_1', + expect(clientHistory.map((h) => h.content)).toEqual([ + { + role: 'user', + parts: [{ text: 'What time is it?' }], + }, + { + role: 'model', + parts: [ + { + functionCall: { + name: 'get_time', + args: {}, + id: 'call_1', + }, }, - }, - ], - }); - expect(clientHistory[2]).toEqual({ - role: 'user', - parts: [ - { - functionResponse: { - id: 'call_1', - name: 'get_time', - response: { output: '12:00' }, + ], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'get_time', + response: { output: '12:00' }, + id: 'call_1', + }, }, - }, - ], - }); + ], + }, + ]); }); }); diff --git a/packages/cli/src/ui/hooks/useSessionBrowser.ts b/packages/cli/src/ui/hooks/useSessionBrowser.ts index b42e1c5a72..aabb2eb0b2 100644 --- a/packages/cli/src/ui/hooks/useSessionBrowser.ts +++ b/packages/cli/src/ui/hooks/useSessionBrowser.ts @@ -12,22 +12,28 @@ import { convertSessionToClientHistory, uiTelemetryService, loadConversationRecord, - type Config, - type ResumedSessionData, +} from '@google/gemini-cli-core'; +import type { + HistoryTurn, + Config, + ResumedSessionData, } from '@google/gemini-cli-core'; import { convertSessionToHistoryFormats, type SessionInfo, } from '../../utils/sessionUtils.js'; -import type { Part } from '@google/genai'; export { convertSessionToHistoryFormats }; +import type { Part } from '@google/genai'; + export const useSessionBrowser = ( config: Config, onLoadHistory: ( uiHistory: HistoryItemWithoutId[], - clientHistory: Array<{ role: 'user' | 'model'; parts: Part[] }>, + clientHistory: Array< + { role: 'user' | 'model'; parts: Part[] } | HistoryTurn + >, resumedSessionData: ResumedSessionData, ) => Promise, ) => { diff --git a/packages/cli/src/ui/hooks/useSessionResume.test.ts b/packages/cli/src/ui/hooks/useSessionResume.test.ts index 3997eb06c5..c1655f6ce6 100644 --- a/packages/cli/src/ui/hooks/useSessionResume.test.ts +++ b/packages/cli/src/ui/hooks/useSessionResume.test.ts @@ -13,6 +13,7 @@ import type { ResumedSessionData, ConversationRecord, MessageRecord, + HistoryTurn, } from '@google/gemini-cli-core'; import type { UseHistoryManagerReturn } from './useHistoryManager.js'; import type { HistoryItemWithoutId } from '../types.js'; @@ -527,10 +528,12 @@ describe('useSessionResume', () => { // Should only have the non-slash-command message expect(clientHistory).toHaveLength(1); - expect(clientHistory[0]).toEqual({ - role: 'user', - parts: [{ text: 'Regular message' }], - }); + expect(clientHistory.map((h: HistoryTurn) => h.content)).toEqual([ + { + role: 'user', + parts: [{ text: 'Regular message' }], + }, + ]); // But UI history should have both expect(mockHistoryManager.addItem).toHaveBeenCalledTimes(2); diff --git a/packages/cli/src/ui/hooks/useSessionResume.ts b/packages/cli/src/ui/hooks/useSessionResume.ts index 055686773b..9808b26579 100644 --- a/packages/cli/src/ui/hooks/useSessionResume.ts +++ b/packages/cli/src/ui/hooks/useSessionResume.ts @@ -7,14 +7,17 @@ import { useCallback, useEffect, useRef, useState } from 'react'; import { coreEvents, - type Config, - type ResumedSessionData, convertSessionToClientHistory, } from '@google/gemini-cli-core'; -import type { Part } from '@google/genai'; +import type { + HistoryTurn, + Config, + ResumedSessionData, +} from '@google/gemini-cli-core'; import type { HistoryItemWithoutId } from '../types.js'; import type { UseHistoryManagerReturn } from './useHistoryManager.js'; import { convertSessionToHistoryFormats } from './useSessionBrowser.js'; +import type { Part } from '@google/genai'; interface UseSessionResumeParams { config: Config; @@ -54,7 +57,9 @@ export function useSessionResume({ const loadHistoryForResume = useCallback( async ( uiHistory: HistoryItemWithoutId[], - clientHistory: Array<{ role: 'user' | 'model'; parts: Part[] }>, + clientHistory: Array< + { role: 'user' | 'model'; parts: Part[] } | HistoryTurn + >, resumedData: ResumedSessionData, ) => { // Wait for the client. diff --git a/packages/core/src/agents/browser/snapshotSuperseder.test.ts b/packages/core/src/agents/browser/snapshotSuperseder.test.ts index 773d0216e0..8d30434dcd 100644 --- a/packages/core/src/agents/browser/snapshotSuperseder.test.ts +++ b/packages/core/src/agents/browser/snapshotSuperseder.test.ts @@ -9,16 +9,21 @@ import { supersedeStaleSnapshots, SNAPSHOT_SUPERSEDED_PLACEHOLDER, } from './snapshotSuperseder.js'; -import type { GeminiChat } from '../../core/geminiChat.js'; +import type { GeminiChat, HistoryTurn } from '../../core/geminiChat.js'; import type { Content } from '@google/genai'; +import { randomUUID } from 'node:crypto'; /** Builds a minimal mock GeminiChat around a mutable history array. */ function createMockChat(history: Content[]): GeminiChat { + const getTurns = () => history.map((c) => ({ id: randomUUID(), content: c })); return { getHistory: vi.fn(() => [...history]), - setHistory: vi.fn((newHistory: readonly Content[]) => { + getHistoryTurns: vi.fn(() => getTurns()), + setHistory: vi.fn((newHistory: ReadonlyArray) => { history.length = 0; - history.push(...newHistory); + for (const item of newHistory) { + history.push('content' in item ? item.content : item); + } }), } as unknown as GeminiChat; } diff --git a/packages/core/src/agents/browser/snapshotSuperseder.ts b/packages/core/src/agents/browser/snapshotSuperseder.ts index e8a5068dd9..935223c2e2 100644 --- a/packages/core/src/agents/browser/snapshotSuperseder.ts +++ b/packages/core/src/agents/browser/snapshotSuperseder.ts @@ -14,8 +14,8 @@ * model call so the model only ever sees the most recent snapshot in full. */ -import type { GeminiChat } from '../../core/geminiChat.js'; -import type { Content, Part } from '@google/genai'; +import type { GeminiChat, HistoryTurn } from '../../core/geminiChat.js'; +import type { Part } from '@google/genai'; import { debugLogger } from '../../utils/debugLogger.js'; const TAKE_SNAPSHOT_TOOL_NAME = 'take_snapshot'; @@ -39,7 +39,7 @@ export const SNAPSHOT_SUPERSEDED_PLACEHOLDER = * Uses {@link GeminiChat.setHistory} to apply the modified history. */ export function supersedeStaleSnapshots(chat: GeminiChat): void { - const history = chat.getHistory(); + const history = chat.getHistoryTurns(); // Locate all (contentIndex, partIndex) tuples for take_snapshot responses. const snapshotLocations: Array<{ @@ -48,7 +48,7 @@ export function supersedeStaleSnapshots(chat: GeminiChat): void { }> = []; for (let i = 0; i < history.length; i++) { - const parts = history[i].parts; + const parts = history[i].content.parts; if (!parts) continue; for (let j = 0; j < parts.length; j++) { const part = parts[j]; @@ -71,7 +71,7 @@ export function supersedeStaleSnapshots(chat: GeminiChat): void { const staleLocations = snapshotLocations.slice(0, -1); const needsUpdate = staleLocations.some(({ contentIdx, partIdx }) => { const output = getResponseOutput( - history[contentIdx].parts![partIdx].functionResponse?.response, + history[contentIdx].content.parts![partIdx].functionResponse?.response, ); return !output.includes(SNAPSHOT_SUPERSEDED_PLACEHOLDER); }); @@ -81,15 +81,18 @@ export function supersedeStaleSnapshots(chat: GeminiChat): void { } // Shallow-copy the history and replace stale snapshots. - const newHistory: Content[] = history.map((content) => ({ - ...content, - parts: content.parts ? [...content.parts] : undefined, + const newHistory: HistoryTurn[] = history.map((turn) => ({ + id: turn.id, + content: { + ...turn.content, + parts: turn.content.parts ? [...turn.content.parts] : undefined, + }, })); let replacedCount = 0; for (const { contentIdx, partIdx } of staleLocations) { - const originalPart = newHistory[contentIdx].parts![partIdx]; + const originalPart = newHistory[contentIdx].content.parts![partIdx]; if (!originalPart.functionResponse) continue; // Check if already superseded @@ -106,7 +109,7 @@ export function supersedeStaleSnapshots(chat: GeminiChat): void { }, }; - newHistory[contentIdx].parts![partIdx] = replacementPart; + newHistory[contentIdx].content.parts![partIdx] = replacementPart; replacedCount++; } diff --git a/packages/core/src/agents/local-executor.test.ts b/packages/core/src/agents/local-executor.test.ts index a1f3b72965..a40b8f55b1 100644 --- a/packages/core/src/agents/local-executor.test.ts +++ b/packages/core/src/agents/local-executor.test.ts @@ -756,12 +756,19 @@ describe('LocalAgentExecutor', () => { expect(startHistory).toBeDefined(); expect(startHistory).toHaveLength(2); + const history = startHistory!; // Perform checks on defined objects to satisfy TS - const firstPart = startHistory?.[0]?.parts?.[0]; + const firstPart = + 'content' in history[0] + ? history[0].content.parts?.[0] + : history[0].parts?.[0]; expect(firstPart?.text).toBe('Goal: TestGoal'); - const secondPart = startHistory?.[1]?.parts?.[0]; + const secondPart = + 'content' in history[1] + ? history[1].content.parts?.[0] + : history[1].parts?.[0]; expect(secondPart?.text).toBe('OK, starting on TestGoal.'); }); @@ -3601,7 +3608,14 @@ describe('LocalAgentExecutor', () => { expect(mockCompress).toHaveBeenCalledTimes(1); expect(mockSetHistory).toHaveBeenCalledTimes(1); - expect(mockSetHistory).toHaveBeenCalledWith(compressedHistory); + // History turns are now wrapped with IDs + expect(mockSetHistory).toHaveBeenCalledWith( + expect.arrayContaining([ + expect.objectContaining({ + content: expect.objectContaining({ role: 'user' }), + }), + ]), + ); }); it('should pass hasFailedCompressionAttempt=true to compression after a failure', async () => { @@ -3706,7 +3720,14 @@ describe('LocalAgentExecutor', () => { expect(mockCompress.mock.calls[2][5]).toBe(false); expect(mockSetHistory).toHaveBeenCalledTimes(1); - expect(mockSetHistory).toHaveBeenCalledWith(compressedHistory); + // History turns are now wrapped with IDs + expect(mockSetHistory).toHaveBeenCalledWith( + expect.arrayContaining([ + expect.objectContaining({ + content: expect.objectContaining({ role: 'user' }), + }), + ]), + ); }); }); diff --git a/packages/core/src/agents/local-executor.ts b/packages/core/src/agents/local-executor.ts index 266eb55a4c..76e76810ae 100644 --- a/packages/core/src/agents/local-executor.ts +++ b/packages/core/src/agents/local-executor.ts @@ -919,12 +919,20 @@ export class LocalAgentExecutor { this.hasFailedCompressionAttempt = true; } else if (info.compressionStatus === CompressionStatus.COMPRESSED) { if (newHistory) { - chat.setHistory(newHistory); + const turns = newHistory.map((c) => ({ + id: randomUUID(), + content: c, + })); + chat.setHistory(turns); this.hasFailedCompressionAttempt = false; } } else if (info.compressionStatus === CompressionStatus.CONTENT_TRUNCATED) { if (newHistory) { - chat.setHistory(newHistory); + const turns = newHistory.map((c) => ({ + id: randomUUID(), + content: c, + })); + chat.setHistory(turns); // Do NOT reset hasFailedCompressionAttempt. // We only truncated content because summarization previously failed. // We want to keep avoiding expensive summarization calls. diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 18ff941cfd..4429ce5de1 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -689,6 +689,7 @@ export interface ConfigParameters { enableShellOutputEfficiency?: boolean; shellToolInactivityTimeout?: number; fakeResponses?: string; + fakeResponsesNonStrict?: string; recordResponses?: string; ptyInfo?: string; disableYoloMode?: boolean; @@ -921,6 +922,7 @@ export class Config implements McpContext, AgentLoopContext { private readonly enableShellOutputEfficiency: boolean; private readonly shellToolInactivityTimeout: number; readonly fakeResponses?: string; + readonly fakeResponsesNonStrict?: string; readonly recordResponses?: string; private readonly disableYoloMode: boolean; private readonly disableAlwaysAllow: boolean; @@ -1301,6 +1303,7 @@ export class Config implements McpContext, AgentLoopContext { this.storage.setCustomPlansDir(params.planSettings?.directory); this.fakeResponses = params.fakeResponses; + this.fakeResponsesNonStrict = params.fakeResponsesNonStrict; this.recordResponses = params.recordResponses; this.fileExclusions = new FileExclusions(this); this.eventEmitter = params.eventEmitter; diff --git a/packages/core/src/context/chatCompressionService.test.ts b/packages/core/src/context/chatCompressionService.test.ts index c4f26dedc0..ea21bb0225 100644 --- a/packages/core/src/context/chatCompressionService.test.ts +++ b/packages/core/src/context/chatCompressionService.test.ts @@ -196,7 +196,7 @@ describe('ChatCompressionService', () => { } as unknown as Config; vi.mocked(getInitialChatHistory).mockImplementation( - async (_config, extraHistory) => extraHistory || [], + async (_config, extraHistory) => (extraHistory ? [...extraHistory] : []), ); }); diff --git a/packages/core/src/context/chatCompressionService.ts b/packages/core/src/context/chatCompressionService.ts index 992ca67cf9..6d49ed97e7 100644 --- a/packages/core/src/context/chatCompressionService.ts +++ b/packages/core/src/context/chatCompressionService.ts @@ -442,7 +442,9 @@ export class ChatCompressionService { const fullNewHistory = await getInitialChatHistory(config, extraHistory); const newTokenCount = await calculateRequestTokenCount( - fullNewHistory.flatMap((c) => c.parts || []), + fullNewHistory.flatMap( + (c) => ('content' in c ? c.content.parts : c.parts) || [], + ), config.getContentGenerator(), model, ); diff --git a/packages/core/src/context/contextManager.barrier.test.ts b/packages/core/src/context/contextManager.barrier.test.ts index e46637d7d8..9f5aaa119a 100644 --- a/packages/core/src/context/contextManager.barrier.test.ts +++ b/packages/core/src/context/contextManager.barrier.test.ts @@ -5,6 +5,7 @@ */ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { randomUUID } from 'node:crypto'; import { testTruncateProfile } from './testing/testProfile.js'; import { createSyntheticHistory, @@ -32,20 +33,35 @@ describe('ContextManager Sync Pressure Barrier Tests', () => { // 2. Add System Prompt (Episode 0 - Protected) chatHistory.set([ - { role: 'user', parts: [{ text: 'System prompt' }] }, - { role: 'model', parts: [{ text: 'Understood.' }] }, + { + id: 'h1', + content: { role: 'user', parts: [{ text: 'System prompt' }] }, + }, + { + id: 'h2', + content: { role: 'model', parts: [{ text: 'Understood.' }] }, + }, ]); // 3. Add massive history that blows past the 150k maxTokens limit // 20 turns * ~20,000 tokens/turn (10k user + 10k model) = ~400,000 tokens - const massiveHistory = createSyntheticHistory(20, 10000); + const massiveHistory = createSyntheticHistory(20, 10000).map((c) => ({ + id: randomUUID(), + content: c, + })); chatHistory.set([...chatHistory.get(), ...massiveHistory]); // 4. Add the Latest Turn (Protected) chatHistory.set([ ...chatHistory.get(), - { role: 'user', parts: [{ text: 'Final question.' }] }, - { role: 'model', parts: [{ text: 'Final answer.' }] }, + { + id: 'h-last-user', + content: { role: 'user', parts: [{ text: 'Final question.' }] }, + }, + { + id: 'h-last-model', + content: { role: 'model', parts: [{ text: 'Final answer.' }] }, + }, ]); const rawHistoryLength = chatHistory.get().length; @@ -59,21 +75,22 @@ describe('ContextManager Sync Pressure Barrier Tests', () => { expect(projection.length).toBeLessThan(rawHistoryLength); // Verify Episode 0 (System) was pruned, so we now start with a sentinel due to role alternation - expect(projection[0].role).toBe('user'); + expect(projection[0].content.role).toBe('user'); const projectionString = JSON.stringify(projection); expect(projectionString).toContain('User turn 17'); // Filter out synthetic Yield nodes (they are model responses without actual tool/text bodies) const contentNodes = projection.filter( (p) => - p.parts && p.parts.some((part) => part.text && part.text !== 'Yield'), + p.content.parts && + p.content.parts.some((part) => part.text && part.text !== 'Yield'), ); // Verify the latest turn is perfectly preserved at the back // Note: The HistoryHardener appends a "Please continue." user turn if we end on model, // so we look at the turns before the sentinel. - const lastSentinel = contentNodes[contentNodes.length - 1]; - const lastModel = contentNodes[contentNodes.length - 2]; - const lastUser = contentNodes[contentNodes.length - 3]; + const lastSentinel = contentNodes[contentNodes.length - 1].content; + const lastModel = contentNodes[contentNodes.length - 2].content; + const lastUser = contentNodes[contentNodes.length - 3].content; expect(lastSentinel.role).toBe('user'); expect(lastSentinel.parts![0].text).toBe('Please continue.'); diff --git a/packages/core/src/context/contextManager.hotstart.test.ts b/packages/core/src/context/contextManager.hotstart.test.ts index 5d0d848267..cae39185cb 100644 --- a/packages/core/src/context/contextManager.hotstart.test.ts +++ b/packages/core/src/context/contextManager.hotstart.test.ts @@ -47,7 +47,9 @@ describe('ContextManager - Hot Start Calibration', () => { const emitGroundTruthSpy = vi.spyOn(env.eventBus, 'emitTokenGroundTruth'); // Add a node to make the buffer non-empty - chatHistory.set([{ role: 'user', parts: [{ text: 'Hello' }] }]); + chatHistory.set([ + { id: 'h1', content: { role: 'user', parts: [{ text: 'Hello' }] } }, + ]); // First render should trigger calibration await contextManager.renderHistory(); @@ -81,7 +83,9 @@ describe('ContextManager - Hot Start Calibration', () => { ); // Add a node - chatHistory.set([{ role: 'user', parts: [{ text: 'Hello' }] }]); + chatHistory.set([ + { id: 'h1', content: { role: 'user', parts: [{ text: 'Hello' }] } }, + ]); // Render should succeed without throwing const result = await contextManager.renderHistory(); diff --git a/packages/core/src/context/contextManager.ts b/packages/core/src/context/contextManager.ts index f161e0903b..e8a59b7967 100644 --- a/packages/core/src/context/contextManager.ts +++ b/packages/core/src/context/contextManager.ts @@ -5,8 +5,11 @@ */ import type { Content } from '@google/genai'; -import type { AgentChatHistory } from '../core/agentChatHistory.js'; -import { isToolExecution, type ConcreteNode } from './graph/types.js'; +import type { + AgentChatHistory, + HistoryTurn, +} from '../core/agentChatHistory.js'; +import type { ConcreteNode } from './graph/types.js'; import type { ContextEventBus } from './eventBus.js'; import type { ContextTracer } from './tracer.js'; import type { ContextEnvironment } from './pipeline/environment.js'; @@ -38,9 +41,11 @@ export class ContextManager { private lastRenderCache?: { nodesHash: string; result: { - history: Content[]; + history: HistoryTurn[]; + apiHistory: Content[]; didApplyManagement: boolean; baseUnits: number; + processedNodes: readonly ConcreteNode[]; }; }; @@ -75,6 +80,21 @@ export class ContextManager { this.evaluateTriggers(event.newNodes); }); this.eventBus.onProcessorResult((event) => { + // Defensive: Verify all targets are still present in the buffer. + // If a synchronous render or a previous async task already removed them, + // this result is stale and should be dropped. + const currentIds = new Set(this.buffer.nodes.map((n) => n.id)); + const allTargetsPresent = event.targets.every((t) => + currentIds.has(t.id), + ); + + if (!allTargetsPresent) { + debugLogger.log( + `[ContextManager] Dropping stale processor result from ${event.processorId}. One or more targets were already removed.`, + ); + return; + } + this.buffer = this.buffer.applyProcessorResult( event.processorId, event.targets, @@ -127,11 +147,11 @@ export class ContextManager { const agedOutNodes = new Set(); let rollingTokens = 0; - // Identify active tool calls that must NEVER be truncated + // Identify nodes that must NEVER be truncated const protectedIds = this.getProtectedNodeIds(this.buffer.nodes); if (protectedIds.size > 0) { debugLogger.log( - `[ContextManager] Pinning ${protectedIds.size} active tool call nodes to prevent truncation.`, + `[ContextManager] Pinning ${protectedIds.size} nodes (recent_turn or external_active_task) to prevent truncation.`, ); } @@ -215,24 +235,7 @@ export class ContextManager { } } - // 2. Identify active tool calls that must NEVER be truncated - const calls = nodes.filter((n) => isToolExecution(n) && n.role === 'model'); - const responses = new Set( - nodes - .filter((n) => isToolExecution(n) && n.role === 'user') - .map((n) => n.payload.functionResponse?.id) - .filter((id): id is string => !!id), - ); - - for (const call of calls) { - const id = call.payload.functionCall?.id; - // If we have a call but no response in the current graph, it's 'in flight' - if (id && !responses.has(id)) { - protectionMap.set(call.id, 'in_flight_tool_call'); - } - } - - // 3. Any externally requested protections + // 2. Any externally requested protections for (const id of extraProtectedIds) { protectionMap.set(id, 'external_active_task'); } @@ -278,13 +281,15 @@ export class ContextManager { * This is the primary method called by the agent framework before sending a request. */ async renderHistory( - pendingRequest?: Content, + pendingRequest?: HistoryTurn, activeTaskIds: Set = new Set(), abortSignal?: AbortSignal, ): Promise<{ - history: Content[]; + history: HistoryTurn[]; + apiHistory: Content[]; didApplyManagement: boolean; baseUnits: number; + processedNodes: readonly ConcreteNode[]; }> { this.tracer.logEvent('ContextManager', 'Starting rendering of LLM context'); @@ -302,6 +307,7 @@ export class ContextManager { const hotStartPromise = (async () => { if (!this.hasPerformedHotStart) { this.hasPerformedHotStart = true; + if (this.buffer.nodes.length > 0) { const nodesForHotStart = [...this.buffer.nodes, ...previewNodes]; await this.performHotStartCalibration(nodesForHotStart, abortSignal); @@ -345,11 +351,7 @@ export class ContextManager { const protectionReasons = this.getProtectedNodeIds(nodes, activeTaskIds); // Apply final GC Backstop pressure barrier synchronously before mapping - const { - history: renderedHistory, - didApplyManagement, - baseUnits, - } = await render( + const renderResult = await render( nodes, this.orchestrator, this.sidecar, @@ -361,21 +363,68 @@ export class ContextManager { previewNodeIds, ); + const { + history: renderedHistory, + didApplyManagement, + baseUnits, + processedNodes, + } = renderResult; + + if (didApplyManagement) { + // Commit the GC backstop results back to the master buffer. + // We filter out preview nodes because they are ephemeral and will be + // added to history naturally by the client after the turn completes. + this.buffer = this.buffer.applyProcessorResult( + 'sync_backstop', + this.buffer.nodes, + processedNodes.filter((n) => !previewNodeIds.has(n.id)), + ); + } + // Structural validation in debug mode checkContextInvariants(this.buffer.nodes, 'RenderHistory'); this.tracer.logEvent('ContextManager', 'Finished rendering'); - const combinedHistory = header - ? [header, ...renderedHistory] + // We must temporarily append the pendingRequest (if any) before hardening. + // Otherwise, the hardener will see dangling functionCalls and inject sentinels + // even though the pendingRequest provides the required functionResponses. + const fullHistoryToHarden = pendingRequest + ? [...renderedHistory, pendingRequest] : renderedHistory; + const hardenedHistory = hardenHistory(fullHistoryToHarden, { + sentinels: this.sidecar.sentinels, + }); + + if (pendingRequest) { + const last = hardenedHistory[hardenedHistory.length - 1]; + if (last && last.content.parts) { + const numPartsToRemove = pendingRequest.content.parts?.length || 0; + if ( + numPartsToRemove > 0 && + last.content.parts.length > numPartsToRemove + ) { + last.content.parts.splice(-numPartsToRemove); + } else { + hardenedHistory.pop(); + } + } else { + hardenedHistory.pop(); + } + } + + const apiHistory = hardenedHistory.map((h) => h.content); + if (header) { + apiHistory.unshift(header); + } + const result = { - history: hardenHistory(combinedHistory, { - sentinels: this.sidecar.sentinels, - }), + history: hardenedHistory, + apiHistory, didApplyManagement, baseUnits, + processedNodes, }; // Update cache @@ -394,10 +443,11 @@ export class ContextManager { ); const contents = this.env.graphMapper.fromGraph(nodes); + const rawContents = contents.map((h) => h.content); const header = this.headerProvider ? await this.headerProvider() : undefined; - const combinedHistory = header ? [header, ...contents] : contents; + const combinedHistory = header ? [header, ...rawContents] : rawContents; const baseUnits = this.advancedTokenCalculator.getRawBaseUnits(nodes) + diff --git a/packages/core/src/context/graph/fromGraph.test.ts b/packages/core/src/context/graph/fromGraph.test.ts new file mode 100644 index 0000000000..0017fd7465 --- /dev/null +++ b/packages/core/src/context/graph/fromGraph.test.ts @@ -0,0 +1,202 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect } from 'vitest'; +import { fromGraph } from './fromGraph.js'; +import { NodeType, type ConcreteNode } from './types.js'; +import { NodeIdService } from './nodeIdService.js'; + +describe('fromGraph', () => { + it('should reconstruct an empty history from empty nodes', () => { + expect(fromGraph([])).toEqual([]); + }); + + it('should reconstruct a single turn from a single node', () => { + const nodes: ConcreteNode[] = [ + { + id: 'node_1', + turnId: 'turn_durable_1', + role: 'user', + type: NodeType.USER_PROMPT, + payload: { text: 'hello' }, + timestamp: 100, + }, + ]; + + const history = fromGraph(nodes); + expect(history).toEqual([ + { + id: 'durable_1', + content: { + role: 'user', + parts: [{ text: 'hello' }], + }, + }, + ]); + }); + + it('should coalesce adjacent nodes with the same turnId into a single turn', () => { + const nodes: ConcreteNode[] = [ + { + id: 'node_1', + turnId: 'turn_durable_1', + role: 'user', + type: NodeType.USER_PROMPT, + payload: { text: 'hello' }, + timestamp: 100, + }, + { + id: 'node_2', + turnId: 'turn_durable_1', + role: 'user', + type: NodeType.USER_PROMPT, + payload: { text: 'world' }, + timestamp: 101, + }, + ]; + + const history = fromGraph(nodes); + expect(history).toEqual([ + { + id: 'durable_1', + content: { + role: 'user', + parts: [{ text: 'hello' }, { text: 'world' }], + }, + }, + ]); + }); + + it('should split turns when the role changes', () => { + const nodes: ConcreteNode[] = [ + { + id: 'node_1', + turnId: 'turn_durable_1', + role: 'user', + type: NodeType.USER_PROMPT, + payload: { text: 'hello' }, + timestamp: 100, + }, + { + id: 'node_2', + turnId: 'turn_durable_2', + role: 'model', + type: NodeType.AGENT_THOUGHT, + payload: { text: 'hi' }, + timestamp: 101, + }, + ]; + + const history = fromGraph(nodes); + expect(history).toEqual([ + { + id: 'durable_1', + content: { + role: 'user', + parts: [{ text: 'hello' }], + }, + }, + { + id: 'durable_2', + content: { + role: 'model', + parts: [{ text: 'hi' }], + }, + }, + ]); + }); + + it('should split turns when the turnId changes, even if role is the same', () => { + const nodes: ConcreteNode[] = [ + { + id: 'node_1', + turnId: 'turn_durable_1', + role: 'user', + type: NodeType.USER_PROMPT, + payload: { text: 'hello' }, + timestamp: 100, + }, + { + id: 'node_2', + turnId: 'turn_durable_2', + role: 'user', + type: NodeType.USER_PROMPT, + payload: { text: 'world' }, + timestamp: 101, + }, + ]; + + const history = fromGraph(nodes); + expect(history).toEqual([ + { + id: 'durable_1', + content: { + role: 'user', + parts: [{ text: 'hello' }], + }, + }, + { + id: 'durable_2', + content: { + role: 'user', + parts: [{ text: 'world' }], + }, + }, + ]); + }); + + it('should correctly strip the turn_ prefix from turnId', () => { + const nodes: ConcreteNode[] = [ + { + id: 'node_1', + turnId: 'turn_my_stable_id_123', + role: 'user', + type: NodeType.USER_PROMPT, + payload: { text: 'hello' }, + timestamp: 100, + }, + ]; + + const history = fromGraph(nodes); + expect(history[0].id).toBe('my_stable_id_123'); + }); + + it('should handle orphan nodes gracefully', () => { + const nodes: ConcreteNode[] = [ + { + id: 'node_1', + role: 'user', + type: NodeType.USER_PROMPT, + payload: { text: 'orphan part' }, + timestamp: 100, + } as unknown as ConcreteNode, + ]; + + const history = fromGraph(nodes); + expect(history[0].id).toBe('orphan'); + expect(history[0].content.parts).toEqual([{ text: 'orphan part' }]); + }); + + it('should register identities with the NodeIdService if provided', () => { + const idService = new NodeIdService(); + const payload = { text: 'hello' }; + const nodes: ConcreteNode[] = [ + { + id: 'node_1', + turnId: 'turn_1', + role: 'user', + type: NodeType.USER_PROMPT, + payload, + timestamp: 100, + }, + ]; + + fromGraph(nodes, idService); + + // The payload object reference should map to the node ID + expect(idService.get(payload)).toBe('node_1'); + }); +}); diff --git a/packages/core/src/context/graph/fromGraph.ts b/packages/core/src/context/graph/fromGraph.ts index 3a078c01ee..92740cc02e 100644 --- a/packages/core/src/context/graph/fromGraph.ts +++ b/packages/core/src/context/graph/fromGraph.ts @@ -7,22 +7,34 @@ import type { Content } from '@google/genai'; import type { ConcreteNode } from './types.js'; import { debugLogger } from '../../utils/debugLogger.js'; +import type { NodeIdService } from './nodeIdService.js'; +import type { HistoryTurn } from '../../core/agentChatHistory.js'; /** - * Reconstructs a valid Gemini Chat History from a list of Concrete Nodes. + * Reconstructs a list of HistoryTurns from a list of Concrete Nodes. * This process is "role-alternation-aware" and uses turnId to - * preserve original turn boundaries even if multiple turns have the same role. + * preserve original turn boundaries and IDs. */ -export function fromGraph(nodes: readonly ConcreteNode[]): Content[] { +export function fromGraph( + nodes: readonly ConcreteNode[], + idService?: NodeIdService, +): HistoryTurn[] { debugLogger.log( `[fromGraph] Reconstructing history from ${nodes.length} nodes`, ); - const history: Content[] = []; - let currentTurn: (Content & { _turnId?: string }) | null = null; + const history: HistoryTurn[] = []; + let currentTurn: { id: string; content: Content } | null = null; for (const node of nodes) { - const turnId = node.turnId; + const turnId = node.turnId || 'orphan'; + const durableId = turnId.startsWith('turn_') ? turnId.slice(5) : turnId; + + // Register the payload in the identity service to ensure stability + // even if the turn content changes (e.g. after GC backstop). + if (idService) { + idService.set(node.payload, node.id); + } // We start a new turn if: // 1. We don't have a current turn. @@ -30,26 +42,25 @@ export function fromGraph(nodes: readonly ConcreteNode[]): Content[] { // 3. The turnId changes (Preserving distinct turns of the same role). if ( !currentTurn || - currentTurn.role !== node.role || - currentTurn._turnId !== turnId + currentTurn.content.role !== node.role || + currentTurn.id !== durableId ) { currentTurn = { - role: node.role, - parts: [node.payload], - _turnId: turnId, + id: durableId, + content: { + role: node.role, + parts: [node.payload], + }, }; history.push(currentTurn); } else { - currentTurn.parts = [...(currentTurn.parts || []), node.payload]; + currentTurn.content.parts = [ + ...(currentTurn.content.parts || []), + node.payload, + ]; } } - // Final cleanup: remove our internal tracking field - for (const turn of history) { - const t = turn as Content & { _turnId?: string }; - delete t._turnId; - } - debugLogger.log(`[fromGraph] Reconstructed ${history.length} turns`); return history; } diff --git a/packages/core/src/context/graph/mapper.test.ts b/packages/core/src/context/graph/mapper.test.ts new file mode 100644 index 0000000000..fa2640dac1 --- /dev/null +++ b/packages/core/src/context/graph/mapper.test.ts @@ -0,0 +1,110 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect } from 'vitest'; +import { ContextGraphMapper } from './mapper.js'; +import type { HistoryTurn } from '../../core/agentChatHistory.js'; +import { hardenHistory } from '../../utils/historyHardening.js'; + +describe('ContextGraphMapper (Round-Trip Fidelity)', () => { + it('should flawlessly round-trip a complex history containing parallel tool calls and responses', () => { + // 1. Define a complex, worst-case scenario history + const originalHistory: HistoryTurn[] = [ + { + id: 'system_prompt_id', + content: { + role: 'user', + parts: [{ text: '\nSystem Prompt here' }], + }, + }, + { + id: 'user_turn_1', + content: { + role: 'user', + parts: [{ text: 'Please read file A and file B at the same time.' }], + }, + }, + { + id: 'model_turn_1', + content: { + role: 'model', + parts: [ + { text: 'I will read both files concurrently.' }, + { + functionCall: { + id: 'call_A', + name: 'read_file', + args: { path: 'A.txt' }, + }, + thoughtSignature: 'synthetic_sig_xyz', + }, + { + functionCall: { + id: 'call_B', + name: 'read_file', + args: { path: 'B.txt' }, + }, + }, + ], + }, + }, + // Note: GeminiChat records these as separate sequential user turns initially + { + id: 'tool_resp_B_id', + content: { + role: 'user', + parts: [ + { + functionResponse: { + id: 'call_B', + name: 'read_file', + response: { content: 'File B' }, + }, + }, + ], + }, + }, + { + id: 'tool_resp_A_id', + content: { + role: 'user', + parts: [ + { + functionResponse: { + id: 'call_A', + name: 'read_file', + response: { content: 'File A' }, + }, + }, + ], + }, + }, + ]; + + // 2. We harden the original history first. The core agent loop feeds the hardener the pure history. + // We want our round-tripped history to match what the hardener WOULD have produced natively. + const hardenedOriginal = hardenHistory(originalHistory); + + // 3. Translate History -> Graph + const mapper = new ContextGraphMapper(); + // Simulate the HistoryObserver capturing the push + const nodes = mapper.applyEvent({ + type: 'SYNC_FULL', + payload: originalHistory, + }); + + // 4. Translate Graph -> History + const reconstructedHistory = mapper.fromGraph(nodes); + + // 5. Harden the reconstructed history (as the ContextManager does before sending to API) + const hardenedReconstructed = hardenHistory(reconstructedHistory); + + // 6. Assert Absolute Equality + // The round-trip through the Context Graph and Hardener must exactly equal + // the original history put through the Hardener. + expect(hardenedReconstructed).toEqual(hardenedOriginal); + }); +}); diff --git a/packages/core/src/context/graph/mapper.ts b/packages/core/src/context/graph/mapper.ts index d66928d58f..ffd9a4cac0 100644 --- a/packages/core/src/context/graph/mapper.ts +++ b/packages/core/src/context/graph/mapper.ts @@ -5,23 +5,27 @@ */ import type { ConcreteNode } from './types.js'; import { ContextGraphBuilder } from './toGraph.js'; -import type { Content } from '@google/genai'; -import type { HistoryEvent } from '../../core/agentChatHistory.js'; +import type { HistoryEvent, HistoryTurn } from '../../core/agentChatHistory.js'; import { fromGraph } from './fromGraph.js'; +import { NodeIdService } from './nodeIdService.js'; export class ContextGraphMapper { - private readonly nodeIdentityMap = new WeakMap(); + private readonly idService = new NodeIdService(); private readonly builder: ContextGraphBuilder; constructor() { - this.builder = new ContextGraphBuilder(this.nodeIdentityMap); + this.builder = new ContextGraphBuilder(this.idService); } applyEvent(event: HistoryEvent): ConcreteNode[] { return this.builder.processHistory(event.payload); } - fromGraph(nodes: readonly ConcreteNode[]): Content[] { - return fromGraph(nodes); + fromGraph(nodes: readonly ConcreteNode[]): HistoryTurn[] { + return fromGraph(nodes, this.idService); + } + + getIdService(): NodeIdService { + return this.idService; } } diff --git a/packages/core/src/context/graph/nodeIdService.ts b/packages/core/src/context/graph/nodeIdService.ts new file mode 100644 index 0000000000..a9e48e748e --- /dev/null +++ b/packages/core/src/context/graph/nodeIdService.ts @@ -0,0 +1,23 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * Provides a durable mapping between history object references and their + * corresponding graph node IDs. This ensures that context management logic + * can track the identity of turns even after they are transformed (e.g. scrubbed + * or hardened) without polluting the raw JSON sent to the Gemini API. + */ +export class NodeIdService { + constructor(private readonly map: WeakMap = new WeakMap()) {} + + get(obj: object): string | undefined { + return this.map.get(obj); + } + + set(obj: object, id: string): void { + this.map.set(obj, id); + } +} diff --git a/packages/core/src/context/graph/render.ts b/packages/core/src/context/graph/render.ts index e16beb4f38..58bf9f07d0 100644 --- a/packages/core/src/context/graph/render.ts +++ b/packages/core/src/context/graph/render.ts @@ -12,9 +12,10 @@ import type { PipelineOrchestrator } from '../pipeline/orchestrator.js'; import type { ContextEnvironment } from '../pipeline/environment.js'; import { performCalibration } from '../utils/tokenCalibration.js'; import type { AdvancedTokenCalculator } from '../utils/contextTokenCalculator.js'; +import type { HistoryTurn } from '../../core/agentChatHistory.js'; /** - * Maps the Episodic Context Graph back into a raw Gemini Content[] array for transmission. + * Maps the Episodic Context Graph back into a list of HistoryTurns for transmission. * It applies synchronous context management (GC backstop) if the budget is exceeded. */ export async function render( @@ -28,9 +29,10 @@ export async function render( header?: Content, previewNodeIds: ReadonlySet = new Set(), ): Promise<{ - history: Content[]; + history: HistoryTurn[]; didApplyManagement: boolean; baseUnits: number; + processedNodes: readonly ConcreteNode[]; }> { let headerTokens = 0; let headerBaseUnits = 0; @@ -52,7 +54,12 @@ export async function render( const baseUnits = advancedTokenCalculator.getRawBaseUnits(nodes) + headerBaseUnits; - return { history: contents, didApplyManagement: false, baseUnits }; + return { + history: contents, + didApplyManagement: false, + baseUnits, + processedNodes: nodes, + }; } const maxTokens = sidecar.config.budget.maxTokens; @@ -92,11 +99,16 @@ export async function render( tracer.logEvent('Render', 'Render Context for LLM', { renderedContext: contents, }); - performCalibration(env, visibleNodes, contents); + performCalibration( + env, + visibleNodes, + contents.map((h) => h.content), + ); return { history: contents, didApplyManagement: false, baseUnits: graphBaseUnits + headerBaseUnits, + processedNodes: nodes, }; } const targetDelta = currentTokens - sidecar.config.budget.retainedTokens; @@ -145,11 +157,16 @@ export async function render( tracer.logEvent('Render', 'Render Sanitized Context for LLM', { renderedContextSanitized: contents, }); - performCalibration(env, visibleNodes, contents); + performCalibration( + env, + visibleNodes, + contents.map((h) => h.content), + ); return { history: contents, didApplyManagement: true, baseUnits: advancedTokenCalculator.getRawBaseUnits(visibleNodes) + headerBaseUnits, + processedNodes, }; } diff --git a/packages/core/src/context/graph/toGraph.test.ts b/packages/core/src/context/graph/toGraph.test.ts index 4a99202ffc..cfb9b4c239 100644 --- a/packages/core/src/context/graph/toGraph.test.ts +++ b/packages/core/src/context/graph/toGraph.test.ts @@ -4,29 +4,42 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { describe, it, expect } from 'vitest'; +import { describe, it, expect, vi } from 'vitest'; import { ContextGraphBuilder } from './toGraph.js'; -import type { Content } from '@google/genai'; import type { BaseConcreteNode } from './types.js'; +import { NodeIdService } from './nodeIdService.js'; +import type { HistoryTurn } from '../../core/agentChatHistory.js'; describe('ContextGraphBuilder', () => { describe('toGraph', () => { it('should skip legacy headers even if they appear later in the history', () => { - const history: Content[] = [ - { role: 'user', parts: [{ text: 'Message 1' }] }, - { role: 'model', parts: [{ text: 'Reply 1' }] }, + const history: HistoryTurn[] = [ { - role: 'user', - parts: [ - { - text: '\nThis is the Gemini CLI\nSome context...', - }, - ], + id: '1', + content: { role: 'user', parts: [{ text: 'Message 1' }] }, + }, + { + id: '2', + content: { role: 'model', parts: [{ text: 'Reply 1' }] }, + }, + { + id: '3', + content: { + role: 'user', + parts: [ + { + text: '\nThis is the Gemini CLI\nSome context...', + }, + ], + }, + }, + { + id: '4', + content: { role: 'user', parts: [{ text: 'Message 2' }] }, }, - { role: 'user', parts: [{ text: 'Message 2' }] }, ]; - const builder = new ContextGraphBuilder(); + const builder = new ContextGraphBuilder(new NodeIdService()); const nodes = builder.processHistory(history); // We expect the first two messages and the last one to be present @@ -36,5 +49,81 @@ describe('ContextGraphBuilder', () => { expect((nodes[1] as BaseConcreteNode).payload.text).toBe('Reply 1'); expect((nodes[2] as BaseConcreteNode).payload.text).toBe('Message 2'); }); + + it('should generate completely deterministic graph structure and UUIDs across JSON serialization cycles', () => { + vi.spyOn(Date, 'now').mockReturnValue(0); + + const complexHistory: HistoryTurn[] = [ + { + id: 'turn-1', + content: { + role: 'user', + parts: [{ text: 'Step 1: complex analysis' }], + }, + }, + { + id: 'turn-2', + content: { + role: 'model', + parts: [ + { text: 'Thinking about the tool to use.' }, + { + functionCall: { + name: 'fetch_data', + args: { query: 'test data' }, + }, + }, + ], + }, + }, + { + id: 'turn-3', + content: { + role: 'user', + parts: [ + { + functionResponse: { + name: 'fetch_data', + response: { status: 'success', data: [1, 2, 3] }, + }, + }, + ], + }, + }, + { + id: 'turn-4', + content: { role: 'model', parts: [{ text: 'Analysis complete.' }] }, + }, + ]; + + // 1. Initial Graph Generation + const builder1 = new ContextGraphBuilder(new NodeIdService()); + const nodes1 = builder1.processHistory(complexHistory); + + // 2. Serialize and Deserialize (Simulating saving and loading from disk) + const serializedHistory = JSON.stringify(complexHistory); + const parsedHistory = JSON.parse(serializedHistory) as HistoryTurn[]; + + // 3. Second Graph Generation from parsed JSON + const builder2 = new ContextGraphBuilder(new NodeIdService()); + const nodes2 = builder2.processHistory(parsedHistory); + + // Assertion: The arrays must be completely identical, including all generated UUIDs + expect(nodes1).toEqual(nodes2); + + // Sanity check to ensure IDs are actually populated and consistent + expect(nodes1.length).toBeGreaterThan(0); + nodes1.forEach((node, index) => { + expect(node.id).toBeDefined(); + expect(node.id).toBe(nodes2[index].id); + expect(node.timestamp).toBe(0); + if ('turnId' in node) { + expect(node.turnId).toBeDefined(); + expect(node.turnId).toBe((nodes2[index] as BaseConcreteNode).turnId); + } + }); + + vi.restoreAllMocks(); + }); }); }); diff --git a/packages/core/src/context/graph/toGraph.ts b/packages/core/src/context/graph/toGraph.ts index f901f76659..0214c7021b 100644 --- a/packages/core/src/context/graph/toGraph.ts +++ b/packages/core/src/context/graph/toGraph.ts @@ -4,14 +4,13 @@ * SPDX-License-Identifier: Apache-2.0 */ -import type { Content, Part } from '@google/genai'; +import type { Part } from '@google/genai'; import { type ConcreteNode, NodeType } from './types.js'; -import { randomUUID, createHash } from 'node:crypto'; +import { createHash } from 'node:crypto'; import { debugLogger } from '../../utils/debugLogger.js'; - -interface PartWithSynthId extends Part { - _synthId?: string; -} +import type { NodeIdService } from './nodeIdService.js'; +import type { HistoryTurn } from '../../core/agentChatHistory.js'; +import { isSnapshotState } from '../utils/snapshotGenerator.js'; // Global WeakMap to cache hashes for Part objects. // This optimizes getStableId by avoiding redundant stringify/hash operations @@ -62,34 +61,53 @@ function isFunctionResponsePart( ); } +function isExecutableCodePart( + part: Part, +): part is Part & { executableCode: { code: string; language: string } } { + return ( + typeof part.executableCode === 'object' && + part.executableCode !== null && + typeof part.executableCode.code === 'string' && + typeof part.executableCode.language === 'string' + ); +} + +function isCodeExecutionResultPart( + part: Part, +): part is Part & { codeExecutionResult: { outcome: string; output: string } } { + return ( + typeof part.codeExecutionResult === 'object' && + part.codeExecutionResult !== null && + typeof part.codeExecutionResult.output === 'string' && + typeof part.codeExecutionResult.outcome === 'string' + ); +} + /** - * Generates a stable ID for an object reference using a WeakMap. + * Generates a stable ID for an object reference using a NodeIdService. * Falls back to content-based hashing for Part-like objects to ensure * stability across object re-creations (e.g. during history mapping). */ export function getStableId( obj: object, - nodeIdentityMap: WeakMap, + idService: NodeIdService, turnSalt: string = '', partIdx: number = 0, ): string { - let id = nodeIdentityMap.get(obj); + let id = idService.get(obj); if (id) return id; const cachedHash = PART_HASH_CACHE.get(obj); if (cachedHash) { id = `${cachedHash}_${turnSalt}_${partIdx}`; - nodeIdentityMap.set(obj, id); + idService.set(obj, id); return id; } - const part = obj as PartWithSynthId; + const part = obj as Part; let contentHash: string | undefined; - // If the object already has a synthetic ID property, use it. - if (typeof part._synthId === 'string') { - id = part._synthId; - } else if (isTextPart(part)) { + if (isTextPart(part)) { contentHash = createHash('sha256').update(part.text).digest('hex'); id = `text_${contentHash}_${turnSalt}_${partIdx}`; } else if (isInlineDataPart(part)) { @@ -116,6 +134,20 @@ export function getStableId( ) .digest('hex'); id = `resp_h_${contentHash}_${turnSalt}_${partIdx}`; + } else if (isExecutableCodePart(part)) { + contentHash = createHash('sha256') + .update( + `exec:${part.executableCode.language}:${part.executableCode.code}`, + ) + .digest('hex'); + id = `exec_${contentHash}_${turnSalt}_${partIdx}`; + } else if (isCodeExecutionResultPart(part)) { + contentHash = createHash('sha256') + .update( + `result:${part.codeExecutionResult.outcome}:${part.codeExecutionResult.output}`, + ) + .digest('hex'); + id = `result_${contentHash}_${turnSalt}_${partIdx}`; } if (contentHash) { @@ -123,10 +155,14 @@ export function getStableId( } if (!id) { - id = randomUUID(); + if (turnSalt && partIdx === -1) { + id = `turn_${turnSalt}`; + } else { + id = `${turnSalt}_f_${partIdx}`; + } } - nodeIdentityMap.set(obj, id); + idService.set(obj, id); return id; } @@ -135,18 +171,14 @@ export function getStableId( * Every Part in history is mapped to exactly one ConcreteNode. */ export class ContextGraphBuilder { - constructor( - private readonly nodeIdentityMap: WeakMap = new WeakMap(), - ) {} + constructor(private readonly idService: NodeIdService) {} - processHistory(history: readonly Content[]): ConcreteNode[] { + processHistory(history: readonly HistoryTurn[]): ConcreteNode[] { const nodes: ConcreteNode[] = []; - // Tracks occurrences of identical turn content to ensure unique stable IDs - const seenHashes = new Map(); - for (let turnIdx = 0; turnIdx < history.length; turnIdx++) { - const msg = history[turnIdx]; + const turn = history[turnIdx]; + const msg = turn.content; if (!msg.parts) continue; // Defensive: Skip legacy environment header regardless of where it appears. @@ -164,15 +196,8 @@ export class ContextGraphBuilder { } } - // Generate a stable salt for this turn based on its role and content - const turnContent = JSON.stringify(msg.parts); - const h = createHash('md5') - .update(`${msg.role}:${turnContent}`) - .digest('hex'); - const occurrence = (seenHashes.get(h) || 0) + 1; - seenHashes.set(h, occurrence); - const turnSalt = `${h}_${occurrence}`; - const turnId = getStableId(msg, this.nodeIdentityMap, turnSalt, -1); + const turnSalt = turn.id; + const turnId = `turn_${turnSalt}`; if (msg.role === 'user') { for (let partIdx = 0; partIdx < msg.parts.length; partIdx++) { @@ -180,34 +205,46 @@ export class ContextGraphBuilder { const apiId = isFunctionResponsePart(part) && typeof part.functionResponse.id === 'string' - ? `resp_${part.functionResponse.id}_${turnSalt}_${partIdx}` + ? part.functionResponse.id : isFunctionCallPart(part) && typeof part.functionCall.id === 'string' - ? `call_${part.functionCall.id}_${turnSalt}_${partIdx}` + ? part.functionCall.id : undefined; - const id = - apiId || getStableId(part, this.nodeIdentityMap, turnSalt, partIdx); + + const isSnapshot = isTextPart(part) && isSnapshotState(part.text); + + // Use stable API ID if available, otherwise anchor to the turn and index. + const id = apiId + ? `${apiId}_${turnSalt}_${partIdx}` + : `${turnSalt}_${partIdx}`; + const node: ConcreteNode = { id, timestamp: Date.now(), type: isFunctionResponsePart(part) ? NodeType.TOOL_EXECUTION - : NodeType.USER_PROMPT, + : isSnapshot + ? NodeType.SNAPSHOT + : NodeType.USER_PROMPT, role: 'user', payload: part, turnId, }; nodes.push(node); + this.idService.set(part, id); } } else if (msg.role === 'model') { for (let partIdx = 0; partIdx < msg.parts.length; partIdx++) { const part = msg.parts[partIdx]; const apiId = isFunctionCallPart(part) && typeof part.functionCall.id === 'string' - ? `call_${part.functionCall.id}_${turnSalt}_${partIdx}` + ? part.functionCall.id : undefined; - const id = - apiId || getStableId(part, this.nodeIdentityMap, turnSalt, partIdx); + + const id = apiId + ? `${apiId}_${turnSalt}_${partIdx}` + : `${turnSalt}_${partIdx}`; + const node: ConcreteNode = { id, timestamp: Date.now(), @@ -219,6 +256,7 @@ export class ContextGraphBuilder { turnId, }; nodes.push(node); + this.idService.set(part, id); } } } diff --git a/packages/core/src/context/initializer.ts b/packages/core/src/context/initializer.ts index 3916210bea..ac6208a78e 100644 --- a/packages/core/src/context/initializer.ts +++ b/packages/core/src/context/initializer.ts @@ -24,6 +24,7 @@ import { StateSnapshotAsyncProcessorOptionsSchema } from './processors/stateSnap import { RollingSummaryProcessorOptionsSchema } from './processors/rollingSummaryProcessor.js'; import { getEnvironmentContext } from '../utils/environmentContext.js'; import { AdaptiveTokenCalculator } from './utils/adaptiveTokenCalculator.js'; +import { estimateContextBreakdown } from '../core/loggingContentGenerator.js'; import { NodeBehaviorRegistry } from './graph/behaviorRegistry.js'; import { registerBuiltInBehaviors } from './graph/builtinBehaviors.js'; @@ -92,10 +93,26 @@ export async function initializeContextManager( const behaviorRegistry = new NodeBehaviorRegistry(); registerBuiltInBehaviors(behaviorRegistry); + const getOverheadTokens = () => { + const breakdown = estimateContextBreakdown([], { + systemInstruction: { + role: 'system', + parts: [{ text: chat.getSystemInstruction() }], + }, + tools: chat.getTools(), + }); + return ( + breakdown.system_instructions + + breakdown.tool_definitions + + breakdown.mcp_servers + ); + }; + const calculator = new AdaptiveTokenCalculator( charsPerToken, behaviorRegistry, eventBus, + getOverheadTokens, ); const env = new ContextEnvironmentImpl( diff --git a/packages/core/src/context/processors/blobDegradationProcessor.ts b/packages/core/src/context/processors/blobDegradationProcessor.ts index c1cae0d0a6..663d26aba7 100644 --- a/packages/core/src/context/processors/blobDegradationProcessor.ts +++ b/packages/core/src/context/processors/blobDegradationProcessor.ts @@ -3,7 +3,7 @@ * Copyright 2026 Google LLC * SPDX-License-Identifier: Apache-2.0 */ -import { randomUUID } from 'node:crypto'; +import { deriveStableId } from '../../utils/cryptoUtils.js'; import type { JSONSchemaType } from 'ajv'; import type { ProcessArgs, ContextProcessor } from '../pipeline.js'; import * as fs from 'node:fs/promises'; @@ -62,7 +62,8 @@ export function createBlobDegradationProcessor( if (payload.inlineData?.data && payload.inlineData?.mimeType) { await ensureDir(); const ext = payload.inlineData.mimeType.split('/')[1] || 'bin'; - const fileName = `blob_${Date.now()}_${randomUUID()}.${ext}`; + // Use a stable filename based on the node ID + const fileName = `blob_${deriveStableId([node.id])}.${ext}`; const filePath = path.join(blobOutputsDir, fileName); const buffer = Buffer.from(payload.inlineData.data, 'base64'); @@ -92,7 +93,7 @@ export function createBlobDegradationProcessor( if (newText && tokensSaved > 0) { returnedNodes.push({ ...node, - id: randomUUID(), + id: deriveStableId([node.id, 'degraded']), payload: { text: newText }, replacesId: node.id, turnId: node.turnId, diff --git a/packages/core/src/context/processors/nodeDistillationProcessor.ts b/packages/core/src/context/processors/nodeDistillationProcessor.ts index 5691ddf51b..a6a818ed56 100644 --- a/packages/core/src/context/processors/nodeDistillationProcessor.ts +++ b/packages/core/src/context/processors/nodeDistillationProcessor.ts @@ -3,7 +3,7 @@ * Copyright 2026 Google LLC * SPDX-License-Identifier: Apache-2.0 */ -import { randomUUID } from 'node:crypto'; +import { deriveStableId } from '../../utils/cryptoUtils.js'; import type { JSONSchemaType } from 'ajv'; import type { ContextProcessor, ProcessArgs } from '../pipeline.js'; import { type ConcreteNode, NodeType } from '../graph/types.js'; @@ -99,9 +99,10 @@ export function createNodeDistillationProcessor( if (newTokens < oldTokens) { const distilledPayload = updatePart(payload, { text: summary }); + const newId = deriveStableId([node.id, 'distilled']); returnedNodes.push({ ...node, - id: randomUUID(), + id: newId, payload: distilledPayload, replacesId: node.id, timestamp: node.timestamp, @@ -158,9 +159,10 @@ export function createNodeDistillationProcessor( functionResponse: newFR, }); + const newId = deriveStableId([node.id, 'distilled']); returnedNodes.push({ ...node, - id: randomUUID(), + id: newId, payload: distilledPayload, replacesId: node.id, timestamp: node.timestamp, diff --git a/packages/core/src/context/processors/nodeTruncationProcessor.ts b/packages/core/src/context/processors/nodeTruncationProcessor.ts index acb08e2022..75f74f60cf 100644 --- a/packages/core/src/context/processors/nodeTruncationProcessor.ts +++ b/packages/core/src/context/processors/nodeTruncationProcessor.ts @@ -3,7 +3,7 @@ * Copyright 2026 Google LLC * SPDX-License-Identifier: Apache-2.0 */ -import { randomUUID } from 'node:crypto'; +import { deriveStableId } from '../../utils/cryptoUtils.js'; import type { JSONSchemaType } from 'ajv'; import type { ContextProcessor, ProcessArgs } from '../pipeline.js'; import type { ContextEnvironment } from '../pipeline/environment.js'; @@ -79,9 +79,10 @@ export function createNodeTruncationProcessor( if (text) { const squashResult = tryApplySquash(text, limitChars); if (squashResult) { + const newId = deriveStableId([node.id, 'truncated']); returnedNodes.push({ ...node, - id: randomUUID(), + id: newId, payload: { ...payload, text: squashResult.text }, replacesId: node.id, turnId: node.turnId, diff --git a/packages/core/src/context/processors/rollingSummaryProcessor.ts b/packages/core/src/context/processors/rollingSummaryProcessor.ts index fc6f5cb60a..5de111e0f8 100644 --- a/packages/core/src/context/processors/rollingSummaryProcessor.ts +++ b/packages/core/src/context/processors/rollingSummaryProcessor.ts @@ -3,7 +3,7 @@ * Copyright 2026 Google LLC * SPDX-License-Identifier: Apache-2.0 */ -import { randomUUID } from 'node:crypto'; +import { deriveStableId } from '../../utils/cryptoUtils.js'; import type { JSONSchemaType } from 'ajv'; import type { ContextProcessor, @@ -112,7 +112,8 @@ export function createRollingSummaryProcessor( try { // Synthesize the rolling summary synchronously const snapshotText = await generateRollingSummary(nodesToSummarize); - const newId = randomUUID(); + const consumedIds = nodesToSummarize.map((n) => n.id); + const newId = deriveStableId(consumedIds); const summaryNode: RollingSummary = { id: newId, @@ -121,10 +122,9 @@ export function createRollingSummaryProcessor( timestamp: nodesToSummarize[nodesToSummarize.length - 1].timestamp, role: 'user', payload: { text: snapshotText }, - abstractsIds: nodesToSummarize.map((n) => n.id), + abstractsIds: consumedIds, }; - const consumedIds = nodesToSummarize.map((n) => n.id); const returnedNodes = targets.filter( (t) => !consumedIds.includes(t.id), ); diff --git a/packages/core/src/context/processors/stateSnapshotProcessor.ts b/packages/core/src/context/processors/stateSnapshotProcessor.ts index 3ad3001a16..b57f4f7960 100644 --- a/packages/core/src/context/processors/stateSnapshotProcessor.ts +++ b/packages/core/src/context/processors/stateSnapshotProcessor.ts @@ -3,7 +3,7 @@ * Copyright 2026 Google LLC * SPDX-License-Identifier: Apache-2.0 */ -import { randomUUID } from 'node:crypto'; +import { deriveStableId } from '../../utils/cryptoUtils.js'; import type { JSONSchemaType } from 'ajv'; import type { ContextProcessor, @@ -90,8 +90,11 @@ export function createStateSnapshotProcessor( const isValid = consumedIds.every((id) => targetIds.has(id)); if (isValid) { + debugLogger.log( + `[StateSnapshotProcessor] Successfully spliced PROPOSED_SNAPSHOT from Inbox into Graph. Consumed ${consumedIds.length} nodes.`, + ); // If valid, apply it! - const newId = randomUUID(); + const newId = deriveStableId(consumedIds); const snapshotNode: Snapshot = { id: newId, @@ -120,6 +123,10 @@ export function createStateSnapshotProcessor( inbox.consume(proposed.id); return returnedNodes; + } else { + debugLogger.log( + `[StateSnapshotProcessor] Rejected PROPOSED_SNAPSHOT from Inbox because one or more target IDs were missing from the current graph window.`, + ); } } } @@ -179,11 +186,11 @@ export function createStateSnapshotProcessor( maxStateTokens: options.maxStateTokens, }, ); - const newId = randomUUID(); const consumedIds = nodesToSummarize.map((n) => n.id); if (baselineIdToConsume && !consumedIds.includes(baselineIdToConsume)) { consumedIds.push(baselineIdToConsume); } + const newId = deriveStableId(consumedIds); const snapshotNode: Snapshot = { id: newId, diff --git a/packages/core/src/context/processors/toolMaskingProcessor.ts b/packages/core/src/context/processors/toolMaskingProcessor.ts index e62bb34e5d..43aba6e40b 100644 --- a/packages/core/src/context/processors/toolMaskingProcessor.ts +++ b/packages/core/src/context/processors/toolMaskingProcessor.ts @@ -3,7 +3,7 @@ * Copyright 2026 Google LLC * SPDX-License-Identifier: Apache-2.0 */ -import { randomUUID } from 'node:crypto'; +import { deriveStableId } from '../../utils/cryptoUtils.js'; import type { JSONSchemaType } from 'ajv'; import type { ContextProcessor, ProcessArgs } from '../pipeline.js'; import * as fs from 'node:fs/promises'; @@ -120,7 +120,7 @@ export function createToolMaskingProcessor( directoryCreated = true; } - const fileName = `${sanitizeFilenamePart(toolName).toLowerCase()}_${sanitizeFilenamePart(callId).toLowerCase()}_${nodeType}_${randomUUID()}.txt`; + const fileName = `${sanitizeFilenamePart(toolName).toLowerCase()}_${sanitizeFilenamePart(callId).toLowerCase()}_${nodeType}_${deriveStableId([content])}.txt`; const filePath = path.join(toolOutputsDir, fileName); await fs.writeFile(filePath, content); @@ -214,9 +214,10 @@ export function createToolMaskingProcessor( functionCall: newFC, }); + const newId = deriveStableId([node.id, 'masked']); returnedNodes.push({ ...node, - id: randomUUID(), + id: newId, payload: maskedPart, replacesId: node.id, turnId: node.turnId, @@ -242,9 +243,10 @@ export function createToolMaskingProcessor( functionResponse: newFR, }); + const newId = deriveStableId([node.id, 'masked']); returnedNodes.push({ ...node, - id: randomUUID(), + id: newId, payload: maskedPart, replacesId: node.id, turnId: node.turnId, diff --git a/packages/core/src/context/system-tests/__snapshots__/lifecycle.golden.test.ts.snap b/packages/core/src/context/system-tests/__snapshots__/lifecycle.golden.test.ts.snap index 3c36020dbc..afd08df64d 100644 --- a/packages/core/src/context/system-tests/__snapshots__/lifecycle.golden.test.ts.snap +++ b/packages/core/src/context/system-tests/__snapshots__/lifecycle.golden.test.ts.snap @@ -2,127 +2,182 @@ exports[`System Lifecycle Golden Tests > Scenario 1: Organic Growth with Huge Tool Output & Images 1`] = ` { - "baseUnits": 787, + "baseUnits": 765, "finalProjection": [ { - "parts": [ - { - "text": "System Instructions", - }, - ], - "role": "user", - }, - { - "parts": [ - { - "text": "Ack.", - }, - ], - "role": "model", - }, - { - "parts": [ - { - "text": "Hello!", - }, - ], - "role": "user", - }, - { - "parts": [ - { - "text": "Hi, how can I help?", - }, - ], - "role": "model", - }, - { - "parts": [ - { - "text": "Read the logs.", - }, - ], - "role": "user", - }, - { - "parts": [ - { - "functionCall": { - "args": { - "cmd": "cat server.log", - }, - "name": "run_shell_command", + "content": { + "parts": [ + { + "text": "System Instructions", }, - "thoughtSignature": "skip_thought_signature_validator", - }, - ], - "role": "model", + ], + "role": "user", + }, + "id": "", }, { - "parts": [ - { - "functionResponse": { - "name": "run_shell_command", - "response": { - "output": " + "content": { + "parts": [ + { + "text": "Ack.", + }, + ], + "role": "model", + }, + "id": "", + }, + { + "content": { + "parts": [ + { + "text": "Hello!", + }, + ], + "role": "user", + }, + "id": "", + }, + { + "content": { + "parts": [ + { + "text": "Hi, how can I help?", + }, + ], + "role": "model", + }, + "id": "", + }, + { + "content": { + "parts": [ + { + "text": "Read the logs.", + }, + ], + "role": "user", + }, + "id": "", + }, + { + "content": { + "parts": [ + { + "functionCall": { + "args": { + "cmd": "cat server.log", + }, + "name": "run_shell_command", + }, + "thoughtSignature": "skip_thought_signature_validator", + }, + { + "functionCall": { + "args": {}, + "id": "undefined", + "name": "run_shell_command", + }, + }, + ], + "role": "model", + }, + "id": "", + }, + { + "content": { + "parts": [ + { + "functionResponse": { + "name": "run_shell_command", + "response": { + "output": " [Tool observation string (0.02MB, 1 lines) masked to preserve context window. Full string saved to: ] ", + }, }, }, - }, - ], - "role": "user", + { + "functionResponse": { + "id": "undefined", + "name": "run_shell_command", + "response": { + "error": "The tool execution result was lost due to context management truncation.", + }, + }, + }, + ], + "role": "user", + }, + "id": "", }, { - "parts": [ - { - "text": "The logs are very long.", - }, - ], - "role": "model", + "content": { + "parts": [ + { + "text": "The logs are very long.", + }, + ], + "role": "model", + }, + "id": "", }, { - "parts": [ - { - "text": "Look at this architecture diagram:", - }, - { - "text": "[Multi-Modal Blob (image/png, 0.01MB) degraded to text to preserve context window. Saved to: ]", - }, - ], - "role": "user", + "content": { + "parts": [ + { + "text": "Look at this architecture diagram:", + }, + { + "text": "[Multi-Modal Blob (image/png, 0.01MB) degraded to text to preserve context window. Saved to: ]", + }, + ], + "role": "user", + }, + "id": "", }, { - "parts": [ - { - "text": "Nice diagram.", - }, - ], - "role": "model", + "content": { + "parts": [ + { + "text": "Nice diagram.", + }, + ], + "role": "model", + }, + "id": "", }, { - "parts": [ - { - "text": "Can we refactor?", - }, - ], - "role": "user", + "content": { + "parts": [ + { + "text": "Can we refactor?", + }, + ], + "role": "user", + }, + "id": "", }, { - "parts": [ - { - "text": "Yes we can.", - }, - ], - "role": "model", + "content": { + "parts": [ + { + "text": "Yes we can.", + }, + ], + "role": "model", + }, + "id": "", }, { - "parts": [ - { - "text": "Please continue.", - }, - ], - "role": "user", + "content": { + "parts": [ + { + "text": "Please continue.", + }, + ], + "role": "user", + }, + "id": "", }, ], "tokenTrajectory": [ @@ -137,18 +192,18 @@ exports[`System Lifecycle Golden Tests > Scenario 1: Organic Growth with Huge To "turnIndex": 1, }, { - "tokensAfterBackground": 497, + "tokensAfterBackground": 493, "tokensBeforeBackground": 20232, "turnIndex": 2, }, { - "tokensAfterBackground": 750, - "tokensBeforeBackground": 3554, + "tokensAfterBackground": 728, + "tokensBeforeBackground": 3550, "turnIndex": 3, }, { - "tokensAfterBackground": 787, - "tokensBeforeBackground": 787, + "tokensAfterBackground": 765, + "tokensBeforeBackground": 765, "turnIndex": 4, }, ], @@ -160,44 +215,59 @@ exports[`System Lifecycle Golden Tests > Scenario 2: Under Budget (No Modificati "baseUnits": 68, "finalProjection": [ { - "parts": [ - { - "text": "System Instructions", - }, - ], - "role": "user", + "content": { + "parts": [ + { + "text": "System Instructions", + }, + ], + "role": "user", + }, + "id": "", }, { - "parts": [ - { - "text": "Ack.", - }, - ], - "role": "model", + "content": { + "parts": [ + { + "text": "Ack.", + }, + ], + "role": "model", + }, + "id": "", }, { - "parts": [ - { - "text": "Hello!", - }, - ], - "role": "user", + "content": { + "parts": [ + { + "text": "Hello!", + }, + ], + "role": "user", + }, + "id": "", }, { - "parts": [ - { - "text": "Hi, how can I help?", - }, - ], - "role": "model", + "content": { + "parts": [ + { + "text": "Hi, how can I help?", + }, + ], + "role": "model", + }, + "id": "", }, { - "parts": [ - { - "text": "Please continue.", - }, - ], - "role": "user", + "content": { + "parts": [ + { + "text": "Please continue.", + }, + ], + "role": "user", + }, + "id": "", }, ], "tokenTrajectory": [ @@ -220,60 +290,81 @@ exports[`System Lifecycle Golden Tests > Scenario 3: Node Distillation of Large "baseUnits": 5370, "finalProjection": [ { - "parts": [ - { - "text": "Mock response from: utility_compressor, for: {"text":"A...AAAAAAAA"}", - }, - ], - "role": "user", + "content": { + "parts": [ + { + "text": "Mock response from: utility_compressor, for: {"text":"A...AAAAAAAA"}", + }, + ], + "role": "user", + }, + "id": "", }, { - "parts": [ - { - "text": "Mock response from: utility_compressor, for: {"text":"B...BBBBBBBB"}", - }, - ], - "role": "model", + "content": { + "parts": [ + { + "text": "Mock response from: utility_compressor, for: {"text":"B...BBBBBBBB"}", + }, + ], + "role": "model", + }, + "id": "", }, { - "parts": [ - { - "text": "Mock response from: utility_compressor, for: {"text":"C...CCCCCCCC"}", - }, - ], - "role": "user", + "content": { + "parts": [ + { + "text": "Mock response from: utility_compressor, for: {"text":"C...CCCCCCCC"}", + }, + ], + "role": "user", + }, + "id": "", }, { - "parts": [ - { - "text": "Mock response from: utility_compressor, for: {"text":"D...DDDDDDDD"}", - }, - ], - "role": "model", + "content": { + "parts": [ + { + "text": "Mock response from: utility_compressor, for: {"text":"D...DDDDDDDD"}", + }, + ], + "role": "model", + }, + "id": "", }, { - "parts": [ - { - "text": "Mock response from: utility_compressor, for: {"text":"E...EEEEEEEE"}", - }, - ], - "role": "user", + "content": { + "parts": [ + { + "text": "Mock response from: utility_compressor, for: {"text":"E...EEEEEEEE"}", + }, + ], + "role": "user", + }, + "id": "", }, { - "parts": [ - { - "text": "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", - }, - ], - "role": "model", + "content": { + "parts": [ + { + "text": "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", + }, + ], + "role": "model", + }, + "id": "", }, { - "parts": [ - { - "text": "Please continue.", - }, - ], - "role": "user", + "content": { + "parts": [ + { + "text": "Please continue.", + }, + ], + "role": "user", + }, + "id": "", }, ], "tokenTrajectory": [ @@ -301,31 +392,40 @@ exports[`System Lifecycle Golden Tests > Scenario 4: Async-Driven Background GC "baseUnits": 505, "finalProjection": [ { - "parts": [ - { - "text": "{"active_tasks":[],"discovered_facts":[],"constraints_and_preferences":[],"recent_arc":[]}", - }, - { - "text": "Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 ..................................................", - }, - ], - "role": "user", + "content": { + "parts": [ + { + "text": "{"active_tasks":[],"discovered_facts":[],"constraints_and_preferences":[],"recent_arc":[]}", + }, + { + "text": "Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 Msg 8 ..................................................", + }, + ], + "role": "user", + }, + "id": "", }, { - "parts": [ - { - "text": "Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 ..................................................", - }, - ], - "role": "model", + "content": { + "parts": [ + { + "text": "Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 Msg 9 ..................................................", + }, + ], + "role": "model", + }, + "id": "", }, { - "parts": [ - { - "text": "Please continue.", - }, - ], - "role": "user", + "content": { + "parts": [ + { + "text": "Please continue.", + }, + ], + "role": "user", + }, + "id": "", }, ], "tokenTrajectory": [ diff --git a/packages/core/src/context/system-tests/hysteresis.test.ts b/packages/core/src/context/system-tests/hysteresis.test.ts index cf804a27f6..eeb63e7c33 100644 --- a/packages/core/src/context/system-tests/hysteresis.test.ts +++ b/packages/core/src/context/system-tests/hysteresis.test.ts @@ -9,7 +9,7 @@ import { SimulationHarness } from './simulationHarness.js'; import { createMockLlmClient } from '../testing/contextTestUtils.js'; import type { ContextProfile } from '../config/profiles.js'; import { generalistProfile } from '../config/profiles.js'; -import type { Content } from '@google/genai'; +import { type HistoryTurn } from '../../core/agentChatHistory.js'; describe('Context Manager Hysteresis Tests', () => { const mockLlmClient = createMockLlmClient(['']); @@ -18,6 +18,7 @@ describe('Context Manager Hysteresis Tests', () => { ...generalistProfile, name: 'Hysteresis Stress Test', config: { + ...generalistProfile.config, budget: { maxTokens: 5000, retainedTokens: 1000, @@ -26,9 +27,13 @@ describe('Context Manager Hysteresis Tests', () => { }, }); - const getProjectionTokens = (proj: Content[], harness: SimulationHarness) => + const getProjectionTokens = ( + proj: HistoryTurn[], + harness: SimulationHarness, + ) => proj.reduce( - (sum, c) => sum + harness.env.tokenCalculator.calculateContentTokens(c), + (sum, c) => + sum + harness.env.tokenCalculator.calculateContentTokens(c.content), 0, ); @@ -57,7 +62,7 @@ describe('Context Manager Hysteresis Tests', () => { // No snapshot because maxTokens (5000) not exceeded, and deficit < threshold. expect( state.finalProjection.some((c) => - c.parts?.some((p) => p.text?.includes('')), + c.content.parts?.some((p) => p.text?.includes('')), ), ).toBe(false); @@ -79,7 +84,7 @@ describe('Context Manager Hysteresis Tests', () => { state = await harness.getGoldenState(); expect( state.finalProjection.some((c) => - c.parts?.some((p) => p.text?.includes('')), + c.content.parts?.some((p) => p.text?.includes('')), ), ).toBe(true); }); @@ -108,7 +113,7 @@ describe('Context Manager Hysteresis Tests', () => { let state = await harness.getGoldenState(); expect( state.finalProjection.some((c) => - c.parts?.some((p) => p.text?.includes('')), + c.content.parts?.some((p) => p.text?.includes('')), ), ).toBe(true); diff --git a/packages/core/src/context/system-tests/lifecycle.golden.test.ts b/packages/core/src/context/system-tests/lifecycle.golden.test.ts index 9e6512e646..970138ca5b 100644 --- a/packages/core/src/context/system-tests/lifecycle.golden.test.ts +++ b/packages/core/src/context/system-tests/lifecycle.golden.test.ts @@ -17,6 +17,7 @@ expect.addSnapshotSerializer({ (/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/i.test( val, ) || + /^[0-9a-f]{32}$/i.test(val) || /[\\/]tmp[\\/]sim/.test(val)), print: (val) => { if (typeof val !== 'string') return `"${val}"`; @@ -25,6 +26,7 @@ expect.addSnapshotSerializer({ /[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/gi, '', ) + .replace(/\b[0-9a-f]{32}\b/gi, '') .replace(/[\\/]tmp[\\/]sim[^\s"'\]]*/g, ''); // Also scrub timestamps in filenames like blob_1234567890_... diff --git a/packages/core/src/context/system-tests/simulationHarness.ts b/packages/core/src/context/system-tests/simulationHarness.ts index c15c9fc26b..de137bb5e1 100644 --- a/packages/core/src/context/system-tests/simulationHarness.ts +++ b/packages/core/src/context/system-tests/simulationHarness.ts @@ -4,6 +4,7 @@ * SPDX-License-Identifier: Apache-2.0 */ +import { randomUUID } from 'node:crypto'; import { ContextManager } from '../contextManager.js'; import { AgentChatHistory } from '../../core/agentChatHistory.js'; import type { Content } from '@google/genai'; @@ -98,7 +99,8 @@ export class SimulationHarness { async simulateTurn(messages: Content[]) { // 1. Append the new messages const currentHistory = this.chatHistory.get(); - this.chatHistory.set([...currentHistory, ...messages]); + const turns = messages.map((m) => ({ id: randomUUID(), content: m })); + this.chatHistory.set([...currentHistory, ...turns]); // 2. Measure tokens immediately after append const tokensBefore = this.env.tokenCalculator.calculateConcreteListTokens( diff --git a/packages/core/src/context/utils/adaptiveTokenCalculator.test.ts b/packages/core/src/context/utils/adaptiveTokenCalculator.test.ts index 6e89d1baca..12e258cf07 100644 --- a/packages/core/src/context/utils/adaptiveTokenCalculator.test.ts +++ b/packages/core/src/context/utils/adaptiveTokenCalculator.test.ts @@ -122,4 +122,69 @@ describe('AdaptiveTokenCalculator', () => { expect(calculator.getLearnedWeight()).toBe(1.0); }); + + it('should subtract overhead tokens from actual tokens when determining target weight', () => { + const eventBus = new ContextEventBus(); + const getOverheadTokens = () => 40; + const calculator = new AdaptiveTokenCalculator( + charsPerToken, + registry, + eventBus, + getOverheadTokens, + ); + + // Initial state: weight = 1.0 + + // Simulate an event where the API reported 100 tokens, and our base units were 100 + // But overhead is 40. + // actualGraphTokens = 100 - 40 = 60 + // rawTargetWeight = 60 / 100 = 0.6 + // targetWeight = Math.max(0.5, 0.6) = 0.6 + // newWeight = 1.0 * 0.8 + 0.6 * 0.2 = 0.8 + 0.12 = 0.92 + eventBus.emitTokenGroundTruth({ + actualTokens: 100, + promptBaseUnits: 100, + }); + + expect(calculator.getLearnedWeight()).toBeCloseTo(0.92, 5); + }); + + it('should enforce the maxStep limit to prevent violent oscillation from massive outliers', () => { + const eventBus = new ContextEventBus(); + const maxStep = 0.05; // Tight limit + const calculator = new AdaptiveTokenCalculator( + charsPerToken, + registry, + eventBus, + undefined, + { maxStep }, + ); + + // Initial state: weight = 1.0 + + // Simulate a massive outlier where the API reports 10,000 tokens for 100 base units. + // rawTargetWeight = 100 + // targetWeight = Math.min(100, 1.0 * 2.0) = 2.0 + // emaWeight = 1.0 * 0.8 + 2.0 * 0.2 = 1.2 + // BUT maxStep is 0.05, so the actual step is clamped. + // finalWeight = 1.0 + 0.05 = 1.05 + eventBus.emitTokenGroundTruth({ + actualTokens: 10000, + promptBaseUnits: 100, + }); + + expect(calculator.getLearnedWeight()).toBeCloseTo(1.05, 5); + + // Simulate a massive under-estimation + // rawTargetWeight = 0 + // targetWeight = Math.max(0, 1.05 * 0.5) = 0.525 + // emaWeight = 1.05 * 0.8 + 0.525 * 0.2 = 0.84 + 0.105 = 0.945 + // BUT maxStep is 0.05, so step is clamped: 1.05 - 0.05 = 1.0 + eventBus.emitTokenGroundTruth({ + actualTokens: 0, + promptBaseUnits: 100, + }); + + expect(calculator.getLearnedWeight()).toBeCloseTo(1.0, 5); + }); }); diff --git a/packages/core/src/context/utils/adaptiveTokenCalculator.ts b/packages/core/src/context/utils/adaptiveTokenCalculator.ts index 2ac3825ef5..7cf7984109 100644 --- a/packages/core/src/context/utils/adaptiveTokenCalculator.ts +++ b/packages/core/src/context/utils/adaptiveTokenCalculator.ts @@ -14,6 +14,13 @@ import type { NodeBehaviorRegistry } from '../graph/behaviorRegistry.js'; import type { ContextEventBus, TokenGroundTruthEvent } from '../eventBus.js'; import { debugLogger } from '../../utils/debugLogger.js'; +export interface AdaptiveLearningConfig { + /** The momentum factor for the Exponential Moving Average (EMA). Defaults to 0.2. */ + learningRate?: number; + /** The absolute maximum change allowed to the weight in a single turn. Defaults to 0.15. */ + maxStep?: number; +} + /** * An Adaptive Token Calculator that dynamically learns the true token cost of the user's * conversation by applying an Exponential Moving Average (EMA) gradient descent to @@ -26,12 +33,18 @@ import { debugLogger } from '../../utils/debugLogger.js'; export class AdaptiveTokenCalculator implements AdvancedTokenCalculator { private learnedWeight = 1.0; private readonly baseCalculator: StaticTokenCalculator; + private readonly learningRate: number; + private readonly maxStep: number; constructor( charsPerToken: number, registry: NodeBehaviorRegistry, eventBus: ContextEventBus, + private readonly getOverheadTokens?: () => number, + config?: AdaptiveLearningConfig, ) { + this.learningRate = config?.learningRate ?? 0.2; + this.maxStep = config?.maxStep ?? 0.15; this.baseCalculator = new StaticTokenCalculator(charsPerToken, registry); eventBus.onTokenGroundTruth((event: TokenGroundTruthEvent) => { this.handleGroundTruth(event.actualTokens, event.promptBaseUnits); @@ -41,21 +54,44 @@ export class AdaptiveTokenCalculator implements AdvancedTokenCalculator { private handleGroundTruth(actualTokens: number, promptBaseUnits: number) { if (promptBaseUnits <= 0) return; + const overheadTokens = this.getOverheadTokens + ? this.getOverheadTokens() + : 0; + + // The Gemini API token count includes the static overhead (system instruction + tools) + // and the dynamic chat history (which we measure as promptBaseUnits). + // We subtract the overhead so the adaptive calculator is comparing "apples to apples" + // when learning the weight multiplier for the graph nodes. + const actualGraphTokens = Math.max(0, actualTokens - overheadTokens); + // Determine what ratio we should have used - const targetWeight = actualTokens / promptBaseUnits; + const rawTargetWeight = actualGraphTokens / promptBaseUnits; const oldWeight = this.learnedWeight; - // Apply Momentum (Learning Rate) - const learningRate = 0.2; - const newWeight = - oldWeight * (1 - learningRate) + targetWeight * learningRate; + // Dampen extreme outliers *before* applying the EMA by capping the target weight + // to a reasonable multiple of the current weight. This prevents a single massive + // anomaly from destroying the running average. + const targetWeight = Math.max( + oldWeight * 0.5, + Math.min(rawTargetWeight, oldWeight * 2.0), + ); - // Clamp to reasonable safety bounds to prevent rogue metadata poisoning the system + // Apply Momentum (Learning Rate) + let newWeight = + oldWeight * (1 - this.learningRate) + targetWeight * this.learningRate; + + // Hard limit the maximum step size per turn to prevent violent oscillation + if (newWeight > oldWeight + this.maxStep) + newWeight = oldWeight + this.maxStep; + if (newWeight < oldWeight - this.maxStep) + newWeight = oldWeight - this.maxStep; + + // Clamp to reasonable absolute safety bounds this.learnedWeight = Math.max(0.5, Math.min(newWeight, 2.0)); debugLogger.log( `[AdaptiveTokenCalculator] Learned weight updated to ${this.learnedWeight.toFixed(3)} ` + - `(API Tokens: ${actualTokens}, Base Units: ${promptBaseUnits}, Target Ratio: ${targetWeight.toFixed(3)})`, + `(API Tokens: ${actualTokens}, Overhead: ${overheadTokens}, Graph Tokens: ${actualGraphTokens}, Base Units: ${promptBaseUnits}, Target Ratio: ${targetWeight.toFixed(3)})`, ); } diff --git a/packages/core/src/context/utils/snapshotGenerator.test.ts b/packages/core/src/context/utils/snapshotGenerator.test.ts index 07ccc195f9..51f1667477 100644 --- a/packages/core/src/context/utils/snapshotGenerator.test.ts +++ b/packages/core/src/context/utils/snapshotGenerator.test.ts @@ -5,12 +5,132 @@ */ import { describe, it, expect, vi, beforeEach } from 'vitest'; -import { SnapshotGenerator, type SnapshotState } from './snapshotGenerator.js'; +import { + SnapshotGenerator, + type SnapshotState, + SnapshotStateHelper, +} from './snapshotGenerator.js'; import type { ContextEnvironment } from '../pipeline/environment.js'; import { NodeType, type ConcreteNode } from '../graph/types.js'; import type { Mock } from 'vitest'; +describe('SnapshotStateHelper', () => { + describe('exportState', () => { + it('should flatten nested abstractsIds to pristine IDs', () => { + // Setup a graph with nested snapshots + // S3 abstracts [S2, N5] + // S2 abstracts [S1, N3, N4] + // S1 abstracts [N1, N2] + + const nodes: ConcreteNode[] = [ + { + id: 'N1', + type: NodeType.USER_PROMPT, + timestamp: 10, + role: 'user', + payload: { text: '1' }, + turnId: 'T1', + }, + { + id: 'N2', + type: NodeType.AGENT_THOUGHT, + timestamp: 20, + role: 'model', + payload: { text: '2' }, + turnId: 'T1', + }, + { + id: 'S1', + type: NodeType.SNAPSHOT, + timestamp: 30, + role: 'user', + payload: { text: 'State 1' }, + turnId: 'S1', + abstractsIds: ['N1', 'N2'], + }, + { + id: 'N3', + type: NodeType.USER_PROMPT, + timestamp: 40, + role: 'user', + payload: { text: '3' }, + turnId: 'T2', + }, + { + id: 'N4', + type: NodeType.AGENT_THOUGHT, + timestamp: 50, + role: 'model', + payload: { text: '4' }, + turnId: 'T2', + }, + { + id: 'S2', + type: NodeType.SNAPSHOT, + timestamp: 60, + role: 'user', + payload: { text: 'State 2' }, + turnId: 'S2', + abstractsIds: ['S1', 'N3', 'N4'], + }, + { + id: 'N5', + type: NodeType.USER_PROMPT, + timestamp: 70, + role: 'user', + payload: { text: '5' }, + turnId: 'T3', + }, + { + id: 'S3', + type: NodeType.SNAPSHOT, + timestamp: 80, + role: 'user', + payload: { text: 'State 3' }, + turnId: 'S3', + abstractsIds: ['S2', 'N5'], + }, + ]; + + const state = SnapshotStateHelper.exportState(nodes); + + expect(state.snapshot).toBeDefined(); + expect(state.snapshot?.text).toBe('State 3'); + + // Should be flattened to only the "pristine" (non-snapshot) IDs + const consumedIds = state.snapshot?.consumedIds; + expect(consumedIds).toContain('N1'); + expect(consumedIds).toContain('N2'); + expect(consumedIds).toContain('N3'); + expect(consumedIds).toContain('N4'); + expect(consumedIds).toContain('N5'); + + // Should NOT contain the intermediate snapshot IDs + expect(consumedIds).not.toContain('S1'); + expect(consumedIds).not.toContain('S2'); + + expect(consumedIds?.length).toBe(5); + }); + + it('should return empty state if no snapshot baseline is found', () => { + const nodes: ConcreteNode[] = [ + { + id: 'N1', + type: NodeType.USER_PROMPT, + timestamp: 10, + role: 'user', + payload: { text: '1' }, + turnId: 'T1', + }, + ]; + + const state = SnapshotStateHelper.exportState(nodes); + expect(state).toEqual({}); + }); + }); +}); + describe('SnapshotGenerator', () => { let mockEnv: ContextEnvironment; let mockGenerateJson: Mock; diff --git a/packages/core/src/context/utils/snapshotGenerator.ts b/packages/core/src/context/utils/snapshotGenerator.ts index 1d268e1228..3042d560ce 100644 --- a/packages/core/src/context/utils/snapshotGenerator.ts +++ b/packages/core/src/context/utils/snapshotGenerator.ts @@ -48,10 +48,39 @@ export interface SnapshotState { recent_arc: string[]; } +import { debugLogger } from '../../utils/debugLogger.js'; + +export function isSnapshotState(text: string): boolean { + const trimmed = text.trim(); + if (!trimmed.startsWith('{') || !trimmed.endsWith('}')) { + return false; + } + try { + const parsed: unknown = JSON.parse(trimmed); + if (!isRecord(parsed)) return false; + const isSnap = + Array.isArray(parsed['active_tasks']) && + Array.isArray(parsed['discovered_facts']) && + Array.isArray(parsed['constraints_and_preferences']) && + Array.isArray(parsed['recent_arc']); + if (!isSnap) { + debugLogger.log( + '[isSnapshotState] FAILED FOR JSON:', + JSON.stringify(parsed), + ); + } + return isSnap; + } catch { + debugLogger.log('[isSnapshotState] PARSE FAILED FOR:', trimmed); + return false; + } +} + export interface BaselineSnapshotInfo { text: string; abstractsIds: string[]; id: string; + timestamp: number; } /** @@ -61,6 +90,20 @@ export interface BaselineSnapshotInfo { export function findLatestSnapshotBaseline( targets: readonly ConcreteNode[], ): BaselineSnapshotInfo | undefined { + debugLogger.log( + '[findLatestSnapshotBaseline] Targets:', + targets.map((t) => ({ + id: t.id, + type: t.type, + text: + t.payload && + typeof t.payload === 'object' && + 'text' in t.payload && + typeof t.payload.text === 'string' + ? t.payload.text.substring(0, 20) + : '', + })), + ); const lastSnapshotNode = [...targets] .reverse() .find((n) => n.type === NodeType.SNAPSHOT && n.payload.text); @@ -72,8 +115,10 @@ export function findLatestSnapshotBaseline( ? [...lastSnapshotNode.abstractsIds] : [], id: lastSnapshotNode.id, + timestamp: lastSnapshotNode.timestamp, }; } + return undefined; } @@ -326,3 +371,61 @@ ${formatNodesForLlm(nodes)}`; return JSON.stringify(newState); } } + +/** + * Shared logic for working with Snapshot node state. + */ +export class SnapshotStateHelper { + /** + * Flatten nested abstract IDs to only the "pristine" (non-snapshot) IDs. + */ + static flattenAbstracts( + nodes: ConcreteNode[], + abstractsIds: readonly string[], + ): string[] { + const pristineIds: string[] = []; + const nodeMap = new Map(nodes.map((n) => [n.id, n])); + + const walk = (ids: readonly string[]) => { + for (const id of ids) { + const node = nodeMap.get(id); + if (!node) { + // Fallback: if node not in map, treat as pristine ID + pristineIds.push(id); + continue; + } + + if (node.type === NodeType.SNAPSHOT && node.abstractsIds) { + walk(node.abstractsIds); + } else { + pristineIds.push(id); + } + } + }; + + walk(abstractsIds); + return Array.from(new Set(pristineIds)); // Dedupe + } + + /** + * Helper to extract state from the most recent snapshot in a list of nodes. + */ + static exportState(nodes: ConcreteNode[]): { + snapshot?: { text: string; consumedIds: string[] }; + } { + const baseline = findLatestSnapshotBaseline(nodes); + if (!baseline) return {}; + + const node = nodes.find((n) => n.id === baseline.id); + if (!node || node.type !== NodeType.SNAPSHOT) return {}; + + const consumedIds = this.flattenAbstracts(nodes, node.abstractsIds || []); + + return { + snapshot: { + text: baseline.text, + consumedIds, + }, + }; + } +} diff --git a/packages/core/src/core/agentChatHistory.ts b/packages/core/src/core/agentChatHistory.ts index 7ef4b6a64d..438a903b46 100644 --- a/packages/core/src/core/agentChatHistory.ts +++ b/packages/core/src/core/agentChatHistory.ts @@ -6,21 +6,35 @@ import type { Content } from '@google/genai'; +/** + * A durable wrapper for Gemini Content that carries a stable ID. + * This ID is preserved across all transformations and is used as the anchor + * for context graph node identity. + */ +export interface HistoryTurn { + readonly id: string; + readonly content: Content; +} + export type HistoryEventType = 'PUSH' | 'SYNC_FULL' | 'CLEAR' | 'SILENT_SYNC'; export interface HistoryEvent { type: HistoryEventType; - payload: readonly Content[]; + payload: readonly HistoryTurn[]; } export type HistoryListener = (event: HistoryEvent) => void; +/** + * The 'Strong Owner' of chat history turns. + * It ensures that every turn in the session is associated with a durable ID. + */ export class AgentChatHistory { - private history: Content[]; + private history: HistoryTurn[] = []; private listeners: Set = new Set(); - constructor(initialHistory: Content[] = []) { - this.history = [...initialHistory]; + constructor(initialTurns: HistoryTurn[] = []) { + this.history = [...initialTurns]; } subscribe(listener: HistoryListener): () => void { @@ -30,20 +44,27 @@ export class AgentChatHistory { return () => this.listeners.delete(listener); } - private notify(type: HistoryEventType, payload: readonly Content[]) { + private notify(type: HistoryEventType, payload: readonly HistoryTurn[]) { const event: HistoryEvent = { type, payload }; for (const listener of this.listeners) { listener(event); } } - push(content: Content) { - this.history.push(content); - this.notify('PUSH', [content]); + /** + * Adds a new turn to the history. + * Every turn must have a durable ID, usually provided by the ChatRecordingService. + */ + push(turn: HistoryTurn) { + this.history.push(turn); + this.notify('PUSH', [turn]); } - set(history: readonly Content[], options: { silent?: boolean } = {}) { - this.history = [...history]; + /** + * Overwrites the entire history with a new list of turns. + */ + set(turns: readonly HistoryTurn[], options: { silent?: boolean } = {}) { + this.history = [...turns]; this.notify(options.silent ? 'SILENT_SYNC' : 'SYNC_FULL', this.history); } @@ -52,20 +73,28 @@ export class AgentChatHistory { this.notify('CLEAR', []); } - get(): readonly Content[] { + get(): readonly HistoryTurn[] { return this.history; } - map(callback: (value: Content, index: number, array: Content[]) => Content) { - this.history = this.history.map(callback); - this.notify('SYNC_FULL', this.history); + /** + * Returns a copy of the raw Gemini Content[] for API consumption. + */ + getContents(): Content[] { + return this.history.map((h) => h.content); + } + + map( + callback: (value: HistoryTurn, index: number, array: HistoryTurn[]) => U, + ): U[] { + return this.history.map(callback); } flatMap( callback: ( - value: Content, + value: HistoryTurn, index: number, - array: Content[], + array: HistoryTurn[], ) => U | readonly U[], ): U[] { return this.history.flatMap(callback); diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 302b89d7f0..69a68c0313 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -46,6 +46,7 @@ import { LoopDetectionService } from '../services/loopDetectionService.js'; import { ChatCompressionService } from '../context/chatCompressionService.js'; import { AgentHistoryProvider } from '../context/agentHistoryProvider.js'; import type { ContextManager } from '../context/contextManager.js'; +import type { HistoryTurn } from './agentChatHistory.js'; import { ideContextStore } from '../ide/ideContext.js'; import { logNextSpeakerCheck } from '../telemetry/loggers.js'; import type { @@ -67,6 +68,7 @@ import { } from '../availability/policyHelpers.js'; import { getDisplayString, resolveModel } from '../config/models.js'; import { partToString } from '../utils/partUtils.js'; +import { randomUUID } from 'node:crypto'; import { coreEvents, CoreEvent, @@ -293,7 +295,7 @@ export class GeminiClient { this.getChat().stripThoughtsFromHistory(); } - setHistory(history: readonly Content[]) { + setHistory(history: ReadonlyArray) { this.getChat().setHistory(history); this.updateTelemetryTokenCount(); this.forceFullIdeContext = true; @@ -335,7 +337,7 @@ export class GeminiClient { } async resumeChat( - history: Content[], + history: ReadonlyArray, resumedSessionData?: ResumedSessionData, ): Promise { this.chat = await this.startChat(history, resumedSessionData); @@ -376,7 +378,7 @@ export class GeminiClient { } async startChat( - extraHistory?: Content[], + extraHistory?: ReadonlyArray, resumedSessionData?: ResumedSessionData, ): Promise { this.forceFullIdeContext = true; @@ -398,7 +400,7 @@ export class GeminiClient { this.config, systemInstruction, tools, - history, + [...history], resumedSessionData, async (modelId: string) => { this.lastUsedModelId = modelId; @@ -419,7 +421,7 @@ export class GeminiClient { await reportError( error, 'Error initializing Gemini chat session.', - history, + [...history], 'startChat', ); throw new Error(`Failed to initialize chat: ${getErrorMessage(error)}`); @@ -641,7 +643,15 @@ export class GeminiClient { if (this.config.getContextManagementConfig().enabled) { if (this.contextManager) { - const pendingRequest = createUserContent(request); + const rawPendingRequest = createUserContent(request); + const pendingRequest = { + id: + this.getChatRecordingService()?.recordSyntheticMessage( + 'user', + rawPendingRequest.parts || [], + ) || randomUUID(), + content: rawPendingRequest, + }; const { history: newHistory, didApplyManagement, diff --git a/packages/core/src/core/contentGenerator.ts b/packages/core/src/core/contentGenerator.ts index 4494a5e9ff..a6915fd0e2 100644 --- a/packages/core/src/core/contentGenerator.ts +++ b/packages/core/src/core/contentGenerator.ts @@ -199,6 +199,13 @@ export async function createContentGenerator( sessionId?: string, ): Promise { const generator = await (async () => { + if (gcConfig.fakeResponsesNonStrict) { + const fakeGenerator = await FakeContentGenerator.fromFile( + gcConfig.fakeResponsesNonStrict, + { nonStrict: true }, + ); + return new LoggingContentGenerator(fakeGenerator, gcConfig); + } if (gcConfig.fakeResponses) { const fakeGenerator = await FakeContentGenerator.fromFile( gcConfig.fakeResponses, diff --git a/packages/core/src/core/fakeContentGenerator.ts b/packages/core/src/core/fakeContentGenerator.ts index 39687579e8..4976604993 100644 --- a/packages/core/src/core/fakeContentGenerator.ts +++ b/packages/core/src/core/fakeContentGenerator.ts @@ -36,6 +36,18 @@ export type FakeResponse = response: EmbedContentResponse; }; +/** + * Options for the FakeContentGenerator. + */ +export interface FakeContentGeneratorOptions { + /** + * If true, the generator will find the first available response that matches + * the requested method, rather than strictly following the input order. + * Useful for non-deterministic background tasks. + */ + nonStrict?: boolean; +} + // A ContentGenerator that responds with canned responses. // // Typically these would come from a file, provided by the `--fake-responses` @@ -46,22 +58,45 @@ export class FakeContentGenerator implements ContentGenerator { userTierName?: string; paidTier?: GeminiUserTier; - constructor(private readonly responses: FakeResponse[]) {} + private readonly responses: FakeResponse[]; - static async fromFile(filePath: string): Promise { + constructor( + responses: FakeResponse[], + private readonly options: FakeContentGeneratorOptions = {}, + ) { + this.responses = structuredClone(responses); + } + + static async fromFile( + filePath: string, + options: FakeContentGeneratorOptions = {}, + ): Promise { const fileContent = await promises.readFile(filePath, 'utf-8'); const responses = fileContent .split('\n') .filter((line) => line.trim() !== '') // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion .map((line) => JSON.parse(line) as FakeResponse); - return new FakeContentGenerator(responses); + return new FakeContentGenerator(responses, options); } private getNextResponse< M extends FakeResponse['method'], R = Extract['response'], >(method: M, request: unknown): R { + if (this.options.nonStrict) { + const index = this.responses.findIndex((r) => r.method === method); + if (index === -1) { + throw new Error( + `No more mock responses for ${method}, got request:\n` + + safeJsonStringify(request), + ); + } + const response = this.responses.splice(index, 1)[0]; + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion + return response.response as R; + } + const response = this.responses[this.callCounter++]; if (!response) { throw new Error( diff --git a/packages/core/src/core/geminiChat.test.ts b/packages/core/src/core/geminiChat.test.ts index 05a27f8bbc..a663289fef 100644 --- a/packages/core/src/core/geminiChat.test.ts +++ b/packages/core/src/core/geminiChat.test.ts @@ -19,6 +19,7 @@ import { SYNTHETIC_THOUGHT_SIGNATURE, type StreamEvent, stripToolCallIdPrefixes, + type HistoryTurn, } from './geminiChat.js'; import { type CompletedToolCall, @@ -40,6 +41,7 @@ import { makeResolvedModelConfig } from '../services/modelConfigServiceTestUtils import type { HookSystem } from '../hooks/hookSystem.js'; import { LlmRole } from '../telemetry/types.js'; import { BINARY_INJECTION_KEY } from '../utils/generateContentResponseUtilities.js'; +import type { ResumedSessionData } from '../services/chatRecordingTypes.js'; // Mock fs module to prevent actual file system operations during tests const mockFileSystem = new Map(); @@ -234,9 +236,9 @@ describe('GeminiChat', () => { describe('constructor', () => { it('should initialize lastPromptTokenCount based on history size', () => { - const history: Content[] = [ - { role: 'user', parts: [{ text: 'Hello' }] }, - { role: 'model', parts: [{ text: 'Hi there' }] }, + const history: HistoryTurn[] = [ + { id: '1', content: { role: 'user', parts: [{ text: 'Hello' }] } }, + { id: '2', content: { role: 'model', parts: [{ text: 'Hi there' }] } }, ]; const chatWithHistory = new GeminiChat(mockConfig, '', [], history); // 'Hello': 5 chars * 0.25 = 1.25 @@ -249,12 +251,66 @@ describe('GeminiChat', () => { const chatEmpty = new GeminiChat(mockConfig); expect(chatEmpty.getLastPromptTokenCount()).toBe(0); }); + + it('should prioritize in-memory history over resumedSessionData', () => { + // This test simulates a "hot restart" after a context management operation + // like compression, where the in-memory history is shorter and more up-to-date + // than the session data that might be on disk. + + // 1. A stale, longer history from a persisted session record + const resumedSessionData = { + conversation: { + messages: [ + { + id: 'a', + type: 'user', + content: [{ text: 'turn 1' }], + create_time: new Date(), + }, + { + id: 'b', + type: 'gemini', + content: [{ text: 'turn 2' }], + create_time: new Date(), + }, + { + id: 'c', + type: 'user', + content: [{ text: 'turn 3' }], + create_time: new Date(), + }, + ], + }, + } as unknown as ResumedSessionData; + + // 2. A fresh, compressed in-memory history + const compressedHistory: HistoryTurn[] = [ + { + id: 'summary-1', + content: { role: 'user', parts: [{ text: 'summary of turns 1-3' }] }, + }, + ]; + + // 3. Instantiate the chat, providing both. + const chat = new GeminiChat( + mockConfig, + '', + [], + compressedHistory, // This should be prioritized + resumedSessionData, // This should be ignored + ); + + // 4. Assert that the shorter, in-memory history was used. + const finalHistory = chat.getHistoryTurns(); + expect(finalHistory).toHaveLength(1); + expect(finalHistory[0].id).toBe('summary-1'); + }); }); describe('setHistory', () => { it('should recalculate lastPromptTokenCount when history is updated', () => { - const initialHistory: Content[] = [ - { role: 'user', parts: [{ text: 'Hello' }] }, + const initialHistory: HistoryTurn[] = [ + { id: '1', content: { role: 'user', parts: [{ text: 'Hello' }] } }, ]; const chatWithHistory = new GeminiChat( mockConfig, @@ -264,14 +320,17 @@ describe('GeminiChat', () => { ); const initialCount = chatWithHistory.getLastPromptTokenCount(); - const newHistory: Content[] = [ + const newHistory: HistoryTurn[] = [ { - role: 'user', - parts: [ - { - text: 'This is a much longer history item that should result in more tokens than just hello.', - }, - ], + id: '2', + content: { + role: 'user', + parts: [ + { + text: 'This is a much longer history item that should result in more tokens than just hello.', + }, + ], + }, }, ]; chatWithHistory.setHistory(newHistory); @@ -331,9 +390,9 @@ describe('GeminiChat', () => { ).resolves.not.toThrow(); // 3. Verify history was recorded correctly - const history = chat.getHistory(); + const history = chat.getHistoryTurns(); expect(history.length).toBe(2); // user turn + model turn - const modelTurn = history[1]; + const modelTurn = history[1].content; expect(modelTurn?.parts?.length).toBe(1); // The empty part is discarded expect(modelTurn?.parts![0].functionCall).toBeDefined(); }); @@ -433,9 +492,9 @@ describe('GeminiChat', () => { ).resolves.not.toThrow(); // 3. Verify history was recorded correctly with only the valid part. - const history = chat.getHistory(); + const history = chat.getHistoryTurns(); expect(history.length).toBe(2); // user turn + model turn - const modelTurn = history[1]; + const modelTurn = history[1].content; expect(modelTurn?.parts?.length).toBe(1); expect(modelTurn?.parts![0].text).toBe('Initial valid content...'); }); @@ -478,9 +537,9 @@ describe('GeminiChat', () => { } // 3. Assert: Check that the final history was correctly consolidated. - const history = chat.getHistory(); + const history = chat.getHistoryTurns(); expect(history.length).toBe(2); - const modelTurn = history[1]; + const modelTurn = history[1].content; expect(modelTurn?.parts?.length).toBe(1); expect(modelTurn?.parts![0].text).toBe('Hello World!'); }); @@ -538,12 +597,12 @@ describe('GeminiChat', () => { } // 3. Assert: Check that the final history was correctly consolidated. - const history = chat.getHistory(); + const history = chat.getHistoryTurns(); // The history should contain the user's turn and ONE consolidated model turn. expect(history.length).toBe(2); - const modelTurn = history[1]; + const modelTurn = history[1].content; expect(modelTurn.role).toBe('model'); // The model turn should have 3 distinct parts: the merged text, the function call, and the final text. @@ -599,10 +658,10 @@ describe('GeminiChat', () => { } // 3. Assert: Check that the final history contains both function calls. - const history = chat.getHistory(); + const history = chat.getHistoryTurns(); expect(history.length).toBe(2); - const modelTurn = history[1]; + const modelTurn = history[1].content; expect(modelTurn.role).toBe('model'); expect(modelTurn.parts?.length).toBe(2); expect(modelTurn.parts![0].functionCall?.name).toBe('tool_A'); @@ -647,8 +706,8 @@ describe('GeminiChat', () => { // Consume the stream to trigger history recording } - const history = chat.getHistory(); - const modelTurn = history[1]; + const history = chat.getHistoryTurns(); + const modelTurn = history[1].content; expect(modelTurn.parts?.length).toBe(2); expect(modelTurn.parts![0].functionCall?.name).toBe('tool_X'); expect(modelTurn.parts![0].functionCall?.args).toEqual({ id: 1 }); @@ -694,12 +753,12 @@ describe('GeminiChat', () => { } // 3. Assert: Check the final state of the history. - const history = chat.getHistory(); + const history = chat.getHistoryTurns(); // The history should contain two turns: the user's message and the model's response. expect(history.length).toBe(2); - const modelTurn = history[1]; + const modelTurn = history[1].content; expect(modelTurn.role).toBe('model'); // CRUCIAL ASSERTION: @@ -713,21 +772,27 @@ describe('GeminiChat', () => { it('should throw an error when a tool call is followed by an empty stream response', async () => { // 1. Setup: A history where the model has just made a function call. - const initialHistory: Content[] = [ + const initialHistory: HistoryTurn[] = [ { - role: 'user', - parts: [{ text: 'Find a good Italian restaurant for me.' }], + id: '1', + content: { + role: 'user', + parts: [{ text: 'Find a good Italian restaurant for me.' }], + }, }, { - role: 'model', - parts: [ - { - functionCall: { - name: 'find_restaurant', - args: { cuisine: 'Italian' }, + id: '2', + content: { + role: 'model', + parts: [ + { + functionCall: { + name: 'find_restaurant', + args: { cuisine: 'Italian' }, + }, }, - }, - ], + ], + }, }, ]; chat.setHistory(initialHistory); @@ -1251,31 +1316,40 @@ describe('GeminiChat', () => { describe('addHistory', () => { it('should add a new content item to the history', () => { - const newContent: Content = { - role: 'user', - parts: [{ text: 'A new message' }], + const newTurn: HistoryTurn = { + id: '1', + content: { + role: 'user', + parts: [{ text: 'A new message' }], + }, }; - chat.addHistory(newContent); - const history = chat.getHistory(); + chat.addHistory(newTurn); + const history = chat.getHistoryTurns(); expect(history.length).toBe(1); - expect(history[0]).toEqual(newContent); + expect(history[0]).toEqual(newTurn); }); it('should add multiple items correctly', () => { - const content1: Content = { - role: 'user', - parts: [{ text: 'Message 1' }], + const turn1: HistoryTurn = { + id: '1', + content: { + role: 'user', + parts: [{ text: 'Message 1' }], + }, }; - const content2: Content = { - role: 'model', - parts: [{ text: 'Message 2' }], + const turn2: HistoryTurn = { + id: '2', + content: { + role: 'model', + parts: [{ text: 'Message 2' }], + }, }; - chat.addHistory(content1); - chat.addHistory(content2); - const history = chat.getHistory(); + chat.addHistory(turn1); + chat.addHistory(turn2); + const history = chat.getHistoryTurns(); expect(history.length).toBe(2); - expect(history[0]).toEqual(content1); - expect(history[1]).toEqual(content2); + expect(history[0]).toEqual(turn1); + expect(history[1]).toEqual(turn2); }); }); diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index 6a728884a5..30b33d3c2e 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -19,7 +19,10 @@ import { type GenerateContentParameters, type FunctionCall, } from '@google/genai'; -import { AgentChatHistory } from './agentChatHistory.js'; +export { AgentChatHistory, type HistoryTurn } from './agentChatHistory.js'; +import { AgentChatHistory, type HistoryTurn } from './agentChatHistory.js'; + +import { randomUUID } from 'node:crypto'; import { toParts } from '../code_assist/converter.js'; import { retryWithBackoff, @@ -159,8 +162,9 @@ function isValidContent(content: Content): boolean { * @throws Error if the history does not start with a user turn. * @throws Error if the history contains an invalid role. */ -function validateHistory(history: Content[]) { - for (const content of history) { +function validateHistory(history: Array) { + for (const item of history) { + const content = 'content' in item ? item.content : item; if (content.role !== 'user' && content.role !== 'model') { throw new Error(`Role must be user or model, but got ${content.role}.`); } @@ -175,23 +179,25 @@ function validateHistory(history: Content[]) { * filters or recitation). Extracting valid turns from the history * ensures that subsequent requests could be accepted by the model. */ -function extractCuratedHistory(comprehensiveHistory: Content[]): Content[] { +function extractCuratedHistory( + comprehensiveHistory: readonly HistoryTurn[], +): HistoryTurn[] { if (comprehensiveHistory === undefined || comprehensiveHistory.length === 0) { return []; } - const curatedHistory: Content[] = []; + const curatedHistory: HistoryTurn[] = []; const length = comprehensiveHistory.length; let i = 0; while (i < length) { - if (comprehensiveHistory[i].role === 'user') { + if (comprehensiveHistory[i].content.role === 'user') { curatedHistory.push(comprehensiveHistory[i]); i++; } else { - const modelOutput: Content[] = []; + const modelOutput: HistoryTurn[] = []; let isValid = true; - while (i < length && comprehensiveHistory[i].role === 'model') { + while (i < length && comprehensiveHistory[i].content.role === 'model') { modelOutput.push(comprehensiveHistory[i]); - if (isValid && !isValidContent(comprehensiveHistory[i])) { + if (isValid && !isValidContent(comprehensiveHistory[i].content)) { isValid = false; } i++; @@ -272,15 +278,43 @@ export class GeminiChat { readonly context: AgentLoopContext, private systemInstruction: string = '', private tools: Tool[] = [], - history: Content[] = [], + history: Array = [], resumedSessionData?: ResumedSessionData, private readonly onModelChanged?: (modelId: string) => Promise, ) { validateHistory(history); - this.agentHistory = new AgentChatHistory(history); + + let initialHistory: HistoryTurn[]; + // If history is passed, it is the most up-to-date in-memory state and takes precedence. + // This is critical for hot-restarts after operations like context compression. + if (history.length > 0) { + initialHistory = history.map((item) => + 'id' in item && 'content' in item + ? item + : { id: randomUUID(), content: item }, + ); + } else if (resumedSessionData) { + // Otherwise, if resuming from disk, build from the persisted record. + initialHistory = resumedSessionData.conversation.messages + .filter((m) => m.type === 'user' || m.type === 'gemini') + .map((m) => ({ + id: m.id, + content: { + role: m.type === 'user' ? 'user' : 'model', + parts: Array.isArray(m.content) + ? // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion + (m.content as Part[]) + : [{ text: String(m.content) }], + }, + })); + } else { + initialHistory = []; + } + + this.agentHistory = new AgentChatHistory(initialHistory); this.chatRecordingService = new ChatRecordingService(context); this.lastPromptTokenCount = estimateTokenCountSync( - this.agentHistory.flatMap((c) => c.parts || []), + this.agentHistory.flatMap((c) => c.content.parts || []), ); } @@ -293,12 +327,21 @@ export class GeminiChat { kind: 'main' | 'subagent' = 'main', ) { await this.chatRecordingService.initialize(resumedSessionData, kind); + // Sync initial history with the recorder to ensure all turns (even bootstrapped ones) + // are durable and coordinated. + this.chatRecordingService.updateMessagesFromHistory( + this.agentHistory.get(), + ); } setSystemInstruction(sysInstr: string) { this.systemInstruction = sysInstr; } + getSystemInstruction(): string { + return this.systemInstruction; + } + /** * Sends a message to the model and returns the response in chunks. * @@ -362,41 +405,67 @@ export class GeminiChat { } } - this.chatRecordingService.recordMessage({ + const id = this.chatRecordingService.recordMessage({ model, type: 'user', content: userMessageParts, displayContent: finalDisplayContent, }); - } + this.agentHistory.push({ id, content: userContent }); + } else { + // Record tool response as a message to ensure durable ID and linear history for resume. + const id = this.chatRecordingService.recordSyntheticMessage( + 'user', + userContent.parts || [], + ); - // Add user content to history ONCE before any attempts. - const binaryInjections = this.extractBinaryInjections(userContent.parts); - if (binaryInjections) { - // Turn 1: The original tool response (now cleaned) - this.agentHistory.push(userContent); + // Binary injections: If the tool output contains binary data, we expand the history. + const binaryParts = this.extractBinaryInjections(userContent.parts); + if (binaryParts) { + // Turn 1: The original tool response (now cleaned) + this.agentHistory.push({ id, content: userContent }); - // Turn 2: Synthetic Model Acknowledgment - this.agentHistory.push({ - role: 'model', - parts: [ - { - text: 'Binary content received. Proceeding with analysis.', - thought: true, - thoughtSignature: SYNTHETIC_THOUGHT_SIGNATURE, + // Turn 2: Synthetic Model Acknowledgment + const modelId = this.chatRecordingService.recordSyntheticMessage( + 'gemini', + [ + { + text: 'Binary content received. Proceeding with analysis.', + thought: true, + thoughtSignature: SYNTHETIC_THOUGHT_SIGNATURE, + }, + ], + ); + this.agentHistory.push({ + id: modelId, + content: { + role: 'model', + parts: [ + { + text: 'Binary content received. Proceeding with analysis.', + thought: true, + thoughtSignature: SYNTHETIC_THOUGHT_SIGNATURE, + }, + ], }, - ], - }); + }); - // Turn 3: The actual binary data (becomes the current request message) - userContent = { - role: 'user', - parts: binaryInjections, - }; + // Turn 3: The actual binary data (becomes the current request message) + const binaryId = this.chatRecordingService.recordSyntheticMessage( + 'info', + binaryParts, + ); + userContent = { + role: 'user', + parts: binaryParts, + }; + this.agentHistory.push({ id: binaryId, content: userContent }); + } else { + this.agentHistory.push({ id, content: userContent }); + } } - this.agentHistory.push(userContent); - const requestContents = this.getHistory(true); + const requestHistory = this.getHistoryTurns(true); const streamWithRetries = async function* ( this: GeminiChat, @@ -420,7 +489,7 @@ export class GeminiChat { isConnectionPhase = true; const stream = await this.makeApiCallAndProcessStream( currentConfigKey, - requestContents, + requestHistory, prompt_id, signal, role, @@ -542,45 +611,44 @@ export class GeminiChat { private extractBinaryInjections( parts: Part[] | undefined, ): Part[] | undefined { - if (!parts) { - return undefined; - } - - const binaryInjections: Part[] = []; - - for (const part of parts) { - const response = part.functionResponse?.response; - - if (response && BINARY_INJECTION_KEY in response) { - // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion - const binaryParts = response[BINARY_INJECTION_KEY] as Part[]; - delete response[BINARY_INJECTION_KEY]; - - if (Array.isArray(binaryParts)) { - binaryInjections.push(...binaryParts); + const binaryParts: Part[] = []; + if (parts) { + for (const part of parts) { + const response = part.functionResponse?.response; + if (response && BINARY_INJECTION_KEY in response) { + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion + const injected = response[BINARY_INJECTION_KEY] as Part[]; + delete response[BINARY_INJECTION_KEY]; + if (Array.isArray(injected)) { + binaryParts.push(...injected); + } } } } - return binaryInjections.length > 0 ? binaryInjections : undefined; + return binaryParts.length > 0 ? binaryParts : undefined; } private async makeApiCallAndProcessStream( modelConfigKey: ModelConfigKey, - requestContents: readonly Content[], + requestHistory: readonly HistoryTurn[], prompt_id: string, abortSignal: AbortSignal, role: LlmRole, ): Promise> { // Last mile scrubbing to remove internal tracking properties (e.g. callIndex) // before sending to the Gemini API. This whitelists only standard Gemini fields. - const scrubbedContents = this.context.config.isContextManagementEnabled() - ? scrubHistory([...requestContents]) - : [...requestContents]; + const scrubbedHistory = this.context.config.isContextManagementEnabled() + ? scrubHistory([...requestHistory]) + : [...requestHistory]; + + const scrubbedContents = scrubbedHistory.map((h) => h.content); const contentsForPreviewModel = this.ensureActiveLoopHasThoughtSignatures(scrubbedContents); + const requestContents = scrubbedContents; + // Track final request parameters for AfterModel hooks const { model: availabilityFinalModel, @@ -829,14 +897,21 @@ export class GeminiChat { * @return History contents alternating between user and model for the entire * chat session. */ - getHistory(curated: boolean = false): readonly Content[] { + getHistory(curated: boolean = false): Content[] { + return this.getHistoryTurns(curated).map((h) => h.content); + } + + /** + * Returns the chat history as HistoryTurns. + */ + getHistoryTurns(curated: boolean = false): HistoryTurn[] { const history = curated - ? extractCuratedHistory([...this.agentHistory.get()]) - : this.agentHistory.get(); + ? extractCuratedHistory(this.agentHistory.get()) + : [...this.agentHistory.get()]; return this.context.config.isContextManagementEnabled() - ? scrubHistory([...history]) - : [...history]; + ? scrubHistory(history) + : history; } /** @@ -849,24 +924,44 @@ export class GeminiChat { /** * Adds a new entry to the chat history. */ - addHistory(content: Content): void { - this.agentHistory.push(content); + addHistory(content: Content | HistoryTurn): void { + if ('id' in content && 'content' in content) { + this.agentHistory.push(content); + } else { + const id = this.chatRecordingService.recordSyntheticMessage( + content.role === 'user' ? 'user' : 'gemini', + content.parts || [], + ); + this.agentHistory.push({ id, content }); + } } setHistory( - history: readonly Content[], + history: ReadonlyArray, options: { silent?: boolean } = {}, ): void { - this.agentHistory.set(history, options); + const wrappedHistory: HistoryTurn[] = history.map((item) => { + if ('id' in item && 'content' in item) { + return item; + } + const id = this.chatRecordingService.recordSyntheticMessage( + item.role === 'user' ? 'user' : 'gemini', + item.parts || [], + ); + return { id, content: item }; + }); + this.agentHistory.set(wrappedHistory, options); this.lastPromptTokenCount = estimateTokenCountSync( - this.agentHistory.flatMap((c) => c.parts || []), + this.agentHistory.flatMap((c) => c.content.parts || []), + ); + this.chatRecordingService.updateMessagesFromHistory( + this.agentHistory.get(), ); - this.chatRecordingService.updateMessagesFromHistory(history); } stripThoughtsFromHistory(): void { - this.agentHistory.map((content) => { - const newContent = { ...content }; + const newHistory = this.agentHistory.map((turn) => { + const newContent = { ...turn.content }; if (newContent.parts) { newContent.parts = newContent.parts.map((part) => { if (part && typeof part === 'object' && 'thoughtSignature' in part) { @@ -877,8 +972,9 @@ export class GeminiChat { return part; }); } - return newContent; + return { id: turn.id, content: newContent }; }); + this.agentHistory.set(newHistory); } // To ensure our requests validate, the first function call in every model @@ -936,6 +1032,10 @@ export class GeminiChat { this.tools = tools; } + getTools(): Tool[] { + return this.tools; + } + async maybeIncludeSchemaDepthContext(error: StructuredError): Promise { // Check for potentially problematic cyclic tools with cyclic schemas // and include a recommendation to remove potentially problematic tools. @@ -1162,15 +1262,22 @@ export class GeminiChat { .join('') .trim(); + let id: string; // Record model response text from the collected parts. // Also flush when there are thoughts or a tool call (even with no text) // so that BeforeTool hooks always see the latest transcript state. if (responseText || hasThoughts || hasToolCall) { - this.chatRecordingService.recordMessage({ + id = this.chatRecordingService.recordMessage({ model, type: 'gemini', content: responseText, }); + } else { + // Still need a durable ID even if response is empty (e.g. only tool calls) + id = this.chatRecordingService.recordSyntheticMessage( + 'gemini', + consolidatedParts, + ); } // Stream validation logic: A stream is considered successful if: @@ -1208,7 +1315,10 @@ export class GeminiChat { } } - this.agentHistory.push({ role: 'model', parts: consolidatedParts }); + this.agentHistory.push({ + id, + content: { role: 'model', parts: consolidatedParts }, + }); } getLastPromptTokenCount(): number { diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 7fc1892139..091e59d619 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -36,6 +36,7 @@ export * from './commands/types.js'; export * from './core/baseLlmClient.js'; export * from './core/client.js'; export * from './core/contentGenerator.js'; +export * from './core/fakeContentGenerator.js'; export * from './core/loggingContentGenerator.js'; export * from './core/geminiChat.js'; export * from './core/logger.js'; diff --git a/packages/core/src/services/chatRecordingService.test.ts b/packages/core/src/services/chatRecordingService.test.ts index 7af8380a5a..cc3e174cf0 100644 --- a/packages/core/src/services/chatRecordingService.test.ts +++ b/packages/core/src/services/chatRecordingService.test.ts @@ -47,9 +47,10 @@ import { } from './chatRecordingService.js'; import type { WorkspaceContext } from '../utils/workspaceContext.js'; import { CoreToolCallStatus } from '../scheduler/types.js'; -import type { Content, Part } from '@google/genai'; +import type { Part } from '@google/genai'; import type { Config } from '../config/config.js'; import { getProjectHash } from '../utils/paths.js'; +import type { HistoryTurn } from '../core/agentChatHistory.js'; vi.mock('../utils/paths.js'); vi.mock('node:crypto', async (importOriginal) => { @@ -1065,7 +1066,7 @@ describe('ChatRecordingService', () => { it('should update tool results from API history (masking sync)', async () => { // 1. Record an initial message and tool call - chatRecordingService.recordMessage({ + const modelMsgId = chatRecordingService.recordMessage({ type: 'gemini', content: 'I will list the files.', model: 'gemini-pro', @@ -1087,24 +1088,30 @@ describe('ChatRecordingService', () => { // 2. Prepare mock history with masked content const maskedSnippet = 'short preview'; - const history: Content[] = [ + const history: HistoryTurn[] = [ { - role: 'model', - parts: [ - { functionCall: { name: 'list_files', args: { path: '.' } } }, - ], + id: modelMsgId, + content: { + role: 'model', + parts: [ + { functionCall: { name: 'list_files', args: { path: '.' } } }, + ], + }, }, { - role: 'user', - parts: [ - { - functionResponse: { - name: 'list_files', - id: callId, - response: { output: maskedSnippet }, + id: 'user-id', + content: { + role: 'user', + parts: [ + { + functionResponse: { + name: 'list_files', + id: callId, + response: { output: maskedSnippet }, + }, }, - }, - ], + ], + }, }, ]; @@ -1132,8 +1139,15 @@ describe('ChatRecordingService', () => { output: maskedSnippet, }); }); + it('should preserve multi-modal sibling parts during sync', async () => { await chatRecordingService.initialize(); + const modelMsgId = chatRecordingService.recordMessage({ + type: 'gemini', + content: '', + model: 'gemini-pro', + }); + const callId = 'multi-modal-call'; const originalResult: Part[] = [ { @@ -1146,12 +1160,6 @@ describe('ChatRecordingService', () => { { inlineData: { mimeType: 'image/png', data: 'base64...' } }, ]; - chatRecordingService.recordMessage({ - type: 'gemini', - content: '', - model: 'gemini-pro', - }); - chatRecordingService.recordToolCalls('gemini-pro', [ { id: callId, @@ -1164,19 +1172,26 @@ describe('ChatRecordingService', () => { ]); const maskedSnippet = ''; - const history: Content[] = [ + const history: HistoryTurn[] = [ { - role: 'user', - parts: [ - { - functionResponse: { - name: 'read_file', - id: callId, - response: { output: maskedSnippet }, + id: modelMsgId, + content: { role: 'model', parts: [] }, + }, + { + id: 'user-id', + content: { + role: 'user', + parts: [ + { + functionResponse: { + name: 'read_file', + id: callId, + response: { output: maskedSnippet }, + }, }, - }, - { inlineData: { mimeType: 'image/png', data: 'base64...' } }, - ], + { inlineData: { mimeType: 'image/png', data: 'base64...' } }, + ], + }, }, ]; @@ -1201,14 +1216,14 @@ describe('ChatRecordingService', () => { it('should handle parts appearing BEFORE the functionResponse in a content block', async () => { await chatRecordingService.initialize(); - const callId = 'prefix-part-call'; - - chatRecordingService.recordMessage({ + const modelMsgId = chatRecordingService.recordMessage({ type: 'gemini', content: '', model: 'gemini-pro', }); + const callId = 'prefix-part-call'; + chatRecordingService.recordToolCalls('gemini-pro', [ { id: callId, @@ -1220,19 +1235,26 @@ describe('ChatRecordingService', () => { }, ]); - const history: Content[] = [ + const history: HistoryTurn[] = [ { - role: 'user', - parts: [ - { text: 'Prefix metadata or text' }, - { - functionResponse: { - name: 'read_file', - id: callId, - response: { output: 'file content' }, + id: modelMsgId, + content: { role: 'model', parts: [] }, + }, + { + id: 'user-id', + content: { + role: 'user', + parts: [ + { text: 'Prefix metadata or text' }, + { + functionResponse: { + name: 'read_file', + id: callId, + response: { output: 'file content' }, + }, }, - }, - ], + ], + }, }, ]; @@ -1263,25 +1285,30 @@ describe('ChatRecordingService', () => { appendFileSyncSpy.mockClear(); // History with a tool call ID that doesn't exist in the conversation - const history: Content[] = [ + const history: HistoryTurn[] = [ { - role: 'user', - parts: [ - { - functionResponse: { - name: 'read_file', - id: 'nonexistent-call-id', - response: { output: 'some content' }, + id: 'user-id', + content: { + role: 'user', + parts: [ + { + functionResponse: { + name: 'read_file', + id: 'nonexistent-call-id', + response: { output: 'some content' }, + }, }, - }, - ], + ], + }, }, ]; chatRecordingService.updateMessagesFromHistory(history); - // No tool calls matched, so writeFileSync should NOT have been called - expect(appendFileSyncSpy).not.toHaveBeenCalled(); + // In the new 'Strong Owner' architecture, updateMessagesFromHistory ensures that + // all turns in history (including new/synthetic ones) are recorded. + // Since 'user-id' was not in the original conversation, it is added. + expect(appendFileSyncSpy).toHaveBeenCalled(); }); }); @@ -1315,4 +1342,69 @@ describe('ChatRecordingService', () => { mkdirSyncSpy.mockRestore(); }); }); + + describe('recordSyntheticMessage and history sync', () => { + it('should correctly record synthetic messages with durable IDs', async () => { + await chatRecordingService.initialize(undefined, 'main'); + const parts = [{ text: 'Synthetic Turn' }]; + + // Implicit ID generation + const id1 = chatRecordingService.recordSyntheticMessage('user', parts); + expect(id1).toBeDefined(); + expect(id1).toMatch(/test-uuid-/); + + // Explicit ID registration (e.g. from context processor) + const customId = 'stable-hash-123'; + const id2 = chatRecordingService.recordSyntheticMessage( + 'gemini', + parts, + customId, + ); + expect(id2).toBe(customId); + + const record = await loadConversationRecord( + chatRecordingService.getConversationFilePath()!, + ); + expect(record!.messages).toHaveLength(2); + expect(record!.messages[0].id).toBe(id1); + expect(record!.messages[0].type).toBe('user'); + expect(record!.messages[1].id).toBe(customId); + expect(record!.messages[1].type).toBe('gemini'); + }); + + it('should synchronize history turns and maintain their durable identity', async () => { + await chatRecordingService.initialize(undefined, 'main'); + const history: HistoryTurn[] = [ + { id: 'h1', content: { role: 'user', parts: [{ text: 'msg1' }] } }, + { id: 'h2', content: { role: 'model', parts: [{ text: 'msg2' }] } }, + ]; + + chatRecordingService.updateMessagesFromHistory(history); + + const record = await loadConversationRecord( + chatRecordingService.getConversationFilePath()!, + ); + expect(record!.messages).toHaveLength(2); + expect(record!.messages[0].id).toBe('h1'); + expect(record!.messages[1].id).toBe('h2'); + + // Update with a summary + const summaryId = 'summary-123'; + const updatedHistory: HistoryTurn[] = [ + { + id: summaryId, + content: { role: 'user', parts: [{ text: 'summary' }] }, + }, + ...history.slice(1), + ]; + + chatRecordingService.updateMessagesFromHistory(updatedHistory); + const record2 = await loadConversationRecord( + chatRecordingService.getConversationFilePath()!, + ); + expect(record2!.messages).toHaveLength(2); + expect(record2!.messages[0].id).toBe(summaryId); + expect(record2!.messages[1].id).toBe('h2'); + }); + }); }); diff --git a/packages/core/src/services/chatRecordingService.ts b/packages/core/src/services/chatRecordingService.ts index e070a1c542..ca76a0e499 100644 --- a/packages/core/src/services/chatRecordingService.ts +++ b/packages/core/src/services/chatRecordingService.ts @@ -17,13 +17,12 @@ import { import readline from 'node:readline'; import { randomUUID } from 'node:crypto'; import type { - Content, - Part, PartListUnion, GenerateContentResponseUsageMetadata, } from '@google/genai'; import { debugLogger } from '../utils/debugLogger.js'; import type { AgentLoopContext } from '../config/agent-loop-context.js'; +import type { HistoryTurn } from '../core/agentChatHistory.js'; import { SESSION_FILE_PREFIX, type TokensSummary, @@ -497,9 +496,10 @@ export class ChatRecordingService { type: ConversationRecordExtra['type'], content: PartListUnion, displayContent?: PartListUnion, + id?: string, ): MessageRecord { return { - id: randomUUID(), + id: id || randomUUID(), timestamp: new Date().toISOString(), type, content, @@ -512,14 +512,17 @@ export class ChatRecordingService { type: ConversationRecordExtra['type']; content: PartListUnion; displayContent?: PartListUnion; - }): void { - if (!this.conversationFile || !this.cachedConversation) return; + id?: string; + }): string { + if (!this.conversationFile || !this.cachedConversation) + return message.id || randomUUID(); try { const msg = this.newMessage( message.type, message.content, message.displayContent, + message.id, ); if (msg.type === 'gemini') { msg.thoughts = this.queuedThoughts; @@ -530,12 +533,30 @@ export class ChatRecordingService { } this.pushMessage(msg); this.updateMetadata({ lastUpdated: new Date().toISOString() }); + return msg.id; } catch (error) { debugLogger.error('Error saving message to chat history.', error); throw error; } } + /** + * Records a synthetic message (e.g. Binary Received, Snapshot/Summary) + * and returns its durable ID. + */ + recordSyntheticMessage( + type: ConversationRecordExtra['type'], + content: PartListUnion, + id?: string, + ): string { + return this.recordMessage({ + model: undefined, + type, + content, + id, + }); + } + recordThought(thought: ThoughtSummary): void { if (!this.conversationFile) return; this.queuedThoughts.push({ @@ -869,48 +890,83 @@ export class ChatRecordingService { return this.cachedConversation; } - updateMessagesFromHistory(history: readonly Content[]): void { + updateMessagesFromHistory(history: readonly HistoryTurn[]): void { if (!this.conversationFile || !this.cachedConversation) return; try { - const partsMap = new Map(); - for (const content of history) { - if (content.role === 'user' && content.parts) { - const callIds = content.parts - .map((p) => p.functionResponse?.id) - .filter((id): id is string => !!id); + let updated = false; - if (callIds.length === 0) continue; + // 1. Sync content and IDs + const newMessages: MessageRecord[] = history.map((turn) => { + const existing = this.cachedConversation?.messages.find( + (m) => m.id === turn.id, + ); - let currentCallId = callIds[0]; - for (const part of content.parts) { - if (part.functionResponse?.id) { - currentCallId = part.functionResponse.id; + if (existing) { + // If content parts have changed (e.g. masking), update them + if ( + JSON.stringify(existing.content) !== + JSON.stringify(turn.content.parts) + ) { + updated = true; + } + return { + ...existing, + content: turn.content.parts || [], + }; + } + + // It's a new (possibly synthetic) turn like a summary + updated = true; + return this.newMessage( + turn.content.role === 'user' ? 'user' : 'gemini', + turn.content.parts || [], + undefined, + turn.id, + ); + }); + + // 2. Specialized 'Masking Sync' for tool call results + // If a user turn in history contains a functionResponse, we update the + // corresponding ToolCallRecord in the preceding gemini message. + for (const turn of history) { + if (turn.content.role !== 'user') continue; + for (const part of turn.content.parts || []) { + if (part.functionResponse) { + const callId = part.functionResponse.id; + // Find the gemini message that contains this tool call + const geminiMsg = newMessages.find( + (m) => + m.type === 'gemini' && + m.toolCalls?.some((tc) => tc.id === callId), + ); + if (geminiMsg && geminiMsg.type === 'gemini') { + const tc = geminiMsg.toolCalls!.find((tc) => tc.id === callId); + if (tc) { + // If the history version is different (e.g. masked), sync it into the record + // We sync the entire parts array of the user turn to ensure sibling parts are preserved + if ( + JSON.stringify(tc.result) !== + JSON.stringify(turn.content.parts) + ) { + tc.result = turn.content.parts || []; + updated = true; + } + } } - - if (!partsMap.has(currentCallId)) { - partsMap.set(currentCallId, []); - } - partsMap.get(currentCallId)!.push(part); } } } - for (const message of this.cachedConversation.messages) { - let msgChanged = false; - if (message.type === 'gemini' && message.toolCalls) { - for (const toolCall of message.toolCalls) { - const newParts = partsMap.get(toolCall.id); - if (newParts !== undefined) { - toolCall.result = newParts; - msgChanged = true; - } - } - } - if (msgChanged) { - // Push updated message to log - this.pushMessage(message); - } + if ( + updated || + newMessages.length !== this.cachedConversation.messages.length + ) { + this.cachedConversation.messages = newMessages; + this.updateMetadata({ + messages: newMessages, + lastUpdated: new Date().toISOString(), + }); } } catch (error) { debugLogger.error( diff --git a/packages/core/src/utils/cryptoUtils.test.ts b/packages/core/src/utils/cryptoUtils.test.ts new file mode 100644 index 0000000000..72ba3eda06 --- /dev/null +++ b/packages/core/src/utils/cryptoUtils.test.ts @@ -0,0 +1,35 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect } from 'vitest'; +import { deriveStableId } from './cryptoUtils.js'; + +describe('cryptoUtils', () => { + describe('deriveStableId', () => { + it('should be deterministic regardless of input order', () => { + const id1 = deriveStableId(['a', 'b', 'c']); + const id2 = deriveStableId(['c', 'b', 'a']); + expect(id1).toBe(id2); + expect(id1).toMatch(/^[0-9a-f]{32}$/); + }); + + it('should produce different IDs for different inputs', () => { + const id1 = deriveStableId(['a', 'b', 'c']); + const id2 = deriveStableId(['a', 'b', 'd']); + expect(id1).not.toBe(id2); + }); + + it('should handle single inputs', () => { + const id = deriveStableId(['only-one']); + expect(id).toMatch(/^[0-9a-f]{32}$/); + }); + + it('should be consistent across calls with same data', () => { + const input = ['id-123', 'id-456']; + expect(deriveStableId(input)).toBe(deriveStableId(input)); + }); + }); +}); diff --git a/packages/core/src/utils/cryptoUtils.ts b/packages/core/src/utils/cryptoUtils.ts new file mode 100644 index 0000000000..6c9eff9cde --- /dev/null +++ b/packages/core/src/utils/cryptoUtils.ts @@ -0,0 +1,20 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { createHash } from 'node:crypto'; + +/** + * Derives a stable, deterministic ID from a list of source IDs. + * Used for synthetic turns like summaries to ensure that re-summarizing the same + * content produces a consistent identity. + */ +export function deriveStableId(sourceIds: string[]): string { + const sortedIds = [...sourceIds].sort(); + return createHash('sha256') + .update(sortedIds.join('|')) + .digest('hex') + .slice(0, 32); +} diff --git a/packages/core/src/utils/environmentContext.ts b/packages/core/src/utils/environmentContext.ts index 947062eb27..6344a08569 100644 --- a/packages/core/src/utils/environmentContext.ts +++ b/packages/core/src/utils/environmentContext.ts @@ -7,6 +7,7 @@ import type { Part, Content } from '@google/genai'; import type { Config } from '../config/config.js'; import { getFolderStructure } from './getFolderStructure.js'; +import type { HistoryTurn } from '../core/agentChatHistory.js'; export const INITIAL_HISTORY_LENGTH = 1; @@ -81,8 +82,8 @@ ${environmentMemory} export async function getInitialChatHistory( config: Config, - extraHistory?: Content[], -): Promise { + extraHistory?: ReadonlyArray, +): Promise> { const envParts = await getEnvironmentContext(config); const envContextString = envParts.map((part) => part.text || '').join('\n\n'); diff --git a/packages/core/src/utils/historyHardening.test.ts b/packages/core/src/utils/historyHardening.test.ts new file mode 100644 index 0000000000..face667132 --- /dev/null +++ b/packages/core/src/utils/historyHardening.test.ts @@ -0,0 +1,378 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect } from 'vitest'; +import { + hardenHistory, + SYNTHETIC_THOUGHT_SIGNATURE, +} from './historyHardening.js'; +import type { HistoryTurn } from '../core/agentChatHistory.js'; +import { deriveStableId } from './cryptoUtils.js'; +import type { Part } from '@google/genai'; + +describe('hardenHistory', () => { + it('should return an empty array if input is empty', () => { + expect(hardenHistory([])).toEqual([]); + }); + + it('should coalesce adjacent turns of the same role', () => { + const history: HistoryTurn[] = [ + { id: '1', content: { role: 'user', parts: [{ text: 'hello' }] } }, + { id: '2', content: { role: 'user', parts: [{ text: 'world' }] } }, + ]; + const hardened = hardenHistory(history); + expect(hardened.length).toBe(1); + expect(hardened[0].content.parts).toEqual([ + { text: 'hello' }, + { text: 'world' }, + ]); + expect(hardened[0].id).toBe('1'); // Inherits ID of the first turn in the sequence + }); + + it('should inject thoughtSignature into the first functionCall of a model turn if missing', () => { + const history: HistoryTurn[] = [ + { id: '1', content: { role: 'user', parts: [{ text: 'do it' }] } }, + { + id: '2', + content: { + role: 'model', + parts: [{ functionCall: { name: 'myTool', args: {} } }], + }, + }, + { + id: '3', + content: { + role: 'user', + parts: [ + { + functionResponse: { + name: 'myTool', + response: { ok: true }, + }, + }, + ], + }, + }, + ]; + + const hardened = hardenHistory(history); + const modelPart = hardened[1].content.parts![0]; + expect(modelPart).toHaveProperty( + 'thoughtSignature', + SYNTHETIC_THOUGHT_SIGNATURE, + ); + }); + + it('should inject a sentinel user turn if history ends with a model turn', () => { + const history: HistoryTurn[] = [ + { id: '1', content: { role: 'user', parts: [{ text: 'hello' }] } }, + { id: '2', content: { role: 'model', parts: [{ text: 'hi' }] } }, + ]; + + const hardened = hardenHistory(history); + expect(hardened.length).toBe(3); + expect(hardened[2].content.role).toBe('user'); + expect(hardened[2].content.parts![0]).toEqual({ text: 'Please continue.' }); + expect(hardened[2].id).toBe(deriveStableId(['2', 'sentinel_end'])); + }); + + it('should inject a sentinel user turn if history starts with a model turn', () => { + const history: HistoryTurn[] = [ + { id: '1', content: { role: 'model', parts: [{ text: 'hi' }] } }, + { id: '2', content: { role: 'user', parts: [{ text: 'hello' }] } }, + ]; + + const hardened = hardenHistory(history, { + sentinels: { continuation: 'Custom start' }, + }); + expect(hardened.length).toBe(3); + expect(hardened[0].content.role).toBe('user'); + expect(hardened[0].content.parts![0]).toEqual({ text: 'Custom start' }); + expect(hardened[0].id).toBe(deriveStableId(['1', 'sentinel_start'])); + }); + + it('should inject sentinel responses for missing functionResponses', () => { + const history: HistoryTurn[] = [ + { id: '1', content: { role: 'user', parts: [{ text: 'do it' }] } }, + { + id: '2', + content: { + role: 'model', + parts: [ + { + functionCall: { id: 'call_1', name: 'toolA', args: {} }, + thoughtSignature: 'sig', + }, + { functionCall: { id: 'call_2', name: 'toolB', args: {} } }, + ], + }, + }, + // Note: Turn 3 is missing, so toolA and toolB have no responses + ]; + + const hardened = hardenHistory(history, { + sentinels: { lostToolResponse: 'Lost.' }, + }); + + // The history should now be: User -> Model -> User (sentinel responses) -> User (sentinel end) + // Wait, the sentinel responses turn will satisfy the "ends with user" rule. + expect(hardened.length).toBe(3); + expect(hardened[2].content.role).toBe('user'); + expect(hardened[2].content.parts).toHaveLength(2); + + const resp1 = hardened[2].content.parts![0].functionResponse; + expect(resp1?.id).toBe('call_1'); + expect(resp1?.response).toEqual({ error: 'Lost.' }); + + const resp2 = hardened[2].content.parts![1].functionResponse; + expect(resp2?.id).toBe('call_2'); + expect(resp2?.response).toEqual({ error: 'Lost.' }); + + expect(hardened[2].id).toBe(deriveStableId(['2', 'sentinel_resp'])); + }); + + it('should successfully match parallel tool calls and responses even if responses are originally split across separate user turns', () => { + const history: HistoryTurn[] = [ + { id: '1', content: { role: 'user', parts: [{ text: 'do it' }] } }, + { + id: '2', + content: { + role: 'model', + parts: [ + { + functionCall: { id: 'call_1', name: 'toolA', args: {} }, + thoughtSignature: 'sig', + }, + { functionCall: { id: 'call_2', name: 'toolB', args: {} } }, + ], + }, + }, + // Responses arrive as separate user turns + { + id: '3', + content: { + role: 'user', + parts: [ + { + functionResponse: { + id: 'call_1', + name: 'toolA', + response: { ok: true }, + }, + }, + ], + }, + }, + { + id: '4', + content: { + role: 'user', + parts: [ + { + functionResponse: { + id: 'call_2', + name: 'toolB', + response: { ok: true }, + }, + }, + ], + }, + }, + ]; + + // The hardener should coalesce Turn 3 and Turn 4 *before* it tries to pair them with Turn 2. + // Otherwise, it would look at Turn 3, see 'call_2' is missing, inject a sentinel for 'call_2', + // and then look at Turn 4 and consider 'call_2' to be orphaned. + const hardened = hardenHistory(history); + + // Total turns: User(1), Model(2), User(3+4 merged) + expect(hardened.length).toBe(3); + + const userResponseTurn = hardened[2]; + expect(userResponseTurn.content.role).toBe('user'); + expect(userResponseTurn.content.parts).toHaveLength(2); + + // Verify no sentinels were injected and original responses were preserved + expect(userResponseTurn.content.parts![0].functionResponse?.id).toBe( + 'call_1', + ); + expect(userResponseTurn.content.parts![1].functionResponse?.id).toBe( + 'call_2', + ); + + // Ensure no error properties exist + expect( + userResponseTurn.content.parts![0].functionResponse?.response, + ).toEqual({ ok: true }); + expect( + userResponseTurn.content.parts![1].functionResponse?.response, + ).toEqual({ ok: true }); + }); + + it('should synthesize a functionCall for a singleton orphaned functionResponse', () => { + const history: HistoryTurn[] = [ + { id: '1', content: { role: 'user', parts: [{ text: 'hello' }] } }, + { id: '2', content: { role: 'model', parts: [{ text: 'hi' }] } }, + { + id: '3', + content: { + role: 'user', + parts: [ + { text: 'text is kept' }, + { + functionResponse: { id: 'orphan_1', name: 'toolA', response: {} }, + }, + ], + }, + }, + ]; + + const hardened = hardenHistory(history); + // Turn 1: user, Turn 2: model (with synthetic call), Turn 3: user + expect(hardened.length).toBe(3); + + const modelTurn = hardened[1]; + expect(modelTurn.content.role).toBe('model'); + expect(modelTurn.content.parts).toHaveLength(2); // text + synthetic call + expect(modelTurn.content.parts![1].functionCall).toBeDefined(); + expect(modelTurn.content.parts![1].functionCall?.id).toBe('orphan_1'); + expect( + (modelTurn.content.parts![1] as unknown as { thoughtSignature: string }) + .thoughtSignature, + ).toBe(SYNTHETIC_THOUGHT_SIGNATURE); + + const userTurn = hardened[2]; + expect(userTurn.content.parts).toHaveLength(2); // hoisted response + text + expect(userTurn.content.parts![0].functionResponse?.id).toBe('orphan_1'); + expect(userTurn.content.parts![1]).toEqual({ text: 'text is kept' }); + }); + + it('should synthesize functionCalls for multiple orphaned functionResponses in parallel', () => { + const history: HistoryTurn[] = [ + { + id: '1', + content: { role: 'user', parts: [{ text: 'Parallel action' }] }, + }, + // Previous model turn exists but has NO tool calls + { + id: '2', + content: { role: 'model', parts: [{ text: 'I will do nothing' }] }, + }, + { + id: '3', + content: { + role: 'user', + parts: [ + { + functionResponse: { id: 'orphan_A', name: 'toolA', response: {} }, + }, + { + functionResponse: { id: 'orphan_B', name: 'toolB', response: {} }, + }, + { + functionResponse: { id: 'orphan_C', name: 'toolC', response: {} }, + }, + ], + }, + }, + ]; + + const hardened = hardenHistory(history); + expect(hardened.length).toBe(3); + + const modelTurn = hardened[1]; + expect(modelTurn.content.role).toBe('model'); + expect(modelTurn.content.parts).toHaveLength(4); // original text + 3 synthetic calls + + // Only the FIRST function call should get the synthetic signature + const callA = modelTurn.content.parts![1]; + expect(callA.functionCall?.id).toBe('orphan_A'); + expect( + (callA as unknown as { thoughtSignature?: string }).thoughtSignature, + ).toBe(SYNTHETIC_THOUGHT_SIGNATURE); + + const callB = modelTurn.content.parts![2]; + expect(callB.functionCall?.id).toBe('orphan_B'); + expect( + (callB as unknown as { thoughtSignature?: string }).thoughtSignature, + ).toBeUndefined(); + + const callC = modelTurn.content.parts![3]; + expect(callC.functionCall?.id).toBe('orphan_C'); + expect( + (callC as unknown as { thoughtSignature?: string }).thoughtSignature, + ).toBeUndefined(); + + const userTurn = hardened[2]; + expect(userTurn.content.parts).toHaveLength(3); + expect(userTurn.content.parts![0].functionResponse?.id).toBe('orphan_A'); + expect(userTurn.content.parts![1].functionResponse?.id).toBe('orphan_B'); + expect(userTurn.content.parts![2].functionResponse?.id).toBe('orphan_C'); + }); + + it('should hoist and re-order tool responses to match functionCall order', () => { + const history: HistoryTurn[] = [ + { id: '1', content: { role: 'user', parts: [{ text: 'do it' }] } }, + { + id: '2', + content: { + role: 'model', + parts: [ + { + functionCall: { id: 'c1', name: 'toolA', args: {} }, + thoughtSignature: 'sig', + }, + { functionCall: { id: 'c2', name: 'toolB', args: {} } }, + ], + }, + }, + { + id: '3', + content: { + role: 'user', + parts: [ + { text: 'some text' }, + { functionResponse: { id: 'c2', name: 'toolB', response: {} } }, + { functionResponse: { id: 'c1', name: 'toolA', response: {} } }, + ], + }, + }, + ]; + + const hardened = hardenHistory(history); + expect(hardened[2].content.parts).toHaveLength(3); + + // Order should be: resp(c1) -> resp(c2) -> text + const p0 = hardened[2].content.parts![0]; + const p1 = hardened[2].content.parts![1]; + const p2 = hardened[2].content.parts![2]; + + expect(p0.functionResponse?.id).toBe('c1'); + expect(p1.functionResponse?.id).toBe('c2'); + expect(p2.text).toBe('some text'); + }); + + it('should scrub non-standard properties from parts', () => { + const history: HistoryTurn[] = [ + { + id: '1', + content: { + role: 'user', + parts: [ + { + text: 'hello', + extraProp: 'should be removed', + } as unknown as Part, + ], + }, + }, + ]; + + const hardened = hardenHistory(history); + expect(hardened[0].content.parts![0]).not.toHaveProperty('extraProp'); + expect(hardened[0].content.parts![0]).toHaveProperty('text', 'hello'); + }); +}); diff --git a/packages/core/src/utils/historyHardening.ts b/packages/core/src/utils/historyHardening.ts index 5ff071acd8..8a2dc547b1 100644 --- a/packages/core/src/utils/historyHardening.ts +++ b/packages/core/src/utils/historyHardening.ts @@ -4,8 +4,10 @@ * SPDX-License-Identifier: Apache-2.0 */ -import type { Content, Part } from '@google/genai'; +import { type Part } from '@google/genai'; import { debugLogger } from './debugLogger.js'; +import { type HistoryTurn } from '../core/agentChatHistory.js'; +import { deriveStableId } from './cryptoUtils.js'; export const SYNTHETIC_THOUGHT_SIGNATURE = 'skip_thought_signature_validator'; @@ -35,9 +37,9 @@ const DEFAULT_SENTINELS = { * 5. Signatures: The first functionCall in a model turn must have a thoughtSignature. */ export function hardenHistory( - history: Content[], + history: HistoryTurn[], options: HardeningOptions = {}, -): Content[] { +): HistoryTurn[] { if (history.length === 0) return history; const sentinels = { ...DEFAULT_SENTINELS, ...options.sentinels }; @@ -63,17 +65,20 @@ export function hardenHistory( /** * Combines adjacent turns with the same role and removes empty turns. */ -function coalesce(history: Content[]): Content[] { - const result: Content[] = []; +function coalesce(history: HistoryTurn[]): HistoryTurn[] { + const result: HistoryTurn[] = []; for (const turn of history) { - if (!turn.parts || turn.parts.length === 0) continue; + if (!turn.content.parts || turn.content.parts.length === 0) continue; const last = result[result.length - 1]; - if (last && last.role === turn.role) { - last.parts = [...(last.parts || []), ...(turn.parts || [])]; + if (last && last.content.role === turn.content.role) { + last.content.parts = [ + ...(last.content.parts || []), + ...(turn.content.parts || []), + ]; } else { - // Shallow clone the turn so we don't mutate the original history array structure - result.push({ ...turn }); + // Shallow clone the turn and content so we don't mutate the original history array structure + result.push({ id: turn.id, content: { ...turn.content } }); } } return result; @@ -83,10 +88,10 @@ function coalesce(history: Content[]): Content[] { * Ensures tool calls have matching responses and model turns have required signatures. */ function pairToolsAndEnforceSignatures( - history: Content[], + history: HistoryTurn[], sentinels: Required>, -): Content[] { - const result: Content[] = []; +): HistoryTurn[] { + const result: HistoryTurn[] = []; // We work on a copy to allow splicing in sentinel turns const work = [...history]; @@ -94,8 +99,8 @@ function pairToolsAndEnforceSignatures( for (let i = 0; i < work.length; i++) { const turn = work[i]; - if (turn.role === 'model') { - const parts = turn.parts || []; + if (turn.content.role === 'model') { + const parts = turn.content.parts || []; // A. Signatures let foundCall = false; @@ -123,8 +128,8 @@ function pairToolsAndEnforceSignatures( const name = call.functionCall!.name || 'unknown'; const hasResponse = - nextTurn?.role === 'user' && - nextTurn.parts?.some( + nextTurn?.content.role === 'user' && + nextTurn.content.parts?.some( (p) => p.functionResponse?.id === id && p.functionResponse?.name === name, @@ -143,17 +148,20 @@ function pairToolsAndEnforceSignatures( `[HistoryHardener] Detected ${missing.length} tool calls without responses. Injecting sentinel responses.`, ); - let targetUserTurn: Content; - if (nextTurn?.role === 'user') { + let targetUserTurn: HistoryTurn; + if (nextTurn?.content.role === 'user') { targetUserTurn = nextTurn; } else { - targetUserTurn = { role: 'user', parts: [] }; + targetUserTurn = { + id: deriveStableId([turn.id, 'sentinel_resp']), + content: { role: 'user', parts: [] }, + }; work.splice(i + 1, 0, targetUserTurn); } for (const m of missing) { - targetUserTurn.parts = targetUserTurn.parts || []; - targetUserTurn.parts.push({ + targetUserTurn.content.parts = targetUserTurn.content.parts || []; + targetUserTurn.content.parts.push({ functionResponse: { name: m.name, id: m.id, @@ -165,20 +173,21 @@ function pairToolsAndEnforceSignatures( } } } - } else if (turn.role === 'user') { + } else if (turn.content.role === 'user') { // C. Orphaned Responses // A user response MUST follow a model call. const prevTurn = result[result.length - 1]; - const parts = turn.parts || []; + const parts = turn.content.parts || []; const validParts: Part[] = []; + const orphanedResponses: Part[] = []; for (const p of parts) { if (p.functionResponse) { const id = p.functionResponse.id; const name = p.functionResponse.name; const hasCall = - prevTurn?.role === 'model' && - prevTurn.parts?.some( + prevTurn?.content.role === 'model' && + prevTurn.content.parts?.some( (cp) => cp.functionCall?.id === id && cp.functionCall?.name === name, ); @@ -187,17 +196,51 @@ function pairToolsAndEnforceSignatures( validParts.push(p); } else { debugLogger.log( - `[HistoryHardener] Dropping orphaned functionResponse id='${id}' (name='${name}')`, + `[HistoryHardener] Orphaned functionResponse id='${id}' (name='${name}'). Injecting synthetic functionCall.`, ); + orphanedResponses.push(p); + validParts.push(p); } } else { validParts.push(p); } } - turn.parts = validParts; + + if (orphanedResponses.length > 0) { + let targetModelTurn: HistoryTurn; + if (prevTurn?.content.role === 'model') { + targetModelTurn = prevTurn; + } else { + targetModelTurn = { + id: deriveStableId([turn.id, 'sentinel_call']), + content: { role: 'model', parts: [] }, + }; + result.push(targetModelTurn); + } + + for (const orph of orphanedResponses) { + targetModelTurn.content.parts = targetModelTurn.content.parts || []; + const hasExistingCall = targetModelTurn.content.parts.some( + (p) => !!p.functionCall, + ); + const callPart: Part = { + functionCall: { + name: orph.functionResponse!.name, + id: orph.functionResponse!.id, + args: {}, + }, + }; + if (!hasExistingCall) { + callPart.thoughtSignature = SYNTHETIC_THOUGHT_SIGNATURE; + } + targetModelTurn.content.parts.push(callPart); + } + } + + turn.content.parts = validParts; } - if (turn.parts && turn.parts.length > 0) { + if (turn.content.parts && turn.content.parts.length > 0) { result.push(turn); } } @@ -208,21 +251,22 @@ function pairToolsAndEnforceSignatures( /** * Hoists and re-orders tool responses within user turns to match preceding model turns. */ -function refineToolResponses(history: Content[]): Content[] { +function refineToolResponses(history: HistoryTurn[]): HistoryTurn[] { for (let i = 1; i < history.length; i++) { const turn = history[i]; const prev = history[i - 1]; - if (turn.role === 'user' && prev.role === 'model') { + if (turn.content.role === 'user' && prev.content.role === 'model') { const callOrder = - prev.parts + prev.content.parts ?.filter((p) => !!p.functionCall) .map((p) => p.functionCall!.id) || []; if (callOrder.length > 0) { const responseParts = - turn.parts?.filter((p) => !!p.functionResponse) || []; - const otherParts = turn.parts?.filter((p) => !p.functionResponse) || []; + turn.content.parts?.filter((p) => !!p.functionResponse) || []; + const otherParts = + turn.content.parts?.filter((p) => !p.functionResponse) || []; if (responseParts.length > 0) { // 1. Re-order: Sort responses to match the model's call order @@ -240,7 +284,7 @@ function refineToolResponses(history: Content[]): Content[] { }); // 2. Hoisting: Place all sorted responses BEFORE text or other parts - turn.parts = [...responseParts, ...otherParts]; + turn.content.parts = [...responseParts, ...otherParts]; } } } @@ -252,36 +296,42 @@ function refineToolResponses(history: Content[]): Content[] { * Final pass to ensure start/end roles and alternation are correct. */ function enforceRoleConstraints( - history: Content[], + history: HistoryTurn[], sentinels: Required>, -): Content[] { +): HistoryTurn[] { if (history.length === 0) return []; // Re-coalesce first to catch any empty turns or adjacent roles introduced by pairing const base = coalesce(history); if (base.length === 0) return []; - const result: Content[] = [...base]; + const result: HistoryTurn[] = [...base]; // 1. Ensure starts with user - if (result[0].role === 'model') { + if (result[0].content.role === 'model') { debugLogger.log( '[HistoryHardener] Final history starts with model role. Prepending sentinel user turn.', ); result.unshift({ - role: 'user', - parts: [{ text: sentinels.continuation }], + id: deriveStableId([result[0].id, 'sentinel_start']), + content: { + role: 'user', + parts: [{ text: sentinels.continuation }], + }, }); } // 2. Ensure ends with user - if (result[result.length - 1].role === 'model') { + if (result[result.length - 1].content.role === 'model') { debugLogger.log( '[HistoryHardener] Final history ends with model role. Appending sentinel user turn.', ); result.push({ - role: 'user', - parts: [{ text: 'Please continue.' }], + id: deriveStableId([result[result.length - 1].id, 'sentinel_end']), + content: { + role: 'user', + parts: [{ text: 'Please continue.' }], + }, }); } @@ -293,10 +343,13 @@ function enforceRoleConstraints( * Deep-scrubs the history to remove any non-standard properties from Content and Part objects. * This ensures compatibility with strict APIs (like Vertex AI) that reject unknown fields. */ -export function scrubHistory(history: Content[]): Content[] { - return history.map((content) => ({ - role: content.role, - parts: (content.parts || []).map(scrubPart), +export function scrubHistory(history: HistoryTurn[]): HistoryTurn[] { + return history.map((turn) => ({ + id: turn.id, + content: { + role: turn.content.role, + parts: (turn.content.parts || []).map((p) => scrubPart(p)), + }, })); } diff --git a/packages/core/src/utils/sessionUtils.test.ts b/packages/core/src/utils/sessionUtils.test.ts index d132087ee8..adcb2e76e0 100644 --- a/packages/core/src/utils/sessionUtils.test.ts +++ b/packages/core/src/utils/sessionUtils.test.ts @@ -27,7 +27,7 @@ describe('convertSessionToClientHistory', () => { const history = convertSessionToClientHistory(messages); - expect(history).toEqual([ + expect(history.map((h) => h.content)).toEqual([ { role: 'user', parts: [{ text: 'Hello' }] }, { role: 'model', parts: [{ text: 'Hi there' }] }, ]); @@ -58,7 +58,7 @@ describe('convertSessionToClientHistory', () => { const history = convertSessionToClientHistory(messages); - expect(history).toEqual([ + expect(history.map((h) => h.content)).toEqual([ { role: 'user', parts: [{ text: 'Hello' }] }, { role: 'model', @@ -100,7 +100,7 @@ describe('convertSessionToClientHistory', () => { const history = convertSessionToClientHistory(messages); - expect(history).toEqual([ + expect(history.map((h) => h.content)).toEqual([ { role: 'user', parts: [{ text: 'Actual query' }] }, ]); }); @@ -133,7 +133,7 @@ describe('convertSessionToClientHistory', () => { const history = convertSessionToClientHistory(messages); - expect(history).toEqual([ + expect(history.map((h) => h.content)).toEqual([ { role: 'user', parts: [{ text: 'List files' }] }, { role: 'model', @@ -172,7 +172,7 @@ describe('convertSessionToClientHistory', () => { const history = convertSessionToClientHistory(messages); - expect(history).toEqual([ + expect(history.map((h) => h.content)).toEqual([ { role: 'user', parts: [ diff --git a/packages/core/src/utils/sessionUtils.ts b/packages/core/src/utils/sessionUtils.ts index 4803dd4f07..822612cb28 100644 --- a/packages/core/src/utils/sessionUtils.ts +++ b/packages/core/src/utils/sessionUtils.ts @@ -7,6 +7,7 @@ import { type Part, type PartListUnion } from '@google/genai'; import { type ConversationRecord } from '../services/chatRecordingService.js'; import { partListUnionToString } from '../core/geminiRequest.js'; +import { type HistoryTurn } from '../core/agentChatHistory.js'; /** * Converts a PartListUnion into a normalized array of Part objects. @@ -29,8 +30,8 @@ function ensurePartArray(content: PartListUnion): Part[] { */ export function convertSessionToClientHistory( messages: ConversationRecord['messages'], -): Array<{ role: 'user' | 'model'; parts: Part[] }> { - const clientHistory: Array<{ role: 'user' | 'model'; parts: Part[] }> = []; +): HistoryTurn[] { + const clientHistory: HistoryTurn[] = []; for (const msg of messages) { if (msg.type === 'info' || msg.type === 'error' || msg.type === 'warning') { @@ -47,8 +48,11 @@ export function convertSessionToClientHistory( } clientHistory.push({ - role: 'user', - parts: ensurePartArray(msg.content), + id: msg.id, + content: { + role: 'user', + parts: ensurePartArray(msg.content), + }, }); } else if (msg.type === 'gemini') { const modelParts: Part[] = []; @@ -85,8 +89,11 @@ export function convertSessionToClientHistory( } clientHistory.push({ - role: 'model', - parts: modelParts, + id: msg.id, + content: { + role: 'model', + parts: modelParts, + }, }); const functionResponseParts: Part[] = []; @@ -117,8 +124,11 @@ export function convertSessionToClientHistory( if (functionResponseParts.length > 0) { clientHistory.push({ - role: 'user', - parts: functionResponseParts, + id: `${msg.id}_response`, + content: { + role: 'user', + parts: functionResponseParts, + }, }); } } else { @@ -128,8 +138,11 @@ export function convertSessionToClientHistory( if (modelParts.length > 0) { clientHistory.push({ - role: 'model', - parts: modelParts, + id: msg.id, + content: { + role: 'model', + parts: modelParts, + }, }); } }