From fbcfa40f1d54866fa885c72e59e2e1f7b3a385da Mon Sep 17 00:00:00 2001 From: Your Name Date: Mon, 6 Apr 2026 22:47:43 +0000 Subject: [PATCH] next steps --- .../blobDegradationProcessor.test.ts | 93 ++++-------------- .../emergencyTruncationProcessor.test.ts | 96 +++++++++++++++++++ .../historySquashingProcessor.test.ts | 66 ++++--------- .../semanticCompressionProcessor.test.ts | 68 +++++-------- .../processors/stateSnapshotProcessor.test.ts | 91 ++++++++++++++++++ packages/core/src/context/testing.md | 40 ++++++++ .../src/context/testing/contextTestUtils.ts | 51 ++++++++++ 7 files changed, 342 insertions(+), 163 deletions(-) create mode 100644 packages/core/src/context/processors/emergencyTruncationProcessor.test.ts create mode 100644 packages/core/src/context/processors/stateSnapshotProcessor.test.ts create mode 100644 packages/core/src/context/testing.md diff --git a/packages/core/src/context/processors/blobDegradationProcessor.test.ts b/packages/core/src/context/processors/blobDegradationProcessor.test.ts index 0a5b31187b..d6cedacec1 100644 --- a/packages/core/src/context/processors/blobDegradationProcessor.test.ts +++ b/packages/core/src/context/processors/blobDegradationProcessor.test.ts @@ -3,12 +3,10 @@ * Copyright 2026 Google LLC * SPDX-License-Identifier: Apache-2.0 */ -import { createMockEnvironment } from '../testing/contextTestUtils.js'; +import { createMockEnvironment, createDummyState, createDummyEpisode } from '../testing/contextTestUtils.js'; import { describe, it, expect, beforeEach, vi } from 'vitest'; import { BlobDegradationProcessor } from './blobDegradationProcessor.js'; -import type { Episode, UserPrompt } from '../ir/types.js'; -import type { ContextAccountingState } from '../pipeline.js'; -import { randomUUID } from 'node:crypto'; +import type { UserPrompt } from '../ir/types.js'; import type { ContextEnvironment } from '../sidecar/environment.js'; import { InMemoryFileSystem } from '../system/InMemoryFileSystem.js'; @@ -24,46 +22,19 @@ describe('BlobDegradationProcessor', () => { processor = new BlobDegradationProcessor(env); }); - const getDummyState = ( - isSatisfied = false, - deficit = 0, - protectedIds = new Set(), - ): ContextAccountingState => ({ - currentTokens: 5000, - maxTokens: 10000, - retainedTokens: 4000, - deficitTokens: deficit, - protectedEpisodeIds: protectedIds, - isBudgetSatisfied: isSatisfied, - }); - it('degrades inline_data into a text reference and saves to disk', async () => { const dummyImageBase64 = Buffer.from('fake-image-data').toString('base64'); - const ep: Episode = { - id: 'ep-1', - timestamp: Date.now(), - trigger: { - id: randomUUID(), - type: 'USER_PROMPT', - semanticParts: [ - { type: 'text', text: 'Look at this image:' }, - { - type: 'inline_data', - mimeType: 'image/png', - data: dummyImageBase64, - }, - ], - metadata: { - originalTokens: 300, - currentTokens: 300, - transformations: [], - }, + const ep = createDummyEpisode('ep-1', 'USER_PROMPT', [ + { type: 'text', text: 'Look at this image:' }, + { + type: 'inline_data', + mimeType: 'image/png', + data: dummyImageBase64, }, - steps: [], - }; + ]); - const state = getDummyState(false, 500, new Set()); + const state = createDummyState(false, 500); const result = await processor.process([ep], state); const parts = (result[0].trigger as UserPrompt).semanticParts; @@ -73,12 +44,8 @@ describe('BlobDegradationProcessor', () => { // Inline data should be degraded expect(parts[1].presentation).toBeDefined(); - expect(parts[1].presentation!.text).toContain( - '[Multi-Modal Blob (image/png', - ); - expect(parts[1].presentation!.text).toContain( - 'degraded to text to preserve context window', - ); + expect(parts[1].presentation!.text).toContain('[Multi-Modal Blob (image/png'); + expect(parts[1].presentation!.text).toContain('degraded to text to preserve context window'); // Verify it was written to fake FS expect(fileSystem.getFiles().size).toBeGreaterThan(0); @@ -89,39 +56,21 @@ describe('BlobDegradationProcessor', () => { }); it('degrades file_data into a text reference without disk write', async () => { - const ep: Episode = { - id: 'ep-2', - timestamp: Date.now(), - trigger: { - id: randomUUID(), - type: 'USER_PROMPT', - semanticParts: [ - { - type: 'file_data', - mimeType: 'application/pdf', - fileUri: 'gs://fake-bucket/doc.pdf', - }, - ], - metadata: { - originalTokens: 300, - currentTokens: 300, - transformations: [], - }, + const ep = createDummyEpisode('ep-2', 'USER_PROMPT', [ + { + type: 'file_data', + mimeType: 'application/pdf', + fileUri: 'gs://fake-bucket/doc.pdf', }, - steps: [], - }; + ]); - const state = getDummyState(false, 500, new Set()); + const state = createDummyState(false, 500); const result = await processor.process([ep], state); const parts = (result[0].trigger as UserPrompt).semanticParts; expect(parts[0].presentation).toBeDefined(); - expect(parts[0].presentation!.text).toContain( - '[File Reference (application/pdf)', - ); - expect(parts[0].presentation!.text).toContain( - 'Original URI: gs://fake-bucket/doc.pdf', - ); + expect(parts[0].presentation!.text).toContain('[File Reference (application/pdf)'); + expect(parts[0].presentation!.text).toContain('Original URI: gs://fake-bucket/doc.pdf'); expect(fileSystem.getFiles().size).toBe(0); }); diff --git a/packages/core/src/context/processors/emergencyTruncationProcessor.test.ts b/packages/core/src/context/processors/emergencyTruncationProcessor.test.ts new file mode 100644 index 0000000000..03d5a08983 --- /dev/null +++ b/packages/core/src/context/processors/emergencyTruncationProcessor.test.ts @@ -0,0 +1,96 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { createMockEnvironment, createDummyState, createDummyEpisode } from '../testing/contextTestUtils.js'; +import { describe, it, expect, beforeEach, vi } from 'vitest'; +import { EmergencyTruncationProcessor } from './emergencyTruncationProcessor.js'; +import type { ContextEnvironment } from '../sidecar/environment.js'; + +describe('EmergencyTruncationProcessor', () => { + let processor: EmergencyTruncationProcessor; + let env: ContextEnvironment; + + beforeEach(() => { + vi.resetAllMocks(); + env = createMockEnvironment(); + // Force token calculator to return exactly what we tell it for deterministic testing + vi.spyOn(env.tokenCalculator, 'calculateEpisodeListTokens').mockImplementation((episodes) => { + // Just sum up the metadata originalTokens for our dummy episodes + return episodes.reduce((acc, ep) => acc + (ep.trigger.metadata.originalTokens || 100), 0); + }); + + processor = new EmergencyTruncationProcessor(env, {}); + }); + + it('bypasses processing if currentTokens <= maxTokens', async () => { + const episodes = [ + createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: 'short' }]) + ]; + // State says we are under budget (5000 < 10000) + const state = createDummyState(true, 0, new Set(), 5000, 10000); + + const result = await processor.process(episodes, state); + expect(result).toStrictEqual(episodes); + expect(result.length).toBe(1); + }); + + it('truncates episodes from the front (oldest) until targetTokens is met', async () => { + const ep1 = createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: 'oldest' }]); + const ep2 = createDummyEpisode('ep-2', 'USER_PROMPT', [{ text: 'middle' }]); + const ep3 = createDummyEpisode('ep-3', 'USER_PROMPT', [{ text: 'newest' }]); + + // Each is worth 100 tokens according to our mock + const episodes = [ep1, ep2, ep3]; + + // We have 300 tokens, but max is 200. We need to drop 100 tokens. + const state = createDummyState(false, 100, new Set(), 300, 200); + + const result = await processor.process(episodes, state); + + // It should drop the FIRST episode (ep-1) and keep the rest. + expect(result.length).toBe(2); + expect(result[0].id).toBe('ep-2'); + expect(result[1].id).toBe('ep-3'); + }); + + it('never drops protected episodes (e.g. system instructions)', async () => { + const ep1 = createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: 'protected system prompt' }]); + const ep2 = createDummyEpisode('ep-2', 'USER_PROMPT', [{ text: 'middle' }]); + const ep3 = createDummyEpisode('ep-3', 'USER_PROMPT', [{ text: 'newest' }]); + + const episodes = [ep1, ep2, ep3]; + + // We have 300 tokens, max is 200. We need to drop 100 tokens. + // However, ep-1 is protected! + const state = createDummyState(false, 100, new Set(['ep-1']), 300, 200); + + const result = await processor.process(episodes, state); + + // It should SKIP dropping ep-1 (protected) and drop ep-2 instead. + expect(result.length).toBe(2); + expect(result[0].id).toBe('ep-1'); // Protected, survived + expect(result[1].id).toBe('ep-3'); // Survivor + }); + + it('can drop multiple episodes if deficit is huge', async () => { + const ep1 = createDummyEpisode('ep-1', 'USER_PROMPT', []); + const ep2 = createDummyEpisode('ep-2', 'USER_PROMPT', []); + const ep3 = createDummyEpisode('ep-3', 'USER_PROMPT', []); + + const episodes = [ep1, ep2, ep3]; + + // We have 300 tokens, max is 50. We need to drop 250 tokens! + const state = createDummyState(false, 250, new Set(), 300, 50); + + const result = await processor.process(episodes, state); + + // It must drop ep1 (100t) and ep2 (100t). + // Remaining is ep3 (100t). + // Wait, if it drops ep1 (remaining=200) and ep2 (remaining=100), + // when it looks at ep3, remaining (100) > max (50), so it will drop ep3 too! + expect(result.length).toBe(0); + }); +}); diff --git a/packages/core/src/context/processors/historySquashingProcessor.test.ts b/packages/core/src/context/processors/historySquashingProcessor.test.ts index 06cb313872..8a41d0f741 100644 --- a/packages/core/src/context/processors/historySquashingProcessor.test.ts +++ b/packages/core/src/context/processors/historySquashingProcessor.test.ts @@ -3,16 +3,14 @@ * Copyright 2026 Google LLC * SPDX-License-Identifier: Apache-2.0 */ -import { createMockEnvironment } from '../testing/contextTestUtils.js'; +import { createMockEnvironment, createDummyState, createDummyEpisode } from '../testing/contextTestUtils.js'; import { describe, it, expect, beforeEach } from 'vitest'; import { HistorySquashingProcessor } from './historySquashingProcessor.js'; import type { - Episode, UserPrompt, AgentThought, AgentYield, } from '../ir/types.js'; -import type { ContextAccountingState } from '../pipeline.js'; import { randomUUID } from 'node:crypto'; describe('HistorySquashingProcessor', () => { @@ -24,37 +22,10 @@ describe('HistorySquashingProcessor', () => { }); }); - const getDummyState = ( - isSatisfied = false, - deficit = 0, - protectedIds = new Set(), - ): ContextAccountingState => ({ - currentTokens: 5000, - maxTokens: 10000, - retainedTokens: 4000, - deficitTokens: deficit, - protectedEpisodeIds: protectedIds, - isBudgetSatisfied: isSatisfied, - }); - - const createDummyEpisode = ( - id: string, - userText: string, - modelThought: string, - ): Episode => ({ - id, - timestamp: Date.now(), - trigger: { - id: randomUUID(), - type: 'USER_PROMPT', - semanticParts: [{ type: 'text', text: userText }], - metadata: { - originalTokens: 1000, - currentTokens: 1000, - transformations: [], - }, - }, - steps: [ + const createThoughtEpisode = (id: string, userText: string, modelThought: string) => { + const ep = createDummyEpisode(id, 'USER_PROMPT', [{ type: 'text', text: userText }]); + // Replace the tool steps with a thought step for this test + ep.steps = [ { id: randomUUID(), type: 'AGENT_THOUGHT', @@ -65,12 +36,13 @@ describe('HistorySquashingProcessor', () => { transformations: [], }, }, - ], - }); + ]; + return ep; + }; it('bypasses processing if budget is satisfied', async () => { - const episodes = [createDummyEpisode('1', 'short text', 'short thought')]; - const state = getDummyState(true); + const episodes = [createThoughtEpisode('1', 'short text', 'short thought')]; + const state = createDummyState(true); const result = await processor.process(episodes, state); @@ -83,8 +55,8 @@ describe('HistorySquashingProcessor', () => { it('skips protected episodes', async () => { // 500 chars = ~125 tokens. Limit is 100 tokens, so it WOULD truncate if not protected. const longText = 'A'.repeat(500); - const episodes = [createDummyEpisode('ep-1', longText, 'short thought')]; - const state = getDummyState(false, 100, new Set(['ep-1'])); + const episodes = [createThoughtEpisode('ep-1', longText, 'short thought')]; + const state = createDummyState(false, 100, new Set(['ep-1'])); const result = await processor.process(episodes, state); @@ -96,8 +68,8 @@ describe('HistorySquashingProcessor', () => { it('truncates both UserPrompts and AgentThoughts', async () => { const longUser = 'U'.repeat(1000); // ~250 tokens const longModel = 'M'.repeat(1000); // ~250 tokens - const episodes = [createDummyEpisode('ep-2', longUser, longModel)]; - const state = getDummyState(false, 500, new Set()); // High deficit, force truncation + const episodes = [createThoughtEpisode('ep-2', longUser, longModel)]; + const state = createDummyState(false, 500); // High deficit, force truncation const result = await processor.process(episodes, state); @@ -123,13 +95,13 @@ describe('HistorySquashingProcessor', () => { const longUser1 = 'A'.repeat(1000); const longUser2 = 'B'.repeat(1000); const episodes = [ - createDummyEpisode('ep-3', longUser1, 'short'), - createDummyEpisode('ep-4', longUser2, 'short'), + createThoughtEpisode('ep-3', longUser1, 'short'), + createThoughtEpisode('ep-4', longUser2, 'short'), ]; // Set deficit to exactly what ONE truncation will save // Original = ~250 tokens. Limit = 100. Truncation saves ~150 tokens. - const state = getDummyState(false, 150, new Set()); + const state = createDummyState(false, 150); const result = await processor.process(episodes, state); @@ -144,7 +116,7 @@ describe('HistorySquashingProcessor', () => { it('truncates IrNodes', async () => { const longYield = 'Y'.repeat(1000); // ~250 tokens - const ep = createDummyEpisode('ep-5', 'short', 'short'); + const ep = createThoughtEpisode('ep-5', 'short', 'short'); ep.yield = { id: randomUUID(), type: 'AGENT_YIELD', @@ -156,7 +128,7 @@ describe('HistorySquashingProcessor', () => { }, }; - const state = getDummyState(false, 500, new Set()); + const state = createDummyState(false, 500); const result = await processor.process([ep], state); const yieldPart = result[0].yield as AgentYield; diff --git a/packages/core/src/context/processors/semanticCompressionProcessor.test.ts b/packages/core/src/context/processors/semanticCompressionProcessor.test.ts index 9692189292..18adc2c976 100644 --- a/packages/core/src/context/processors/semanticCompressionProcessor.test.ts +++ b/packages/core/src/context/processors/semanticCompressionProcessor.test.ts @@ -4,16 +4,14 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { createMockEnvironment } from '../testing/contextTestUtils.js'; +import { createMockEnvironment, createDummyState, createDummyEpisode } from '../testing/contextTestUtils.js'; import { describe, it, expect, beforeEach, vi } from 'vitest'; import { SemanticCompressionProcessor } from './semanticCompressionProcessor.js'; import type { - Episode, UserPrompt, ToolExecution, AgentThought, } from '../ir/types.js'; -import type { ContextAccountingState } from '../pipeline.js'; import { randomUUID } from 'node:crypto'; import type { BaseLlmClient } from 'src/core/baseLlmClient.js'; @@ -27,51 +25,32 @@ describe('SemanticCompressionProcessor', () => { }); const env = createMockEnvironment(); + // Re-mock llmClient properly vi.spyOn(env, 'llmClient', 'get').mockReturnValue({ generateContent: generateContentMock } as unknown as BaseLlmClient); + processor = new SemanticCompressionProcessor(env, { nodeThresholdTokens: 2000, }); }); - const getDummyState = ( - isSatisfied = false, - deficit = 0, - protectedIds = new Set(), - ): ContextAccountingState => ({ - currentTokens: 5000, - maxTokens: 10000, - retainedTokens: 4000, - deficitTokens: deficit, - protectedEpisodeIds: protectedIds, - isBudgetSatisfied: isSatisfied, - }); - - const createDummyEpisode = ( + const createEpisodeWithThoughtsAndTools = ( id: string, userText: string, thoughtText: string, toolObs: string, - ): Episode => ({ - id, - timestamp: Date.now(), - trigger: { - id: randomUUID(), - type: 'USER_PROMPT', - semanticParts: [{ type: 'text', text: userText }], - metadata: { - originalTokens: 3800, - currentTokens: 3800, - transformations: [], - }, - }, - steps: [ + ) => { + const ep = createDummyEpisode(id, 'USER_PROMPT', [{ type: 'text', text: userText }]); + // We override metadata for threshold triggering + ep.trigger.metadata.currentTokens = 3800; + + ep.steps = [ { id: randomUUID(), type: 'AGENT_THOUGHT', text: thoughtText, metadata: { - originalTokens: 100, - currentTokens: 100, + originalTokens: 3800, + currentTokens: 3800, transformations: [], }, }, @@ -88,23 +67,24 @@ describe('SemanticCompressionProcessor', () => { transformations: [], }, }, - ], - }); + ]; + return ep; + }; it('bypasses processing if budget is satisfied', async () => { - const episodes = [createDummyEpisode('1', 'short', 'short', 'short')]; - const state = getDummyState(true); + const episodes = [createEpisodeWithThoughtsAndTools('1', 'short', 'short', 'short')]; + const state = createDummyState(true); await processor.process(episodes, state); expect(generateContentMock).not.toHaveBeenCalled(); }); it('skips protected episodes even if over budget', async () => { - const massiveStr = 'M'.repeat(15000); // Exceeds threshold (10 * 4 = 40) + const massiveStr = 'M'.repeat(15000); const episodes = [ - createDummyEpisode('ep-1', massiveStr, massiveStr, massiveStr), + createEpisodeWithThoughtsAndTools('ep-1', massiveStr, massiveStr, massiveStr), ]; - const state = getDummyState(false, 1000, new Set(['ep-1'])); + const state = createDummyState(false, 1000, new Set(['ep-1'])); await processor.process(episodes, state); expect(generateContentMock).not.toHaveBeenCalled(); @@ -113,9 +93,9 @@ describe('SemanticCompressionProcessor', () => { it('summarizes unprotected UserPrompts, Thoughts, and Tool observations until deficit is met', async () => { const massiveStr = 'M'.repeat(15000); const episodes = [ - createDummyEpisode('ep-1', massiveStr, massiveStr, massiveStr), + createEpisodeWithThoughtsAndTools('ep-1', massiveStr, massiveStr, massiveStr), ]; - const state = getDummyState(false, 50000, new Set()); // Massive deficit, forces all 3 to summarize + const state = createDummyState(false, 50000); // Massive deficit, forces all 3 to summarize const result = await processor.process(episodes, state); expect(generateContentMock).toHaveBeenCalledTimes(3); @@ -140,11 +120,11 @@ describe('SemanticCompressionProcessor', () => { it('stops calling LLM when deficit hits zero', async () => { const massiveStr = 'M'.repeat(15000); const episodes = [ - createDummyEpisode('ep-1', massiveStr, massiveStr, massiveStr), + createEpisodeWithThoughtsAndTools('ep-1', massiveStr, massiveStr, massiveStr), ]; // Set deficit low enough that ONE summary solves the problem - const state = getDummyState(false, 5, new Set()); + const state = createDummyState(false, 5); await processor.process(episodes, state); // It should only compress the UserPrompt and then stop diff --git a/packages/core/src/context/processors/stateSnapshotProcessor.test.ts b/packages/core/src/context/processors/stateSnapshotProcessor.test.ts new file mode 100644 index 0000000000..752e971854 --- /dev/null +++ b/packages/core/src/context/processors/stateSnapshotProcessor.test.ts @@ -0,0 +1,91 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { createMockEnvironment, createDummyState, createDummyEpisode } from '../testing/contextTestUtils.js'; +import { describe, it, expect, beforeEach, vi } from 'vitest'; +import { StateSnapshotProcessor } from './stateSnapshotProcessor.js'; +import type { ContextEnvironment } from '../sidecar/environment.js'; +import type { BaseLlmClient } from '../../core/baseLlmClient.js'; + +describe('StateSnapshotProcessor', () => { + let processor: StateSnapshotProcessor; + let env: ContextEnvironment; + let generateContentMock: ReturnType; + + beforeEach(() => { + vi.resetAllMocks(); + env = createMockEnvironment(); + + generateContentMock = vi.fn().mockResolvedValue({ + text: 'Mocked Compressed State Snapshot!', + }); + vi.spyOn(env, 'llmClient', 'get').mockReturnValue({ generateContent: generateContentMock } as unknown as BaseLlmClient); + + // Override token calc for testing + vi.spyOn(env.tokenCalculator, 'estimateTokensForParts').mockReturnValue(100); + + processor = new StateSnapshotProcessor(env, {}, env.eventBus); + }); + + it('bypasses processing if deficit is <= 0', async () => { + const episodes = [ + createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: 'hello' }]) + ]; + // current: 100, max: 1000, retained: 200 (deficit 0) + const state = createDummyState(false, 0, new Set(), 100, 1000, 200); + + const result = await processor.process(episodes, state); + expect(result).toStrictEqual(episodes); + expect(generateContentMock).not.toHaveBeenCalled(); + }); + + it('bypasses processing if not enough episodes to summarize (needs at least 2 inner episodes)', async () => { + const episodes = [ + createDummyEpisode('ep-sys', 'SYSTEM_EVENT', []), + createDummyEpisode('ep-active', 'USER_PROMPT', [{ text: 'help' }]), + ]; + + // current: 1000, max: 10000, retained: 500. Target deficit = 500 + const state = createDummyState(false, 500, new Set(), 1000, 10000, 500); + + const result = await processor.process(episodes, state); + expect(result).toStrictEqual(episodes); + expect(generateContentMock).not.toHaveBeenCalled(); + }); + + it('summarizes intermediate episodes into a single snapshot episode', async () => { + const episodes = [ + createDummyEpisode('ep-0', 'SYSTEM_EVENT', []), + createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: 'old 1' }]), + createDummyEpisode('ep-2', 'USER_PROMPT', [{ text: 'old 2' }]), + createDummyEpisode('ep-3', 'USER_PROMPT', [{ text: 'current' }]), + ]; + + // Target deficit = 200 + const state = createDummyState(false, 200, new Set(), 1000, 10000, 800); + + const result = await processor.process(episodes, state); + + // We started with 4 episodes. + // Episodes [1, 2] were synthesized into a single new Snapshot episode. + // Final array should be: [0, SNAPSHOT, 3] = length 3. + expect(result.length).toBe(3); + expect(result[0].id).toBe('ep-0'); + + const snapshotEp = result[1]; + expect(snapshotEp.yield).toBeDefined(); + expect(snapshotEp.yield!.text).toContain(''); + expect(snapshotEp.yield!.text).toContain('Mocked Compressed State Snapshot!'); + + expect(result[2].id).toBe('ep-3'); + + expect(generateContentMock).toHaveBeenCalledTimes(1); + + const llmArgs = generateContentMock.mock.calls[0][0]; + expect(llmArgs.contents[0].parts[0].text).toContain('old 1'); + expect(llmArgs.contents[0].parts[0].text).toContain('old 2'); + }); +}); diff --git a/packages/core/src/context/testing.md b/packages/core/src/context/testing.md new file mode 100644 index 0000000000..1d333d9f94 --- /dev/null +++ b/packages/core/src/context/testing.md @@ -0,0 +1,40 @@ +# Context Pipeline Testing Strategy & Audit + +## Philosophy: Defense in Depth +Our testing strategy avoids the "endless tax" of brittle tests by strictly separating concerns: +1. **Unit Tests (Processors, System Fakes, Mappers):** Exhaustively test logical boundaries, token math, and state transformations. Driven by shared, DRY test factories (no repetitive boilerplate). +2. **Component Tests (ContextManager, Orchestrator):** Test the *wiring* and *triggers*. Verify that barriers block, background pipelines execute, and events fire correctly. +3. **Golden / E2E Tests:** Test emergent behavior. Pass in complex, raw chat histories and assert the exact final projected `Content[]` output against committed JSON snapshots. + +--- + +## Audit Checklist & Coverage Tracker + +### 1. The Tooling Library (`contextTestUtils.ts`) +- [ ] Implement `ContextTestBuilder` or shared factory functions (`createDummyEpisode`, `createDummyState`). +- [ ] Ensure all existing tests are migrated to use these helpers to establish the pattern. + +### 2. Unit Tests (The Processors) +Goal: Ensure every processor gracefully handles boundary conditions (budget satisfied vs. deficit), skips protected IDs, and correctly transforms IR. +- [ ] `BlobDegradationProcessor` (Mostly complete, needs migration to shared helpers) +- [ ] `ToolMaskingProcessor` (Mostly complete, needs migration to shared helpers) +- [ ] `HistorySquashingProcessor` (Audit coverage) +- [ ] `SemanticCompressionProcessor` (Audit coverage) +- [ ] `ContextTracer` (Complete) +- [ ] `SidecarLoader` (Complete) +- [ ] `IrMapper` / `IrProjector` (Audit coverage) + +### 3. Component Tests (The Orchestration) +Goal: Prove the sidecar configuration accurately drives runtime behavior without testing the processor logic itself. +- [ ] `PipelineOrchestrator`: Test sync vs. async routing, and trigger setup. +- [ ] `ContextManager`: Test `subscribeToHistory` (Opportunistic triggers). +- [ ] `ContextManager`: Test `project()` (Synchronous barrier triggers). + +### 4. Golden / E2E Tests +- [ ] `contextManager.golden.test.ts`: Ensure we have a scenario representing a "Day in the Life" of the CLI (some images, some huge tool outputs, deep history) mapping to a snapshot. + +--- + +## Next Actions +1. Migrate processor tests to shared factories to DRY up the suite. +2. Go down the Unit Test checklist, ensuring full line/branch coverage for the core transformations. diff --git a/packages/core/src/context/testing/contextTestUtils.ts b/packages/core/src/context/testing/contextTestUtils.ts index aa47419b19..b7168efeeb 100644 --- a/packages/core/src/context/testing/contextTestUtils.ts +++ b/packages/core/src/context/testing/contextTestUtils.ts @@ -13,6 +13,57 @@ import { ContextManager } from '../contextManager.js'; import { InMemoryFileSystem } from '../system/InMemoryFileSystem.js'; import { DeterministicIdGenerator } from '../system/DeterministicIdGenerator.js'; +import type { Episode } from '../ir/types.js'; +import type { ContextAccountingState } from '../pipeline.js'; +import { randomUUID } from 'node:crypto'; + +export function createDummyState( + isSatisfied = false, + deficit = 0, + protectedIds = new Set(), + currentTokens = 5000, + maxTokens = 10000, + retainedTokens = 4000, +): ContextAccountingState { + return { + currentTokens, + maxTokens, + retainedTokens, + deficitTokens: deficit, + protectedEpisodeIds: protectedIds, + isBudgetSatisfied: isSatisfied, + }; +} + +export function createDummyEpisode( + id: string, + type: 'USER_PROMPT' | 'SYSTEM_EVENT', + parts: unknown[] = [], + toolSteps: { intent: Record; observation: Record; toolName?: string; tokens?: { intent: number; observation: number } }[] = [] +): Episode { + return { + id, + timestamp: Date.now(), + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion + trigger: { + id: randomUUID(), + type, + name: type === 'SYSTEM_EVENT' ? 'dummy_event' : undefined, + payload: type === 'SYSTEM_EVENT' ? {} : undefined, + semanticParts: type === 'USER_PROMPT' ? parts as any : undefined, + metadata: { originalTokens: 100, currentTokens: 100, transformations: [] }, + } as any, + steps: toolSteps.map(step => ({ + id: randomUUID(), + type: 'TOOL_EXECUTION', + toolName: step.toolName || 'test_tool', + intent: step.intent, + observation: step.observation, + tokens: step.tokens || { intent: 50, observation: 50 }, + metadata: { originalTokens: 100, currentTokens: 100, transformations: [] }, + })), + }; +} export function createMockEnvironment(): ContextEnvironment { return {