next steps

2026-05-16 23:02:51 -07:00 · 2026-04-06 22:47:43 +00:00
parent fc4439ce03
commit fbcfa40f1d
7 changed files with 342 additions and 163 deletions
@@ -3,12 +3,10 @@
 * Copyright 2026 Google LLC
 * SPDX-License-Identifier: Apache-2.0
 */
-import { createMockEnvironment } from '../testing/contextTestUtils.js';
+import { createMockEnvironment, createDummyState, createDummyEpisode } from '../testing/contextTestUtils.js';
 import { describe, it, expect, beforeEach, vi } from 'vitest';
 import { BlobDegradationProcessor } from './blobDegradationProcessor.js';
-import type { Episode, UserPrompt } from '../ir/types.js';
-import type { ContextAccountingState } from '../pipeline.js';
-import { randomUUID } from 'node:crypto';
+import type { UserPrompt } from '../ir/types.js';
 import type { ContextEnvironment } from '../sidecar/environment.js';
 import { InMemoryFileSystem } from '../system/InMemoryFileSystem.js';

@@ -24,46 +22,19 @@ describe('BlobDegradationProcessor', () => {
    processor = new BlobDegradationProcessor(env);
  });

-  const getDummyState = (
-    isSatisfied = false,
-    deficit = 0,
-    protectedIds = new Set<string>(),
-  ): ContextAccountingState => ({
-    currentTokens: 5000,
-    maxTokens: 10000,
-    retainedTokens: 4000,
-    deficitTokens: deficit,
-    protectedEpisodeIds: protectedIds,
-    isBudgetSatisfied: isSatisfied,
-  });
-
  it('degrades inline_data into a text reference and saves to disk', async () => {
    const dummyImageBase64 = Buffer.from('fake-image-data').toString('base64');

-    const ep: Episode = {
-      id: 'ep-1',
-      timestamp: Date.now(),
-      trigger: {
-        id: randomUUID(),
-        type: 'USER_PROMPT',
-        semanticParts: [
-          { type: 'text', text: 'Look at this image:' },
-          {
-            type: 'inline_data',
-            mimeType: 'image/png',
-            data: dummyImageBase64,
-          },
-        ],
-        metadata: {
-          originalTokens: 300,
-          currentTokens: 300,
-          transformations: [],
-        },
+    const ep = createDummyEpisode('ep-1', 'USER_PROMPT', [
+      { type: 'text', text: 'Look at this image:' },
+      {
+        type: 'inline_data',
+        mimeType: 'image/png',
+        data: dummyImageBase64,
      },
-      steps: [],
-    };
+    ]);

-    const state = getDummyState(false, 500, new Set());
+    const state = createDummyState(false, 500);
    const result = await processor.process([ep], state);

    const parts = (result[0].trigger as UserPrompt).semanticParts;
@@ -73,12 +44,8 @@ describe('BlobDegradationProcessor', () => {

    // Inline data should be degraded
    expect(parts[1].presentation).toBeDefined();
-    expect(parts[1].presentation!.text).toContain(
-      '[Multi-Modal Blob (image/png',
-    );
-    expect(parts[1].presentation!.text).toContain(
-      'degraded to text to preserve context window',
-    );
+    expect(parts[1].presentation!.text).toContain('[Multi-Modal Blob (image/png');
+    expect(parts[1].presentation!.text).toContain('degraded to text to preserve context window');

    // Verify it was written to fake FS
    expect(fileSystem.getFiles().size).toBeGreaterThan(0);
@@ -89,39 +56,21 @@ describe('BlobDegradationProcessor', () => {
  });

  it('degrades file_data into a text reference without disk write', async () => {
-    const ep: Episode = {
-      id: 'ep-2',
-      timestamp: Date.now(),
-      trigger: {
-        id: randomUUID(),
-        type: 'USER_PROMPT',
-        semanticParts: [
-          {
-            type: 'file_data',
-            mimeType: 'application/pdf',
-            fileUri: 'gs://fake-bucket/doc.pdf',
-          },
-        ],
-        metadata: {
-          originalTokens: 300,
-          currentTokens: 300,
-          transformations: [],
-        },
+    const ep = createDummyEpisode('ep-2', 'USER_PROMPT', [
+      {
+        type: 'file_data',
+        mimeType: 'application/pdf',
+        fileUri: 'gs://fake-bucket/doc.pdf',
      },
-      steps: [],
-    };
+    ]);

-    const state = getDummyState(false, 500, new Set());
+    const state = createDummyState(false, 500);
    const result = await processor.process([ep], state);

    const parts = (result[0].trigger as UserPrompt).semanticParts;
    expect(parts[0].presentation).toBeDefined();
-    expect(parts[0].presentation!.text).toContain(
-      '[File Reference (application/pdf)',
-    );
-    expect(parts[0].presentation!.text).toContain(
-      'Original URI: gs://fake-bucket/doc.pdf',
-    );
+    expect(parts[0].presentation!.text).toContain('[File Reference (application/pdf)');
+    expect(parts[0].presentation!.text).toContain('Original URI: gs://fake-bucket/doc.pdf');

    expect(fileSystem.getFiles().size).toBe(0);
  });
@@ -0,0 +1,96 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { createMockEnvironment, createDummyState, createDummyEpisode } from '../testing/contextTestUtils.js';
+import { describe, it, expect, beforeEach, vi } from 'vitest';
+import { EmergencyTruncationProcessor } from './emergencyTruncationProcessor.js';
+import type { ContextEnvironment } from '../sidecar/environment.js';
+
+describe('EmergencyTruncationProcessor', () => {
+  let processor: EmergencyTruncationProcessor;
+  let env: ContextEnvironment;
+
+  beforeEach(() => {
+    vi.resetAllMocks();
+    env = createMockEnvironment();
+    // Force token calculator to return exactly what we tell it for deterministic testing
+    vi.spyOn(env.tokenCalculator, 'calculateEpisodeListTokens').mockImplementation((episodes) => {
+        // Just sum up the metadata originalTokens for our dummy episodes
+        return episodes.reduce((acc, ep) => acc + (ep.trigger.metadata.originalTokens || 100), 0);
+    });
+
+    processor = new EmergencyTruncationProcessor(env, {});
+  });
+
+  it('bypasses processing if currentTokens <= maxTokens', async () => {
+    const episodes = [
+      createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: 'short' }])
+    ];
+    // State says we are under budget (5000 < 10000)
+    const state = createDummyState(true, 0, new Set(), 5000, 10000); 
+
+    const result = await processor.process(episodes, state);
+    expect(result).toStrictEqual(episodes);
+    expect(result.length).toBe(1);
+  });
+
+  it('truncates episodes from the front (oldest) until targetTokens is met', async () => {
+    const ep1 = createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: 'oldest' }]);
+    const ep2 = createDummyEpisode('ep-2', 'USER_PROMPT', [{ text: 'middle' }]);
+    const ep3 = createDummyEpisode('ep-3', 'USER_PROMPT', [{ text: 'newest' }]);
+    
+    // Each is worth 100 tokens according to our mock
+    const episodes = [ep1, ep2, ep3];
+    
+    // We have 300 tokens, but max is 200. We need to drop 100 tokens.
+    const state = createDummyState(false, 100, new Set(), 300, 200);
+
+    const result = await processor.process(episodes, state);
+    
+    // It should drop the FIRST episode (ep-1) and keep the rest.
+    expect(result.length).toBe(2);
+    expect(result[0].id).toBe('ep-2');
+    expect(result[1].id).toBe('ep-3');
+  });
+
+  it('never drops protected episodes (e.g. system instructions)', async () => {
+    const ep1 = createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: 'protected system prompt' }]);
+    const ep2 = createDummyEpisode('ep-2', 'USER_PROMPT', [{ text: 'middle' }]);
+    const ep3 = createDummyEpisode('ep-3', 'USER_PROMPT', [{ text: 'newest' }]);
+    
+    const episodes = [ep1, ep2, ep3];
+    
+    // We have 300 tokens, max is 200. We need to drop 100 tokens.
+    // However, ep-1 is protected!
+    const state = createDummyState(false, 100, new Set(['ep-1']), 300, 200);
+
+    const result = await processor.process(episodes, state);
+    
+    // It should SKIP dropping ep-1 (protected) and drop ep-2 instead.
+    expect(result.length).toBe(2);
+    expect(result[0].id).toBe('ep-1'); // Protected, survived
+    expect(result[1].id).toBe('ep-3'); // Survivor
+  });
+
+  it('can drop multiple episodes if deficit is huge', async () => {
+    const ep1 = createDummyEpisode('ep-1', 'USER_PROMPT', []);
+    const ep2 = createDummyEpisode('ep-2', 'USER_PROMPT', []);
+    const ep3 = createDummyEpisode('ep-3', 'USER_PROMPT', []);
+    
+    const episodes = [ep1, ep2, ep3];
+    
+    // We have 300 tokens, max is 50. We need to drop 250 tokens!
+    const state = createDummyState(false, 250, new Set(), 300, 50);
+
+    const result = await processor.process(episodes, state);
+    
+    // It must drop ep1 (100t) and ep2 (100t). 
+    // Remaining is ep3 (100t). 
+    // Wait, if it drops ep1 (remaining=200) and ep2 (remaining=100), 
+    // when it looks at ep3, remaining (100) > max (50), so it will drop ep3 too!
+    expect(result.length).toBe(0);
+  });
+});
@@ -3,16 +3,14 @@
 * Copyright 2026 Google LLC
 * SPDX-License-Identifier: Apache-2.0
 */
-import { createMockEnvironment } from '../testing/contextTestUtils.js';
+import { createMockEnvironment, createDummyState, createDummyEpisode } from '../testing/contextTestUtils.js';
 import { describe, it, expect, beforeEach } from 'vitest';
 import { HistorySquashingProcessor } from './historySquashingProcessor.js';
 import type {
-  Episode,
  UserPrompt,
  AgentThought,
  AgentYield,
 } from '../ir/types.js';
-import type { ContextAccountingState } from '../pipeline.js';
 import { randomUUID } from 'node:crypto';

 describe('HistorySquashingProcessor', () => {
@@ -24,37 +22,10 @@ describe('HistorySquashingProcessor', () => {
    });
  });

-  const getDummyState = (
-    isSatisfied = false,
-    deficit = 0,
-    protectedIds = new Set<string>(),
-  ): ContextAccountingState => ({
-    currentTokens: 5000,
-    maxTokens: 10000,
-    retainedTokens: 4000,
-    deficitTokens: deficit,
-    protectedEpisodeIds: protectedIds,
-    isBudgetSatisfied: isSatisfied,
-  });
-
-  const createDummyEpisode = (
-    id: string,
-    userText: string,
-    modelThought: string,
-  ): Episode => ({
-    id,
-    timestamp: Date.now(),
-    trigger: {
-      id: randomUUID(),
-      type: 'USER_PROMPT',
-      semanticParts: [{ type: 'text', text: userText }],
-      metadata: {
-        originalTokens: 1000,
-        currentTokens: 1000,
-        transformations: [],
-      },
-    },
-    steps: [
+  const createThoughtEpisode = (id: string, userText: string, modelThought: string) => {
+    const ep = createDummyEpisode(id, 'USER_PROMPT', [{ type: 'text', text: userText }]);
+    // Replace the tool steps with a thought step for this test
+    ep.steps = [
      {
        id: randomUUID(),
        type: 'AGENT_THOUGHT',
@@ -65,12 +36,13 @@ describe('HistorySquashingProcessor', () => {
          transformations: [],
        },
      },
-    ],
-  });
+    ];
+    return ep;
+  };

  it('bypasses processing if budget is satisfied', async () => {
-    const episodes = [createDummyEpisode('1', 'short text', 'short thought')];
-    const state = getDummyState(true);
+    const episodes = [createThoughtEpisode('1', 'short text', 'short thought')];
+    const state = createDummyState(true);

    const result = await processor.process(episodes, state);

@@ -83,8 +55,8 @@ describe('HistorySquashingProcessor', () => {
  it('skips protected episodes', async () => {
    // 500 chars = ~125 tokens. Limit is 100 tokens, so it WOULD truncate if not protected.
    const longText = 'A'.repeat(500);
-    const episodes = [createDummyEpisode('ep-1', longText, 'short thought')];
-    const state = getDummyState(false, 100, new Set(['ep-1']));
+    const episodes = [createThoughtEpisode('ep-1', longText, 'short thought')];
+    const state = createDummyState(false, 100, new Set(['ep-1']));

    const result = await processor.process(episodes, state);

@@ -96,8 +68,8 @@ describe('HistorySquashingProcessor', () => {
  it('truncates both UserPrompts and AgentThoughts', async () => {
    const longUser = 'U'.repeat(1000); // ~250 tokens
    const longModel = 'M'.repeat(1000); // ~250 tokens
-    const episodes = [createDummyEpisode('ep-2', longUser, longModel)];
-    const state = getDummyState(false, 500, new Set()); // High deficit, force truncation
+    const episodes = [createThoughtEpisode('ep-2', longUser, longModel)];
+    const state = createDummyState(false, 500); // High deficit, force truncation

    const result = await processor.process(episodes, state);

@@ -123,13 +95,13 @@ describe('HistorySquashingProcessor', () => {
    const longUser1 = 'A'.repeat(1000);
    const longUser2 = 'B'.repeat(1000);
    const episodes = [
-      createDummyEpisode('ep-3', longUser1, 'short'),
-      createDummyEpisode('ep-4', longUser2, 'short'),
+      createThoughtEpisode('ep-3', longUser1, 'short'),
+      createThoughtEpisode('ep-4', longUser2, 'short'),
    ];

    // Set deficit to exactly what ONE truncation will save
    // Original = ~250 tokens. Limit = 100. Truncation saves ~150 tokens.
-    const state = getDummyState(false, 150, new Set());
+    const state = createDummyState(false, 150);

    const result = await processor.process(episodes, state);

@@ -144,7 +116,7 @@ describe('HistorySquashingProcessor', () => {

  it('truncates IrNodes', async () => {
    const longYield = 'Y'.repeat(1000); // ~250 tokens
-    const ep = createDummyEpisode('ep-5', 'short', 'short');
+    const ep = createThoughtEpisode('ep-5', 'short', 'short');
    ep.yield = {
      id: randomUUID(),
      type: 'AGENT_YIELD',
@@ -156,7 +128,7 @@ describe('HistorySquashingProcessor', () => {
      },
    };

-    const state = getDummyState(false, 500, new Set());
+    const state = createDummyState(false, 500);
    const result = await processor.process([ep], state);

    const yieldPart = result[0].yield as AgentYield;
@@ -4,16 +4,14 @@
 * SPDX-License-Identifier: Apache-2.0
 */

-import { createMockEnvironment } from '../testing/contextTestUtils.js';
+import { createMockEnvironment, createDummyState, createDummyEpisode } from '../testing/contextTestUtils.js';
 import { describe, it, expect, beforeEach, vi } from 'vitest';
 import { SemanticCompressionProcessor } from './semanticCompressionProcessor.js';
 import type {
-  Episode,
  UserPrompt,
  ToolExecution,
  AgentThought,
 } from '../ir/types.js';
-import type { ContextAccountingState } from '../pipeline.js';
 import { randomUUID } from 'node:crypto';
 import type { BaseLlmClient } from 'src/core/baseLlmClient.js';

@@ -27,51 +25,32 @@ describe('SemanticCompressionProcessor', () => {
    });

    const env = createMockEnvironment();
+    // Re-mock llmClient properly
    vi.spyOn(env, 'llmClient', 'get').mockReturnValue({ generateContent: generateContentMock } as unknown as BaseLlmClient);
+    
    processor = new SemanticCompressionProcessor(env, {
      nodeThresholdTokens: 2000,
    });
  });

-  const getDummyState = (
-    isSatisfied = false,
-    deficit = 0,
-    protectedIds = new Set<string>(),
-  ): ContextAccountingState => ({
-    currentTokens: 5000,
-    maxTokens: 10000,
-    retainedTokens: 4000,
-    deficitTokens: deficit,
-    protectedEpisodeIds: protectedIds,
-    isBudgetSatisfied: isSatisfied,
-  });
-
-  const createDummyEpisode = (
+  const createEpisodeWithThoughtsAndTools = (
    id: string,
    userText: string,
    thoughtText: string,
    toolObs: string,
-  ): Episode => ({
-    id,
-    timestamp: Date.now(),
-    trigger: {
-      id: randomUUID(),
-      type: 'USER_PROMPT',
-      semanticParts: [{ type: 'text', text: userText }],
-      metadata: {
-        originalTokens: 3800,
-        currentTokens: 3800,
-        transformations: [],
-      },
-    },
-    steps: [
+  ) => {
+    const ep = createDummyEpisode(id, 'USER_PROMPT', [{ type: 'text', text: userText }]);
+    // We override metadata for threshold triggering
+    ep.trigger.metadata.currentTokens = 3800;
+    
+    ep.steps = [
      {
        id: randomUUID(),
        type: 'AGENT_THOUGHT',
        text: thoughtText,
        metadata: {
-          originalTokens: 100,
-          currentTokens: 100,
+          originalTokens: 3800,
+          currentTokens: 3800,
          transformations: [],
        },
      },
@@ -88,23 +67,24 @@ describe('SemanticCompressionProcessor', () => {
          transformations: [],
        },
      },
-    ],
-  });
+    ];
+    return ep;
+  };

  it('bypasses processing if budget is satisfied', async () => {
-    const episodes = [createDummyEpisode('1', 'short', 'short', 'short')];
-    const state = getDummyState(true);
+    const episodes = [createEpisodeWithThoughtsAndTools('1', 'short', 'short', 'short')];
+    const state = createDummyState(true);

    await processor.process(episodes, state);
    expect(generateContentMock).not.toHaveBeenCalled();
  });

  it('skips protected episodes even if over budget', async () => {
-    const massiveStr = 'M'.repeat(15000); // Exceeds threshold (10 * 4 = 40)
+    const massiveStr = 'M'.repeat(15000); 
    const episodes = [
-      createDummyEpisode('ep-1', massiveStr, massiveStr, massiveStr),
+      createEpisodeWithThoughtsAndTools('ep-1', massiveStr, massiveStr, massiveStr),
    ];
-    const state = getDummyState(false, 1000, new Set(['ep-1']));
+    const state = createDummyState(false, 1000, new Set(['ep-1']));

    await processor.process(episodes, state);
    expect(generateContentMock).not.toHaveBeenCalled();
@@ -113,9 +93,9 @@ describe('SemanticCompressionProcessor', () => {
  it('summarizes unprotected UserPrompts, Thoughts, and Tool observations until deficit is met', async () => {
    const massiveStr = 'M'.repeat(15000);
    const episodes = [
-      createDummyEpisode('ep-1', massiveStr, massiveStr, massiveStr),
+      createEpisodeWithThoughtsAndTools('ep-1', massiveStr, massiveStr, massiveStr),
    ];
-    const state = getDummyState(false, 50000, new Set()); // Massive deficit, forces all 3 to summarize
+    const state = createDummyState(false, 50000); // Massive deficit, forces all 3 to summarize

    const result = await processor.process(episodes, state);
    expect(generateContentMock).toHaveBeenCalledTimes(3);
@@ -140,11 +120,11 @@ describe('SemanticCompressionProcessor', () => {
  it('stops calling LLM when deficit hits zero', async () => {
    const massiveStr = 'M'.repeat(15000);
    const episodes = [
-      createDummyEpisode('ep-1', massiveStr, massiveStr, massiveStr),
+      createEpisodeWithThoughtsAndTools('ep-1', massiveStr, massiveStr, massiveStr),
    ];

    // Set deficit low enough that ONE summary solves the problem
-    const state = getDummyState(false, 5, new Set());
+    const state = createDummyState(false, 5);

    await processor.process(episodes, state);
    // It should only compress the UserPrompt and then stop
@@ -0,0 +1,91 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { createMockEnvironment, createDummyState, createDummyEpisode } from '../testing/contextTestUtils.js';
+import { describe, it, expect, beforeEach, vi } from 'vitest';
+import { StateSnapshotProcessor } from './stateSnapshotProcessor.js';
+import type { ContextEnvironment } from '../sidecar/environment.js';
+import type { BaseLlmClient } from '../../core/baseLlmClient.js';
+
+describe('StateSnapshotProcessor', () => {
+  let processor: StateSnapshotProcessor;
+  let env: ContextEnvironment;
+  let generateContentMock: ReturnType<typeof vi.fn>;
+
+  beforeEach(() => {
+    vi.resetAllMocks();
+    env = createMockEnvironment();
+
+    generateContentMock = vi.fn().mockResolvedValue({
+      text: 'Mocked Compressed State Snapshot!',
+    });
+    vi.spyOn(env, 'llmClient', 'get').mockReturnValue({ generateContent: generateContentMock } as unknown as BaseLlmClient);
+
+    // Override token calc for testing
+    vi.spyOn(env.tokenCalculator, 'estimateTokensForParts').mockReturnValue(100);
+
+    processor = new StateSnapshotProcessor(env, {}, env.eventBus);
+  });
+
+  it('bypasses processing if deficit is <= 0', async () => {
+    const episodes = [
+      createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: 'hello' }])
+    ];
+    // current: 100, max: 1000, retained: 200 (deficit 0)
+    const state = createDummyState(false, 0, new Set(), 100, 1000, 200);
+
+    const result = await processor.process(episodes, state);
+    expect(result).toStrictEqual(episodes);
+    expect(generateContentMock).not.toHaveBeenCalled();
+  });
+
+  it('bypasses processing if not enough episodes to summarize (needs at least 2 inner episodes)', async () => {
+    const episodes = [
+      createDummyEpisode('ep-sys', 'SYSTEM_EVENT', []),
+      createDummyEpisode('ep-active', 'USER_PROMPT', [{ text: 'help' }]),
+    ];
+    
+    // current: 1000, max: 10000, retained: 500. Target deficit = 500
+    const state = createDummyState(false, 500, new Set(), 1000, 10000, 500);
+
+    const result = await processor.process(episodes, state);
+    expect(result).toStrictEqual(episodes);
+    expect(generateContentMock).not.toHaveBeenCalled();
+  });
+
+  it('summarizes intermediate episodes into a single snapshot episode', async () => {
+    const episodes = [
+      createDummyEpisode('ep-0', 'SYSTEM_EVENT', []),
+      createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: 'old 1' }]),
+      createDummyEpisode('ep-2', 'USER_PROMPT', [{ text: 'old 2' }]),
+      createDummyEpisode('ep-3', 'USER_PROMPT', [{ text: 'current' }]),
+    ];
+    
+    // Target deficit = 200
+    const state = createDummyState(false, 200, new Set(), 1000, 10000, 800);
+
+    const result = await processor.process(episodes, state);
+    
+    // We started with 4 episodes.
+    // Episodes [1, 2] were synthesized into a single new Snapshot episode.
+    // Final array should be: [0, SNAPSHOT, 3] = length 3.
+    expect(result.length).toBe(3);
+    expect(result[0].id).toBe('ep-0');
+    
+    const snapshotEp = result[1];
+    expect(snapshotEp.yield).toBeDefined();
+    expect(snapshotEp.yield!.text).toContain('<CONTEXT_SNAPSHOT>');
+    expect(snapshotEp.yield!.text).toContain('Mocked Compressed State Snapshot!');
+    
+    expect(result[2].id).toBe('ep-3');
+    
+    expect(generateContentMock).toHaveBeenCalledTimes(1);
+    
+    const llmArgs = generateContentMock.mock.calls[0][0];
+    expect(llmArgs.contents[0].parts[0].text).toContain('old 1');
+    expect(llmArgs.contents[0].parts[0].text).toContain('old 2');
+  });
+});
@@ -0,0 +1,40 @@
+# Context Pipeline Testing Strategy & Audit
+
+## Philosophy: Defense in Depth
+Our testing strategy avoids the "endless tax" of brittle tests by strictly separating concerns:
+1. **Unit Tests (Processors, System Fakes, Mappers):** Exhaustively test logical boundaries, token math, and state transformations. Driven by shared, DRY test factories (no repetitive boilerplate).
+2. **Component Tests (ContextManager, Orchestrator):** Test the *wiring* and *triggers*. Verify that barriers block, background pipelines execute, and events fire correctly.
+3. **Golden / E2E Tests:** Test emergent behavior. Pass in complex, raw chat histories and assert the exact final projected `Content[]` output against committed JSON snapshots.
+
+---
+
+## Audit Checklist & Coverage Tracker
+
+### 1. The Tooling Library (`contextTestUtils.ts`)
+- [ ] Implement `ContextTestBuilder` or shared factory functions (`createDummyEpisode`, `createDummyState`).
+- [ ] Ensure all existing tests are migrated to use these helpers to establish the pattern.
+
+### 2. Unit Tests (The Processors)
+Goal: Ensure every processor gracefully handles boundary conditions (budget satisfied vs. deficit), skips protected IDs, and correctly transforms IR.
+- [ ] `BlobDegradationProcessor` (Mostly complete, needs migration to shared helpers)
+- [ ] `ToolMaskingProcessor` (Mostly complete, needs migration to shared helpers)
+- [ ] `HistorySquashingProcessor` (Audit coverage)
+- [ ] `SemanticCompressionProcessor` (Audit coverage)
+- [ ] `ContextTracer` (Complete)
+- [ ] `SidecarLoader` (Complete)
+- [ ] `IrMapper` / `IrProjector` (Audit coverage)
+
+### 3. Component Tests (The Orchestration)
+Goal: Prove the sidecar configuration accurately drives runtime behavior without testing the processor logic itself.
+- [ ] `PipelineOrchestrator`: Test sync vs. async routing, and trigger setup.
+- [ ] `ContextManager`: Test `subscribeToHistory` (Opportunistic triggers).
+- [ ] `ContextManager`: Test `project()` (Synchronous barrier triggers).
+
+### 4. Golden / E2E Tests
+- [ ] `contextManager.golden.test.ts`: Ensure we have a scenario representing a "Day in the Life" of the CLI (some images, some huge tool outputs, deep history) mapping to a snapshot.
+
+---
+
+## Next Actions
+1. Migrate processor tests to shared factories to DRY up the suite.
+2. Go down the Unit Test checklist, ensuring full line/branch coverage for the core transformations.
@@ -13,6 +13,57 @@ import { ContextManager } from '../contextManager.js';

 import { InMemoryFileSystem } from '../system/InMemoryFileSystem.js';
 import { DeterministicIdGenerator } from '../system/DeterministicIdGenerator.js';
+import type { Episode } from '../ir/types.js';
+import type { ContextAccountingState } from '../pipeline.js';
+import { randomUUID } from 'node:crypto';
+
+export function createDummyState(
+  isSatisfied = false,
+  deficit = 0,
+  protectedIds = new Set<string>(),
+  currentTokens = 5000,
+  maxTokens = 10000,
+  retainedTokens = 4000,
+): ContextAccountingState {
+  return {
+    currentTokens,
+    maxTokens,
+    retainedTokens,
+    deficitTokens: deficit,
+    protectedEpisodeIds: protectedIds,
+    isBudgetSatisfied: isSatisfied,
+  };
+}
+
+export function createDummyEpisode(
+  id: string,
+  type: 'USER_PROMPT' | 'SYSTEM_EVENT',
+  parts: unknown[] = [],
+  toolSteps: { intent: Record<string, unknown>; observation: Record<string, unknown>; toolName?: string; tokens?: { intent: number; observation: number } }[] = []
+): Episode {
+  return {
+    id,
+    timestamp: Date.now(),
+    // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
+    trigger: {
+      id: randomUUID(),
+      type,
+      name: type === 'SYSTEM_EVENT' ? 'dummy_event' : undefined,
+      payload: type === 'SYSTEM_EVENT' ? {} : undefined,
+      semanticParts: type === 'USER_PROMPT' ? parts as any : undefined,
+      metadata: { originalTokens: 100, currentTokens: 100, transformations: [] },
+    } as any,
+    steps: toolSteps.map(step => ({
+      id: randomUUID(),
+      type: 'TOOL_EXECUTION',
+      toolName: step.toolName || 'test_tool',
+      intent: step.intent,
+      observation: step.observation,
+      tokens: step.tokens || { intent: 50, observation: 50 },
+      metadata: { originalTokens: 100, currentTokens: 100, transformations: [] },
+    })),
+  };
+}

 export function createMockEnvironment(): ContextEnvironment {
  return {