next steps

2026-05-20 00:32:31 -07:00 · 2026-04-06 23:35:36 +00:00
parent fbcfa40f1d
commit d3d6b9403d
5 changed files with 434 additions and 15 deletions
@@ -0,0 +1,121 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, beforeEach, vi } from 'vitest';
+import { generateWorkingBufferView } from './graphUtils.js';
+import { createMockEnvironment, createDummyEpisode } from '../testing/contextTestUtils.js';
+import type { ContextEnvironment } from '../sidecar/environment.js';
+
+describe('graphUtils (View Generator)', () => {
+  let env: ContextEnvironment;
+
+  beforeEach(() => {
+    vi.resetAllMocks();
+    env = createMockEnvironment();
+    // Our token mock is 1 char = 1 token for simplicity
+    vi.spyOn(env.tokenCalculator, 'calculateEpisodeListTokens').mockImplementation((eps) => {
+        return eps.reduce((acc, ep) => acc + (ep.trigger.metadata.originalTokens || 100), 0);
+    });
+  });
+
+  it('returns pristine episodes untouched if under budget', () => {
+    const episodes = [
+      createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: '1' }]),
+      createDummyEpisode('ep-2', 'USER_PROMPT', [{ text: '2' }]),
+    ];
+    
+    // We retain 5000 tokens. Total mock tokens = 200.
+    const view = generateWorkingBufferView(episodes, 5000, env.tracer, env);
+    
+    expect(view).toHaveLength(2);
+    // Must be a deep copy! The view generator clones episodes.
+    expect(view).not.toBe(episodes);
+    expect(view[0].id).toBe('ep-1');
+    expect(view[1].id).toBe('ep-2');
+  });
+
+  it('swaps to Masked variant when over budget (rolling backwards)', () => {
+    const ep1 = createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: '1', type: 'text' }]);
+    const ep2 = createDummyEpisode('ep-2', 'USER_PROMPT', [{ text: '2', type: 'text' }]);
+    
+    ep1.variants = {
+        'masked': { type: 'masked', status: 'ready', text: '<MASKED>', recoveredTokens: 10 }
+    };
+    
+    // We only retain 100 tokens.
+    // ep-2 (newest) takes 100 tokens.
+    // Now rolling = 100. Over budget!
+    // ep-1 is evaluated, and swapped for Masked.
+    const view = generateWorkingBufferView([ep1, ep2], 10, env.tracer, env);
+    
+    expect(view).toHaveLength(2);
+    expect(view[1].id).toBe('ep-2'); // Unchanged (newest)
+    
+    expect(view[0].id).toBe('ep-1');
+    expect((view[0].trigger as any).semanticParts[0].presentation.text).toBe('<MASKED>');
+  });
+
+  it('swaps to Summary variant when over budget', () => {
+    const ep1 = createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: '1' }]);
+    const ep2 = createDummyEpisode('ep-2', 'USER_PROMPT', [{ text: '2' }]);
+    
+    ep1.variants = {
+        'summary': { type: 'summary', status: 'ready', text: '<SUMMARY>', recoveredTokens: 50 }
+    };
+    
+    const view = generateWorkingBufferView([ep1, ep2], 10, env.tracer, env);
+    
+    expect(view).toHaveLength(2);
+    
+    // The summary completely replaces the internal steps and clears the yield.
+    expect(view[0].steps).toHaveLength(1);
+    expect(view[0].steps[0].type).toBe('AGENT_THOUGHT');
+    expect((view[0].steps[0] as any).text).toBe('<SUMMARY>');
+    expect(view[0].yield).toBeUndefined();
+  });
+
+  it('handles complex N-to-1 Snapshot skipping gracefully', () => {
+    const ep1 = createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: '1' }]);
+    const ep2 = createDummyEpisode('ep-2', 'USER_PROMPT', [{ text: '2' }]);
+    const ep3 = createDummyEpisode('ep-3', 'USER_PROMPT', [{ text: '3' }]);
+    const ep4 = createDummyEpisode('ep-4', 'USER_PROMPT', [{ text: '4' }]);
+    
+    // ep-3 has a snapshot that replaces [ep-1, ep-2, ep-3]
+    const snapshotEp = createDummyEpisode('snap-1', 'SYSTEM_EVENT', []);
+    
+    ep3.variants = {
+        'snapshot': { 
+            type: 'snapshot', 
+            status: 'ready', 
+            episode: snapshotEp, 
+            replacedEpisodeIds: ['ep-1', 'ep-2', 'ep-3'] 
+        }
+    };
+    
+    // We only retain 5 tokens, forcing the sweep to use variants for EVERYTHING except ep4.
+    const view = generateWorkingBufferView([ep1, ep2, ep3, ep4], 5, env.tracer, env);
+    
+    // Result should be exactly: [snapshot, ep-4]
+    expect(view).toHaveLength(2);
+    expect(view[0].id).toBe('snap-1');
+    expect(view[1].id).toBe('ep-4');
+  });
+
+  it('ignores variants that are not yet "ready"', () => {
+    const ep1 = createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: '1' }]);
+    const ep2 = createDummyEpisode('ep-2', 'USER_PROMPT', [{ text: '2' }]);
+    
+    ep1.variants = {
+        'masked': { type: 'masked', status: 'computing', text: '<MASKED>', recoveredTokens: 10 }
+    };
+    
+    const view = generateWorkingBufferView([ep1, ep2], 10, env.tracer, env);
+    
+    // Because the variant was computing, it must fall back to the raw pristine text.
+    expect(view).toHaveLength(2);
+    expect((view[0].trigger as any).semanticParts[0].presentation).toBeUndefined();
+  });
+});
@@ -130,6 +130,108 @@ describe('IrMapper', () => {
    // in one Content block, but the flat representation is semantically identical.
  });

+  it('should correctly handle multi-tool-calls grouped within a single turn without dropping observations', () => {
+    const rawHistory: Content[] = [
+      { role: 'user', parts: [{ text: 'Examine both of these tools please.' }] },
+      {
+        role: 'model',
+        parts: [
+          { text: 'I will call them concurrently.' },
+          {
+            functionCall: {
+              id: 'c1',
+              name: 'tool_one',
+              args: { p: 1 },
+            },
+          },
+          {
+            functionCall: {
+              id: 'c2',
+              name: 'tool_two',
+              args: { p: 2 },
+            },
+          },
+        ],
+      },
+      // Gemini forces the user turn to contain ALL function responses for that model turn
+      {
+        role: 'user',
+        parts: [
+          {
+            functionResponse: {
+              id: 'c1',
+              name: 'tool_one',
+              response: { r: 1 },
+            },
+          },
+          {
+            functionResponse: {
+              id: 'c2',
+              name: 'tool_two',
+              response: { r: 2 },
+            },
+          },
+        ],
+      },
+      {
+        role: 'model',
+        parts: [{ text: 'Both complete.' }],
+      },
+    ];
+
+    const tokenCalculator = new ContextTokenCalculator(4);
+    const episodes = IrMapper.toIr(rawHistory, tokenCalculator);
+
+    // It should collapse into a single episode
+    expect(episodes).toHaveLength(1);
+    const ep = episodes[0];
+
+    expect(ep.trigger.type).toBe('USER_PROMPT');
+
+    // The steps array should contain:
+    // 0: AgentThought ("I will call them concurrently")
+    // 1: ToolExecution(tool_one)
+    // 2: ToolExecution(tool_two)
+    
+    expect(ep.steps).toHaveLength(3);
+    
+    expect(ep.steps[0].type).toBe('AGENT_THOUGHT');
+    expect((ep.steps[0] as any).text).toBe('I will call them concurrently.');
+
+    expect(ep.steps[1].type).toBe('TOOL_EXECUTION');
+    expect((ep.steps[1] as ToolExecution).toolName).toBe('tool_one');
+    expect((ep.steps[1] as ToolExecution).intent).toEqual({ p: 1 });
+    expect((ep.steps[1] as ToolExecution).observation).toEqual({ r: 1 });
+
+    expect(ep.steps[2].type).toBe('TOOL_EXECUTION');
+    expect((ep.steps[2] as ToolExecution).toolName).toBe('tool_two');
+    expect((ep.steps[2] as ToolExecution).intent).toEqual({ p: 2 });
+    expect((ep.steps[2] as ToolExecution).observation).toEqual({ r: 2 });
+
+    // The final model turn should become the yield
+    expect(ep.yield).toBeDefined();
+    expect(ep.yield?.type).toBe('AGENT_YIELD');
+    expect(ep.yield?.text).toBe('Both complete.');
+    
+    // Now verify we can reconstitute it without dropping the multiple calls
+    const reconstituted = IrMapper.fromIr(episodes);
+    
+    // The reconstituted history should have exactly 4 turns, same as original
+    expect(reconstituted).toHaveLength(4);
+    
+    // Check that the Model turn has both function calls
+    expect(reconstituted[1].role).toBe('model');
+    expect(reconstituted[1].parts).toHaveLength(3); // text + call1 + call2
+    expect(reconstituted[1].parts![1].functionCall?.name).toBe('tool_one');
+    expect(reconstituted[1].parts![2].functionCall?.name).toBe('tool_two');
+    
+    // Check that the User turn has both function responses
+    expect(reconstituted[2].role).toBe('user');
+    expect(reconstituted[2].parts).toHaveLength(2); // response1 + response2
+    expect(reconstituted[2].parts![0].functionResponse?.name).toBe('tool_one');
+    expect(reconstituted[2].parts![1].functionResponse?.name).toBe('tool_two');
+  });
+
  it('should guarantee WeakMap ID stability across continuous mapping', () => {
    // 1. Initial history
    const history: Content[] = [
@@ -0,0 +1,194 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, beforeEach, vi, afterEach } from 'vitest';
+import { PipelineOrchestrator } from './orchestrator.js';
+import { ProcessorRegistry } from './registry.js';
+import { createMockEnvironment, createDummyState, createDummyEpisode } from '../testing/contextTestUtils.js';
+import type { ContextEnvironment } from './environment.js';
+import type { ContextProcessor } from '../pipeline.js';
+import type { SidecarConfig } from './types.js';
+import { ContextEventBus } from '../eventBus.js';
+
+import type { Episode } from '../ir/types.js';
+
+// Create a Dummy Processor for testing Orchestration routing
+class DummySyncProcessor implements ContextProcessor {
+  static create() { return new DummySyncProcessor(); }
+  constructor() {}
+  readonly name = 'DummySync';
+  async process(episodes: any[], _state: any) {
+    const copy = [...episodes];
+    copy[0] = { ...copy[0], dummyModified: true };
+    return copy;
+  }
+}
+
+class DummyAsyncProcessor implements ContextProcessor {
+  static create() { return new DummyAsyncProcessor(); }
+  constructor() {}
+  readonly name = 'DummyAsync';
+  async process(episodes: any[], _state: any) {
+    await new Promise(resolve => setTimeout(resolve, 50));
+    const copy = [...episodes];
+    copy[0] = { ...copy[0], asyncModified: true };
+    return copy;
+  }
+}
+
+class ThrowingProcessor implements ContextProcessor {
+  static create() { return new ThrowingProcessor(); }
+  constructor() {}
+  readonly name = 'Thrower';
+  async process(): Promise<Episode[]> {
+    throw new Error('Processor failed intentionally');
+  }
+}
+
+describe('PipelineOrchestrator (Component)', () => {
+  let env: ContextEnvironment;
+  let eventBus: ContextEventBus;
+
+  beforeEach(() => {
+    vi.resetAllMocks();
+    env = createMockEnvironment();
+    eventBus = env.eventBus as ContextEventBus;
+    
+    // Register our test processors
+    ProcessorRegistry.register({ id: 'DummySyncProcessor', create: () => new DummySyncProcessor() });
+    ProcessorRegistry.register({ id: 'DummyAsyncProcessor', create: () => new DummyAsyncProcessor() });
+    ProcessorRegistry.register({ id: 'ThrowingProcessor', create: () => new ThrowingProcessor() });
+  });
+
+  afterEach(() => {
+    // Cleanup registry to not pollute other tests
+    (ProcessorRegistry as any).processors.delete('DummySyncProcessor');
+    (ProcessorRegistry as any).processors.delete('DummyAsyncProcessor');
+    (ProcessorRegistry as any).processors.delete('ThrowingProcessor');
+  });
+
+  const createConfig = (pipelines: any[]): SidecarConfig => ({
+    budget: { maxTokens: 100, retainedTokens: 50 },
+    gcBackstop: { strategy: 'truncate', target: 'max' },
+    pipelines
+  });
+
+  it('instantiates processors from the registry on initialization', () => {
+    const config = createConfig([
+      {
+        name: 'Sync',
+        execution: 'blocking',
+        triggers: [],
+        processors: [{ processorId: 'DummySyncProcessor' }]
+      }
+    ]);
+    
+    const orchestrator = new PipelineOrchestrator(config, env, eventBus, env.tracer);
+    expect((orchestrator as any).instantiatedProcessors.has('DummySyncProcessor')).toBe(true);
+  });
+
+  it('throws an error if a config requests an unknown processor', () => {
+    const config = createConfig([
+      {
+        name: 'Bad',
+        execution: 'blocking',
+        triggers: [],
+        processors: [{ processorId: 'DoesNotExist' }]
+      }
+    ]);
+    
+    expect(() => new PipelineOrchestrator(config, env, eventBus, env.tracer))
+        .toThrow('Context Processor [DoesNotExist] is not registered.');
+  });
+
+  it('executes blocking pipelines synchronously and returns the modified array', async () => {
+    const config = createConfig([
+      {
+        name: 'SyncPipe',
+        execution: 'blocking',
+        triggers: [],
+        processors: [{ processorId: 'DummySyncProcessor' }]
+      }
+    ]);
+    const orchestrator = new PipelineOrchestrator(config, env, eventBus, env.tracer);
+    
+    const episodes = [createDummyEpisode('1', 'USER_PROMPT', [])];
+    const state = createDummyState(false);
+    
+    const result = await orchestrator.executePipeline('SyncPipe', episodes, state);
+    
+    expect(result).toHaveLength(1);
+    expect((result[0] as any).dummyModified).toBe(true);
+  });
+
+  it('executes background pipelines asynchronously without blocking the return', async () => {
+    const config = createConfig([
+      {
+        name: 'AsyncPipe',
+        execution: 'background',
+        triggers: [],
+        processors: [{ processorId: 'DummyAsyncProcessor' }]
+      }
+    ]);
+    const orchestrator = new PipelineOrchestrator(config, env, eventBus, env.tracer);
+    
+    const episodes = [createDummyEpisode('1', 'USER_PROMPT', [])];
+    const state = createDummyState(false);
+    
+    // This should resolve immediately with the UNMODIFIED array because execution is background
+    const result = await orchestrator.executePipeline('AsyncPipe', episodes, state);
+    
+    expect(result).toHaveLength(1);
+    expect((result[0] as any).asyncModified).toBeUndefined(); // Not modified yet!
+    
+    // Wait for the background task to complete (50ms delay in DummyAsyncProcessor)
+    await new Promise(resolve => setTimeout(resolve, 60));
+  });
+
+  it('gracefully handles and swallows processor errors in synchronous pipelines', async () => {
+    const config = createConfig([
+      {
+        name: 'ThrowingPipe',
+        execution: 'blocking',
+        triggers: [],
+        processors: [{ processorId: 'ThrowingProcessor' }]
+      }
+    ]);
+    const orchestrator = new PipelineOrchestrator(config, env, eventBus, env.tracer);
+    
+    const episodes = [createDummyEpisode('1', 'USER_PROMPT', [])];
+    const state = createDummyState(false);
+    
+    // It should not throw! It should swallow the error and return the unmodified array.
+    const result = await orchestrator.executePipeline('ThrowingPipe', episodes, state);
+    
+    expect(result).toHaveLength(1);
+    expect(result).toStrictEqual(episodes);
+  });
+
+  it('automatically binds to budget_exceeded trigger via EventBus', () => {
+    const config = createConfig([
+      {
+        name: 'PressureRelief',
+        execution: 'background',
+        triggers: ['budget_exceeded'],
+        processors: [{ processorId: 'DummyAsyncProcessor' }]
+      }
+    ]);
+    
+    // Spy on the private method to see if the trigger fires it
+    const executeSpy = vi.spyOn(PipelineOrchestrator.prototype as any, 'executePipelineAsync');
+    
+    new PipelineOrchestrator(config, env, eventBus, env.tracer);
+    
+    const episodes = [createDummyEpisode('1', 'USER_PROMPT', [])];
+    
+    // Emit the trigger
+    eventBus.emitConsolidationNeeded({ episodes, targetDeficit: 100 });
+    
+    expect(executeSpy).toHaveBeenCalled();
+  });
+});
@@ -34,7 +34,7 @@ export class PipelineOrchestrator {
        if (!this.instantiatedProcessors.has(procDef.processorId)) {
          const processorClass = ProcessorRegistry.get(procDef.processorId);
          if (!processorClass) {
-            throw new Error(`Unknown processor ID: ${procDef.processorId}`);
+            throw new Error(`Context Processor [${procDef.processorId}] is not registered.`);
          }
          // The Orchestrator injects standard dependencies required by processors
          // If a processor needs the eventBus (like Snapshot), it expects it via constructor.
@@ -11,22 +11,24 @@ Our testing strategy avoids the "endless tax" of brittle tests by strictly separ
 ## Audit Checklist & Coverage Tracker

 ### 1. The Tooling Library (`contextTestUtils.ts`)
- [ ] Implement `ContextTestBuilder` or shared factory functions (`createDummyEpisode`, `createDummyState`).
- [ ] Ensure all existing tests are migrated to use these helpers to establish the pattern.
+- [x] Implement `ContextTestBuilder` or shared factory functions (`createDummyEpisode`, `createDummyState`).
+- [x] Ensure all existing tests are migrated to use these helpers to establish the pattern.

-### 2. Unit Tests (The Processors)
-Goal: Ensure every processor gracefully handles boundary conditions (budget satisfied vs. deficit), skips protected IDs, and correctly transforms IR.
- [ ] `BlobDegradationProcessor` (Mostly complete, needs migration to shared helpers)
- [ ] `ToolMaskingProcessor` (Mostly complete, needs migration to shared helpers)
- [ ] `HistorySquashingProcessor` (Audit coverage)
- [ ] `SemanticCompressionProcessor` (Audit coverage)
- [ ] `ContextTracer` (Complete)
- [ ] `SidecarLoader` (Complete)
- [ ] `IrMapper` / `IrProjector` (Audit coverage)
+### 2. Unit Tests (The Processors & Map/Reduce)
+Goal: Ensure every component gracefully handles boundary conditions (budget satisfied vs. deficit), skips protected IDs, and correctly transforms IR.
+- [x] `BlobDegradationProcessor` (Completed)
+- [x] `ToolMaskingProcessor` (Completed)
+- [x] `HistorySquashingProcessor` (Completed)
+- [x] `SemanticCompressionProcessor` (Completed)
+- [x] `StateSnapshotProcessor` (Completed)
+- [x] `EmergencyTruncationProcessor` (Completed)
+- [x] `ContextTracer` (Completed)
+- [x] `SidecarLoader` (Completed)
+- [x] `IrMapper` / `graphUtils` (Completed - Handles Multi-Tool Concurrency and Backwards Graph Traversal)

 ### 3. Component Tests (The Orchestration)
 Goal: Prove the sidecar configuration accurately drives runtime behavior without testing the processor logic itself.
- [ ] `PipelineOrchestrator`: Test sync vs. async routing, and trigger setup.
+- [x] `PipelineOrchestrator`: Test sync vs. async routing, error swallowing, and trigger setup.
 - [ ] `ContextManager`: Test `subscribeToHistory` (Opportunistic triggers).
 - [ ] `ContextManager`: Test `project()` (Synchronous barrier triggers).

@@ -36,5 +38,5 @@ Goal: Prove the sidecar configuration accurately drives runtime behavior without
 ---

 ## Next Actions
-1. Migrate processor tests to shared factories to DRY up the suite.
-2. Go down the Unit Test checklist, ensuring full line/branch coverage for the core transformations.
+1. Audit the ContextManager component tests (opportunistic updates & sync barrier).
+2. Finalize the End-to-End "Day in the Life" Golden Snapshot test.