From d3d6b9403df5dae62f1e39c61ff33c73b7d1e178 Mon Sep 17 00:00:00 2001 From: Your Name Date: Mon, 6 Apr 2026 23:35:36 +0000 Subject: [PATCH] next steps --- .../core/src/context/ir/graphUtils.test.ts | 121 +++++++++++ packages/core/src/context/ir/mapper.test.ts | 102 +++++++++ .../src/context/sidecar/orchestrator.test.ts | 194 ++++++++++++++++++ .../core/src/context/sidecar/orchestrator.ts | 2 +- packages/core/src/context/testing.md | 30 +-- 5 files changed, 434 insertions(+), 15 deletions(-) create mode 100644 packages/core/src/context/ir/graphUtils.test.ts create mode 100644 packages/core/src/context/sidecar/orchestrator.test.ts diff --git a/packages/core/src/context/ir/graphUtils.test.ts b/packages/core/src/context/ir/graphUtils.test.ts new file mode 100644 index 0000000000..8d22962b81 --- /dev/null +++ b/packages/core/src/context/ir/graphUtils.test.ts @@ -0,0 +1,121 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, beforeEach, vi } from 'vitest'; +import { generateWorkingBufferView } from './graphUtils.js'; +import { createMockEnvironment, createDummyEpisode } from '../testing/contextTestUtils.js'; +import type { ContextEnvironment } from '../sidecar/environment.js'; + +describe('graphUtils (View Generator)', () => { + let env: ContextEnvironment; + + beforeEach(() => { + vi.resetAllMocks(); + env = createMockEnvironment(); + // Our token mock is 1 char = 1 token for simplicity + vi.spyOn(env.tokenCalculator, 'calculateEpisodeListTokens').mockImplementation((eps) => { + return eps.reduce((acc, ep) => acc + (ep.trigger.metadata.originalTokens || 100), 0); + }); + }); + + it('returns pristine episodes untouched if under budget', () => { + const episodes = [ + createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: '1' }]), + createDummyEpisode('ep-2', 'USER_PROMPT', [{ text: '2' }]), + ]; + + // We retain 5000 tokens. Total mock tokens = 200. + const view = generateWorkingBufferView(episodes, 5000, env.tracer, env); + + expect(view).toHaveLength(2); + // Must be a deep copy! The view generator clones episodes. + expect(view).not.toBe(episodes); + expect(view[0].id).toBe('ep-1'); + expect(view[1].id).toBe('ep-2'); + }); + + it('swaps to Masked variant when over budget (rolling backwards)', () => { + const ep1 = createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: '1', type: 'text' }]); + const ep2 = createDummyEpisode('ep-2', 'USER_PROMPT', [{ text: '2', type: 'text' }]); + + ep1.variants = { + 'masked': { type: 'masked', status: 'ready', text: '', recoveredTokens: 10 } + }; + + // We only retain 100 tokens. + // ep-2 (newest) takes 100 tokens. + // Now rolling = 100. Over budget! + // ep-1 is evaluated, and swapped for Masked. + const view = generateWorkingBufferView([ep1, ep2], 10, env.tracer, env); + + expect(view).toHaveLength(2); + expect(view[1].id).toBe('ep-2'); // Unchanged (newest) + + expect(view[0].id).toBe('ep-1'); + expect((view[0].trigger as any).semanticParts[0].presentation.text).toBe(''); + }); + + it('swaps to Summary variant when over budget', () => { + const ep1 = createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: '1' }]); + const ep2 = createDummyEpisode('ep-2', 'USER_PROMPT', [{ text: '2' }]); + + ep1.variants = { + 'summary': { type: 'summary', status: 'ready', text: '', recoveredTokens: 50 } + }; + + const view = generateWorkingBufferView([ep1, ep2], 10, env.tracer, env); + + expect(view).toHaveLength(2); + + // The summary completely replaces the internal steps and clears the yield. + expect(view[0].steps).toHaveLength(1); + expect(view[0].steps[0].type).toBe('AGENT_THOUGHT'); + expect((view[0].steps[0] as any).text).toBe(''); + expect(view[0].yield).toBeUndefined(); + }); + + it('handles complex N-to-1 Snapshot skipping gracefully', () => { + const ep1 = createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: '1' }]); + const ep2 = createDummyEpisode('ep-2', 'USER_PROMPT', [{ text: '2' }]); + const ep3 = createDummyEpisode('ep-3', 'USER_PROMPT', [{ text: '3' }]); + const ep4 = createDummyEpisode('ep-4', 'USER_PROMPT', [{ text: '4' }]); + + // ep-3 has a snapshot that replaces [ep-1, ep-2, ep-3] + const snapshotEp = createDummyEpisode('snap-1', 'SYSTEM_EVENT', []); + + ep3.variants = { + 'snapshot': { + type: 'snapshot', + status: 'ready', + episode: snapshotEp, + replacedEpisodeIds: ['ep-1', 'ep-2', 'ep-3'] + } + }; + + // We only retain 5 tokens, forcing the sweep to use variants for EVERYTHING except ep4. + const view = generateWorkingBufferView([ep1, ep2, ep3, ep4], 5, env.tracer, env); + + // Result should be exactly: [snapshot, ep-4] + expect(view).toHaveLength(2); + expect(view[0].id).toBe('snap-1'); + expect(view[1].id).toBe('ep-4'); + }); + + it('ignores variants that are not yet "ready"', () => { + const ep1 = createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: '1' }]); + const ep2 = createDummyEpisode('ep-2', 'USER_PROMPT', [{ text: '2' }]); + + ep1.variants = { + 'masked': { type: 'masked', status: 'computing', text: '', recoveredTokens: 10 } + }; + + const view = generateWorkingBufferView([ep1, ep2], 10, env.tracer, env); + + // Because the variant was computing, it must fall back to the raw pristine text. + expect(view).toHaveLength(2); + expect((view[0].trigger as any).semanticParts[0].presentation).toBeUndefined(); + }); +}); diff --git a/packages/core/src/context/ir/mapper.test.ts b/packages/core/src/context/ir/mapper.test.ts index bb2365661c..f3eca63db0 100644 --- a/packages/core/src/context/ir/mapper.test.ts +++ b/packages/core/src/context/ir/mapper.test.ts @@ -130,6 +130,108 @@ describe('IrMapper', () => { // in one Content block, but the flat representation is semantically identical. }); + it('should correctly handle multi-tool-calls grouped within a single turn without dropping observations', () => { + const rawHistory: Content[] = [ + { role: 'user', parts: [{ text: 'Examine both of these tools please.' }] }, + { + role: 'model', + parts: [ + { text: 'I will call them concurrently.' }, + { + functionCall: { + id: 'c1', + name: 'tool_one', + args: { p: 1 }, + }, + }, + { + functionCall: { + id: 'c2', + name: 'tool_two', + args: { p: 2 }, + }, + }, + ], + }, + // Gemini forces the user turn to contain ALL function responses for that model turn + { + role: 'user', + parts: [ + { + functionResponse: { + id: 'c1', + name: 'tool_one', + response: { r: 1 }, + }, + }, + { + functionResponse: { + id: 'c2', + name: 'tool_two', + response: { r: 2 }, + }, + }, + ], + }, + { + role: 'model', + parts: [{ text: 'Both complete.' }], + }, + ]; + + const tokenCalculator = new ContextTokenCalculator(4); + const episodes = IrMapper.toIr(rawHistory, tokenCalculator); + + // It should collapse into a single episode + expect(episodes).toHaveLength(1); + const ep = episodes[0]; + + expect(ep.trigger.type).toBe('USER_PROMPT'); + + // The steps array should contain: + // 0: AgentThought ("I will call them concurrently") + // 1: ToolExecution(tool_one) + // 2: ToolExecution(tool_two) + + expect(ep.steps).toHaveLength(3); + + expect(ep.steps[0].type).toBe('AGENT_THOUGHT'); + expect((ep.steps[0] as any).text).toBe('I will call them concurrently.'); + + expect(ep.steps[1].type).toBe('TOOL_EXECUTION'); + expect((ep.steps[1] as ToolExecution).toolName).toBe('tool_one'); + expect((ep.steps[1] as ToolExecution).intent).toEqual({ p: 1 }); + expect((ep.steps[1] as ToolExecution).observation).toEqual({ r: 1 }); + + expect(ep.steps[2].type).toBe('TOOL_EXECUTION'); + expect((ep.steps[2] as ToolExecution).toolName).toBe('tool_two'); + expect((ep.steps[2] as ToolExecution).intent).toEqual({ p: 2 }); + expect((ep.steps[2] as ToolExecution).observation).toEqual({ r: 2 }); + + // The final model turn should become the yield + expect(ep.yield).toBeDefined(); + expect(ep.yield?.type).toBe('AGENT_YIELD'); + expect(ep.yield?.text).toBe('Both complete.'); + + // Now verify we can reconstitute it without dropping the multiple calls + const reconstituted = IrMapper.fromIr(episodes); + + // The reconstituted history should have exactly 4 turns, same as original + expect(reconstituted).toHaveLength(4); + + // Check that the Model turn has both function calls + expect(reconstituted[1].role).toBe('model'); + expect(reconstituted[1].parts).toHaveLength(3); // text + call1 + call2 + expect(reconstituted[1].parts![1].functionCall?.name).toBe('tool_one'); + expect(reconstituted[1].parts![2].functionCall?.name).toBe('tool_two'); + + // Check that the User turn has both function responses + expect(reconstituted[2].role).toBe('user'); + expect(reconstituted[2].parts).toHaveLength(2); // response1 + response2 + expect(reconstituted[2].parts![0].functionResponse?.name).toBe('tool_one'); + expect(reconstituted[2].parts![1].functionResponse?.name).toBe('tool_two'); + }); + it('should guarantee WeakMap ID stability across continuous mapping', () => { // 1. Initial history const history: Content[] = [ diff --git a/packages/core/src/context/sidecar/orchestrator.test.ts b/packages/core/src/context/sidecar/orchestrator.test.ts new file mode 100644 index 0000000000..84ca081279 --- /dev/null +++ b/packages/core/src/context/sidecar/orchestrator.test.ts @@ -0,0 +1,194 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, beforeEach, vi, afterEach } from 'vitest'; +import { PipelineOrchestrator } from './orchestrator.js'; +import { ProcessorRegistry } from './registry.js'; +import { createMockEnvironment, createDummyState, createDummyEpisode } from '../testing/contextTestUtils.js'; +import type { ContextEnvironment } from './environment.js'; +import type { ContextProcessor } from '../pipeline.js'; +import type { SidecarConfig } from './types.js'; +import { ContextEventBus } from '../eventBus.js'; + +import type { Episode } from '../ir/types.js'; + +// Create a Dummy Processor for testing Orchestration routing +class DummySyncProcessor implements ContextProcessor { + static create() { return new DummySyncProcessor(); } + constructor() {} + readonly name = 'DummySync'; + async process(episodes: any[], _state: any) { + const copy = [...episodes]; + copy[0] = { ...copy[0], dummyModified: true }; + return copy; + } +} + +class DummyAsyncProcessor implements ContextProcessor { + static create() { return new DummyAsyncProcessor(); } + constructor() {} + readonly name = 'DummyAsync'; + async process(episodes: any[], _state: any) { + await new Promise(resolve => setTimeout(resolve, 50)); + const copy = [...episodes]; + copy[0] = { ...copy[0], asyncModified: true }; + return copy; + } +} + +class ThrowingProcessor implements ContextProcessor { + static create() { return new ThrowingProcessor(); } + constructor() {} + readonly name = 'Thrower'; + async process(): Promise { + throw new Error('Processor failed intentionally'); + } +} + +describe('PipelineOrchestrator (Component)', () => { + let env: ContextEnvironment; + let eventBus: ContextEventBus; + + beforeEach(() => { + vi.resetAllMocks(); + env = createMockEnvironment(); + eventBus = env.eventBus as ContextEventBus; + + // Register our test processors + ProcessorRegistry.register({ id: 'DummySyncProcessor', create: () => new DummySyncProcessor() }); + ProcessorRegistry.register({ id: 'DummyAsyncProcessor', create: () => new DummyAsyncProcessor() }); + ProcessorRegistry.register({ id: 'ThrowingProcessor', create: () => new ThrowingProcessor() }); + }); + + afterEach(() => { + // Cleanup registry to not pollute other tests + (ProcessorRegistry as any).processors.delete('DummySyncProcessor'); + (ProcessorRegistry as any).processors.delete('DummyAsyncProcessor'); + (ProcessorRegistry as any).processors.delete('ThrowingProcessor'); + }); + + const createConfig = (pipelines: any[]): SidecarConfig => ({ + budget: { maxTokens: 100, retainedTokens: 50 }, + gcBackstop: { strategy: 'truncate', target: 'max' }, + pipelines + }); + + it('instantiates processors from the registry on initialization', () => { + const config = createConfig([ + { + name: 'Sync', + execution: 'blocking', + triggers: [], + processors: [{ processorId: 'DummySyncProcessor' }] + } + ]); + + const orchestrator = new PipelineOrchestrator(config, env, eventBus, env.tracer); + expect((orchestrator as any).instantiatedProcessors.has('DummySyncProcessor')).toBe(true); + }); + + it('throws an error if a config requests an unknown processor', () => { + const config = createConfig([ + { + name: 'Bad', + execution: 'blocking', + triggers: [], + processors: [{ processorId: 'DoesNotExist' }] + } + ]); + + expect(() => new PipelineOrchestrator(config, env, eventBus, env.tracer)) + .toThrow('Context Processor [DoesNotExist] is not registered.'); + }); + + it('executes blocking pipelines synchronously and returns the modified array', async () => { + const config = createConfig([ + { + name: 'SyncPipe', + execution: 'blocking', + triggers: [], + processors: [{ processorId: 'DummySyncProcessor' }] + } + ]); + const orchestrator = new PipelineOrchestrator(config, env, eventBus, env.tracer); + + const episodes = [createDummyEpisode('1', 'USER_PROMPT', [])]; + const state = createDummyState(false); + + const result = await orchestrator.executePipeline('SyncPipe', episodes, state); + + expect(result).toHaveLength(1); + expect((result[0] as any).dummyModified).toBe(true); + }); + + it('executes background pipelines asynchronously without blocking the return', async () => { + const config = createConfig([ + { + name: 'AsyncPipe', + execution: 'background', + triggers: [], + processors: [{ processorId: 'DummyAsyncProcessor' }] + } + ]); + const orchestrator = new PipelineOrchestrator(config, env, eventBus, env.tracer); + + const episodes = [createDummyEpisode('1', 'USER_PROMPT', [])]; + const state = createDummyState(false); + + // This should resolve immediately with the UNMODIFIED array because execution is background + const result = await orchestrator.executePipeline('AsyncPipe', episodes, state); + + expect(result).toHaveLength(1); + expect((result[0] as any).asyncModified).toBeUndefined(); // Not modified yet! + + // Wait for the background task to complete (50ms delay in DummyAsyncProcessor) + await new Promise(resolve => setTimeout(resolve, 60)); + }); + + it('gracefully handles and swallows processor errors in synchronous pipelines', async () => { + const config = createConfig([ + { + name: 'ThrowingPipe', + execution: 'blocking', + triggers: [], + processors: [{ processorId: 'ThrowingProcessor' }] + } + ]); + const orchestrator = new PipelineOrchestrator(config, env, eventBus, env.tracer); + + const episodes = [createDummyEpisode('1', 'USER_PROMPT', [])]; + const state = createDummyState(false); + + // It should not throw! It should swallow the error and return the unmodified array. + const result = await orchestrator.executePipeline('ThrowingPipe', episodes, state); + + expect(result).toHaveLength(1); + expect(result).toStrictEqual(episodes); + }); + + it('automatically binds to budget_exceeded trigger via EventBus', () => { + const config = createConfig([ + { + name: 'PressureRelief', + execution: 'background', + triggers: ['budget_exceeded'], + processors: [{ processorId: 'DummyAsyncProcessor' }] + } + ]); + + // Spy on the private method to see if the trigger fires it + const executeSpy = vi.spyOn(PipelineOrchestrator.prototype as any, 'executePipelineAsync'); + + new PipelineOrchestrator(config, env, eventBus, env.tracer); + + const episodes = [createDummyEpisode('1', 'USER_PROMPT', [])]; + + // Emit the trigger + eventBus.emitConsolidationNeeded({ episodes, targetDeficit: 100 }); + + expect(executeSpy).toHaveBeenCalled(); + }); +}); diff --git a/packages/core/src/context/sidecar/orchestrator.ts b/packages/core/src/context/sidecar/orchestrator.ts index b4778503b3..5f0d5d3c5e 100644 --- a/packages/core/src/context/sidecar/orchestrator.ts +++ b/packages/core/src/context/sidecar/orchestrator.ts @@ -34,7 +34,7 @@ export class PipelineOrchestrator { if (!this.instantiatedProcessors.has(procDef.processorId)) { const processorClass = ProcessorRegistry.get(procDef.processorId); if (!processorClass) { - throw new Error(`Unknown processor ID: ${procDef.processorId}`); + throw new Error(`Context Processor [${procDef.processorId}] is not registered.`); } // The Orchestrator injects standard dependencies required by processors // If a processor needs the eventBus (like Snapshot), it expects it via constructor. diff --git a/packages/core/src/context/testing.md b/packages/core/src/context/testing.md index 1d333d9f94..f0cd794858 100644 --- a/packages/core/src/context/testing.md +++ b/packages/core/src/context/testing.md @@ -11,22 +11,24 @@ Our testing strategy avoids the "endless tax" of brittle tests by strictly separ ## Audit Checklist & Coverage Tracker ### 1. The Tooling Library (`contextTestUtils.ts`) -- [ ] Implement `ContextTestBuilder` or shared factory functions (`createDummyEpisode`, `createDummyState`). -- [ ] Ensure all existing tests are migrated to use these helpers to establish the pattern. +- [x] Implement `ContextTestBuilder` or shared factory functions (`createDummyEpisode`, `createDummyState`). +- [x] Ensure all existing tests are migrated to use these helpers to establish the pattern. -### 2. Unit Tests (The Processors) -Goal: Ensure every processor gracefully handles boundary conditions (budget satisfied vs. deficit), skips protected IDs, and correctly transforms IR. -- [ ] `BlobDegradationProcessor` (Mostly complete, needs migration to shared helpers) -- [ ] `ToolMaskingProcessor` (Mostly complete, needs migration to shared helpers) -- [ ] `HistorySquashingProcessor` (Audit coverage) -- [ ] `SemanticCompressionProcessor` (Audit coverage) -- [ ] `ContextTracer` (Complete) -- [ ] `SidecarLoader` (Complete) -- [ ] `IrMapper` / `IrProjector` (Audit coverage) +### 2. Unit Tests (The Processors & Map/Reduce) +Goal: Ensure every component gracefully handles boundary conditions (budget satisfied vs. deficit), skips protected IDs, and correctly transforms IR. +- [x] `BlobDegradationProcessor` (Completed) +- [x] `ToolMaskingProcessor` (Completed) +- [x] `HistorySquashingProcessor` (Completed) +- [x] `SemanticCompressionProcessor` (Completed) +- [x] `StateSnapshotProcessor` (Completed) +- [x] `EmergencyTruncationProcessor` (Completed) +- [x] `ContextTracer` (Completed) +- [x] `SidecarLoader` (Completed) +- [x] `IrMapper` / `graphUtils` (Completed - Handles Multi-Tool Concurrency and Backwards Graph Traversal) ### 3. Component Tests (The Orchestration) Goal: Prove the sidecar configuration accurately drives runtime behavior without testing the processor logic itself. -- [ ] `PipelineOrchestrator`: Test sync vs. async routing, and trigger setup. +- [x] `PipelineOrchestrator`: Test sync vs. async routing, error swallowing, and trigger setup. - [ ] `ContextManager`: Test `subscribeToHistory` (Opportunistic triggers). - [ ] `ContextManager`: Test `project()` (Synchronous barrier triggers). @@ -36,5 +38,5 @@ Goal: Prove the sidecar configuration accurately drives runtime behavior without --- ## Next Actions -1. Migrate processor tests to shared factories to DRY up the suite. -2. Go down the Unit Test checklist, ensuring full line/branch coverage for the core transformations. +1. Audit the ContextManager component tests (opportunistic updates & sync barrier). +2. Finalize the End-to-End "Day in the Life" Golden Snapshot test.