next steps

This commit is contained in:
Your Name
2026-04-06 23:35:36 +00:00
parent fbcfa40f1d
commit d3d6b9403d
5 changed files with 434 additions and 15 deletions
@@ -0,0 +1,121 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, beforeEach, vi } from 'vitest';
import { generateWorkingBufferView } from './graphUtils.js';
import { createMockEnvironment, createDummyEpisode } from '../testing/contextTestUtils.js';
import type { ContextEnvironment } from '../sidecar/environment.js';
describe('graphUtils (View Generator)', () => {
let env: ContextEnvironment;
beforeEach(() => {
vi.resetAllMocks();
env = createMockEnvironment();
// Our token mock is 1 char = 1 token for simplicity
vi.spyOn(env.tokenCalculator, 'calculateEpisodeListTokens').mockImplementation((eps) => {
return eps.reduce((acc, ep) => acc + (ep.trigger.metadata.originalTokens || 100), 0);
});
});
it('returns pristine episodes untouched if under budget', () => {
const episodes = [
createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: '1' }]),
createDummyEpisode('ep-2', 'USER_PROMPT', [{ text: '2' }]),
];
// We retain 5000 tokens. Total mock tokens = 200.
const view = generateWorkingBufferView(episodes, 5000, env.tracer, env);
expect(view).toHaveLength(2);
// Must be a deep copy! The view generator clones episodes.
expect(view).not.toBe(episodes);
expect(view[0].id).toBe('ep-1');
expect(view[1].id).toBe('ep-2');
});
it('swaps to Masked variant when over budget (rolling backwards)', () => {
const ep1 = createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: '1', type: 'text' }]);
const ep2 = createDummyEpisode('ep-2', 'USER_PROMPT', [{ text: '2', type: 'text' }]);
ep1.variants = {
'masked': { type: 'masked', status: 'ready', text: '<MASKED>', recoveredTokens: 10 }
};
// We only retain 100 tokens.
// ep-2 (newest) takes 100 tokens.
// Now rolling = 100. Over budget!
// ep-1 is evaluated, and swapped for Masked.
const view = generateWorkingBufferView([ep1, ep2], 10, env.tracer, env);
expect(view).toHaveLength(2);
expect(view[1].id).toBe('ep-2'); // Unchanged (newest)
expect(view[0].id).toBe('ep-1');
expect((view[0].trigger as any).semanticParts[0].presentation.text).toBe('<MASKED>');
});
it('swaps to Summary variant when over budget', () => {
const ep1 = createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: '1' }]);
const ep2 = createDummyEpisode('ep-2', 'USER_PROMPT', [{ text: '2' }]);
ep1.variants = {
'summary': { type: 'summary', status: 'ready', text: '<SUMMARY>', recoveredTokens: 50 }
};
const view = generateWorkingBufferView([ep1, ep2], 10, env.tracer, env);
expect(view).toHaveLength(2);
// The summary completely replaces the internal steps and clears the yield.
expect(view[0].steps).toHaveLength(1);
expect(view[0].steps[0].type).toBe('AGENT_THOUGHT');
expect((view[0].steps[0] as any).text).toBe('<SUMMARY>');
expect(view[0].yield).toBeUndefined();
});
it('handles complex N-to-1 Snapshot skipping gracefully', () => {
const ep1 = createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: '1' }]);
const ep2 = createDummyEpisode('ep-2', 'USER_PROMPT', [{ text: '2' }]);
const ep3 = createDummyEpisode('ep-3', 'USER_PROMPT', [{ text: '3' }]);
const ep4 = createDummyEpisode('ep-4', 'USER_PROMPT', [{ text: '4' }]);
// ep-3 has a snapshot that replaces [ep-1, ep-2, ep-3]
const snapshotEp = createDummyEpisode('snap-1', 'SYSTEM_EVENT', []);
ep3.variants = {
'snapshot': {
type: 'snapshot',
status: 'ready',
episode: snapshotEp,
replacedEpisodeIds: ['ep-1', 'ep-2', 'ep-3']
}
};
// We only retain 5 tokens, forcing the sweep to use variants for EVERYTHING except ep4.
const view = generateWorkingBufferView([ep1, ep2, ep3, ep4], 5, env.tracer, env);
// Result should be exactly: [snapshot, ep-4]
expect(view).toHaveLength(2);
expect(view[0].id).toBe('snap-1');
expect(view[1].id).toBe('ep-4');
});
it('ignores variants that are not yet "ready"', () => {
const ep1 = createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: '1' }]);
const ep2 = createDummyEpisode('ep-2', 'USER_PROMPT', [{ text: '2' }]);
ep1.variants = {
'masked': { type: 'masked', status: 'computing', text: '<MASKED>', recoveredTokens: 10 }
};
const view = generateWorkingBufferView([ep1, ep2], 10, env.tracer, env);
// Because the variant was computing, it must fall back to the raw pristine text.
expect(view).toHaveLength(2);
expect((view[0].trigger as any).semanticParts[0].presentation).toBeUndefined();
});
});
+102
View File
@@ -130,6 +130,108 @@ describe('IrMapper', () => {
// in one Content block, but the flat representation is semantically identical.
});
it('should correctly handle multi-tool-calls grouped within a single turn without dropping observations', () => {
const rawHistory: Content[] = [
{ role: 'user', parts: [{ text: 'Examine both of these tools please.' }] },
{
role: 'model',
parts: [
{ text: 'I will call them concurrently.' },
{
functionCall: {
id: 'c1',
name: 'tool_one',
args: { p: 1 },
},
},
{
functionCall: {
id: 'c2',
name: 'tool_two',
args: { p: 2 },
},
},
],
},
// Gemini forces the user turn to contain ALL function responses for that model turn
{
role: 'user',
parts: [
{
functionResponse: {
id: 'c1',
name: 'tool_one',
response: { r: 1 },
},
},
{
functionResponse: {
id: 'c2',
name: 'tool_two',
response: { r: 2 },
},
},
],
},
{
role: 'model',
parts: [{ text: 'Both complete.' }],
},
];
const tokenCalculator = new ContextTokenCalculator(4);
const episodes = IrMapper.toIr(rawHistory, tokenCalculator);
// It should collapse into a single episode
expect(episodes).toHaveLength(1);
const ep = episodes[0];
expect(ep.trigger.type).toBe('USER_PROMPT');
// The steps array should contain:
// 0: AgentThought ("I will call them concurrently")
// 1: ToolExecution(tool_one)
// 2: ToolExecution(tool_two)
expect(ep.steps).toHaveLength(3);
expect(ep.steps[0].type).toBe('AGENT_THOUGHT');
expect((ep.steps[0] as any).text).toBe('I will call them concurrently.');
expect(ep.steps[1].type).toBe('TOOL_EXECUTION');
expect((ep.steps[1] as ToolExecution).toolName).toBe('tool_one');
expect((ep.steps[1] as ToolExecution).intent).toEqual({ p: 1 });
expect((ep.steps[1] as ToolExecution).observation).toEqual({ r: 1 });
expect(ep.steps[2].type).toBe('TOOL_EXECUTION');
expect((ep.steps[2] as ToolExecution).toolName).toBe('tool_two');
expect((ep.steps[2] as ToolExecution).intent).toEqual({ p: 2 });
expect((ep.steps[2] as ToolExecution).observation).toEqual({ r: 2 });
// The final model turn should become the yield
expect(ep.yield).toBeDefined();
expect(ep.yield?.type).toBe('AGENT_YIELD');
expect(ep.yield?.text).toBe('Both complete.');
// Now verify we can reconstitute it without dropping the multiple calls
const reconstituted = IrMapper.fromIr(episodes);
// The reconstituted history should have exactly 4 turns, same as original
expect(reconstituted).toHaveLength(4);
// Check that the Model turn has both function calls
expect(reconstituted[1].role).toBe('model');
expect(reconstituted[1].parts).toHaveLength(3); // text + call1 + call2
expect(reconstituted[1].parts![1].functionCall?.name).toBe('tool_one');
expect(reconstituted[1].parts![2].functionCall?.name).toBe('tool_two');
// Check that the User turn has both function responses
expect(reconstituted[2].role).toBe('user');
expect(reconstituted[2].parts).toHaveLength(2); // response1 + response2
expect(reconstituted[2].parts![0].functionResponse?.name).toBe('tool_one');
expect(reconstituted[2].parts![1].functionResponse?.name).toBe('tool_two');
});
it('should guarantee WeakMap ID stability across continuous mapping', () => {
// 1. Initial history
const history: Content[] = [
@@ -0,0 +1,194 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, beforeEach, vi, afterEach } from 'vitest';
import { PipelineOrchestrator } from './orchestrator.js';
import { ProcessorRegistry } from './registry.js';
import { createMockEnvironment, createDummyState, createDummyEpisode } from '../testing/contextTestUtils.js';
import type { ContextEnvironment } from './environment.js';
import type { ContextProcessor } from '../pipeline.js';
import type { SidecarConfig } from './types.js';
import { ContextEventBus } from '../eventBus.js';
import type { Episode } from '../ir/types.js';
// Create a Dummy Processor for testing Orchestration routing
class DummySyncProcessor implements ContextProcessor {
static create() { return new DummySyncProcessor(); }
constructor() {}
readonly name = 'DummySync';
async process(episodes: any[], _state: any) {
const copy = [...episodes];
copy[0] = { ...copy[0], dummyModified: true };
return copy;
}
}
class DummyAsyncProcessor implements ContextProcessor {
static create() { return new DummyAsyncProcessor(); }
constructor() {}
readonly name = 'DummyAsync';
async process(episodes: any[], _state: any) {
await new Promise(resolve => setTimeout(resolve, 50));
const copy = [...episodes];
copy[0] = { ...copy[0], asyncModified: true };
return copy;
}
}
class ThrowingProcessor implements ContextProcessor {
static create() { return new ThrowingProcessor(); }
constructor() {}
readonly name = 'Thrower';
async process(): Promise<Episode[]> {
throw new Error('Processor failed intentionally');
}
}
describe('PipelineOrchestrator (Component)', () => {
let env: ContextEnvironment;
let eventBus: ContextEventBus;
beforeEach(() => {
vi.resetAllMocks();
env = createMockEnvironment();
eventBus = env.eventBus as ContextEventBus;
// Register our test processors
ProcessorRegistry.register({ id: 'DummySyncProcessor', create: () => new DummySyncProcessor() });
ProcessorRegistry.register({ id: 'DummyAsyncProcessor', create: () => new DummyAsyncProcessor() });
ProcessorRegistry.register({ id: 'ThrowingProcessor', create: () => new ThrowingProcessor() });
});
afterEach(() => {
// Cleanup registry to not pollute other tests
(ProcessorRegistry as any).processors.delete('DummySyncProcessor');
(ProcessorRegistry as any).processors.delete('DummyAsyncProcessor');
(ProcessorRegistry as any).processors.delete('ThrowingProcessor');
});
const createConfig = (pipelines: any[]): SidecarConfig => ({
budget: { maxTokens: 100, retainedTokens: 50 },
gcBackstop: { strategy: 'truncate', target: 'max' },
pipelines
});
it('instantiates processors from the registry on initialization', () => {
const config = createConfig([
{
name: 'Sync',
execution: 'blocking',
triggers: [],
processors: [{ processorId: 'DummySyncProcessor' }]
}
]);
const orchestrator = new PipelineOrchestrator(config, env, eventBus, env.tracer);
expect((orchestrator as any).instantiatedProcessors.has('DummySyncProcessor')).toBe(true);
});
it('throws an error if a config requests an unknown processor', () => {
const config = createConfig([
{
name: 'Bad',
execution: 'blocking',
triggers: [],
processors: [{ processorId: 'DoesNotExist' }]
}
]);
expect(() => new PipelineOrchestrator(config, env, eventBus, env.tracer))
.toThrow('Context Processor [DoesNotExist] is not registered.');
});
it('executes blocking pipelines synchronously and returns the modified array', async () => {
const config = createConfig([
{
name: 'SyncPipe',
execution: 'blocking',
triggers: [],
processors: [{ processorId: 'DummySyncProcessor' }]
}
]);
const orchestrator = new PipelineOrchestrator(config, env, eventBus, env.tracer);
const episodes = [createDummyEpisode('1', 'USER_PROMPT', [])];
const state = createDummyState(false);
const result = await orchestrator.executePipeline('SyncPipe', episodes, state);
expect(result).toHaveLength(1);
expect((result[0] as any).dummyModified).toBe(true);
});
it('executes background pipelines asynchronously without blocking the return', async () => {
const config = createConfig([
{
name: 'AsyncPipe',
execution: 'background',
triggers: [],
processors: [{ processorId: 'DummyAsyncProcessor' }]
}
]);
const orchestrator = new PipelineOrchestrator(config, env, eventBus, env.tracer);
const episodes = [createDummyEpisode('1', 'USER_PROMPT', [])];
const state = createDummyState(false);
// This should resolve immediately with the UNMODIFIED array because execution is background
const result = await orchestrator.executePipeline('AsyncPipe', episodes, state);
expect(result).toHaveLength(1);
expect((result[0] as any).asyncModified).toBeUndefined(); // Not modified yet!
// Wait for the background task to complete (50ms delay in DummyAsyncProcessor)
await new Promise(resolve => setTimeout(resolve, 60));
});
it('gracefully handles and swallows processor errors in synchronous pipelines', async () => {
const config = createConfig([
{
name: 'ThrowingPipe',
execution: 'blocking',
triggers: [],
processors: [{ processorId: 'ThrowingProcessor' }]
}
]);
const orchestrator = new PipelineOrchestrator(config, env, eventBus, env.tracer);
const episodes = [createDummyEpisode('1', 'USER_PROMPT', [])];
const state = createDummyState(false);
// It should not throw! It should swallow the error and return the unmodified array.
const result = await orchestrator.executePipeline('ThrowingPipe', episodes, state);
expect(result).toHaveLength(1);
expect(result).toStrictEqual(episodes);
});
it('automatically binds to budget_exceeded trigger via EventBus', () => {
const config = createConfig([
{
name: 'PressureRelief',
execution: 'background',
triggers: ['budget_exceeded'],
processors: [{ processorId: 'DummyAsyncProcessor' }]
}
]);
// Spy on the private method to see if the trigger fires it
const executeSpy = vi.spyOn(PipelineOrchestrator.prototype as any, 'executePipelineAsync');
new PipelineOrchestrator(config, env, eventBus, env.tracer);
const episodes = [createDummyEpisode('1', 'USER_PROMPT', [])];
// Emit the trigger
eventBus.emitConsolidationNeeded({ episodes, targetDeficit: 100 });
expect(executeSpy).toHaveBeenCalled();
});
});
@@ -34,7 +34,7 @@ export class PipelineOrchestrator {
if (!this.instantiatedProcessors.has(procDef.processorId)) {
const processorClass = ProcessorRegistry.get(procDef.processorId);
if (!processorClass) {
throw new Error(`Unknown processor ID: ${procDef.processorId}`);
throw new Error(`Context Processor [${procDef.processorId}] is not registered.`);
}
// The Orchestrator injects standard dependencies required by processors
// If a processor needs the eventBus (like Snapshot), it expects it via constructor.
+16 -14
View File
@@ -11,22 +11,24 @@ Our testing strategy avoids the "endless tax" of brittle tests by strictly separ
## Audit Checklist & Coverage Tracker
### 1. The Tooling Library (`contextTestUtils.ts`)
- [ ] Implement `ContextTestBuilder` or shared factory functions (`createDummyEpisode`, `createDummyState`).
- [ ] Ensure all existing tests are migrated to use these helpers to establish the pattern.
- [x] Implement `ContextTestBuilder` or shared factory functions (`createDummyEpisode`, `createDummyState`).
- [x] Ensure all existing tests are migrated to use these helpers to establish the pattern.
### 2. Unit Tests (The Processors)
Goal: Ensure every processor gracefully handles boundary conditions (budget satisfied vs. deficit), skips protected IDs, and correctly transforms IR.
- [ ] `BlobDegradationProcessor` (Mostly complete, needs migration to shared helpers)
- [ ] `ToolMaskingProcessor` (Mostly complete, needs migration to shared helpers)
- [ ] `HistorySquashingProcessor` (Audit coverage)
- [ ] `SemanticCompressionProcessor` (Audit coverage)
- [ ] `ContextTracer` (Complete)
- [ ] `SidecarLoader` (Complete)
- [ ] `IrMapper` / `IrProjector` (Audit coverage)
### 2. Unit Tests (The Processors & Map/Reduce)
Goal: Ensure every component gracefully handles boundary conditions (budget satisfied vs. deficit), skips protected IDs, and correctly transforms IR.
- [x] `BlobDegradationProcessor` (Completed)
- [x] `ToolMaskingProcessor` (Completed)
- [x] `HistorySquashingProcessor` (Completed)
- [x] `SemanticCompressionProcessor` (Completed)
- [x] `StateSnapshotProcessor` (Completed)
- [x] `EmergencyTruncationProcessor` (Completed)
- [x] `ContextTracer` (Completed)
- [x] `SidecarLoader` (Completed)
- [x] `IrMapper` / `graphUtils` (Completed - Handles Multi-Tool Concurrency and Backwards Graph Traversal)
### 3. Component Tests (The Orchestration)
Goal: Prove the sidecar configuration accurately drives runtime behavior without testing the processor logic itself.
- [ ] `PipelineOrchestrator`: Test sync vs. async routing, and trigger setup.
- [x] `PipelineOrchestrator`: Test sync vs. async routing, error swallowing, and trigger setup.
- [ ] `ContextManager`: Test `subscribeToHistory` (Opportunistic triggers).
- [ ] `ContextManager`: Test `project()` (Synchronous barrier triggers).
@@ -36,5 +38,5 @@ Goal: Prove the sidecar configuration accurately drives runtime behavior without
---
## Next Actions
1. Migrate processor tests to shared factories to DRY up the suite.
2. Go down the Unit Test checklist, ensuring full line/branch coverage for the core transformations.
1. Audit the ContextManager component tests (opportunistic updates & sync barrier).
2. Finalize the End-to-End "Day in the Life" Golden Snapshot test.