mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-20 00:32:31 -07:00
next steps
This commit is contained in:
@@ -0,0 +1,121 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, vi } from 'vitest';
|
||||
import { generateWorkingBufferView } from './graphUtils.js';
|
||||
import { createMockEnvironment, createDummyEpisode } from '../testing/contextTestUtils.js';
|
||||
import type { ContextEnvironment } from '../sidecar/environment.js';
|
||||
|
||||
describe('graphUtils (View Generator)', () => {
|
||||
let env: ContextEnvironment;
|
||||
|
||||
beforeEach(() => {
|
||||
vi.resetAllMocks();
|
||||
env = createMockEnvironment();
|
||||
// Our token mock is 1 char = 1 token for simplicity
|
||||
vi.spyOn(env.tokenCalculator, 'calculateEpisodeListTokens').mockImplementation((eps) => {
|
||||
return eps.reduce((acc, ep) => acc + (ep.trigger.metadata.originalTokens || 100), 0);
|
||||
});
|
||||
});
|
||||
|
||||
it('returns pristine episodes untouched if under budget', () => {
|
||||
const episodes = [
|
||||
createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: '1' }]),
|
||||
createDummyEpisode('ep-2', 'USER_PROMPT', [{ text: '2' }]),
|
||||
];
|
||||
|
||||
// We retain 5000 tokens. Total mock tokens = 200.
|
||||
const view = generateWorkingBufferView(episodes, 5000, env.tracer, env);
|
||||
|
||||
expect(view).toHaveLength(2);
|
||||
// Must be a deep copy! The view generator clones episodes.
|
||||
expect(view).not.toBe(episodes);
|
||||
expect(view[0].id).toBe('ep-1');
|
||||
expect(view[1].id).toBe('ep-2');
|
||||
});
|
||||
|
||||
it('swaps to Masked variant when over budget (rolling backwards)', () => {
|
||||
const ep1 = createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: '1', type: 'text' }]);
|
||||
const ep2 = createDummyEpisode('ep-2', 'USER_PROMPT', [{ text: '2', type: 'text' }]);
|
||||
|
||||
ep1.variants = {
|
||||
'masked': { type: 'masked', status: 'ready', text: '<MASKED>', recoveredTokens: 10 }
|
||||
};
|
||||
|
||||
// We only retain 100 tokens.
|
||||
// ep-2 (newest) takes 100 tokens.
|
||||
// Now rolling = 100. Over budget!
|
||||
// ep-1 is evaluated, and swapped for Masked.
|
||||
const view = generateWorkingBufferView([ep1, ep2], 10, env.tracer, env);
|
||||
|
||||
expect(view).toHaveLength(2);
|
||||
expect(view[1].id).toBe('ep-2'); // Unchanged (newest)
|
||||
|
||||
expect(view[0].id).toBe('ep-1');
|
||||
expect((view[0].trigger as any).semanticParts[0].presentation.text).toBe('<MASKED>');
|
||||
});
|
||||
|
||||
it('swaps to Summary variant when over budget', () => {
|
||||
const ep1 = createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: '1' }]);
|
||||
const ep2 = createDummyEpisode('ep-2', 'USER_PROMPT', [{ text: '2' }]);
|
||||
|
||||
ep1.variants = {
|
||||
'summary': { type: 'summary', status: 'ready', text: '<SUMMARY>', recoveredTokens: 50 }
|
||||
};
|
||||
|
||||
const view = generateWorkingBufferView([ep1, ep2], 10, env.tracer, env);
|
||||
|
||||
expect(view).toHaveLength(2);
|
||||
|
||||
// The summary completely replaces the internal steps and clears the yield.
|
||||
expect(view[0].steps).toHaveLength(1);
|
||||
expect(view[0].steps[0].type).toBe('AGENT_THOUGHT');
|
||||
expect((view[0].steps[0] as any).text).toBe('<SUMMARY>');
|
||||
expect(view[0].yield).toBeUndefined();
|
||||
});
|
||||
|
||||
it('handles complex N-to-1 Snapshot skipping gracefully', () => {
|
||||
const ep1 = createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: '1' }]);
|
||||
const ep2 = createDummyEpisode('ep-2', 'USER_PROMPT', [{ text: '2' }]);
|
||||
const ep3 = createDummyEpisode('ep-3', 'USER_PROMPT', [{ text: '3' }]);
|
||||
const ep4 = createDummyEpisode('ep-4', 'USER_PROMPT', [{ text: '4' }]);
|
||||
|
||||
// ep-3 has a snapshot that replaces [ep-1, ep-2, ep-3]
|
||||
const snapshotEp = createDummyEpisode('snap-1', 'SYSTEM_EVENT', []);
|
||||
|
||||
ep3.variants = {
|
||||
'snapshot': {
|
||||
type: 'snapshot',
|
||||
status: 'ready',
|
||||
episode: snapshotEp,
|
||||
replacedEpisodeIds: ['ep-1', 'ep-2', 'ep-3']
|
||||
}
|
||||
};
|
||||
|
||||
// We only retain 5 tokens, forcing the sweep to use variants for EVERYTHING except ep4.
|
||||
const view = generateWorkingBufferView([ep1, ep2, ep3, ep4], 5, env.tracer, env);
|
||||
|
||||
// Result should be exactly: [snapshot, ep-4]
|
||||
expect(view).toHaveLength(2);
|
||||
expect(view[0].id).toBe('snap-1');
|
||||
expect(view[1].id).toBe('ep-4');
|
||||
});
|
||||
|
||||
it('ignores variants that are not yet "ready"', () => {
|
||||
const ep1 = createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: '1' }]);
|
||||
const ep2 = createDummyEpisode('ep-2', 'USER_PROMPT', [{ text: '2' }]);
|
||||
|
||||
ep1.variants = {
|
||||
'masked': { type: 'masked', status: 'computing', text: '<MASKED>', recoveredTokens: 10 }
|
||||
};
|
||||
|
||||
const view = generateWorkingBufferView([ep1, ep2], 10, env.tracer, env);
|
||||
|
||||
// Because the variant was computing, it must fall back to the raw pristine text.
|
||||
expect(view).toHaveLength(2);
|
||||
expect((view[0].trigger as any).semanticParts[0].presentation).toBeUndefined();
|
||||
});
|
||||
});
|
||||
@@ -130,6 +130,108 @@ describe('IrMapper', () => {
|
||||
// in one Content block, but the flat representation is semantically identical.
|
||||
});
|
||||
|
||||
it('should correctly handle multi-tool-calls grouped within a single turn without dropping observations', () => {
|
||||
const rawHistory: Content[] = [
|
||||
{ role: 'user', parts: [{ text: 'Examine both of these tools please.' }] },
|
||||
{
|
||||
role: 'model',
|
||||
parts: [
|
||||
{ text: 'I will call them concurrently.' },
|
||||
{
|
||||
functionCall: {
|
||||
id: 'c1',
|
||||
name: 'tool_one',
|
||||
args: { p: 1 },
|
||||
},
|
||||
},
|
||||
{
|
||||
functionCall: {
|
||||
id: 'c2',
|
||||
name: 'tool_two',
|
||||
args: { p: 2 },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
// Gemini forces the user turn to contain ALL function responses for that model turn
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
id: 'c1',
|
||||
name: 'tool_one',
|
||||
response: { r: 1 },
|
||||
},
|
||||
},
|
||||
{
|
||||
functionResponse: {
|
||||
id: 'c2',
|
||||
name: 'tool_two',
|
||||
response: { r: 2 },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'model',
|
||||
parts: [{ text: 'Both complete.' }],
|
||||
},
|
||||
];
|
||||
|
||||
const tokenCalculator = new ContextTokenCalculator(4);
|
||||
const episodes = IrMapper.toIr(rawHistory, tokenCalculator);
|
||||
|
||||
// It should collapse into a single episode
|
||||
expect(episodes).toHaveLength(1);
|
||||
const ep = episodes[0];
|
||||
|
||||
expect(ep.trigger.type).toBe('USER_PROMPT');
|
||||
|
||||
// The steps array should contain:
|
||||
// 0: AgentThought ("I will call them concurrently")
|
||||
// 1: ToolExecution(tool_one)
|
||||
// 2: ToolExecution(tool_two)
|
||||
|
||||
expect(ep.steps).toHaveLength(3);
|
||||
|
||||
expect(ep.steps[0].type).toBe('AGENT_THOUGHT');
|
||||
expect((ep.steps[0] as any).text).toBe('I will call them concurrently.');
|
||||
|
||||
expect(ep.steps[1].type).toBe('TOOL_EXECUTION');
|
||||
expect((ep.steps[1] as ToolExecution).toolName).toBe('tool_one');
|
||||
expect((ep.steps[1] as ToolExecution).intent).toEqual({ p: 1 });
|
||||
expect((ep.steps[1] as ToolExecution).observation).toEqual({ r: 1 });
|
||||
|
||||
expect(ep.steps[2].type).toBe('TOOL_EXECUTION');
|
||||
expect((ep.steps[2] as ToolExecution).toolName).toBe('tool_two');
|
||||
expect((ep.steps[2] as ToolExecution).intent).toEqual({ p: 2 });
|
||||
expect((ep.steps[2] as ToolExecution).observation).toEqual({ r: 2 });
|
||||
|
||||
// The final model turn should become the yield
|
||||
expect(ep.yield).toBeDefined();
|
||||
expect(ep.yield?.type).toBe('AGENT_YIELD');
|
||||
expect(ep.yield?.text).toBe('Both complete.');
|
||||
|
||||
// Now verify we can reconstitute it without dropping the multiple calls
|
||||
const reconstituted = IrMapper.fromIr(episodes);
|
||||
|
||||
// The reconstituted history should have exactly 4 turns, same as original
|
||||
expect(reconstituted).toHaveLength(4);
|
||||
|
||||
// Check that the Model turn has both function calls
|
||||
expect(reconstituted[1].role).toBe('model');
|
||||
expect(reconstituted[1].parts).toHaveLength(3); // text + call1 + call2
|
||||
expect(reconstituted[1].parts![1].functionCall?.name).toBe('tool_one');
|
||||
expect(reconstituted[1].parts![2].functionCall?.name).toBe('tool_two');
|
||||
|
||||
// Check that the User turn has both function responses
|
||||
expect(reconstituted[2].role).toBe('user');
|
||||
expect(reconstituted[2].parts).toHaveLength(2); // response1 + response2
|
||||
expect(reconstituted[2].parts![0].functionResponse?.name).toBe('tool_one');
|
||||
expect(reconstituted[2].parts![1].functionResponse?.name).toBe('tool_two');
|
||||
});
|
||||
|
||||
it('should guarantee WeakMap ID stability across continuous mapping', () => {
|
||||
// 1. Initial history
|
||||
const history: Content[] = [
|
||||
|
||||
@@ -0,0 +1,194 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, vi, afterEach } from 'vitest';
|
||||
import { PipelineOrchestrator } from './orchestrator.js';
|
||||
import { ProcessorRegistry } from './registry.js';
|
||||
import { createMockEnvironment, createDummyState, createDummyEpisode } from '../testing/contextTestUtils.js';
|
||||
import type { ContextEnvironment } from './environment.js';
|
||||
import type { ContextProcessor } from '../pipeline.js';
|
||||
import type { SidecarConfig } from './types.js';
|
||||
import { ContextEventBus } from '../eventBus.js';
|
||||
|
||||
import type { Episode } from '../ir/types.js';
|
||||
|
||||
// Create a Dummy Processor for testing Orchestration routing
|
||||
class DummySyncProcessor implements ContextProcessor {
|
||||
static create() { return new DummySyncProcessor(); }
|
||||
constructor() {}
|
||||
readonly name = 'DummySync';
|
||||
async process(episodes: any[], _state: any) {
|
||||
const copy = [...episodes];
|
||||
copy[0] = { ...copy[0], dummyModified: true };
|
||||
return copy;
|
||||
}
|
||||
}
|
||||
|
||||
class DummyAsyncProcessor implements ContextProcessor {
|
||||
static create() { return new DummyAsyncProcessor(); }
|
||||
constructor() {}
|
||||
readonly name = 'DummyAsync';
|
||||
async process(episodes: any[], _state: any) {
|
||||
await new Promise(resolve => setTimeout(resolve, 50));
|
||||
const copy = [...episodes];
|
||||
copy[0] = { ...copy[0], asyncModified: true };
|
||||
return copy;
|
||||
}
|
||||
}
|
||||
|
||||
class ThrowingProcessor implements ContextProcessor {
|
||||
static create() { return new ThrowingProcessor(); }
|
||||
constructor() {}
|
||||
readonly name = 'Thrower';
|
||||
async process(): Promise<Episode[]> {
|
||||
throw new Error('Processor failed intentionally');
|
||||
}
|
||||
}
|
||||
|
||||
describe('PipelineOrchestrator (Component)', () => {
|
||||
let env: ContextEnvironment;
|
||||
let eventBus: ContextEventBus;
|
||||
|
||||
beforeEach(() => {
|
||||
vi.resetAllMocks();
|
||||
env = createMockEnvironment();
|
||||
eventBus = env.eventBus as ContextEventBus;
|
||||
|
||||
// Register our test processors
|
||||
ProcessorRegistry.register({ id: 'DummySyncProcessor', create: () => new DummySyncProcessor() });
|
||||
ProcessorRegistry.register({ id: 'DummyAsyncProcessor', create: () => new DummyAsyncProcessor() });
|
||||
ProcessorRegistry.register({ id: 'ThrowingProcessor', create: () => new ThrowingProcessor() });
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
// Cleanup registry to not pollute other tests
|
||||
(ProcessorRegistry as any).processors.delete('DummySyncProcessor');
|
||||
(ProcessorRegistry as any).processors.delete('DummyAsyncProcessor');
|
||||
(ProcessorRegistry as any).processors.delete('ThrowingProcessor');
|
||||
});
|
||||
|
||||
const createConfig = (pipelines: any[]): SidecarConfig => ({
|
||||
budget: { maxTokens: 100, retainedTokens: 50 },
|
||||
gcBackstop: { strategy: 'truncate', target: 'max' },
|
||||
pipelines
|
||||
});
|
||||
|
||||
it('instantiates processors from the registry on initialization', () => {
|
||||
const config = createConfig([
|
||||
{
|
||||
name: 'Sync',
|
||||
execution: 'blocking',
|
||||
triggers: [],
|
||||
processors: [{ processorId: 'DummySyncProcessor' }]
|
||||
}
|
||||
]);
|
||||
|
||||
const orchestrator = new PipelineOrchestrator(config, env, eventBus, env.tracer);
|
||||
expect((orchestrator as any).instantiatedProcessors.has('DummySyncProcessor')).toBe(true);
|
||||
});
|
||||
|
||||
it('throws an error if a config requests an unknown processor', () => {
|
||||
const config = createConfig([
|
||||
{
|
||||
name: 'Bad',
|
||||
execution: 'blocking',
|
||||
triggers: [],
|
||||
processors: [{ processorId: 'DoesNotExist' }]
|
||||
}
|
||||
]);
|
||||
|
||||
expect(() => new PipelineOrchestrator(config, env, eventBus, env.tracer))
|
||||
.toThrow('Context Processor [DoesNotExist] is not registered.');
|
||||
});
|
||||
|
||||
it('executes blocking pipelines synchronously and returns the modified array', async () => {
|
||||
const config = createConfig([
|
||||
{
|
||||
name: 'SyncPipe',
|
||||
execution: 'blocking',
|
||||
triggers: [],
|
||||
processors: [{ processorId: 'DummySyncProcessor' }]
|
||||
}
|
||||
]);
|
||||
const orchestrator = new PipelineOrchestrator(config, env, eventBus, env.tracer);
|
||||
|
||||
const episodes = [createDummyEpisode('1', 'USER_PROMPT', [])];
|
||||
const state = createDummyState(false);
|
||||
|
||||
const result = await orchestrator.executePipeline('SyncPipe', episodes, state);
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
expect((result[0] as any).dummyModified).toBe(true);
|
||||
});
|
||||
|
||||
it('executes background pipelines asynchronously without blocking the return', async () => {
|
||||
const config = createConfig([
|
||||
{
|
||||
name: 'AsyncPipe',
|
||||
execution: 'background',
|
||||
triggers: [],
|
||||
processors: [{ processorId: 'DummyAsyncProcessor' }]
|
||||
}
|
||||
]);
|
||||
const orchestrator = new PipelineOrchestrator(config, env, eventBus, env.tracer);
|
||||
|
||||
const episodes = [createDummyEpisode('1', 'USER_PROMPT', [])];
|
||||
const state = createDummyState(false);
|
||||
|
||||
// This should resolve immediately with the UNMODIFIED array because execution is background
|
||||
const result = await orchestrator.executePipeline('AsyncPipe', episodes, state);
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
expect((result[0] as any).asyncModified).toBeUndefined(); // Not modified yet!
|
||||
|
||||
// Wait for the background task to complete (50ms delay in DummyAsyncProcessor)
|
||||
await new Promise(resolve => setTimeout(resolve, 60));
|
||||
});
|
||||
|
||||
it('gracefully handles and swallows processor errors in synchronous pipelines', async () => {
|
||||
const config = createConfig([
|
||||
{
|
||||
name: 'ThrowingPipe',
|
||||
execution: 'blocking',
|
||||
triggers: [],
|
||||
processors: [{ processorId: 'ThrowingProcessor' }]
|
||||
}
|
||||
]);
|
||||
const orchestrator = new PipelineOrchestrator(config, env, eventBus, env.tracer);
|
||||
|
||||
const episodes = [createDummyEpisode('1', 'USER_PROMPT', [])];
|
||||
const state = createDummyState(false);
|
||||
|
||||
// It should not throw! It should swallow the error and return the unmodified array.
|
||||
const result = await orchestrator.executePipeline('ThrowingPipe', episodes, state);
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result).toStrictEqual(episodes);
|
||||
});
|
||||
|
||||
it('automatically binds to budget_exceeded trigger via EventBus', () => {
|
||||
const config = createConfig([
|
||||
{
|
||||
name: 'PressureRelief',
|
||||
execution: 'background',
|
||||
triggers: ['budget_exceeded'],
|
||||
processors: [{ processorId: 'DummyAsyncProcessor' }]
|
||||
}
|
||||
]);
|
||||
|
||||
// Spy on the private method to see if the trigger fires it
|
||||
const executeSpy = vi.spyOn(PipelineOrchestrator.prototype as any, 'executePipelineAsync');
|
||||
|
||||
new PipelineOrchestrator(config, env, eventBus, env.tracer);
|
||||
|
||||
const episodes = [createDummyEpisode('1', 'USER_PROMPT', [])];
|
||||
|
||||
// Emit the trigger
|
||||
eventBus.emitConsolidationNeeded({ episodes, targetDeficit: 100 });
|
||||
|
||||
expect(executeSpy).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
@@ -34,7 +34,7 @@ export class PipelineOrchestrator {
|
||||
if (!this.instantiatedProcessors.has(procDef.processorId)) {
|
||||
const processorClass = ProcessorRegistry.get(procDef.processorId);
|
||||
if (!processorClass) {
|
||||
throw new Error(`Unknown processor ID: ${procDef.processorId}`);
|
||||
throw new Error(`Context Processor [${procDef.processorId}] is not registered.`);
|
||||
}
|
||||
// The Orchestrator injects standard dependencies required by processors
|
||||
// If a processor needs the eventBus (like Snapshot), it expects it via constructor.
|
||||
|
||||
@@ -11,22 +11,24 @@ Our testing strategy avoids the "endless tax" of brittle tests by strictly separ
|
||||
## Audit Checklist & Coverage Tracker
|
||||
|
||||
### 1. The Tooling Library (`contextTestUtils.ts`)
|
||||
- [ ] Implement `ContextTestBuilder` or shared factory functions (`createDummyEpisode`, `createDummyState`).
|
||||
- [ ] Ensure all existing tests are migrated to use these helpers to establish the pattern.
|
||||
- [x] Implement `ContextTestBuilder` or shared factory functions (`createDummyEpisode`, `createDummyState`).
|
||||
- [x] Ensure all existing tests are migrated to use these helpers to establish the pattern.
|
||||
|
||||
### 2. Unit Tests (The Processors)
|
||||
Goal: Ensure every processor gracefully handles boundary conditions (budget satisfied vs. deficit), skips protected IDs, and correctly transforms IR.
|
||||
- [ ] `BlobDegradationProcessor` (Mostly complete, needs migration to shared helpers)
|
||||
- [ ] `ToolMaskingProcessor` (Mostly complete, needs migration to shared helpers)
|
||||
- [ ] `HistorySquashingProcessor` (Audit coverage)
|
||||
- [ ] `SemanticCompressionProcessor` (Audit coverage)
|
||||
- [ ] `ContextTracer` (Complete)
|
||||
- [ ] `SidecarLoader` (Complete)
|
||||
- [ ] `IrMapper` / `IrProjector` (Audit coverage)
|
||||
### 2. Unit Tests (The Processors & Map/Reduce)
|
||||
Goal: Ensure every component gracefully handles boundary conditions (budget satisfied vs. deficit), skips protected IDs, and correctly transforms IR.
|
||||
- [x] `BlobDegradationProcessor` (Completed)
|
||||
- [x] `ToolMaskingProcessor` (Completed)
|
||||
- [x] `HistorySquashingProcessor` (Completed)
|
||||
- [x] `SemanticCompressionProcessor` (Completed)
|
||||
- [x] `StateSnapshotProcessor` (Completed)
|
||||
- [x] `EmergencyTruncationProcessor` (Completed)
|
||||
- [x] `ContextTracer` (Completed)
|
||||
- [x] `SidecarLoader` (Completed)
|
||||
- [x] `IrMapper` / `graphUtils` (Completed - Handles Multi-Tool Concurrency and Backwards Graph Traversal)
|
||||
|
||||
### 3. Component Tests (The Orchestration)
|
||||
Goal: Prove the sidecar configuration accurately drives runtime behavior without testing the processor logic itself.
|
||||
- [ ] `PipelineOrchestrator`: Test sync vs. async routing, and trigger setup.
|
||||
- [x] `PipelineOrchestrator`: Test sync vs. async routing, error swallowing, and trigger setup.
|
||||
- [ ] `ContextManager`: Test `subscribeToHistory` (Opportunistic triggers).
|
||||
- [ ] `ContextManager`: Test `project()` (Synchronous barrier triggers).
|
||||
|
||||
@@ -36,5 +38,5 @@ Goal: Prove the sidecar configuration accurately drives runtime behavior without
|
||||
---
|
||||
|
||||
## Next Actions
|
||||
1. Migrate processor tests to shared factories to DRY up the suite.
|
||||
2. Go down the Unit Test checklist, ensuring full line/branch coverage for the core transformations.
|
||||
1. Audit the ContextManager component tests (opportunistic updates & sync barrier).
|
||||
2. Finalize the End-to-End "Day in the Life" Golden Snapshot test.
|
||||
|
||||
Reference in New Issue
Block a user