mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-16 23:02:51 -07:00
testing
This commit is contained in:
@@ -132,7 +132,6 @@ export class ContextManager {
|
||||
this.tracer,
|
||||
this.env.tokenCalculator,
|
||||
);
|
||||
|
||||
this.historyObserver.start();
|
||||
}
|
||||
|
||||
|
||||
@@ -134,7 +134,43 @@ export class PipelineOrchestrator {
|
||||
|
||||
try {
|
||||
this.tracer.logEvent('Orchestrator', `Executing processor: ${procDef.processorId} (async)`);
|
||||
|
||||
// Before running, capture the state so we know what changed
|
||||
const beforeMap = new Map(currentEpisodes.map(ep => [ep.id, ep]));
|
||||
|
||||
currentEpisodes = await processor.process(currentEpisodes, state);
|
||||
|
||||
// Synthesize VariantReady events for anything that changed or was newly created
|
||||
for (const ep of currentEpisodes) {
|
||||
const original = beforeMap.get(ep.id);
|
||||
|
||||
// If an episode was transformed, or if it's a completely new synthetic episode (like a Snapshot)
|
||||
// we need to broadcast it so the ContextManager can cache it as a variant.
|
||||
if (!original || original !== ep) {
|
||||
const variantId = `v-${procDef.processorId.toLowerCase()}`;
|
||||
|
||||
// Determine variant type. StateSnapshot generates full 'snapshot' replacement nodes.
|
||||
// Masking/Squashing generate 'masked' or 'summary' in-place variants.
|
||||
let vType: 'snapshot' | 'summary' | 'masked' = 'masked';
|
||||
if (procDef.processorId.includes('Snapshot')) vType = 'snapshot';
|
||||
else if (procDef.processorId.includes('Semantic')) vType = 'summary';
|
||||
|
||||
this.eventBus.emitVariantReady({
|
||||
targetId: ep.id, // The ID of the modified or new episode
|
||||
variantId,
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||
variant: {
|
||||
status: 'ready',
|
||||
type: vType,
|
||||
episode: vType === 'snapshot' ? ep : undefined,
|
||||
text: vType !== 'snapshot' ? (ep.yield?.text || (ep.trigger as any)?.semanticParts?.[0]?.presentation?.text || '') : undefined,
|
||||
recoveredTokens: ep.yield?.metadata?.currentTokens || 10,
|
||||
// For snapshots, we look at the transformations metadata to see what it replaced
|
||||
replacedEpisodeIds: vType === 'snapshot' ? currentState.map(c => c.id).filter(id => id !== ep.id && !currentEpisodes.find(ce => ce.id === id)) : undefined,
|
||||
} as any
|
||||
});
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
debugLogger.error(`Pipeline ${pipeline.name} failed at ${procDef.processorId}:`, error);
|
||||
return; // Halt pipeline
|
||||
|
||||
@@ -0,0 +1,162 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { ContextManager } from '../contextManager.js';
|
||||
import { AgentChatHistory } from '../../core/agentChatHistory.js';
|
||||
import type { Content } from '@google/genai';
|
||||
import type { SidecarConfig } from '../sidecar/types.js';
|
||||
import { ContextEnvironmentImpl } from '../sidecar/environmentImpl.js';
|
||||
import { ContextTracer } from '../tracer.js';
|
||||
import { ContextEventBus } from '../eventBus.js';
|
||||
|
||||
import { BlobDegradationProcessor } from '../processors/blobDegradationProcessor.js';
|
||||
import { ToolMaskingProcessor } from '../processors/toolMaskingProcessor.js';
|
||||
import { HistorySquashingProcessor } from '../processors/historySquashingProcessor.js';
|
||||
import { SemanticCompressionProcessor } from '../processors/semanticCompressionProcessor.js';
|
||||
import { StateSnapshotProcessor } from '../processors/stateSnapshotProcessor.js';
|
||||
import { EmergencyTruncationProcessor } from '../processors/emergencyTruncationProcessor.js';
|
||||
import { ProcessorRegistry } from '../sidecar/registry.js';
|
||||
|
||||
export interface TurnSummary {
|
||||
turnIndex: number;
|
||||
tokensBeforeBackground: number;
|
||||
tokensAfterBackground: number;
|
||||
}
|
||||
|
||||
export class SimulationHarness {
|
||||
public readonly chatHistory: AgentChatHistory;
|
||||
public contextManager!: ContextManager;
|
||||
public readonly eventBus: ContextEventBus;
|
||||
public config!: SidecarConfig;
|
||||
private tracer!: ContextTracer;
|
||||
private currentTurnIndex = 0;
|
||||
private tokenTrajectory: TurnSummary[] = [];
|
||||
|
||||
static async create(config: SidecarConfig, mockLlmClient: any, mockTempDir = '/tmp/sim'): Promise<SimulationHarness> {
|
||||
const harness = new SimulationHarness();
|
||||
await harness.init(config, mockLlmClient, mockTempDir);
|
||||
return harness;
|
||||
}
|
||||
|
||||
private constructor() {
|
||||
this.chatHistory = new AgentChatHistory();
|
||||
this.eventBus = new ContextEventBus();
|
||||
}
|
||||
|
||||
private async init(
|
||||
config: SidecarConfig,
|
||||
mockLlmClient: any,
|
||||
mockTempDir: string
|
||||
) {
|
||||
this.config = config;
|
||||
// Register all standard processors
|
||||
ProcessorRegistry.register({ id: 'BlobDegradationProcessor', create: (env, opts) => new BlobDegradationProcessor(env) });
|
||||
ProcessorRegistry.register({ id: 'ToolMaskingProcessor', create: (env, opts) => new ToolMaskingProcessor(env, opts as any) });
|
||||
ProcessorRegistry.register({ id: 'HistorySquashingProcessor', create: (env, opts) => new HistorySquashingProcessor(env, opts as any) });
|
||||
ProcessorRegistry.register({ id: 'SemanticCompressionProcessor', create: (env, opts) => new SemanticCompressionProcessor(env, opts as any) });
|
||||
ProcessorRegistry.register({ id: 'StateSnapshotProcessor', create: (env, opts) => new StateSnapshotProcessor(env, opts as any, env.eventBus) });
|
||||
ProcessorRegistry.register({ id: 'EmergencyTruncationProcessor', create: (env, opts) => new EmergencyTruncationProcessor(env, opts as any) });
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||
(this as any).tracer = new ContextTracer({ targetDir: mockTempDir, sessionId: 'sim-session' });
|
||||
|
||||
// Using real token calculator instead of mock, so we test actual string sizes
|
||||
const InMemoryFS = (await import('../system/InMemoryFileSystem.js')).InMemoryFileSystem;
|
||||
const DetIdGen = (await import('../system/DeterministicIdGenerator.js')).DeterministicIdGenerator;
|
||||
|
||||
const env = new ContextEnvironmentImpl(
|
||||
mockLlmClient,
|
||||
'sim-prompt',
|
||||
'sim-session',
|
||||
mockTempDir,
|
||||
mockTempDir,
|
||||
this.tracer,
|
||||
4, // 4 chars per token average
|
||||
this.eventBus,
|
||||
new InMemoryFS(),
|
||||
new DetIdGen()
|
||||
);
|
||||
|
||||
this.contextManager = new ContextManager(config, env, this.tracer);
|
||||
this.contextManager.subscribeToHistory(this.chatHistory);
|
||||
}
|
||||
|
||||
/**
|
||||
* Simulates a single "Turn" (User input + Model/Tool outputs)
|
||||
* A turn might consist of multiple Content messages (e.g. user prompt -> model call -> user response -> model answer)
|
||||
*/
|
||||
async simulateTurn(messages: Content[]) {
|
||||
// 1. Append the new messages
|
||||
const currentHistory = this.chatHistory.get();
|
||||
await this.chatHistory.set([...currentHistory, ...messages]);
|
||||
|
||||
// 2. Measure tokens immediately after append (Before background processing)
|
||||
const tokensBefore = (this.contextManager as any).env.tokenCalculator.calculateEpisodeListTokens(
|
||||
this.contextManager.getWorkingBufferView()
|
||||
);
|
||||
console.log(`[Turn ${this.currentTurnIndex}] Tokens BEFORE: ${tokensBefore}`);
|
||||
|
||||
// 3. Yield to event loop to allow internal async subscribers and orchestrator to finish
|
||||
await new Promise(resolve => setTimeout(resolve, 50));
|
||||
|
||||
// 3.1 Simulate what projectCompressedHistory does with the sync handlers
|
||||
let currentView = this.contextManager.getWorkingBufferView();
|
||||
const currentTokens = (this.contextManager as any).env.tokenCalculator.calculateEpisodeListTokens(currentView);
|
||||
if (this.config.budget && currentTokens > this.config.budget.maxTokens) {
|
||||
console.log(`[Turn ${this.currentTurnIndex}] Sync panic triggered! ${currentTokens} > ${this.config.budget.maxTokens}`);
|
||||
const syncPipelines = this.config.pipelines.filter(p => p.execution === 'blocking');
|
||||
const orchestrator = (this.contextManager as any).orchestrator;
|
||||
for (const pipe of syncPipelines) {
|
||||
currentView = await orchestrator.executePipeline(pipe.name, currentView, {
|
||||
currentTokens,
|
||||
maxTokens: this.config.budget.maxTokens,
|
||||
retainedTokens: this.config.budget.retainedTokens,
|
||||
deficitTokens: currentTokens - this.config.budget.maxTokens,
|
||||
protectedEpisodeIds: new Set()
|
||||
});
|
||||
}
|
||||
|
||||
// Inject the truncated view back into the graph
|
||||
for (const ep of currentView) {
|
||||
if (!currentHistory.find(c => c === ep)) {
|
||||
this.eventBus.emitVariantReady({
|
||||
targetId: ep.id,
|
||||
variantId: 'v-emergency',
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||
variant: {
|
||||
status: 'ready',
|
||||
type: 'masked', // Truncation is technically a mask
|
||||
text: ep.yield?.text || '',
|
||||
recoveredTokens: 0,
|
||||
} as any
|
||||
});
|
||||
}
|
||||
}
|
||||
// Wait for variant propagation
|
||||
await new Promise(resolve => setTimeout(resolve, 50));
|
||||
}
|
||||
|
||||
// 4. Measure tokens after background processors have (hopefully) emitted variants
|
||||
const tokensAfter = (this.contextManager as any).env.tokenCalculator.calculateEpisodeListTokens(
|
||||
this.contextManager.getWorkingBufferView()
|
||||
);
|
||||
console.log(`[Turn ${this.currentTurnIndex}] Tokens AFTER: ${tokensAfter}`);
|
||||
|
||||
this.tokenTrajectory.push({
|
||||
turnIndex: this.currentTurnIndex++,
|
||||
tokensBeforeBackground: tokensBefore,
|
||||
tokensAfterBackground: tokensAfter,
|
||||
});
|
||||
}
|
||||
|
||||
async getGoldenState() {
|
||||
const finalProjection = await this.contextManager.projectCompressedHistory();
|
||||
return {
|
||||
tokenTrajectory: this.tokenTrajectory,
|
||||
finalProjection
|
||||
};
|
||||
}
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
@@ -0,0 +1,103 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect, vi, beforeAll, afterAll } from 'vitest';
|
||||
import { SimulationHarness } from './SimulationHarness.js';
|
||||
import type { SidecarConfig } from '../sidecar/types.js';
|
||||
|
||||
expect.addSnapshotSerializer({
|
||||
test: (val) =>
|
||||
typeof val === 'string' &&
|
||||
(/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i.test(val) ||
|
||||
/^\/tmp\/sim/.test(val)), // Mask temp directories and UUIDs
|
||||
print: (val) => (typeof val === 'string' && /^\/tmp\/sim/.test(val) ? '"<MOCKED_DIR>"' : '"<UUID>"'),
|
||||
});
|
||||
|
||||
describe('System Lifecycle Golden Tests', () => {
|
||||
beforeAll(() => {
|
||||
vi.spyOn(Math, 'random').mockReturnValue(0.5);
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
const getAggressiveConfig = (): SidecarConfig => ({
|
||||
budget: { maxTokens: 4000, retainedTokens: 2000 }, // Extremely tight limits
|
||||
gcBackstop: { strategy: 'truncate', target: 'max' },
|
||||
pipelines: [
|
||||
{
|
||||
name: 'Pressure Relief', // Emits from eventBus 'budget_exceeded'
|
||||
execution: 'background',
|
||||
triggers: ['budget_exceeded'],
|
||||
processors: [
|
||||
{ processorId: 'BlobDegradationProcessor' },
|
||||
{ processorId: 'ToolMaskingProcessor', options: { stringLengthThresholdTokens: 50 } }, // Mask any tool string > 200 chars
|
||||
{ processorId: 'StateSnapshotProcessor', options: {} } // Squash old history
|
||||
]
|
||||
},
|
||||
{
|
||||
name: 'Immediate Sanitization', // The magic string the projector is hardcoded to use
|
||||
execution: 'blocking',
|
||||
triggers: ['budget_exceeded'],
|
||||
processors: [
|
||||
{ processorId: 'EmergencyTruncationProcessor', options: {} }
|
||||
]
|
||||
}
|
||||
]
|
||||
});
|
||||
|
||||
const mockLlmClient = {
|
||||
generateContent: vi.fn().mockResolvedValue({
|
||||
text: '<MOCKED_STATE_SNAPSHOT_SUMMARY>',
|
||||
})
|
||||
};
|
||||
|
||||
it('Scenario 1: Organic Growth with Huge Tool Output & Images', async () => {
|
||||
const harness = await SimulationHarness.create(getAggressiveConfig(), mockLlmClient);
|
||||
|
||||
// Turn 0: System Prompt
|
||||
await harness.simulateTurn([
|
||||
{ role: 'user', parts: [{ text: 'System Instructions' }] },
|
||||
{ role: 'model', parts: [{ text: 'Ack.' }] }
|
||||
]);
|
||||
|
||||
// Turn 1: Normal conversation
|
||||
await harness.simulateTurn([
|
||||
{ role: 'user', parts: [{ text: 'Hello!' }] },
|
||||
{ role: 'model', parts: [{ text: 'Hi, how can I help?' }] }
|
||||
]);
|
||||
|
||||
// Turn 2: Massive Tool Output (Should trigger ToolMaskingProcessor in background)
|
||||
await harness.simulateTurn([
|
||||
{ role: 'user', parts: [{ text: 'Read the logs.' }] },
|
||||
{ role: 'model', parts: [{ functionCall: { name: 'run_shell_command', args: { cmd: 'cat server.log' } } }] },
|
||||
{ role: 'user', parts: [{ functionResponse: { name: 'run_shell_command', response: { output: 'LOG '.repeat(5000) } } }] },
|
||||
{ role: 'model', parts: [{ text: 'The logs are very long.' }] }
|
||||
]);
|
||||
|
||||
// Turn 3: Multi-modal blob (Should trigger BlobDegradationProcessor)
|
||||
await harness.simulateTurn([
|
||||
{ role: 'user', parts: [{ text: 'Look at this architecture diagram:' }, { inlineData: { mimeType: 'image/png', data: 'fake_base64_data_'.repeat(1000) } }] },
|
||||
{ role: 'model', parts: [{ text: 'Nice diagram.' }] }
|
||||
]);
|
||||
|
||||
// Turn 4: More conversation to trigger StateSnapshot
|
||||
await harness.simulateTurn([
|
||||
{ role: 'user', parts: [{ text: 'Can we refactor?' }] },
|
||||
{ role: 'model', parts: [{ text: 'Yes we can.' }] }
|
||||
]);
|
||||
|
||||
// Get final state
|
||||
const goldenState = await harness.getGoldenState();
|
||||
|
||||
// In a perfectly functioning opportunistic system, the token trajectory should show
|
||||
// the massive spikes in Turn 2 and 3 being immediately resolved by the background tasks.
|
||||
// The final projection should fit neatly under the Max Tokens limit.
|
||||
|
||||
expect(goldenState).toMatchSnapshot();
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user