testing

2026-05-16 23:02:51 -07:00 · 2026-04-07 00:47:39 +00:00
parent d3d6b9403d
commit f423affe6d
5 changed files with 390 additions and 1 deletions
@@ -132,7 +132,6 @@ export class ContextManager {
      this.tracer,
      this.env.tokenCalculator,
    );
-
    this.historyObserver.start();
  }

@@ -134,7 +134,43 @@ export class PipelineOrchestrator {

      try {
        this.tracer.logEvent('Orchestrator', `Executing processor: ${procDef.processorId} (async)`);
+        
+        // Before running, capture the state so we know what changed
+        const beforeMap = new Map(currentEpisodes.map(ep => [ep.id, ep]));
+        
        currentEpisodes = await processor.process(currentEpisodes, state);
+        
+        // Synthesize VariantReady events for anything that changed or was newly created
+        for (const ep of currentEpisodes) {
+           const original = beforeMap.get(ep.id);
+           
+           // If an episode was transformed, or if it's a completely new synthetic episode (like a Snapshot)
+           // we need to broadcast it so the ContextManager can cache it as a variant.
+           if (!original || original !== ep) {
+              const variantId = `v-${procDef.processorId.toLowerCase()}`;
+              
+              // Determine variant type. StateSnapshot generates full 'snapshot' replacement nodes. 
+              // Masking/Squashing generate 'masked' or 'summary' in-place variants.
+              let vType: 'snapshot' | 'summary' | 'masked' = 'masked';
+              if (procDef.processorId.includes('Snapshot')) vType = 'snapshot';
+              else if (procDef.processorId.includes('Semantic')) vType = 'summary';
+              
+              this.eventBus.emitVariantReady({
+                  targetId: ep.id, // The ID of the modified or new episode
+                  variantId,
+                  // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
+                  variant: {
+                      status: 'ready',
+                      type: vType,
+                      episode: vType === 'snapshot' ? ep : undefined,
+                      text: vType !== 'snapshot' ? (ep.yield?.text || (ep.trigger as any)?.semanticParts?.[0]?.presentation?.text || '') : undefined,
+                      recoveredTokens: ep.yield?.metadata?.currentTokens || 10,
+                      // For snapshots, we look at the transformations metadata to see what it replaced
+                      replacedEpisodeIds: vType === 'snapshot' ? currentState.map(c => c.id).filter(id => id !== ep.id && !currentEpisodes.find(ce => ce.id === id)) : undefined,
+                  } as any
+              });
+           }
+        }
      } catch (error) {
        debugLogger.error(`Pipeline ${pipeline.name} failed at ${procDef.processorId}:`, error);
        return; // Halt pipeline
@@ -0,0 +1,162 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { ContextManager } from '../contextManager.js';
+import { AgentChatHistory } from '../../core/agentChatHistory.js';
+import type { Content } from '@google/genai';
+import type { SidecarConfig } from '../sidecar/types.js';
+import { ContextEnvironmentImpl } from '../sidecar/environmentImpl.js';
+import { ContextTracer } from '../tracer.js';
+import { ContextEventBus } from '../eventBus.js';
+
+import { BlobDegradationProcessor } from '../processors/blobDegradationProcessor.js';
+import { ToolMaskingProcessor } from '../processors/toolMaskingProcessor.js';
+import { HistorySquashingProcessor } from '../processors/historySquashingProcessor.js';
+import { SemanticCompressionProcessor } from '../processors/semanticCompressionProcessor.js';
+import { StateSnapshotProcessor } from '../processors/stateSnapshotProcessor.js';
+import { EmergencyTruncationProcessor } from '../processors/emergencyTruncationProcessor.js';
+import { ProcessorRegistry } from '../sidecar/registry.js';
+
+export interface TurnSummary {
+  turnIndex: number;
+  tokensBeforeBackground: number;
+  tokensAfterBackground: number;
+}
+
+export class SimulationHarness {
+  public readonly chatHistory: AgentChatHistory;
+  public contextManager!: ContextManager;
+  public readonly eventBus: ContextEventBus;
+  public config!: SidecarConfig;
+  private tracer!: ContextTracer;
+  private currentTurnIndex = 0;
+  private tokenTrajectory: TurnSummary[] = [];
+
+  static async create(config: SidecarConfig, mockLlmClient: any, mockTempDir = '/tmp/sim'): Promise<SimulationHarness> {
+    const harness = new SimulationHarness();
+    await harness.init(config, mockLlmClient, mockTempDir);
+    return harness;
+  }
+
+  private constructor() {
+    this.chatHistory = new AgentChatHistory();
+    this.eventBus = new ContextEventBus();
+  }
+
+  private async init(
+    config: SidecarConfig,
+    mockLlmClient: any,
+    mockTempDir: string
+  ) {
+    this.config = config;
+    // Register all standard processors
+    ProcessorRegistry.register({ id: 'BlobDegradationProcessor', create: (env, opts) => new BlobDegradationProcessor(env) });
+    ProcessorRegistry.register({ id: 'ToolMaskingProcessor', create: (env, opts) => new ToolMaskingProcessor(env, opts as any) });
+    ProcessorRegistry.register({ id: 'HistorySquashingProcessor', create: (env, opts) => new HistorySquashingProcessor(env, opts as any) });
+    ProcessorRegistry.register({ id: 'SemanticCompressionProcessor', create: (env, opts) => new SemanticCompressionProcessor(env, opts as any) });
+    ProcessorRegistry.register({ id: 'StateSnapshotProcessor', create: (env, opts) => new StateSnapshotProcessor(env, opts as any, env.eventBus) });
+    ProcessorRegistry.register({ id: 'EmergencyTruncationProcessor', create: (env, opts) => new EmergencyTruncationProcessor(env, opts as any) });
+
+    // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
+    (this as any).tracer = new ContextTracer({ targetDir: mockTempDir, sessionId: 'sim-session' });
+
+    // Using real token calculator instead of mock, so we test actual string sizes
+    const InMemoryFS = (await import('../system/InMemoryFileSystem.js')).InMemoryFileSystem;
+    const DetIdGen = (await import('../system/DeterministicIdGenerator.js')).DeterministicIdGenerator;
+
+    const env = new ContextEnvironmentImpl(
+      mockLlmClient,
+      'sim-prompt',
+      'sim-session',
+      mockTempDir,
+      mockTempDir,
+      this.tracer,
+      4, // 4 chars per token average
+      this.eventBus,
+      new InMemoryFS(),
+      new DetIdGen()
+    );
+
+    this.contextManager = new ContextManager(config, env, this.tracer);
+    this.contextManager.subscribeToHistory(this.chatHistory);
+  }
+
+  /**
+   * Simulates a single "Turn" (User input + Model/Tool outputs)
+   * A turn might consist of multiple Content messages (e.g. user prompt -> model call -> user response -> model answer)
+   */
+  async simulateTurn(messages: Content[]) {
+    // 1. Append the new messages
+    const currentHistory = this.chatHistory.get();
+    await this.chatHistory.set([...currentHistory, ...messages]);
+    
+    // 2. Measure tokens immediately after append (Before background processing)
+    const tokensBefore = (this.contextManager as any).env.tokenCalculator.calculateEpisodeListTokens(
+      this.contextManager.getWorkingBufferView()
+    );
+    console.log(`[Turn ${this.currentTurnIndex}] Tokens BEFORE: ${tokensBefore}`);
+    
+    // 3. Yield to event loop to allow internal async subscribers and orchestrator to finish
+    await new Promise(resolve => setTimeout(resolve, 50));
+    
+    // 3.1 Simulate what projectCompressedHistory does with the sync handlers
+    let currentView = this.contextManager.getWorkingBufferView();
+    const currentTokens = (this.contextManager as any).env.tokenCalculator.calculateEpisodeListTokens(currentView);
+    if (this.config.budget && currentTokens > this.config.budget.maxTokens) {
+      console.log(`[Turn ${this.currentTurnIndex}] Sync panic triggered! ${currentTokens} > ${this.config.budget.maxTokens}`);
+      const syncPipelines = this.config.pipelines.filter(p => p.execution === 'blocking');
+      const orchestrator = (this.contextManager as any).orchestrator;
+      for (const pipe of syncPipelines) {
+         currentView = await orchestrator.executePipeline(pipe.name, currentView, {
+            currentTokens,
+            maxTokens: this.config.budget.maxTokens,
+            retainedTokens: this.config.budget.retainedTokens,
+            deficitTokens: currentTokens - this.config.budget.maxTokens,
+            protectedEpisodeIds: new Set()
+         });
+      }
+      
+      // Inject the truncated view back into the graph
+      for (const ep of currentView) {
+          if (!currentHistory.find(c => c === ep)) {
+               this.eventBus.emitVariantReady({
+                  targetId: ep.id,
+                  variantId: 'v-emergency',
+                  // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
+                  variant: {
+                      status: 'ready',
+                      type: 'masked', // Truncation is technically a mask
+                      text: ep.yield?.text || '',
+                      recoveredTokens: 0,
+                  } as any
+              });
+          }
+      }
+      // Wait for variant propagation
+      await new Promise(resolve => setTimeout(resolve, 50));
+    }
+    
+    // 4. Measure tokens after background processors have (hopefully) emitted variants
+    const tokensAfter = (this.contextManager as any).env.tokenCalculator.calculateEpisodeListTokens(
+      this.contextManager.getWorkingBufferView()
+    );
+    console.log(`[Turn ${this.currentTurnIndex}] Tokens AFTER: ${tokensAfter}`);
+    
+    this.tokenTrajectory.push({
+      turnIndex: this.currentTurnIndex++,
+      tokensBeforeBackground: tokensBefore,
+      tokensAfterBackground: tokensAfter,
+    });
+  }
+
+  async getGoldenState() {
+    const finalProjection = await this.contextManager.projectCompressedHistory();
+    return {
+      tokenTrajectory: this.tokenTrajectory,
+      finalProjection
+    };
+  }
+}
@@ -0,0 +1,103 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, vi, beforeAll, afterAll } from 'vitest';
+import { SimulationHarness } from './SimulationHarness.js';
+import type { SidecarConfig } from '../sidecar/types.js';
+
+expect.addSnapshotSerializer({
+  test: (val) =>
+    typeof val === 'string' &&
+    (/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i.test(val) || 
+     /^\/tmp\/sim/.test(val)), // Mask temp directories and UUIDs
+  print: (val) => (typeof val === 'string' && /^\/tmp\/sim/.test(val) ? '"<MOCKED_DIR>"' : '"<UUID>"'),
+});
+
+describe('System Lifecycle Golden Tests', () => {
+  beforeAll(() => {
+    vi.spyOn(Math, 'random').mockReturnValue(0.5);
+  });
+
+  afterAll(() => {
+    vi.restoreAllMocks();
+  });
+
+  const getAggressiveConfig = (): SidecarConfig => ({
+    budget: { maxTokens: 4000, retainedTokens: 2000 }, // Extremely tight limits
+    gcBackstop: { strategy: 'truncate', target: 'max' },
+    pipelines: [
+      {
+        name: 'Pressure Relief', // Emits from eventBus 'budget_exceeded'
+        execution: 'background',
+        triggers: ['budget_exceeded'],
+        processors: [
+          { processorId: 'BlobDegradationProcessor' },
+          { processorId: 'ToolMaskingProcessor', options: { stringLengthThresholdTokens: 50 } }, // Mask any tool string > 200 chars
+          { processorId: 'StateSnapshotProcessor', options: {} } // Squash old history
+        ]
+      },
+      {
+        name: 'Immediate Sanitization', // The magic string the projector is hardcoded to use
+        execution: 'blocking',
+        triggers: ['budget_exceeded'],
+        processors: [
+          { processorId: 'EmergencyTruncationProcessor', options: {} }
+        ]
+      }
+    ]
+  });
+
+  const mockLlmClient = {
+    generateContent: vi.fn().mockResolvedValue({
+      text: '<MOCKED_STATE_SNAPSHOT_SUMMARY>',
+    })
+  };
+
+  it('Scenario 1: Organic Growth with Huge Tool Output & Images', async () => {
+    const harness = await SimulationHarness.create(getAggressiveConfig(), mockLlmClient);
+
+    // Turn 0: System Prompt
+    await harness.simulateTurn([
+      { role: 'user', parts: [{ text: 'System Instructions' }] },
+      { role: 'model', parts: [{ text: 'Ack.' }] }
+    ]);
+
+    // Turn 1: Normal conversation
+    await harness.simulateTurn([
+      { role: 'user', parts: [{ text: 'Hello!' }] },
+      { role: 'model', parts: [{ text: 'Hi, how can I help?' }] }
+    ]);
+
+    // Turn 2: Massive Tool Output (Should trigger ToolMaskingProcessor in background)
+    await harness.simulateTurn([
+      { role: 'user', parts: [{ text: 'Read the logs.' }] },
+      { role: 'model', parts: [{ functionCall: { name: 'run_shell_command', args: { cmd: 'cat server.log' } } }] },
+      { role: 'user', parts: [{ functionResponse: { name: 'run_shell_command', response: { output: 'LOG '.repeat(5000) } } }] },
+      { role: 'model', parts: [{ text: 'The logs are very long.' }] }
+    ]);
+
+    // Turn 3: Multi-modal blob (Should trigger BlobDegradationProcessor)
+    await harness.simulateTurn([
+      { role: 'user', parts: [{ text: 'Look at this architecture diagram:' }, { inlineData: { mimeType: 'image/png', data: 'fake_base64_data_'.repeat(1000) } }] },
+      { role: 'model', parts: [{ text: 'Nice diagram.' }] }
+    ]);
+
+    // Turn 4: More conversation to trigger StateSnapshot
+    await harness.simulateTurn([
+      { role: 'user', parts: [{ text: 'Can we refactor?' }] },
+      { role: 'model', parts: [{ text: 'Yes we can.' }] }
+    ]);
+
+    // Get final state
+    const goldenState = await harness.getGoldenState();
+    
+    // In a perfectly functioning opportunistic system, the token trajectory should show 
+    // the massive spikes in Turn 2 and 3 being immediately resolved by the background tasks.
+    // The final projection should fit neatly under the Max Tokens limit.
+    
+    expect(goldenState).toMatchSnapshot();
+  });
+});