mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-16 23:02:51 -07:00
next steps
This commit is contained in:
@@ -3,12 +3,10 @@
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
import { createMockEnvironment } from '../testing/contextTestUtils.js';
|
||||
import { createMockEnvironment, createDummyState, createDummyEpisode } from '../testing/contextTestUtils.js';
|
||||
import { describe, it, expect, beforeEach, vi } from 'vitest';
|
||||
import { BlobDegradationProcessor } from './blobDegradationProcessor.js';
|
||||
import type { Episode, UserPrompt } from '../ir/types.js';
|
||||
import type { ContextAccountingState } from '../pipeline.js';
|
||||
import { randomUUID } from 'node:crypto';
|
||||
import type { UserPrompt } from '../ir/types.js';
|
||||
import type { ContextEnvironment } from '../sidecar/environment.js';
|
||||
import { InMemoryFileSystem } from '../system/InMemoryFileSystem.js';
|
||||
|
||||
@@ -24,46 +22,19 @@ describe('BlobDegradationProcessor', () => {
|
||||
processor = new BlobDegradationProcessor(env);
|
||||
});
|
||||
|
||||
const getDummyState = (
|
||||
isSatisfied = false,
|
||||
deficit = 0,
|
||||
protectedIds = new Set<string>(),
|
||||
): ContextAccountingState => ({
|
||||
currentTokens: 5000,
|
||||
maxTokens: 10000,
|
||||
retainedTokens: 4000,
|
||||
deficitTokens: deficit,
|
||||
protectedEpisodeIds: protectedIds,
|
||||
isBudgetSatisfied: isSatisfied,
|
||||
});
|
||||
|
||||
it('degrades inline_data into a text reference and saves to disk', async () => {
|
||||
const dummyImageBase64 = Buffer.from('fake-image-data').toString('base64');
|
||||
|
||||
const ep: Episode = {
|
||||
id: 'ep-1',
|
||||
timestamp: Date.now(),
|
||||
trigger: {
|
||||
id: randomUUID(),
|
||||
type: 'USER_PROMPT',
|
||||
semanticParts: [
|
||||
{ type: 'text', text: 'Look at this image:' },
|
||||
{
|
||||
type: 'inline_data',
|
||||
mimeType: 'image/png',
|
||||
data: dummyImageBase64,
|
||||
},
|
||||
],
|
||||
metadata: {
|
||||
originalTokens: 300,
|
||||
currentTokens: 300,
|
||||
transformations: [],
|
||||
},
|
||||
const ep = createDummyEpisode('ep-1', 'USER_PROMPT', [
|
||||
{ type: 'text', text: 'Look at this image:' },
|
||||
{
|
||||
type: 'inline_data',
|
||||
mimeType: 'image/png',
|
||||
data: dummyImageBase64,
|
||||
},
|
||||
steps: [],
|
||||
};
|
||||
]);
|
||||
|
||||
const state = getDummyState(false, 500, new Set());
|
||||
const state = createDummyState(false, 500);
|
||||
const result = await processor.process([ep], state);
|
||||
|
||||
const parts = (result[0].trigger as UserPrompt).semanticParts;
|
||||
@@ -73,12 +44,8 @@ describe('BlobDegradationProcessor', () => {
|
||||
|
||||
// Inline data should be degraded
|
||||
expect(parts[1].presentation).toBeDefined();
|
||||
expect(parts[1].presentation!.text).toContain(
|
||||
'[Multi-Modal Blob (image/png',
|
||||
);
|
||||
expect(parts[1].presentation!.text).toContain(
|
||||
'degraded to text to preserve context window',
|
||||
);
|
||||
expect(parts[1].presentation!.text).toContain('[Multi-Modal Blob (image/png');
|
||||
expect(parts[1].presentation!.text).toContain('degraded to text to preserve context window');
|
||||
|
||||
// Verify it was written to fake FS
|
||||
expect(fileSystem.getFiles().size).toBeGreaterThan(0);
|
||||
@@ -89,39 +56,21 @@ describe('BlobDegradationProcessor', () => {
|
||||
});
|
||||
|
||||
it('degrades file_data into a text reference without disk write', async () => {
|
||||
const ep: Episode = {
|
||||
id: 'ep-2',
|
||||
timestamp: Date.now(),
|
||||
trigger: {
|
||||
id: randomUUID(),
|
||||
type: 'USER_PROMPT',
|
||||
semanticParts: [
|
||||
{
|
||||
type: 'file_data',
|
||||
mimeType: 'application/pdf',
|
||||
fileUri: 'gs://fake-bucket/doc.pdf',
|
||||
},
|
||||
],
|
||||
metadata: {
|
||||
originalTokens: 300,
|
||||
currentTokens: 300,
|
||||
transformations: [],
|
||||
},
|
||||
const ep = createDummyEpisode('ep-2', 'USER_PROMPT', [
|
||||
{
|
||||
type: 'file_data',
|
||||
mimeType: 'application/pdf',
|
||||
fileUri: 'gs://fake-bucket/doc.pdf',
|
||||
},
|
||||
steps: [],
|
||||
};
|
||||
]);
|
||||
|
||||
const state = getDummyState(false, 500, new Set());
|
||||
const state = createDummyState(false, 500);
|
||||
const result = await processor.process([ep], state);
|
||||
|
||||
const parts = (result[0].trigger as UserPrompt).semanticParts;
|
||||
expect(parts[0].presentation).toBeDefined();
|
||||
expect(parts[0].presentation!.text).toContain(
|
||||
'[File Reference (application/pdf)',
|
||||
);
|
||||
expect(parts[0].presentation!.text).toContain(
|
||||
'Original URI: gs://fake-bucket/doc.pdf',
|
||||
);
|
||||
expect(parts[0].presentation!.text).toContain('[File Reference (application/pdf)');
|
||||
expect(parts[0].presentation!.text).toContain('Original URI: gs://fake-bucket/doc.pdf');
|
||||
|
||||
expect(fileSystem.getFiles().size).toBe(0);
|
||||
});
|
||||
|
||||
@@ -0,0 +1,96 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { createMockEnvironment, createDummyState, createDummyEpisode } from '../testing/contextTestUtils.js';
|
||||
import { describe, it, expect, beforeEach, vi } from 'vitest';
|
||||
import { EmergencyTruncationProcessor } from './emergencyTruncationProcessor.js';
|
||||
import type { ContextEnvironment } from '../sidecar/environment.js';
|
||||
|
||||
describe('EmergencyTruncationProcessor', () => {
|
||||
let processor: EmergencyTruncationProcessor;
|
||||
let env: ContextEnvironment;
|
||||
|
||||
beforeEach(() => {
|
||||
vi.resetAllMocks();
|
||||
env = createMockEnvironment();
|
||||
// Force token calculator to return exactly what we tell it for deterministic testing
|
||||
vi.spyOn(env.tokenCalculator, 'calculateEpisodeListTokens').mockImplementation((episodes) => {
|
||||
// Just sum up the metadata originalTokens for our dummy episodes
|
||||
return episodes.reduce((acc, ep) => acc + (ep.trigger.metadata.originalTokens || 100), 0);
|
||||
});
|
||||
|
||||
processor = new EmergencyTruncationProcessor(env, {});
|
||||
});
|
||||
|
||||
it('bypasses processing if currentTokens <= maxTokens', async () => {
|
||||
const episodes = [
|
||||
createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: 'short' }])
|
||||
];
|
||||
// State says we are under budget (5000 < 10000)
|
||||
const state = createDummyState(true, 0, new Set(), 5000, 10000);
|
||||
|
||||
const result = await processor.process(episodes, state);
|
||||
expect(result).toStrictEqual(episodes);
|
||||
expect(result.length).toBe(1);
|
||||
});
|
||||
|
||||
it('truncates episodes from the front (oldest) until targetTokens is met', async () => {
|
||||
const ep1 = createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: 'oldest' }]);
|
||||
const ep2 = createDummyEpisode('ep-2', 'USER_PROMPT', [{ text: 'middle' }]);
|
||||
const ep3 = createDummyEpisode('ep-3', 'USER_PROMPT', [{ text: 'newest' }]);
|
||||
|
||||
// Each is worth 100 tokens according to our mock
|
||||
const episodes = [ep1, ep2, ep3];
|
||||
|
||||
// We have 300 tokens, but max is 200. We need to drop 100 tokens.
|
||||
const state = createDummyState(false, 100, new Set(), 300, 200);
|
||||
|
||||
const result = await processor.process(episodes, state);
|
||||
|
||||
// It should drop the FIRST episode (ep-1) and keep the rest.
|
||||
expect(result.length).toBe(2);
|
||||
expect(result[0].id).toBe('ep-2');
|
||||
expect(result[1].id).toBe('ep-3');
|
||||
});
|
||||
|
||||
it('never drops protected episodes (e.g. system instructions)', async () => {
|
||||
const ep1 = createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: 'protected system prompt' }]);
|
||||
const ep2 = createDummyEpisode('ep-2', 'USER_PROMPT', [{ text: 'middle' }]);
|
||||
const ep3 = createDummyEpisode('ep-3', 'USER_PROMPT', [{ text: 'newest' }]);
|
||||
|
||||
const episodes = [ep1, ep2, ep3];
|
||||
|
||||
// We have 300 tokens, max is 200. We need to drop 100 tokens.
|
||||
// However, ep-1 is protected!
|
||||
const state = createDummyState(false, 100, new Set(['ep-1']), 300, 200);
|
||||
|
||||
const result = await processor.process(episodes, state);
|
||||
|
||||
// It should SKIP dropping ep-1 (protected) and drop ep-2 instead.
|
||||
expect(result.length).toBe(2);
|
||||
expect(result[0].id).toBe('ep-1'); // Protected, survived
|
||||
expect(result[1].id).toBe('ep-3'); // Survivor
|
||||
});
|
||||
|
||||
it('can drop multiple episodes if deficit is huge', async () => {
|
||||
const ep1 = createDummyEpisode('ep-1', 'USER_PROMPT', []);
|
||||
const ep2 = createDummyEpisode('ep-2', 'USER_PROMPT', []);
|
||||
const ep3 = createDummyEpisode('ep-3', 'USER_PROMPT', []);
|
||||
|
||||
const episodes = [ep1, ep2, ep3];
|
||||
|
||||
// We have 300 tokens, max is 50. We need to drop 250 tokens!
|
||||
const state = createDummyState(false, 250, new Set(), 300, 50);
|
||||
|
||||
const result = await processor.process(episodes, state);
|
||||
|
||||
// It must drop ep1 (100t) and ep2 (100t).
|
||||
// Remaining is ep3 (100t).
|
||||
// Wait, if it drops ep1 (remaining=200) and ep2 (remaining=100),
|
||||
// when it looks at ep3, remaining (100) > max (50), so it will drop ep3 too!
|
||||
expect(result.length).toBe(0);
|
||||
});
|
||||
});
|
||||
@@ -3,16 +3,14 @@
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
import { createMockEnvironment } from '../testing/contextTestUtils.js';
|
||||
import { createMockEnvironment, createDummyState, createDummyEpisode } from '../testing/contextTestUtils.js';
|
||||
import { describe, it, expect, beforeEach } from 'vitest';
|
||||
import { HistorySquashingProcessor } from './historySquashingProcessor.js';
|
||||
import type {
|
||||
Episode,
|
||||
UserPrompt,
|
||||
AgentThought,
|
||||
AgentYield,
|
||||
} from '../ir/types.js';
|
||||
import type { ContextAccountingState } from '../pipeline.js';
|
||||
import { randomUUID } from 'node:crypto';
|
||||
|
||||
describe('HistorySquashingProcessor', () => {
|
||||
@@ -24,37 +22,10 @@ describe('HistorySquashingProcessor', () => {
|
||||
});
|
||||
});
|
||||
|
||||
const getDummyState = (
|
||||
isSatisfied = false,
|
||||
deficit = 0,
|
||||
protectedIds = new Set<string>(),
|
||||
): ContextAccountingState => ({
|
||||
currentTokens: 5000,
|
||||
maxTokens: 10000,
|
||||
retainedTokens: 4000,
|
||||
deficitTokens: deficit,
|
||||
protectedEpisodeIds: protectedIds,
|
||||
isBudgetSatisfied: isSatisfied,
|
||||
});
|
||||
|
||||
const createDummyEpisode = (
|
||||
id: string,
|
||||
userText: string,
|
||||
modelThought: string,
|
||||
): Episode => ({
|
||||
id,
|
||||
timestamp: Date.now(),
|
||||
trigger: {
|
||||
id: randomUUID(),
|
||||
type: 'USER_PROMPT',
|
||||
semanticParts: [{ type: 'text', text: userText }],
|
||||
metadata: {
|
||||
originalTokens: 1000,
|
||||
currentTokens: 1000,
|
||||
transformations: [],
|
||||
},
|
||||
},
|
||||
steps: [
|
||||
const createThoughtEpisode = (id: string, userText: string, modelThought: string) => {
|
||||
const ep = createDummyEpisode(id, 'USER_PROMPT', [{ type: 'text', text: userText }]);
|
||||
// Replace the tool steps with a thought step for this test
|
||||
ep.steps = [
|
||||
{
|
||||
id: randomUUID(),
|
||||
type: 'AGENT_THOUGHT',
|
||||
@@ -65,12 +36,13 @@ describe('HistorySquashingProcessor', () => {
|
||||
transformations: [],
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
];
|
||||
return ep;
|
||||
};
|
||||
|
||||
it('bypasses processing if budget is satisfied', async () => {
|
||||
const episodes = [createDummyEpisode('1', 'short text', 'short thought')];
|
||||
const state = getDummyState(true);
|
||||
const episodes = [createThoughtEpisode('1', 'short text', 'short thought')];
|
||||
const state = createDummyState(true);
|
||||
|
||||
const result = await processor.process(episodes, state);
|
||||
|
||||
@@ -83,8 +55,8 @@ describe('HistorySquashingProcessor', () => {
|
||||
it('skips protected episodes', async () => {
|
||||
// 500 chars = ~125 tokens. Limit is 100 tokens, so it WOULD truncate if not protected.
|
||||
const longText = 'A'.repeat(500);
|
||||
const episodes = [createDummyEpisode('ep-1', longText, 'short thought')];
|
||||
const state = getDummyState(false, 100, new Set(['ep-1']));
|
||||
const episodes = [createThoughtEpisode('ep-1', longText, 'short thought')];
|
||||
const state = createDummyState(false, 100, new Set(['ep-1']));
|
||||
|
||||
const result = await processor.process(episodes, state);
|
||||
|
||||
@@ -96,8 +68,8 @@ describe('HistorySquashingProcessor', () => {
|
||||
it('truncates both UserPrompts and AgentThoughts', async () => {
|
||||
const longUser = 'U'.repeat(1000); // ~250 tokens
|
||||
const longModel = 'M'.repeat(1000); // ~250 tokens
|
||||
const episodes = [createDummyEpisode('ep-2', longUser, longModel)];
|
||||
const state = getDummyState(false, 500, new Set()); // High deficit, force truncation
|
||||
const episodes = [createThoughtEpisode('ep-2', longUser, longModel)];
|
||||
const state = createDummyState(false, 500); // High deficit, force truncation
|
||||
|
||||
const result = await processor.process(episodes, state);
|
||||
|
||||
@@ -123,13 +95,13 @@ describe('HistorySquashingProcessor', () => {
|
||||
const longUser1 = 'A'.repeat(1000);
|
||||
const longUser2 = 'B'.repeat(1000);
|
||||
const episodes = [
|
||||
createDummyEpisode('ep-3', longUser1, 'short'),
|
||||
createDummyEpisode('ep-4', longUser2, 'short'),
|
||||
createThoughtEpisode('ep-3', longUser1, 'short'),
|
||||
createThoughtEpisode('ep-4', longUser2, 'short'),
|
||||
];
|
||||
|
||||
// Set deficit to exactly what ONE truncation will save
|
||||
// Original = ~250 tokens. Limit = 100. Truncation saves ~150 tokens.
|
||||
const state = getDummyState(false, 150, new Set());
|
||||
const state = createDummyState(false, 150);
|
||||
|
||||
const result = await processor.process(episodes, state);
|
||||
|
||||
@@ -144,7 +116,7 @@ describe('HistorySquashingProcessor', () => {
|
||||
|
||||
it('truncates IrNodes', async () => {
|
||||
const longYield = 'Y'.repeat(1000); // ~250 tokens
|
||||
const ep = createDummyEpisode('ep-5', 'short', 'short');
|
||||
const ep = createThoughtEpisode('ep-5', 'short', 'short');
|
||||
ep.yield = {
|
||||
id: randomUUID(),
|
||||
type: 'AGENT_YIELD',
|
||||
@@ -156,7 +128,7 @@ describe('HistorySquashingProcessor', () => {
|
||||
},
|
||||
};
|
||||
|
||||
const state = getDummyState(false, 500, new Set());
|
||||
const state = createDummyState(false, 500);
|
||||
const result = await processor.process([ep], state);
|
||||
|
||||
const yieldPart = result[0].yield as AgentYield;
|
||||
|
||||
@@ -4,16 +4,14 @@
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { createMockEnvironment } from '../testing/contextTestUtils.js';
|
||||
import { createMockEnvironment, createDummyState, createDummyEpisode } from '../testing/contextTestUtils.js';
|
||||
import { describe, it, expect, beforeEach, vi } from 'vitest';
|
||||
import { SemanticCompressionProcessor } from './semanticCompressionProcessor.js';
|
||||
import type {
|
||||
Episode,
|
||||
UserPrompt,
|
||||
ToolExecution,
|
||||
AgentThought,
|
||||
} from '../ir/types.js';
|
||||
import type { ContextAccountingState } from '../pipeline.js';
|
||||
import { randomUUID } from 'node:crypto';
|
||||
import type { BaseLlmClient } from 'src/core/baseLlmClient.js';
|
||||
|
||||
@@ -27,51 +25,32 @@ describe('SemanticCompressionProcessor', () => {
|
||||
});
|
||||
|
||||
const env = createMockEnvironment();
|
||||
// Re-mock llmClient properly
|
||||
vi.spyOn(env, 'llmClient', 'get').mockReturnValue({ generateContent: generateContentMock } as unknown as BaseLlmClient);
|
||||
|
||||
processor = new SemanticCompressionProcessor(env, {
|
||||
nodeThresholdTokens: 2000,
|
||||
});
|
||||
});
|
||||
|
||||
const getDummyState = (
|
||||
isSatisfied = false,
|
||||
deficit = 0,
|
||||
protectedIds = new Set<string>(),
|
||||
): ContextAccountingState => ({
|
||||
currentTokens: 5000,
|
||||
maxTokens: 10000,
|
||||
retainedTokens: 4000,
|
||||
deficitTokens: deficit,
|
||||
protectedEpisodeIds: protectedIds,
|
||||
isBudgetSatisfied: isSatisfied,
|
||||
});
|
||||
|
||||
const createDummyEpisode = (
|
||||
const createEpisodeWithThoughtsAndTools = (
|
||||
id: string,
|
||||
userText: string,
|
||||
thoughtText: string,
|
||||
toolObs: string,
|
||||
): Episode => ({
|
||||
id,
|
||||
timestamp: Date.now(),
|
||||
trigger: {
|
||||
id: randomUUID(),
|
||||
type: 'USER_PROMPT',
|
||||
semanticParts: [{ type: 'text', text: userText }],
|
||||
metadata: {
|
||||
originalTokens: 3800,
|
||||
currentTokens: 3800,
|
||||
transformations: [],
|
||||
},
|
||||
},
|
||||
steps: [
|
||||
) => {
|
||||
const ep = createDummyEpisode(id, 'USER_PROMPT', [{ type: 'text', text: userText }]);
|
||||
// We override metadata for threshold triggering
|
||||
ep.trigger.metadata.currentTokens = 3800;
|
||||
|
||||
ep.steps = [
|
||||
{
|
||||
id: randomUUID(),
|
||||
type: 'AGENT_THOUGHT',
|
||||
text: thoughtText,
|
||||
metadata: {
|
||||
originalTokens: 100,
|
||||
currentTokens: 100,
|
||||
originalTokens: 3800,
|
||||
currentTokens: 3800,
|
||||
transformations: [],
|
||||
},
|
||||
},
|
||||
@@ -88,23 +67,24 @@ describe('SemanticCompressionProcessor', () => {
|
||||
transformations: [],
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
];
|
||||
return ep;
|
||||
};
|
||||
|
||||
it('bypasses processing if budget is satisfied', async () => {
|
||||
const episodes = [createDummyEpisode('1', 'short', 'short', 'short')];
|
||||
const state = getDummyState(true);
|
||||
const episodes = [createEpisodeWithThoughtsAndTools('1', 'short', 'short', 'short')];
|
||||
const state = createDummyState(true);
|
||||
|
||||
await processor.process(episodes, state);
|
||||
expect(generateContentMock).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('skips protected episodes even if over budget', async () => {
|
||||
const massiveStr = 'M'.repeat(15000); // Exceeds threshold (10 * 4 = 40)
|
||||
const massiveStr = 'M'.repeat(15000);
|
||||
const episodes = [
|
||||
createDummyEpisode('ep-1', massiveStr, massiveStr, massiveStr),
|
||||
createEpisodeWithThoughtsAndTools('ep-1', massiveStr, massiveStr, massiveStr),
|
||||
];
|
||||
const state = getDummyState(false, 1000, new Set(['ep-1']));
|
||||
const state = createDummyState(false, 1000, new Set(['ep-1']));
|
||||
|
||||
await processor.process(episodes, state);
|
||||
expect(generateContentMock).not.toHaveBeenCalled();
|
||||
@@ -113,9 +93,9 @@ describe('SemanticCompressionProcessor', () => {
|
||||
it('summarizes unprotected UserPrompts, Thoughts, and Tool observations until deficit is met', async () => {
|
||||
const massiveStr = 'M'.repeat(15000);
|
||||
const episodes = [
|
||||
createDummyEpisode('ep-1', massiveStr, massiveStr, massiveStr),
|
||||
createEpisodeWithThoughtsAndTools('ep-1', massiveStr, massiveStr, massiveStr),
|
||||
];
|
||||
const state = getDummyState(false, 50000, new Set()); // Massive deficit, forces all 3 to summarize
|
||||
const state = createDummyState(false, 50000); // Massive deficit, forces all 3 to summarize
|
||||
|
||||
const result = await processor.process(episodes, state);
|
||||
expect(generateContentMock).toHaveBeenCalledTimes(3);
|
||||
@@ -140,11 +120,11 @@ describe('SemanticCompressionProcessor', () => {
|
||||
it('stops calling LLM when deficit hits zero', async () => {
|
||||
const massiveStr = 'M'.repeat(15000);
|
||||
const episodes = [
|
||||
createDummyEpisode('ep-1', massiveStr, massiveStr, massiveStr),
|
||||
createEpisodeWithThoughtsAndTools('ep-1', massiveStr, massiveStr, massiveStr),
|
||||
];
|
||||
|
||||
// Set deficit low enough that ONE summary solves the problem
|
||||
const state = getDummyState(false, 5, new Set());
|
||||
const state = createDummyState(false, 5);
|
||||
|
||||
await processor.process(episodes, state);
|
||||
// It should only compress the UserPrompt and then stop
|
||||
|
||||
@@ -0,0 +1,91 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { createMockEnvironment, createDummyState, createDummyEpisode } from '../testing/contextTestUtils.js';
|
||||
import { describe, it, expect, beforeEach, vi } from 'vitest';
|
||||
import { StateSnapshotProcessor } from './stateSnapshotProcessor.js';
|
||||
import type { ContextEnvironment } from '../sidecar/environment.js';
|
||||
import type { BaseLlmClient } from '../../core/baseLlmClient.js';
|
||||
|
||||
describe('StateSnapshotProcessor', () => {
|
||||
let processor: StateSnapshotProcessor;
|
||||
let env: ContextEnvironment;
|
||||
let generateContentMock: ReturnType<typeof vi.fn>;
|
||||
|
||||
beforeEach(() => {
|
||||
vi.resetAllMocks();
|
||||
env = createMockEnvironment();
|
||||
|
||||
generateContentMock = vi.fn().mockResolvedValue({
|
||||
text: 'Mocked Compressed State Snapshot!',
|
||||
});
|
||||
vi.spyOn(env, 'llmClient', 'get').mockReturnValue({ generateContent: generateContentMock } as unknown as BaseLlmClient);
|
||||
|
||||
// Override token calc for testing
|
||||
vi.spyOn(env.tokenCalculator, 'estimateTokensForParts').mockReturnValue(100);
|
||||
|
||||
processor = new StateSnapshotProcessor(env, {}, env.eventBus);
|
||||
});
|
||||
|
||||
it('bypasses processing if deficit is <= 0', async () => {
|
||||
const episodes = [
|
||||
createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: 'hello' }])
|
||||
];
|
||||
// current: 100, max: 1000, retained: 200 (deficit 0)
|
||||
const state = createDummyState(false, 0, new Set(), 100, 1000, 200);
|
||||
|
||||
const result = await processor.process(episodes, state);
|
||||
expect(result).toStrictEqual(episodes);
|
||||
expect(generateContentMock).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('bypasses processing if not enough episodes to summarize (needs at least 2 inner episodes)', async () => {
|
||||
const episodes = [
|
||||
createDummyEpisode('ep-sys', 'SYSTEM_EVENT', []),
|
||||
createDummyEpisode('ep-active', 'USER_PROMPT', [{ text: 'help' }]),
|
||||
];
|
||||
|
||||
// current: 1000, max: 10000, retained: 500. Target deficit = 500
|
||||
const state = createDummyState(false, 500, new Set(), 1000, 10000, 500);
|
||||
|
||||
const result = await processor.process(episodes, state);
|
||||
expect(result).toStrictEqual(episodes);
|
||||
expect(generateContentMock).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('summarizes intermediate episodes into a single snapshot episode', async () => {
|
||||
const episodes = [
|
||||
createDummyEpisode('ep-0', 'SYSTEM_EVENT', []),
|
||||
createDummyEpisode('ep-1', 'USER_PROMPT', [{ text: 'old 1' }]),
|
||||
createDummyEpisode('ep-2', 'USER_PROMPT', [{ text: 'old 2' }]),
|
||||
createDummyEpisode('ep-3', 'USER_PROMPT', [{ text: 'current' }]),
|
||||
];
|
||||
|
||||
// Target deficit = 200
|
||||
const state = createDummyState(false, 200, new Set(), 1000, 10000, 800);
|
||||
|
||||
const result = await processor.process(episodes, state);
|
||||
|
||||
// We started with 4 episodes.
|
||||
// Episodes [1, 2] were synthesized into a single new Snapshot episode.
|
||||
// Final array should be: [0, SNAPSHOT, 3] = length 3.
|
||||
expect(result.length).toBe(3);
|
||||
expect(result[0].id).toBe('ep-0');
|
||||
|
||||
const snapshotEp = result[1];
|
||||
expect(snapshotEp.yield).toBeDefined();
|
||||
expect(snapshotEp.yield!.text).toContain('<CONTEXT_SNAPSHOT>');
|
||||
expect(snapshotEp.yield!.text).toContain('Mocked Compressed State Snapshot!');
|
||||
|
||||
expect(result[2].id).toBe('ep-3');
|
||||
|
||||
expect(generateContentMock).toHaveBeenCalledTimes(1);
|
||||
|
||||
const llmArgs = generateContentMock.mock.calls[0][0];
|
||||
expect(llmArgs.contents[0].parts[0].text).toContain('old 1');
|
||||
expect(llmArgs.contents[0].parts[0].text).toContain('old 2');
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,40 @@
|
||||
# Context Pipeline Testing Strategy & Audit
|
||||
|
||||
## Philosophy: Defense in Depth
|
||||
Our testing strategy avoids the "endless tax" of brittle tests by strictly separating concerns:
|
||||
1. **Unit Tests (Processors, System Fakes, Mappers):** Exhaustively test logical boundaries, token math, and state transformations. Driven by shared, DRY test factories (no repetitive boilerplate).
|
||||
2. **Component Tests (ContextManager, Orchestrator):** Test the *wiring* and *triggers*. Verify that barriers block, background pipelines execute, and events fire correctly.
|
||||
3. **Golden / E2E Tests:** Test emergent behavior. Pass in complex, raw chat histories and assert the exact final projected `Content[]` output against committed JSON snapshots.
|
||||
|
||||
---
|
||||
|
||||
## Audit Checklist & Coverage Tracker
|
||||
|
||||
### 1. The Tooling Library (`contextTestUtils.ts`)
|
||||
- [ ] Implement `ContextTestBuilder` or shared factory functions (`createDummyEpisode`, `createDummyState`).
|
||||
- [ ] Ensure all existing tests are migrated to use these helpers to establish the pattern.
|
||||
|
||||
### 2. Unit Tests (The Processors)
|
||||
Goal: Ensure every processor gracefully handles boundary conditions (budget satisfied vs. deficit), skips protected IDs, and correctly transforms IR.
|
||||
- [ ] `BlobDegradationProcessor` (Mostly complete, needs migration to shared helpers)
|
||||
- [ ] `ToolMaskingProcessor` (Mostly complete, needs migration to shared helpers)
|
||||
- [ ] `HistorySquashingProcessor` (Audit coverage)
|
||||
- [ ] `SemanticCompressionProcessor` (Audit coverage)
|
||||
- [ ] `ContextTracer` (Complete)
|
||||
- [ ] `SidecarLoader` (Complete)
|
||||
- [ ] `IrMapper` / `IrProjector` (Audit coverage)
|
||||
|
||||
### 3. Component Tests (The Orchestration)
|
||||
Goal: Prove the sidecar configuration accurately drives runtime behavior without testing the processor logic itself.
|
||||
- [ ] `PipelineOrchestrator`: Test sync vs. async routing, and trigger setup.
|
||||
- [ ] `ContextManager`: Test `subscribeToHistory` (Opportunistic triggers).
|
||||
- [ ] `ContextManager`: Test `project()` (Synchronous barrier triggers).
|
||||
|
||||
### 4. Golden / E2E Tests
|
||||
- [ ] `contextManager.golden.test.ts`: Ensure we have a scenario representing a "Day in the Life" of the CLI (some images, some huge tool outputs, deep history) mapping to a snapshot.
|
||||
|
||||
---
|
||||
|
||||
## Next Actions
|
||||
1. Migrate processor tests to shared factories to DRY up the suite.
|
||||
2. Go down the Unit Test checklist, ensuring full line/branch coverage for the core transformations.
|
||||
@@ -13,6 +13,57 @@ import { ContextManager } from '../contextManager.js';
|
||||
|
||||
import { InMemoryFileSystem } from '../system/InMemoryFileSystem.js';
|
||||
import { DeterministicIdGenerator } from '../system/DeterministicIdGenerator.js';
|
||||
import type { Episode } from '../ir/types.js';
|
||||
import type { ContextAccountingState } from '../pipeline.js';
|
||||
import { randomUUID } from 'node:crypto';
|
||||
|
||||
export function createDummyState(
|
||||
isSatisfied = false,
|
||||
deficit = 0,
|
||||
protectedIds = new Set<string>(),
|
||||
currentTokens = 5000,
|
||||
maxTokens = 10000,
|
||||
retainedTokens = 4000,
|
||||
): ContextAccountingState {
|
||||
return {
|
||||
currentTokens,
|
||||
maxTokens,
|
||||
retainedTokens,
|
||||
deficitTokens: deficit,
|
||||
protectedEpisodeIds: protectedIds,
|
||||
isBudgetSatisfied: isSatisfied,
|
||||
};
|
||||
}
|
||||
|
||||
export function createDummyEpisode(
|
||||
id: string,
|
||||
type: 'USER_PROMPT' | 'SYSTEM_EVENT',
|
||||
parts: unknown[] = [],
|
||||
toolSteps: { intent: Record<string, unknown>; observation: Record<string, unknown>; toolName?: string; tokens?: { intent: number; observation: number } }[] = []
|
||||
): Episode {
|
||||
return {
|
||||
id,
|
||||
timestamp: Date.now(),
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||
trigger: {
|
||||
id: randomUUID(),
|
||||
type,
|
||||
name: type === 'SYSTEM_EVENT' ? 'dummy_event' : undefined,
|
||||
payload: type === 'SYSTEM_EVENT' ? {} : undefined,
|
||||
semanticParts: type === 'USER_PROMPT' ? parts as any : undefined,
|
||||
metadata: { originalTokens: 100, currentTokens: 100, transformations: [] },
|
||||
} as any,
|
||||
steps: toolSteps.map(step => ({
|
||||
id: randomUUID(),
|
||||
type: 'TOOL_EXECUTION',
|
||||
toolName: step.toolName || 'test_tool',
|
||||
intent: step.intent,
|
||||
observation: step.observation,
|
||||
tokens: step.tokens || { intent: 50, observation: 50 },
|
||||
metadata: { originalTokens: 100, currentTokens: 100, transformations: [] },
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
export function createMockEnvironment(): ContextEnvironment {
|
||||
return {
|
||||
|
||||
Reference in New Issue
Block a user