diff --git a/integration-tests/context-compress-interactive.compress-empty.json b/integration-tests/context-compress-interactive.compress-empty.json new file mode 100644 index 0000000000..5366bf317b --- /dev/null +++ b/integration-tests/context-compress-interactive.compress-empty.json @@ -0,0 +1,18 @@ +{ + "generateContent": [ + { + "candidates": [ + { + "content": { + "role": "model", + "parts": [ + { + "text": "This is more than the 5 tokens we return below which will trigger an error" + } + ] + } + } + ] + } + ] +} diff --git a/integration-tests/context-compress-interactive.compress-failure.json b/integration-tests/context-compress-interactive.compress-failure.json new file mode 100644 index 0000000000..939189366b --- /dev/null +++ b/integration-tests/context-compress-interactive.compress-failure.json @@ -0,0 +1,40 @@ +{ + "generateContent": [ + { + "candidates": [ + { + "content": { + "role": "model", + "parts": [ + { + "text": "This is more than the 5 tokens we return below which will trigger an error" + } + ] + } + } + ] + } + ], + "generateContentStream": [ + [ + { + "candidates": [ + { + "content": { + "role": "model", + "parts": [ + { + "text": "The initial response from the model" + } + ] + }, + "finishReason": "STOP" + } + ], + "usageMetadata": { + "promptTokenCount": 5 + } + } + ] + ] +} diff --git a/integration-tests/context-compress-interactive.compress.json b/integration-tests/context-compress-interactive.compress.json new file mode 100644 index 0000000000..b9d470fc9c --- /dev/null +++ b/integration-tests/context-compress-interactive.compress.json @@ -0,0 +1,40 @@ +{ + "generateContent": [ + { + "candidates": [ + { + "content": { + "role": "model", + "parts": [ + { + "text": "A summary of the conversation." + } + ] + } + } + ] + } + ], + "generateContentStream": [ + [ + { + "candidates": [ + { + "content": { + "role": "model", + "parts": [ + { + "text": "The initial response from the model" + } + ] + }, + "finishReason": "STOP" + } + ], + "usageMetadata": { + "promptTokenCount": 100000 + } + } + ] + ] +} diff --git a/integration-tests/context-compress-interactive.test.ts b/integration-tests/context-compress-interactive.test.ts index 030efe512e..5be9b73141 100644 --- a/integration-tests/context-compress-interactive.test.ts +++ b/integration-tests/context-compress-interactive.test.ts @@ -6,6 +6,7 @@ import { expect, describe, it, beforeEach, afterEach } from 'vitest'; import { TestRig } from './test-helper.js'; +import { join } from 'node:path'; describe('Interactive Mode', () => { let rig: TestRig; @@ -18,50 +19,78 @@ describe('Interactive Mode', () => { await rig.cleanup(); }); - // TODO(#11062): Make this test reliable by not using the actual Gemini model - // We could not rely on the following mechanisms that have already shown to be - // flakey: - // 1. Asking a prompt like "Output 1000 tokens and the inventor of the lightbulb" - // --> This was b/c the model occasionally did not output einstein and - // we are not able to trigger the compression piece - // 2. Asking it to out a specific output and waiting for that. - // --> The expect catches the input and thinks that is the output so the - // /compress gets called too early - it.skip('should trigger chat compression with /compress command', async () => { - rig.setup('interactive-compress-success'); + it('should trigger chat compression with /compress command', async () => { + await rig.setup('interactive-compress-test', { + fakeResponsesPath: join( + import.meta.dirname, + 'context-compress-interactive.compress.json', + ), + }); const run = await rig.runInteractive(); - // Generate a long context to make compression viable. - const longPrompt = - 'Write a 200 word story about a robot. The story MUST end with the following output: THE_END'; + await run.type('Initial prompt'); + await run.type('\r'); - await run.sendKeys(longPrompt); - await run.sendKeys('\r'); - - // Wait for the specific end marker. - await run.expectText('THE_END', 30000); + await run.expectText('The initial response from the model', 5000); await run.type('/compress'); - await run.sendKeys('\r'); + await run.type('\r'); const foundEvent = await rig.waitForTelemetryEvent( 'chat_compression', - 90000, + 5000, ); expect(foundEvent, 'chat_compression telemetry event was not found').toBe( true, ); + + await run.expectText('Chat history compressed', 5000); }); - it('should handle /compress command on empty history', async () => { - rig.setup('interactive-compress-empty'); + it('should handle compression failure on token inflation', async () => { + await rig.setup('interactive-compress-failure', { + fakeResponsesPath: join( + import.meta.dirname, + 'context-compress-interactive.compress-failure.json', + ), + }); const run = await rig.runInteractive(); + await run.type('Initial prompt'); + await run.type('\r'); + + await run.expectText('The initial response from the model', 25000); + await run.type('/compress'); await run.type('\r'); - await run.expectText('Nothing to compress.', 25000); + await run.expectText('compression was not beneficial', 5000); + + // Verify no telemetry event is logged for NOOP + const foundEvent = await rig.waitForTelemetryEvent( + 'chat_compression', + 5000, + ); + expect( + foundEvent, + 'chat_compression telemetry event should be found for failures', + ).toBe(true); + }); + + it('should handle /compress command on empty history', async () => { + rig.setup('interactive-compress-empty', { + fakeResponsesPath: join( + import.meta.dirname, + 'context-compress-interactive.compress-empty.json', + ), + }); + + const run = await rig.runInteractive(); + await run.type('/compress'); + await run.type('\r'); + + await run.expectText('Nothing to compress.', 5000); // Verify no telemetry event is logged for NOOP const foundEvent = await rig.waitForTelemetryEvent( diff --git a/integration-tests/test-helper.ts b/integration-tests/test-helper.ts index fbc965c0ab..d5a9026726 100644 --- a/integration-tests/test-helper.ts +++ b/integration-tests/test-helper.ts @@ -255,6 +255,7 @@ export class TestRig { testDir: string | null; testName?: string; _lastRunStdout?: string; + fakeResponsesPath?: string; constructor() { this.bundlePath = join(__dirname, '..', 'bundle/gemini.js'); @@ -263,12 +264,19 @@ export class TestRig { setup( testName: string, - options: { settings?: Record } = {}, + options: { + settings?: Record; + fakeResponsesPath?: string; + } = {}, ) { this.testName = testName; const sanitizedName = sanitizeTestName(testName); this.testDir = join(env['INTEGRATION_TEST_FILE_DIR']!, sanitizedName); mkdirSync(this.testDir, { recursive: true }); + if (options.fakeResponsesPath) { + this.fakeResponsesPath = join(this.testDir, 'fake-responses.json'); + fs.copyFileSync(options.fakeResponsesPath, this.fakeResponsesPath); + } // Create a settings file to point the CLI to the local collector const geminiDir = join(this.testDir, GEMINI_DIR); @@ -335,6 +343,9 @@ export class TestRig { const initialArgs = isNpmReleaseTest ? extraInitialArgs : [this.bundlePath, ...extraInitialArgs]; + if (this.fakeResponsesPath) { + initialArgs.push('--fake-responses', this.fakeResponsesPath); + } return { command, initialArgs }; } diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index 96ab5c04d1..f6ae37a0b6 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -68,6 +68,7 @@ export interface CliArgs { useSmartEdit: boolean | undefined; useWriteTodos: boolean | undefined; outputFormat: string | undefined; + fakeResponses: string | undefined; } export async function parseArguments(settings: Settings): Promise { @@ -193,6 +194,10 @@ export async function parseArguments(settings: Settings): Promise { description: 'The format of the CLI output.', choices: ['text', 'json', 'stream-json'], }) + .option('fake-responses', { + type: 'string', + description: 'Path to a file with fake model responses for testing.', + }) .deprecateOption( 'prompt', 'Use the positional prompt instead. This flag will be removed in a future version.', @@ -649,6 +654,7 @@ export async function loadCliConfig( settings.tools?.enableMessageBusIntegration ?? false, codebaseInvestigatorSettings: settings.experimental?.codebaseInvestigatorSettings, + fakeResponses: argv.fakeResponses, retryFetchErrors: settings.general?.retryFetchErrors ?? false, ptyInfo: ptyInfo?.name, }); diff --git a/packages/cli/src/gemini.test.tsx b/packages/cli/src/gemini.test.tsx index 931e35a3b5..e1c04e2cfd 100644 --- a/packages/cli/src/gemini.test.tsx +++ b/packages/cli/src/gemini.test.tsx @@ -339,6 +339,7 @@ describe('gemini.tsx main function kitty protocol', () => { useSmartEdit: undefined, useWriteTodos: undefined, outputFormat: undefined, + fakeResponses: undefined, }); await main(); diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index b7d1fa7add..78632d0480 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -283,6 +283,7 @@ export interface ConfigParameters { continueOnFailedApiCall?: boolean; retryFetchErrors?: boolean; enableShellOutputEfficiency?: boolean; + fakeResponses?: string; ptyInfo?: string; disableYoloMode?: boolean; } @@ -381,6 +382,7 @@ export class Config { private readonly continueOnFailedApiCall: boolean; private readonly retryFetchErrors: boolean; private readonly enableShellOutputEfficiency: boolean; + readonly fakeResponses?: string; private readonly disableYoloMode: boolean; constructor(params: ConfigParameters) { @@ -489,6 +491,7 @@ export class Config { params.enableShellOutputEfficiency ?? true; this.extensionManagement = params.extensionManagement ?? true; this.storage = new Storage(this.targetDir); + this.fakeResponses = params.fakeResponses; this.enablePromptCompletion = params.enablePromptCompletion ?? false; this.fileExclusions = new FileExclusions(this); this.eventEmitter = params.eventEmitter; diff --git a/packages/core/src/core/contentGenerator.test.ts b/packages/core/src/core/contentGenerator.test.ts index 3084c84bd4..e1431b3550 100644 --- a/packages/core/src/core/contentGenerator.test.ts +++ b/packages/core/src/core/contentGenerator.test.ts @@ -15,13 +15,36 @@ import { createCodeAssistContentGenerator } from '../code_assist/codeAssist.js'; import { GoogleGenAI } from '@google/genai'; import type { Config } from '../config/config.js'; import { LoggingContentGenerator } from './loggingContentGenerator.js'; +import { FakeContentGenerator } from './fakeContentGenerator.js'; vi.mock('../code_assist/codeAssist.js'); vi.mock('@google/genai'); +vi.mock('./fakeContentGenerator.js'); const mockConfig = {} as unknown as Config; describe('createContentGenerator', () => { + it('should create a FakeContentGenerator', async () => { + const mockGenerator = {} as unknown as ContentGenerator; + vi.mocked(FakeContentGenerator.fromFile).mockResolvedValue( + mockGenerator as never, + ); + const fakeResponsesFile = 'fake/responses.yaml'; + const mockConfigWithFake = { + fakeResponses: fakeResponsesFile, + } as unknown as Config; + const generator = await createContentGenerator( + { + authType: AuthType.USE_GEMINI, + }, + mockConfigWithFake, + ); + expect(FakeContentGenerator.fromFile).toHaveBeenCalledWith( + fakeResponsesFile, + ); + expect(generator).toEqual(mockGenerator); + }); + it('should create a CodeAssistContentGenerator', async () => { const mockGenerator = {} as unknown as ContentGenerator; vi.mocked(createCodeAssistContentGenerator).mockResolvedValue( diff --git a/packages/core/src/core/contentGenerator.ts b/packages/core/src/core/contentGenerator.ts index e4e8ebb861..487356a19e 100644 --- a/packages/core/src/core/contentGenerator.ts +++ b/packages/core/src/core/contentGenerator.ts @@ -19,6 +19,7 @@ import type { Config } from '../config/config.js'; import type { UserTierId } from '../code_assist/types.js'; import { LoggingContentGenerator } from './loggingContentGenerator.js'; import { InstallationManager } from '../utils/installationManager.js'; +import { FakeContentGenerator } from './fakeContentGenerator.js'; /** * Interface abstracting the core functionalities for generating content and counting tokens. @@ -105,6 +106,10 @@ export async function createContentGenerator( gcConfig: Config, sessionId?: string, ): Promise { + if (gcConfig.fakeResponses) { + return FakeContentGenerator.fromFile(gcConfig.fakeResponses); + } + const version = process.env['CLI_VERSION'] || process.version; const userAgent = `GeminiCLI/${version} (${process.platform}; ${process.arch})`; const baseHeaders: Record = { diff --git a/packages/core/src/core/fakeContentGenerator.test.ts b/packages/core/src/core/fakeContentGenerator.test.ts new file mode 100644 index 0000000000..5ccd92d5e3 --- /dev/null +++ b/packages/core/src/core/fakeContentGenerator.test.ts @@ -0,0 +1,205 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { FakeContentGenerator } from './fakeContentGenerator.js'; +import { promises } from 'node:fs'; +import type { FakeResponses } from './fakeContentGenerator.js'; +import type { + GenerateContentResponse, + CountTokensResponse, + EmbedContentResponse, + GenerateContentParameters, + CountTokensParameters, + EmbedContentParameters, +} from '@google/genai'; + +vi.mock('node:fs', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + promises: { + ...actual.promises, + readFile: vi.fn(), + }, + }; +}); + +const mockReadFile = vi.mocked(promises.readFile); + +describe('FakeContentGenerator', () => { + const fakeResponses: FakeResponses = { + generateContent: [ + { + candidates: [ + { content: { parts: [{ text: 'response1' }], role: 'model' } }, + ], + }, + ] as GenerateContentResponse[], + generateContentStream: [ + [ + { + candidates: [ + { content: { parts: [{ text: 'chunk1' }], role: 'model' } }, + ], + }, + { + candidates: [ + { content: { parts: [{ text: 'chunk2' }], role: 'model' } }, + ], + }, + ], + ] as GenerateContentResponse[][], + countTokens: [{ totalTokens: 10 }] as CountTokensResponse[], + embedContent: [ + { embeddings: [{ values: [1, 2, 3] }] }, + ] as EmbedContentResponse[], + }; + + beforeEach(() => { + vi.resetAllMocks(); + }); + + it('should return responses for generateContent', async () => { + const generator = new FakeContentGenerator(fakeResponses); + const response = await generator.generateContent( + {} as GenerateContentParameters, + 'id', + ); + expect(response).toEqual(fakeResponses.generateContent[0]); + }); + + it('should throw error when no more generateContent responses', async () => { + const generator = new FakeContentGenerator({ + ...fakeResponses, + generateContent: [], + }); + await expect( + generator.generateContent({} as GenerateContentParameters, 'id'), + ).rejects.toThrowError('No more mock responses for generateContent'); + }); + + it('should return responses for generateContentStream', async () => { + const generator = new FakeContentGenerator(fakeResponses); + const stream = await generator.generateContentStream( + {} as GenerateContentParameters, + 'id', + ); + const responses = []; + for await (const response of stream) { + responses.push(response); + } + expect(responses).toEqual(fakeResponses.generateContentStream[0]); + }); + + it('should throw error when no more generateContentStream responses', async () => { + const generator = new FakeContentGenerator({ + ...fakeResponses, + generateContentStream: [], + }); + await expect( + generator.generateContentStream({} as GenerateContentParameters, 'id'), + ).rejects.toThrow('No more mock responses for generateContentStream'); + }); + + it('should return responses for countTokens', async () => { + const generator = new FakeContentGenerator(fakeResponses); + const response = await generator.countTokens({} as CountTokensParameters); + expect(response).toEqual(fakeResponses.countTokens[0]); + }); + + it('should throw error when no more countTokens responses', async () => { + const generator = new FakeContentGenerator({ + ...fakeResponses, + countTokens: [], + }); + await expect( + generator.countTokens({} as CountTokensParameters), + ).rejects.toThrowError('No more mock responses for countTokens'); + }); + + it('should return responses for embedContent', async () => { + const generator = new FakeContentGenerator(fakeResponses); + const response = await generator.embedContent({} as EmbedContentParameters); + expect(response).toEqual(fakeResponses.embedContent[0]); + }); + + it('should throw error when no more embedContent responses', async () => { + const generator = new FakeContentGenerator({ + ...fakeResponses, + embedContent: [], + }); + await expect( + generator.embedContent({} as EmbedContentParameters), + ).rejects.toThrowError('No more mock responses for embedContent'); + }); + + it('should handle multiple calls and exhaust responses', async () => { + const generator = new FakeContentGenerator(fakeResponses); + await generator.generateContent({} as GenerateContentParameters, 'id'); + await expect( + generator.generateContent({} as GenerateContentParameters, 'id'), + ).rejects.toThrow(); + }); + + describe('fromFile', () => { + it('should create a generator from a file', async () => { + const fileContent = JSON.stringify(fakeResponses); + mockReadFile.mockResolvedValue(fileContent); + + const generator = await FakeContentGenerator.fromFile('fake-path.json'); + const response = await generator.generateContent( + {} as GenerateContentParameters, + 'id', + ); + expect(response).toEqual(fakeResponses.generateContent[0]); + }); + }); + + describe('constructor with partial responses', () => { + it('should handle missing generateContent', async () => { + const responses = { ...fakeResponses, generateContent: undefined }; + const generator = new FakeContentGenerator( + responses as unknown as FakeResponses, + ); + await expect( + generator.generateContent({} as GenerateContentParameters, 'id'), + ).rejects.toThrowError('No more mock responses for generateContent'); + }); + + it('should handle missing generateContentStream', async () => { + const responses = { ...fakeResponses, generateContentStream: undefined }; + const generator = new FakeContentGenerator( + responses as unknown as FakeResponses, + ); + await expect( + generator.generateContentStream({} as GenerateContentParameters, 'id'), + ).rejects.toThrowError( + 'No more mock responses for generateContentStream', + ); + }); + + it('should handle missing countTokens', async () => { + const responses = { ...fakeResponses, countTokens: undefined }; + const generator = new FakeContentGenerator( + responses as unknown as FakeResponses, + ); + await expect( + generator.countTokens({} as CountTokensParameters), + ).rejects.toThrowError('No more mock responses for countTokens'); + }); + + it('should handle missing embedContent', async () => { + const responses = { ...fakeResponses, embedContent: undefined }; + const generator = new FakeContentGenerator( + responses as unknown as FakeResponses, + ); + await expect( + generator.embedContent({} as EmbedContentParameters), + ).rejects.toThrowError('No more mock responses for embedContent'); + }); + }); +}); diff --git a/packages/core/src/core/fakeContentGenerator.ts b/packages/core/src/core/fakeContentGenerator.ts new file mode 100644 index 0000000000..9ef48b27e7 --- /dev/null +++ b/packages/core/src/core/fakeContentGenerator.ts @@ -0,0 +1,101 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { + CountTokensResponse, + GenerateContentResponse, + GenerateContentParameters, + CountTokensParameters, + EmbedContentResponse, + EmbedContentParameters, +} from '@google/genai'; +import { promises } from 'node:fs'; +import type { ContentGenerator } from './contentGenerator.js'; +import type { UserTierId } from '../code_assist/types.js'; +import { safeJsonStringify } from '../utils/safeJsonStringify.js'; + +export type FakeResponses = { + generateContent: GenerateContentResponse[]; + generateContentStream: GenerateContentResponse[][]; + countTokens: CountTokensResponse[]; + embedContent: EmbedContentResponse[]; +}; + +// A ContentGenerator that responds with canned responses. +// +// Typically these would come from a file, provided by the `--fake-responses` +// CLI argument. +export class FakeContentGenerator implements ContentGenerator { + private responses: FakeResponses; + private callCounters = { + generateContent: 0, + generateContentStream: 0, + countTokens: 0, + embedContent: 0, + }; + userTier?: UserTierId; + + constructor(responses: FakeResponses) { + this.responses = { + generateContent: responses.generateContent ?? [], + generateContentStream: responses.generateContentStream ?? [], + countTokens: responses.countTokens ?? [], + embedContent: responses.embedContent ?? [], + }; + } + + static async fromFile(filePath: string): Promise { + const fileContent = await promises.readFile(filePath, 'utf-8'); + const responses = JSON.parse(fileContent) as FakeResponses; + return new FakeContentGenerator(responses); + } + + private getNextResponse( + method: K, + request: unknown, + ): FakeResponses[K][number] { + const response = this.responses[method][this.callCounters[method]++]; + if (!response) { + throw new Error( + `No more mock responses for ${method}, got request:\n` + + safeJsonStringify(request), + ); + } + return response; + } + + async generateContent( + _request: GenerateContentParameters, + _userPromptId: string, + ): Promise { + return this.getNextResponse('generateContent', _request); + } + + async generateContentStream( + _request: GenerateContentParameters, + _userPromptId: string, + ): Promise> { + const responses = this.getNextResponse('generateContentStream', _request); + async function* stream() { + for (const response of responses) { + yield response; + } + } + return stream(); + } + + async countTokens( + _request: CountTokensParameters, + ): Promise { + return this.getNextResponse('countTokens', _request); + } + + async embedContent( + _request: EmbedContentParameters, + ): Promise { + return this.getNextResponse('embedContent', _request); + } +}