Refactor: Introduce InteractiveRun class (#10947)

This commit is contained in:
Tommaso Sciortino
2025-10-11 08:33:01 -07:00
committed by GitHub
parent 09ef33ec3a
commit 5dc7059ba3
7 changed files with 137 additions and 130 deletions

View File

@@ -5,7 +5,7 @@
*/
import { expect, describe, it, beforeEach, afterEach } from 'vitest';
import { TestRig, type } from './test-helper.js';
import { TestRig } from './test-helper.js';
describe('Interactive Mode', () => {
let rig: TestRig;
@@ -21,20 +21,20 @@ describe('Interactive Mode', () => {
it('should trigger chat compression with /compress command', async () => {
await rig.setup('interactive-compress-test');
const ptyProcess = await rig.runInteractive();
const run = await rig.runInteractive();
const longPrompt =
'Dont do anything except returning a 1000 token long paragragh with the <name of the scientist who discovered theory of relativity> at the end to indicate end of response. This is a moderately long sentence.';
await type(ptyProcess, longPrompt);
await type(ptyProcess, '\r');
await run.type(longPrompt);
await run.type('\r');
await rig.waitForText('einstein', 25000);
await run.waitForText('einstein', 25000);
await type(ptyProcess, '/compress');
await run.type('/compress');
// A small delay to allow React to re-render the command list.
await new Promise((resolve) => setTimeout(resolve, 100));
await type(ptyProcess, '\r');
await run.type('\r');
const foundEvent = await rig.waitForTelemetryEvent(
'chat_compression',
@@ -49,12 +49,11 @@ describe('Interactive Mode', () => {
it.skip('should handle compression failure on token inflation', async () => {
await rig.setup('interactive-compress-test');
const ptyProcess = await rig.runInteractive();
const run = await rig.runInteractive();
await type(ptyProcess, '/compress');
await run.type('/compress');
await new Promise((resolve) => setTimeout(resolve, 100));
await type(ptyProcess, '\r');
await rig.waitForText('compression was not beneficial', 25000);
await run.type('\r');
await run.waitForText('compression was not beneficial', 25000);
});
});

View File

@@ -7,35 +7,18 @@
import { describe, it, expect } from 'vitest';
import * as os from 'node:os';
import { TestRig } from './test-helper.js';
import * as pty from '@lydell/node-pty';
function waitForExit(ptyProcess: pty.IPty): Promise<number> {
return new Promise((resolve, reject) => {
const timer = setTimeout(
() =>
reject(
new Error(`Test timed out: process did not exit within a minute.`),
),
60000,
);
ptyProcess.onExit(({ exitCode }) => {
clearTimeout(timer);
resolve(exitCode);
});
});
}
describe('Ctrl+C exit', () => {
it('should exit gracefully on second Ctrl+C', async () => {
const rig = new TestRig();
await rig.setup('should exit gracefully on second Ctrl+C');
const ptyProcess = await rig.runInteractive();
const run = await rig.runInteractive();
// Send first Ctrl+C
ptyProcess.write('\x03');
run.type('\x03');
await rig.waitForText('Press Ctrl+C again to exit', 5000);
await run.waitForText('Press Ctrl+C again to exit', 5000);
if (os.platform() === 'win32') {
// This is a workaround for node-pty/winpty on Windows.
@@ -46,9 +29,9 @@ describe('Ctrl+C exit', () => {
// To allow the test to pass, we forcefully kill the process,
// simulating a successful exit. We accept that we cannot test the
// graceful shutdown message on Windows in this automated context.
ptyProcess.kill();
run.kill();
const exitCode = await waitForExit(ptyProcess);
const exitCode = await run.waitForExit();
// On Windows, the exit code after ptyProcess.kill() can be unpredictable
// (often 1), so we accept any non-null exit code as a pass condition,
// focusing on the fact that the process did terminate.
@@ -57,11 +40,11 @@ describe('Ctrl+C exit', () => {
}
// Send second Ctrl+C
ptyProcess.write('\x03');
run.type('\x03');
const exitCode = await waitForExit(ptyProcess);
const exitCode = await run.waitForExit();
expect(exitCode, `Process exited with code ${exitCode}.`).toBe(0);
await rig.waitForText('Agent powering down. Goodbye!', 5000);
await run.waitForText('Agent powering down. Goodbye!', 5000);
});
});

View File

@@ -5,7 +5,7 @@
*/
import { expect, describe, it, beforeEach, afterEach } from 'vitest';
import { TestRig, type, printDebugInfo } from './test-helper.js';
import { TestRig, printDebugInfo } from './test-helper.js';
describe('Interactive file system', () => {
let rig: TestRig;
@@ -23,22 +23,22 @@ describe('Interactive file system', () => {
rig.setup('interactive-read-then-write');
rig.createFile(fileName, '1.0.0');
const ptyProcess = await rig.runInteractive();
const run = await rig.runInteractive();
// Step 1: Read the file
const readPrompt = `Read the version from ${fileName}`;
await type(ptyProcess, readPrompt);
await type(ptyProcess, '\r');
await run.type(readPrompt);
await run.type('\r');
const readCall = await rig.waitForToolCall('read_file', 30000);
expect(readCall, 'Expected to find a read_file tool call').toBe(true);
await rig.waitForText('1.0.0', 30000);
await run.waitForText('1.0.0', 30000);
// Step 2: Write the file
const writePrompt = `now change the version to 1.0.1 in the file`;
await type(ptyProcess, writePrompt);
await type(ptyProcess, '\r');
await run.type(writePrompt);
await run.type('\r');
const toolCall = await rig.waitForAnyToolCall(
['write_file', 'replace'],
@@ -46,9 +46,7 @@ describe('Interactive file system', () => {
);
if (!toolCall) {
printDebugInfo(rig, rig._interactiveOutput, {
toolCall,
});
printDebugInfo(rig, run.output, { toolCall });
}
expect(toolCall, 'Expected to find a write_file or replace tool call').toBe(

View File

@@ -35,7 +35,7 @@ describe('JSON output', () => {
expect(typeof parsed.stats).toBe('object');
});
it('should return a JSON error for enforced auth mismatch before running', async () => {
it('should return a JSON error for sd auth mismatch before running', async () => {
process.env['GOOGLE_GENAI_USE_GCA'] = 'true';
await rig.setup('json-output-auth-mismatch', {
settings: {

View File

@@ -5,7 +5,12 @@
*/
import { describe, it, expect } from 'vitest';
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
import {
TestRig,
poll,
printDebugInfo,
validateModelOutput,
} from './test-helper.js';
import { existsSync } from 'node:fs';
import { join } from 'node:path';
@@ -18,7 +23,7 @@ describe('list_directory', () => {
rig.sync();
// Poll for filesystem changes to propagate in containers
await rig.poll(
await poll(
() => {
// Check if the files exist in the test directory
const file1Path = join(rig.testDir!, 'file1.txt');

View File

@@ -11,7 +11,7 @@
*/
import { describe, it, beforeAll, expect } from 'vitest';
import { TestRig, validateModelOutput } from './test-helper.js';
import { TestRig, poll, validateModelOutput } from './test-helper.js';
import { join } from 'node:path';
import { writeFileSync } from 'node:fs';
@@ -192,7 +192,7 @@ describe('simple-mcp-server', () => {
// Poll for script for up to 5s
const { accessSync, constants } = await import('node:fs');
const isReady = await rig.poll(
const isReady = await poll(
() => {
try {
accessSync(testServerPath, constants.F_OK);

View File

@@ -18,6 +18,39 @@ import * as os from 'node:os';
const __dirname = dirname(fileURLToPath(import.meta.url));
// Get timeout based on environment
function getDefaultTimeout() {
if (env['CI']) return 60000; // 1 minute in CI
if (env['GEMINI_SANDBOX']) return 30000; // 30s in containers
return 15000; // 15s locally
}
export async function poll(
predicate: () => boolean,
timeout: number,
interval: number,
): Promise<boolean> {
const startTime = Date.now();
let attempts = 0;
while (Date.now() - startTime < timeout) {
attempts++;
const result = predicate();
if (env['VERBOSE'] === 'true' && attempts % 5 === 0) {
console.log(
`Poll attempt ${attempts}: ${result ? 'success' : 'waiting...'}`,
);
}
if (result) {
return true;
}
await new Promise((resolve) => setTimeout(resolve, interval));
}
if (env['VERBOSE'] === 'true') {
console.log(`Poll timed out after ${attempts} attempts`);
}
return false;
}
function sanitizeTestName(name: string) {
return name
.toLowerCase()
@@ -117,15 +150,6 @@ export function validateModelOutput(
return true;
}
// Simulates typing a string one character at a time to avoid paste detection.
export async function type(ptyProcess: pty.IPty, text: string) {
const delay = 5;
for (const char of text) {
ptyProcess.write(char);
await new Promise((resolve) => setTimeout(resolve, delay));
}
}
interface ParsedLog {
attributes?: {
'event.name'?: string;
@@ -143,25 +167,73 @@ interface ParsedLog {
}[];
}
export class InteractiveRun {
ptyProcess: pty.IPty;
public output = '';
constructor(ptyProcess: pty.IPty) {
this.ptyProcess = ptyProcess;
ptyProcess.onData((data) => {
this.output += data;
if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') {
process.stdout.write(data);
}
});
}
async waitForText(text: string, timeout?: number) {
if (!timeout) {
timeout = getDefaultTimeout();
}
const found = await poll(
() => stripAnsi(this.output).toLowerCase().includes(text.toLowerCase()),
timeout,
200,
);
expect(found, `Did not find expected text: "${text}"`).toBe(true);
}
// Simulates typing a string one character at a time to avoid paste detection.
async type(text: string) {
const delay = 5;
for (const char of text) {
this.ptyProcess.write(char);
await new Promise((resolve) => setTimeout(resolve, delay));
}
}
async kill() {
this.ptyProcess.kill();
}
waitForExit(): Promise<number> {
return new Promise((resolve, reject) => {
const timer = setTimeout(
() =>
reject(
new Error(`Test timed out: process did not exit within a minute.`),
),
60000,
);
this.ptyProcess.onExit(({ exitCode }) => {
clearTimeout(timer);
resolve(exitCode);
});
});
}
}
export class TestRig {
bundlePath: string;
testDir: string | null;
testName?: string;
_lastRunStdout?: string;
_interactiveOutput = '';
constructor() {
this.bundlePath = join(__dirname, '..', 'bundle/gemini.js');
this.testDir = null;
}
// Get timeout based on environment
getDefaultTimeout() {
if (env['CI']) return 60000; // 1 minute in CI
if (env['GEMINI_SANDBOX']) return 30000; // 30s in containers
return 15000; // 15s locally
}
setup(
testName: string,
options: { settings?: Record<string, unknown> } = {},
@@ -456,7 +528,7 @@ export class TestRig {
if (!logFilePath) return;
// Wait for telemetry file to exist and have content
await this.poll(
await poll(
() => {
if (!fs.existsSync(logFilePath)) return false;
try {
@@ -474,12 +546,12 @@ export class TestRig {
async waitForTelemetryEvent(eventName: string, timeout?: number) {
if (!timeout) {
timeout = this.getDefaultTimeout();
timeout = getDefaultTimeout();
}
await this.waitForTelemetryReady();
return this.poll(
return poll(
() => {
const logs = this._readAndParseTelemetryLog();
return logs.some(
@@ -496,13 +568,13 @@ export class TestRig {
async waitForToolCall(toolName: string, timeout?: number) {
// Use environment-specific timeout
if (!timeout) {
timeout = this.getDefaultTimeout();
timeout = getDefaultTimeout();
}
// Wait for telemetry to be ready before polling for tool calls
await this.waitForTelemetryReady();
return this.poll(
return poll(
() => {
const toolLogs = this.readToolLogs();
return toolLogs.some((log) => log.toolRequest.name === toolName);
@@ -515,13 +587,13 @@ export class TestRig {
async waitForAnyToolCall(toolNames: string[], timeout?: number) {
// Use environment-specific timeout
if (!timeout) {
timeout = this.getDefaultTimeout();
timeout = getDefaultTimeout();
}
// Wait for telemetry to be ready before polling for tool calls
await this.waitForTelemetryReady();
return this.poll(
return poll(
() => {
const toolLogs = this.readToolLogs();
return toolNames.some((name) =>
@@ -533,32 +605,6 @@ export class TestRig {
);
}
async poll(
predicate: () => boolean,
timeout: number,
interval: number,
): Promise<boolean> {
const startTime = Date.now();
let attempts = 0;
while (Date.now() - startTime < timeout) {
attempts++;
const result = predicate();
if (env['VERBOSE'] === 'true' && attempts % 5 === 0) {
console.log(
`Poll attempt ${attempts}: ${result ? 'success' : 'waiting...'}`,
);
}
if (result) {
return true;
}
await new Promise((resolve) => setTimeout(resolve, interval));
}
if (env['VERBOSE'] === 'true') {
console.log(`Poll timed out after ${attempts} attempts`);
}
return false;
}
_parseToolLogsFromStdout(stdout: string) {
const logs: {
timestamp: number;
@@ -808,27 +854,10 @@ export class TestRig {
return null;
}
async waitForText(text: string, timeout?: number) {
if (!timeout) {
timeout = this.getDefaultTimeout();
}
const found = await this.poll(
() =>
stripAnsi(this._interactiveOutput)
.toLowerCase()
.includes(text.toLowerCase()),
timeout,
200,
);
expect(found, `Did not find expected text: "${text}"`).toBe(true);
}
async runInteractive(...args: string[]): Promise<pty.IPty> {
async runInteractive(...args: string[]): Promise<InteractiveRun> {
const { command, initialArgs } = this._getCommandAndArgs(['--yolo']);
const commandArgs = [...initialArgs, ...args];
this._interactiveOutput = ''; // Reset output for the new run
const options: pty.IPtyForkOptions = {
name: 'xterm-color',
cols: 80,
@@ -842,16 +871,9 @@ export class TestRig {
const executable = command === 'node' ? process.execPath : command;
const ptyProcess = pty.spawn(executable, commandArgs, options);
ptyProcess.onData((data) => {
this._interactiveOutput += data;
if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') {
process.stdout.write(data);
}
});
const run = new InteractiveRun(ptyProcess);
// Wait for the app to be ready
await this.waitForText('Type your message', 30000);
return ptyProcess;
await run.waitForText('Type your message', 30000);
return run;
}
}