mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-16 23:02:51 -07:00
fix(simulator): improve PTY stabilization and stall recovery
This commit is contained in:
@@ -190,42 +190,42 @@ describe('UserSimulator', () => {
|
|||||||
vi.useRealTimers();
|
vi.useRealTimers();
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should re-evaluate if internal tool state changes even if screen content is static', async () => {
|
it('should terminate if terminal state does not change after 3 consecutive inputs', async () => {
|
||||||
|
const exitSpy = vi.spyOn(process, 'exit').mockImplementation(() => {
|
||||||
|
return undefined as never;
|
||||||
|
});
|
||||||
const simulator = new UserSimulator(
|
const simulator = new UserSimulator(
|
||||||
mockConfig,
|
mockConfig,
|
||||||
mockGetScreen,
|
mockGetScreen,
|
||||||
mockStdinBuffer,
|
mockStdinBuffer,
|
||||||
);
|
);
|
||||||
mockGetScreen.mockReturnValue('Responding...');
|
mockGetScreen.mockReturnValue('Static Screen');
|
||||||
|
mockContentGenerator.generateContent.mockResolvedValue({
|
||||||
|
text: JSON.stringify({ action: 'y\r' }),
|
||||||
|
});
|
||||||
|
|
||||||
vi.useFakeTimers();
|
vi.useFakeTimers();
|
||||||
simulator.start();
|
simulator.start();
|
||||||
|
|
||||||
// Trigger first tick
|
// Tick 1: Action sent, state recorded
|
||||||
await vi.advanceTimersByTimeAsync(2000);
|
await vi.advanceTimersByTimeAsync(2000);
|
||||||
expect(mockContentGenerator.generateContent).toHaveBeenCalledTimes(1);
|
expect(mockContentGenerator.generateContent).toHaveBeenCalledTimes(1);
|
||||||
|
|
||||||
// Trigger second tick with same screen - should skip
|
// Tick 2: Same screen, action sent, stall count = 1
|
||||||
await vi.advanceTimersByTimeAsync(2000);
|
|
||||||
expect(mockContentGenerator.generateContent).toHaveBeenCalledTimes(1);
|
|
||||||
|
|
||||||
// Simulate tool call update
|
|
||||||
const handler = mockMessageBus.subscribe.mock.calls[0][1];
|
|
||||||
handler({
|
|
||||||
type: MessageBusType.TOOL_CALLS_UPDATE,
|
|
||||||
toolCalls: [
|
|
||||||
{
|
|
||||||
status: CoreToolCallStatus.AwaitingApproval,
|
|
||||||
request: { callId: '123', name: 'test_tool' },
|
|
||||||
},
|
|
||||||
],
|
|
||||||
});
|
|
||||||
|
|
||||||
// Trigger third tick with same screen but new tool state - should NOT skip
|
|
||||||
await vi.advanceTimersByTimeAsync(2000);
|
await vi.advanceTimersByTimeAsync(2000);
|
||||||
expect(mockContentGenerator.generateContent).toHaveBeenCalledTimes(2);
|
expect(mockContentGenerator.generateContent).toHaveBeenCalledTimes(2);
|
||||||
|
|
||||||
|
// Tick 3: Same screen, action sent, stall count = 2
|
||||||
|
await vi.advanceTimersByTimeAsync(2000);
|
||||||
|
expect(mockContentGenerator.generateContent).toHaveBeenCalledTimes(3);
|
||||||
|
|
||||||
|
// Tick 4: Same screen, should trigger termination
|
||||||
|
await vi.advanceTimersByTimeAsync(2000);
|
||||||
|
|
||||||
|
expect(exitSpy).toHaveBeenCalledWith(1);
|
||||||
|
|
||||||
simulator.stop();
|
simulator.stop();
|
||||||
|
exitSpy.mockRestore();
|
||||||
vi.useRealTimers();
|
vi.useRealTimers();
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -304,9 +304,11 @@ describe('UserSimulator', () => {
|
|||||||
(call) => call[1] === 'simulator-compression',
|
(call) => call[1] === 'simulator-compression',
|
||||||
);
|
);
|
||||||
expect(compressionCall).toBeDefined();
|
expect(compressionCall).toBeDefined();
|
||||||
expect(compressionCall[0].contents[0].parts[0].text).toContain(
|
if (compressionCall) {
|
||||||
'Summarize the following chronological session notes',
|
expect(compressionCall[0].contents[0].parts[0].text).toContain(
|
||||||
);
|
'Summarize the following chronological session notes',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// Wait for the compression to finish and merge.
|
// Wait for the compression to finish and merge.
|
||||||
// We need to resolve the promise for the compression call.
|
// We need to resolve the promise for the compression call.
|
||||||
@@ -332,10 +334,11 @@ describe('UserSimulator', () => {
|
|||||||
call[1] === 'simulator-prompt',
|
call[1] === 'simulator-prompt',
|
||||||
);
|
);
|
||||||
|
|
||||||
const finalPrompt = finalCall[0].contents[0].parts[0].text;
|
expect(finalCall).toBeDefined();
|
||||||
expect(finalPrompt).toContain('1. Compressed Summary');
|
if (finalCall) {
|
||||||
// Note 5 (the one added during or after compression trigger) might be there too
|
const finalPrompt = finalCall[0].contents[0].parts[0].text;
|
||||||
// depending on timing, but 'Compressed Summary' must be there.
|
expect(finalPrompt).toContain('1. Compressed Summary');
|
||||||
|
}
|
||||||
|
|
||||||
simulator.stop();
|
simulator.stop();
|
||||||
vi.useRealTimers();
|
vi.useRealTimers();
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ export class UserSimulator {
|
|||||||
private lastStateKey = '';
|
private lastStateKey = '';
|
||||||
private isProcessing = false;
|
private isProcessing = false;
|
||||||
private isCompressingMemory = false;
|
private isCompressingMemory = false;
|
||||||
|
private consecutiveStallCount = 0;
|
||||||
private staleCycleCount = 0;
|
private staleCycleCount = 0;
|
||||||
private interactionsFile: string | null = null;
|
private interactionsFile: string | null = null;
|
||||||
|
|
||||||
@@ -116,6 +117,12 @@ export class UserSimulator {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
this.isProcessing = true;
|
this.isProcessing = true;
|
||||||
|
|
||||||
|
// Stabilization delay: Wait for the terminal UI to finish rendering
|
||||||
|
// (e.g. ANSI clear/repaint sequences) before looking at the screen.
|
||||||
|
// Increased to 1s to handle high-latency PTYs in Docker.
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 1000));
|
||||||
|
|
||||||
const screen = this.getScreen();
|
const screen = this.getScreen();
|
||||||
if (!screen) return;
|
if (!screen) return;
|
||||||
|
|
||||||
@@ -128,10 +135,12 @@ export class UserSimulator {
|
|||||||
.replace(/\n([ \t]*\n)+/g, '\n\n');
|
.replace(/\n([ \t]*\n)+/g, '\n\n');
|
||||||
|
|
||||||
const normalizedScreen = strippedScreen
|
const normalizedScreen = strippedScreen
|
||||||
.replace(/[\u2800-\u28FF]/g, '')
|
.replace(/[\u2800-\u28FF]/g, '') // Braille patterns
|
||||||
.replace(/[|/-\\]/g, '')
|
.replace(/[|/-\\]/g, '') // Spinners
|
||||||
.replace(/\b\d+(\.\d+)?s\b/g, '')
|
.replace(/\b\d+(\.\d+)?s\b/g, '') // Timers (seconds)
|
||||||
.replace(/\b\d+m(\s+\d+s)?\b/g, '')
|
.replace(/\b\d+m(\s+\d+s)?\b/g, '') // Timers (minutes)
|
||||||
|
.replace(/\b\d+%\b/g, '') // Percentages
|
||||||
|
.replace(/\b\d+\/\d+\b/g, '') // Progress ratios (e.g. 1/10)
|
||||||
.replace(/\(\s*\)/g, '')
|
.replace(/\(\s*\)/g, '')
|
||||||
.trim();
|
.trim();
|
||||||
|
|
||||||
@@ -142,16 +151,46 @@ export class UserSimulator {
|
|||||||
const currentStateKey = `${normalizedScreen}::${pendingIds}`;
|
const currentStateKey = `${normalizedScreen}::${pendingIds}`;
|
||||||
|
|
||||||
if (currentStateKey === this.lastStateKey) {
|
if (currentStateKey === this.lastStateKey) {
|
||||||
if (this.pendingToolCalls.length > 0) {
|
const lastAction = this.actionHistory[this.actionHistory.length - 1];
|
||||||
this.staleCycleCount++;
|
if (lastAction && lastAction !== '<WAIT>') {
|
||||||
// Every 10 ticks (10s) on a static screen while blocked, we try a prompt
|
this.consecutiveStallCount++;
|
||||||
if (this.staleCycleCount % 10 !== 0) {
|
|
||||||
return;
|
// Increased limit to 10 for high-load environments.
|
||||||
|
if (this.consecutiveStallCount >= 10) {
|
||||||
|
const errorMsg =
|
||||||
|
`[SIMULATOR] CRITICAL STALL DETECTED: Terminal state has not changed after ${this.consecutiveStallCount} consecutive inputs. Terminating to prevent loop.`;
|
||||||
|
debugLogger.error(errorMsg);
|
||||||
|
if (this.interactionsFile) {
|
||||||
|
fs.appendFileSync(
|
||||||
|
this.interactionsFile,
|
||||||
|
`[ERROR] ${errorMsg}\n\n`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// eslint-disable-next-line no-console
|
||||||
|
console.error(`\n${errorMsg}`);
|
||||||
|
this.stop();
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// RECOVERY: If screen is blank and we are stalled, try a terminal refresh.
|
||||||
|
if (normalizedScreen.length === 0 && this.pendingToolCalls.length > 0) {
|
||||||
|
debugLogger.log('[SIMULATOR] Screen is blank but system is BLOCKED. Sending refresh carriage return.');
|
||||||
|
this.stdinBuffer.write('\r');
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return;
|
// If it was a <WAIT> action or no action yet, we still want the 10s fallback for internal state sync
|
||||||
|
if (this.pendingToolCalls.length > 0) {
|
||||||
|
this.staleCycleCount++;
|
||||||
|
if (this.staleCycleCount % 10 !== 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
this.consecutiveStallCount = 0;
|
||||||
this.staleCycleCount = 0;
|
this.staleCycleCount = 0;
|
||||||
}
|
}
|
||||||
this.lastStateKey = currentStateKey;
|
this.lastStateKey = currentStateKey;
|
||||||
@@ -277,7 +316,7 @@ ${strippedScreen}
|
|||||||
if (startIdx !== -1 && endIdx !== -1 && endIdx > startIdx) {
|
if (startIdx !== -1 && endIdx !== -1 && endIdx > startIdx) {
|
||||||
cleanJson = cleanJson.substring(startIdx, endIdx + 1);
|
cleanJson = cleanJson.substring(startIdx, endIdx + 1);
|
||||||
} else {
|
} else {
|
||||||
cleanJson = cleanJson.replace(/^```json\s*|\s*```$/gm, '').trim();
|
cleanJson = cleanJson.replace(/^\`\`\`json\s*|\s*\`\`\`$/gm, '').trim();
|
||||||
}
|
}
|
||||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||||
parsedJson = JSON.parse(cleanJson) as SimulatorResponse;
|
parsedJson = JSON.parse(cleanJson) as SimulatorResponse;
|
||||||
@@ -301,7 +340,7 @@ ${strippedScreen}
|
|||||||
/^\d+\\r$/.test(text) ||
|
/^\d+\\r$/.test(text) ||
|
||||||
text === '\\r'
|
text === '\\r'
|
||||||
) {
|
) {
|
||||||
responseText = text.replace(/^[`"']+|[`"']+$/g, '');
|
responseText = text.replace(/^[\`\"']+|[\`\"']+$/g, '');
|
||||||
} else {
|
} else {
|
||||||
responseText = ''; // Prevent typing broken JSON string
|
responseText = ''; // Prevent typing broken JSON string
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user