mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-16 09:01:17 -07:00
feat(core): enhance loop detection with 2-stage check (#12902)
This commit is contained in:
@@ -22,6 +22,7 @@ import { LoopDetectionService } from './loopDetectionService.js';
|
||||
vi.mock('../telemetry/loggers.js', () => ({
|
||||
logLoopDetected: vi.fn(),
|
||||
logLoopDetectionDisabled: vi.fn(),
|
||||
logLlmLoopCheck: vi.fn(),
|
||||
}));
|
||||
|
||||
const TOOL_CALL_LOOP_THRESHOLD = 5;
|
||||
@@ -736,10 +737,17 @@ describe('LoopDetectionService LLM Checks', () => {
|
||||
getBaseLlmClient: () => mockBaseLlmClient,
|
||||
getDebugMode: () => false,
|
||||
getTelemetryEnabled: () => true,
|
||||
getModel: vi.fn().mockReturnValue('cognitive-loop-v1'),
|
||||
isInFallbackMode: vi.fn().mockReturnValue(false),
|
||||
modelConfigService: {
|
||||
getResolvedConfig: vi.fn().mockReturnValue({
|
||||
model: 'cognitive-loop-v1',
|
||||
generateContentConfig: {},
|
||||
getResolvedConfig: vi.fn().mockImplementation((key) => {
|
||||
if (key.model === 'loop-detection') {
|
||||
return { model: 'gemini-2.5-flash', generateContentConfig: {} };
|
||||
}
|
||||
return {
|
||||
model: 'cognitive-loop-v1',
|
||||
generateContentConfig: {},
|
||||
};
|
||||
}),
|
||||
},
|
||||
isInteractive: () => false,
|
||||
@@ -773,7 +781,7 @@ describe('LoopDetectionService LLM Checks', () => {
|
||||
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
|
||||
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
modelConfigKey: expect.any(Object),
|
||||
modelConfigKey: { model: 'loop-detection' },
|
||||
systemInstruction: expect.any(String),
|
||||
contents: expect.any(Array),
|
||||
schema: expect.any(Object),
|
||||
@@ -790,6 +798,11 @@ describe('LoopDetectionService LLM Checks', () => {
|
||||
});
|
||||
await advanceTurns(30);
|
||||
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
|
||||
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
modelConfigKey: { model: 'loop-detection' },
|
||||
}),
|
||||
);
|
||||
|
||||
// The confidence of 0.85 will result in a low interval.
|
||||
// The interval will be: 5 + (15 - 5) * (1 - 0.85) = 5 + 10 * 0.15 = 6.5 -> rounded to 7
|
||||
@@ -807,6 +820,7 @@ describe('LoopDetectionService LLM Checks', () => {
|
||||
expect.objectContaining({
|
||||
'event.name': 'loop_detected',
|
||||
loop_type: LoopType.LLM_DETECTED_LOOP,
|
||||
confirmed_by_model: 'cognitive-loop-v1',
|
||||
}),
|
||||
);
|
||||
});
|
||||
@@ -885,4 +899,122 @@ describe('LoopDetectionService LLM Checks', () => {
|
||||
// Verify the original history follows
|
||||
expect(calledArg.contents[1]).toEqual(functionCallHistory[0]);
|
||||
});
|
||||
|
||||
it('should detect a loop when confidence is exactly equal to the threshold (0.9)', async () => {
|
||||
// Mock isInFallbackMode to false so it double checks
|
||||
vi.mocked(mockConfig.isInFallbackMode).mockReturnValue(false);
|
||||
|
||||
mockBaseLlmClient.generateJson = vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce({
|
||||
unproductive_state_confidence: 0.9,
|
||||
unproductive_state_analysis: 'Flash says loop',
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
unproductive_state_confidence: 0.9,
|
||||
unproductive_state_analysis: 'Main says loop',
|
||||
});
|
||||
|
||||
await advanceTurns(30);
|
||||
|
||||
// It should have called generateJson twice
|
||||
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(2);
|
||||
expect(mockBaseLlmClient.generateJson).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
expect.objectContaining({
|
||||
modelConfigKey: { model: 'loop-detection' },
|
||||
}),
|
||||
);
|
||||
expect(mockBaseLlmClient.generateJson).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
expect.objectContaining({
|
||||
modelConfigKey: { model: 'loop-detection-double-check' },
|
||||
}),
|
||||
);
|
||||
|
||||
// And it should have detected a loop
|
||||
expect(loggers.logLoopDetected).toHaveBeenCalledWith(
|
||||
mockConfig,
|
||||
expect.objectContaining({
|
||||
'event.name': 'loop_detected',
|
||||
loop_type: LoopType.LLM_DETECTED_LOOP,
|
||||
confirmed_by_model: 'cognitive-loop-v1',
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('should not detect a loop when Flash is confident (0.9) but Main model is not (0.89)', async () => {
|
||||
// Mock isInFallbackMode to false so it double checks
|
||||
vi.mocked(mockConfig.isInFallbackMode).mockReturnValue(false);
|
||||
|
||||
mockBaseLlmClient.generateJson = vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce({
|
||||
unproductive_state_confidence: 0.9,
|
||||
unproductive_state_analysis: 'Flash says loop',
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
unproductive_state_confidence: 0.89,
|
||||
unproductive_state_analysis: 'Main says no loop',
|
||||
});
|
||||
|
||||
await advanceTurns(30);
|
||||
|
||||
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(2);
|
||||
expect(mockBaseLlmClient.generateJson).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
expect.objectContaining({
|
||||
modelConfigKey: { model: 'loop-detection' },
|
||||
}),
|
||||
);
|
||||
expect(mockBaseLlmClient.generateJson).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
expect.objectContaining({
|
||||
modelConfigKey: { model: 'loop-detection-double-check' },
|
||||
}),
|
||||
);
|
||||
|
||||
// Should NOT have detected a loop
|
||||
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
||||
|
||||
// But should have updated the interval based on the main model's confidence (0.89)
|
||||
// Interval = 5 + (15-5) * (1 - 0.89) = 5 + 10 * 0.11 = 5 + 1.1 = 6.1 -> 6
|
||||
|
||||
// Advance by 6 turns
|
||||
await advanceTurns(6);
|
||||
|
||||
// Next turn (37) should trigger another check
|
||||
await service.turnStarted(abortController.signal);
|
||||
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(3);
|
||||
});
|
||||
|
||||
it('should only call Flash model if in fallback mode', async () => {
|
||||
// Mock isInFallbackMode to true
|
||||
vi.mocked(mockConfig.isInFallbackMode).mockReturnValue(true);
|
||||
|
||||
mockBaseLlmClient.generateJson = vi.fn().mockResolvedValueOnce({
|
||||
unproductive_state_confidence: 0.9,
|
||||
unproductive_state_analysis: 'Flash says loop',
|
||||
});
|
||||
|
||||
await advanceTurns(30);
|
||||
|
||||
// It should have called generateJson only once
|
||||
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
|
||||
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
modelConfigKey: { model: 'loop-detection' },
|
||||
}),
|
||||
);
|
||||
|
||||
// And it should have detected a loop
|
||||
expect(loggers.logLoopDetected).toHaveBeenCalledWith(
|
||||
mockConfig,
|
||||
expect.objectContaining({
|
||||
'event.name': 'loop_detected',
|
||||
loop_type: LoopType.LLM_DETECTED_LOOP,
|
||||
confirmed_by_model: 'gemini-2.5-flash',
|
||||
}),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user