feat(core): enhance loop detection with 2-stage check (#12902)

This commit is contained in:
Sandy Tao
2025-11-11 20:49:00 -08:00
committed by GitHub
parent cab9b1f370
commit 408b885689
10 changed files with 438 additions and 60 deletions

View File

@@ -22,6 +22,7 @@ import { LoopDetectionService } from './loopDetectionService.js';
vi.mock('../telemetry/loggers.js', () => ({
logLoopDetected: vi.fn(),
logLoopDetectionDisabled: vi.fn(),
logLlmLoopCheck: vi.fn(),
}));
const TOOL_CALL_LOOP_THRESHOLD = 5;
@@ -736,10 +737,17 @@ describe('LoopDetectionService LLM Checks', () => {
getBaseLlmClient: () => mockBaseLlmClient,
getDebugMode: () => false,
getTelemetryEnabled: () => true,
getModel: vi.fn().mockReturnValue('cognitive-loop-v1'),
isInFallbackMode: vi.fn().mockReturnValue(false),
modelConfigService: {
getResolvedConfig: vi.fn().mockReturnValue({
model: 'cognitive-loop-v1',
generateContentConfig: {},
getResolvedConfig: vi.fn().mockImplementation((key) => {
if (key.model === 'loop-detection') {
return { model: 'gemini-2.5-flash', generateContentConfig: {} };
}
return {
model: 'cognitive-loop-v1',
generateContentConfig: {},
};
}),
},
isInteractive: () => false,
@@ -773,7 +781,7 @@ describe('LoopDetectionService LLM Checks', () => {
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledWith(
expect.objectContaining({
modelConfigKey: expect.any(Object),
modelConfigKey: { model: 'loop-detection' },
systemInstruction: expect.any(String),
contents: expect.any(Array),
schema: expect.any(Object),
@@ -790,6 +798,11 @@ describe('LoopDetectionService LLM Checks', () => {
});
await advanceTurns(30);
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledWith(
expect.objectContaining({
modelConfigKey: { model: 'loop-detection' },
}),
);
// The confidence of 0.85 will result in a low interval.
// The interval will be: 5 + (15 - 5) * (1 - 0.85) = 5 + 10 * 0.15 = 6.5 -> rounded to 7
@@ -807,6 +820,7 @@ describe('LoopDetectionService LLM Checks', () => {
expect.objectContaining({
'event.name': 'loop_detected',
loop_type: LoopType.LLM_DETECTED_LOOP,
confirmed_by_model: 'cognitive-loop-v1',
}),
);
});
@@ -885,4 +899,122 @@ describe('LoopDetectionService LLM Checks', () => {
// Verify the original history follows
expect(calledArg.contents[1]).toEqual(functionCallHistory[0]);
});
it('should detect a loop when confidence is exactly equal to the threshold (0.9)', async () => {
// Mock isInFallbackMode to false so it double checks
vi.mocked(mockConfig.isInFallbackMode).mockReturnValue(false);
mockBaseLlmClient.generateJson = vi
.fn()
.mockResolvedValueOnce({
unproductive_state_confidence: 0.9,
unproductive_state_analysis: 'Flash says loop',
})
.mockResolvedValueOnce({
unproductive_state_confidence: 0.9,
unproductive_state_analysis: 'Main says loop',
});
await advanceTurns(30);
// It should have called generateJson twice
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(2);
expect(mockBaseLlmClient.generateJson).toHaveBeenNthCalledWith(
1,
expect.objectContaining({
modelConfigKey: { model: 'loop-detection' },
}),
);
expect(mockBaseLlmClient.generateJson).toHaveBeenNthCalledWith(
2,
expect.objectContaining({
modelConfigKey: { model: 'loop-detection-double-check' },
}),
);
// And it should have detected a loop
expect(loggers.logLoopDetected).toHaveBeenCalledWith(
mockConfig,
expect.objectContaining({
'event.name': 'loop_detected',
loop_type: LoopType.LLM_DETECTED_LOOP,
confirmed_by_model: 'cognitive-loop-v1',
}),
);
});
it('should not detect a loop when Flash is confident (0.9) but Main model is not (0.89)', async () => {
// Mock isInFallbackMode to false so it double checks
vi.mocked(mockConfig.isInFallbackMode).mockReturnValue(false);
mockBaseLlmClient.generateJson = vi
.fn()
.mockResolvedValueOnce({
unproductive_state_confidence: 0.9,
unproductive_state_analysis: 'Flash says loop',
})
.mockResolvedValueOnce({
unproductive_state_confidence: 0.89,
unproductive_state_analysis: 'Main says no loop',
});
await advanceTurns(30);
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(2);
expect(mockBaseLlmClient.generateJson).toHaveBeenNthCalledWith(
1,
expect.objectContaining({
modelConfigKey: { model: 'loop-detection' },
}),
);
expect(mockBaseLlmClient.generateJson).toHaveBeenNthCalledWith(
2,
expect.objectContaining({
modelConfigKey: { model: 'loop-detection-double-check' },
}),
);
// Should NOT have detected a loop
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
// But should have updated the interval based on the main model's confidence (0.89)
// Interval = 5 + (15-5) * (1 - 0.89) = 5 + 10 * 0.11 = 5 + 1.1 = 6.1 -> 6
// Advance by 6 turns
await advanceTurns(6);
// Next turn (37) should trigger another check
await service.turnStarted(abortController.signal);
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(3);
});
it('should only call Flash model if in fallback mode', async () => {
// Mock isInFallbackMode to true
vi.mocked(mockConfig.isInFallbackMode).mockReturnValue(true);
mockBaseLlmClient.generateJson = vi.fn().mockResolvedValueOnce({
unproductive_state_confidence: 0.9,
unproductive_state_analysis: 'Flash says loop',
});
await advanceTurns(30);
// It should have called generateJson only once
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledWith(
expect.objectContaining({
modelConfigKey: { model: 'loop-detection' },
}),
);
// And it should have detected a loop
expect(loggers.logLoopDetected).toHaveBeenCalledWith(
mockConfig,
expect.objectContaining({
'event.name': 'loop_detected',
loop_type: LoopType.LLM_DETECTED_LOOP,
confirmed_by_model: 'gemini-2.5-flash',
}),
);
});
});