fix(core): update loop detection LLM schema fields (#12091)

This commit is contained in:
Sandy Tao
2025-10-27 12:16:25 -07:00
committed by GitHub
parent 29efebe38f
commit 4ef3c09332
2 changed files with 29 additions and 20 deletions

View File

@@ -671,7 +671,7 @@ describe('LoopDetectionService LLM Checks', () => {
it('should trigger LLM check on the 30th turn', async () => {
mockBaseLlmClient.generateJson = vi
.fn()
.mockResolvedValue({ confidence: 0.1 });
.mockResolvedValue({ unproductive_state_confidence: 0.1 });
await advanceTurns(30);
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledWith(
@@ -687,9 +687,10 @@ describe('LoopDetectionService LLM Checks', () => {
it('should detect a cognitive loop when confidence is high', async () => {
// First check at turn 30
mockBaseLlmClient.generateJson = vi
.fn()
.mockResolvedValue({ confidence: 0.85, reasoning: 'Repetitive actions' });
mockBaseLlmClient.generateJson = vi.fn().mockResolvedValue({
unproductive_state_confidence: 0.85,
unproductive_state_analysis: 'Repetitive actions',
});
await advanceTurns(30);
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
@@ -697,9 +698,10 @@ describe('LoopDetectionService LLM Checks', () => {
// The interval will be: 5 + (15 - 5) * (1 - 0.85) = 5 + 10 * 0.15 = 6.5 -> rounded to 7
await advanceTurns(6); // advance to turn 36
mockBaseLlmClient.generateJson = vi
.fn()
.mockResolvedValue({ confidence: 0.95, reasoning: 'Repetitive actions' });
mockBaseLlmClient.generateJson = vi.fn().mockResolvedValue({
unproductive_state_confidence: 0.95,
unproductive_state_analysis: 'Repetitive actions',
});
const finalResult = await service.turnStarted(abortController.signal); // This is turn 37
expect(finalResult).toBe(true);
@@ -713,9 +715,10 @@ describe('LoopDetectionService LLM Checks', () => {
});
it('should not detect a loop when confidence is low', async () => {
mockBaseLlmClient.generateJson = vi
.fn()
.mockResolvedValue({ confidence: 0.5, reasoning: 'Looks okay' });
mockBaseLlmClient.generateJson = vi.fn().mockResolvedValue({
unproductive_state_confidence: 0.5,
unproductive_state_analysis: 'Looks okay',
});
await advanceTurns(30);
const result = await service.turnStarted(abortController.signal);
expect(result).toBe(false);
@@ -726,7 +729,7 @@ describe('LoopDetectionService LLM Checks', () => {
// Confidence is 0.0, so interval should be MAX_LLM_CHECK_INTERVAL (15)
mockBaseLlmClient.generateJson = vi
.fn()
.mockResolvedValue({ confidence: 0.0 });
.mockResolvedValue({ unproductive_state_confidence: 0.0 });
await advanceTurns(30); // First check at turn 30
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
@@ -771,7 +774,7 @@ describe('LoopDetectionService LLM Checks', () => {
mockBaseLlmClient.generateJson = vi
.fn()
.mockResolvedValue({ confidence: 0.1 });
.mockResolvedValue({ unproductive_state_confidence: 0.1 });
await advanceTurns(30);

View File

@@ -413,18 +413,21 @@ export class LoopDetectionService {
const schema: Record<string, unknown> = {
type: 'object',
properties: {
reasoning: {
unproductive_state_analysis: {
type: 'string',
description:
'Your reasoning on if the conversation is looping without forward progress.',
},
confidence: {
unproductive_state_confidence: {
type: 'number',
description:
'A number between 0.0 and 1.0 representing your confidence that the conversation is in an unproductive state.',
},
},
required: ['reasoning', 'confidence'],
required: [
'unproductive_state_analysis',
'unproductive_state_confidence',
],
};
let result;
try {
@@ -442,10 +445,13 @@ export class LoopDetectionService {
return false;
}
if (typeof result['confidence'] === 'number') {
if (result['confidence'] > 0.9) {
if (typeof result['reasoning'] === 'string' && result['reasoning']) {
debugLogger.warn(result['reasoning']);
if (typeof result['unproductive_state_confidence'] === 'number') {
if (result['unproductive_state_confidence'] > 0.9) {
if (
typeof result['unproductive_state_analysis'] === 'string' &&
result['unproductive_state_analysis']
) {
debugLogger.warn(result['unproductive_state_analysis']);
}
logLoopDetected(
this.config,
@@ -456,7 +462,7 @@ export class LoopDetectionService {
this.llmCheckInterval = Math.round(
MIN_LLM_CHECK_INTERVAL +
(MAX_LLM_CHECK_INTERVAL - MIN_LLM_CHECK_INTERVAL) *
(1 - result['confidence']),
(1 - result['unproductive_state_confidence']),
);
}
}