From 12720a9fa71baad48f48f5eda61c292e0c16d37e Mon Sep 17 00:00:00 2001 From: Abhi <43648792+abhipatel12@users.noreply.github.com> Date: Sat, 13 Sep 2025 23:07:33 -0400 Subject: [PATCH] refactor(core): Use BaseLlmClient for LLM-based loop detection (#8427) --- .../src/services/loopDetectionService.test.ts | 43 +++++++++++++------ .../core/src/services/loopDetectionService.ts | 36 +++++++++------- 2 files changed, 50 insertions(+), 29 deletions(-) diff --git a/packages/core/src/services/loopDetectionService.test.ts b/packages/core/src/services/loopDetectionService.test.ts index 0b842d6054..cc9f25afb9 100644 --- a/packages/core/src/services/loopDetectionService.test.ts +++ b/packages/core/src/services/loopDetectionService.test.ts @@ -7,6 +7,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import type { Config } from '../config/config.js'; import type { GeminiClient } from '../core/client.js'; +import type { BaseLlmClient } from '../core/baseLlmClient.js'; import type { ServerGeminiContentEvent, ServerGeminiStreamEvent, @@ -625,16 +626,21 @@ describe('LoopDetectionService LLM Checks', () => { let service: LoopDetectionService; let mockConfig: Config; let mockGeminiClient: GeminiClient; + let mockBaseLlmClient: BaseLlmClient; let abortController: AbortController; beforeEach(() => { mockGeminiClient = { getHistory: vi.fn().mockReturnValue([]), - generateJson: vi.fn(), } as unknown as GeminiClient; + mockBaseLlmClient = { + generateJson: vi.fn(), + } as unknown as BaseLlmClient; + mockConfig = { getGeminiClient: () => mockGeminiClient, + getBaseLlmClient: () => mockBaseLlmClient, getDebugMode: () => false, getTelemetryEnabled: () => true, } as unknown as Config; @@ -656,30 +662,39 @@ describe('LoopDetectionService LLM Checks', () => { it('should not trigger LLM check before LLM_CHECK_AFTER_TURNS', async () => { await advanceTurns(29); - expect(mockGeminiClient.generateJson).not.toHaveBeenCalled(); + expect(mockBaseLlmClient.generateJson).not.toHaveBeenCalled(); }); it('should trigger LLM check on the 30th turn', async () => { - mockGeminiClient.generateJson = vi + mockBaseLlmClient.generateJson = vi .fn() .mockResolvedValue({ confidence: 0.1 }); await advanceTurns(30); - expect(mockGeminiClient.generateJson).toHaveBeenCalledTimes(1); + expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1); + expect(mockBaseLlmClient.generateJson).toHaveBeenCalledWith( + expect.objectContaining({ + systemInstruction: expect.any(String), + contents: expect.any(Array), + model: expect.any(String), + schema: expect.any(Object), + promptId: expect.any(String), + }), + ); }); it('should detect a cognitive loop when confidence is high', async () => { // First check at turn 30 - mockGeminiClient.generateJson = vi + mockBaseLlmClient.generateJson = vi .fn() .mockResolvedValue({ confidence: 0.85, reasoning: 'Repetitive actions' }); await advanceTurns(30); - expect(mockGeminiClient.generateJson).toHaveBeenCalledTimes(1); + expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1); // The confidence of 0.85 will result in a low interval. // The interval will be: 5 + (15 - 5) * (1 - 0.85) = 5 + 10 * 0.15 = 6.5 -> rounded to 7 await advanceTurns(6); // advance to turn 36 - mockGeminiClient.generateJson = vi + mockBaseLlmClient.generateJson = vi .fn() .mockResolvedValue({ confidence: 0.95, reasoning: 'Repetitive actions' }); const finalResult = await service.turnStarted(abortController.signal); // This is turn 37 @@ -695,7 +710,7 @@ describe('LoopDetectionService LLM Checks', () => { }); it('should not detect a loop when confidence is low', async () => { - mockGeminiClient.generateJson = vi + mockBaseLlmClient.generateJson = vi .fn() .mockResolvedValue({ confidence: 0.5, reasoning: 'Looks okay' }); await advanceTurns(30); @@ -706,21 +721,21 @@ describe('LoopDetectionService LLM Checks', () => { it('should adjust the check interval based on confidence', async () => { // Confidence is 0.0, so interval should be MAX_LLM_CHECK_INTERVAL (15) - mockGeminiClient.generateJson = vi + mockBaseLlmClient.generateJson = vi .fn() .mockResolvedValue({ confidence: 0.0 }); await advanceTurns(30); // First check at turn 30 - expect(mockGeminiClient.generateJson).toHaveBeenCalledTimes(1); + expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1); await advanceTurns(14); // Advance to turn 44 - expect(mockGeminiClient.generateJson).toHaveBeenCalledTimes(1); + expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1); await service.turnStarted(abortController.signal); // Turn 45 - expect(mockGeminiClient.generateJson).toHaveBeenCalledTimes(2); + expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(2); }); it('should handle errors from generateJson gracefully', async () => { - mockGeminiClient.generateJson = vi + mockBaseLlmClient.generateJson = vi .fn() .mockRejectedValue(new Error('API error')); await advanceTurns(30); @@ -734,6 +749,6 @@ describe('LoopDetectionService LLM Checks', () => { await advanceTurns(30); const result = await service.turnStarted(abortController.signal); expect(result).toBe(false); - expect(mockGeminiClient.generateJson).not.toHaveBeenCalled(); + expect(mockBaseLlmClient.generateJson).not.toHaveBeenCalled(); }); }); diff --git a/packages/core/src/services/loopDetectionService.ts b/packages/core/src/services/loopDetectionService.ts index d2c531b291..2de71a717d 100644 --- a/packages/core/src/services/loopDetectionService.ts +++ b/packages/core/src/services/loopDetectionService.ts @@ -50,6 +50,17 @@ const MIN_LLM_CHECK_INTERVAL = 5; */ const MAX_LLM_CHECK_INTERVAL = 15; +const LOOP_DETECTION_SYSTEM_PROMPT = `You are a sophisticated AI diagnostic agent specializing in identifying when a conversational AI is stuck in an unproductive state. Your task is to analyze the provided conversation history and determine if the assistant has ceased to make meaningful progress. + +An unproductive state is characterized by one or more of the following patterns over the last 5 or more assistant turns: + +Repetitive Actions: The assistant repeats the same tool calls or conversational responses a decent number of times. This includes simple loops (e.g., tool_A, tool_A, tool_A) and alternating patterns (e.g., tool_A, tool_B, tool_A, tool_B, ...). + +Cognitive Loop: The assistant seems unable to determine the next logical step. It might express confusion, repeatedly ask the same questions, or generate responses that don't logically follow from the previous turns, indicating it's stuck and not advancing the task. + +Crucially, differentiate between a true unproductive state and legitimate, incremental progress. +For example, a series of 'tool_A' or 'tool_B' tool calls that make small, distinct changes to the same file (like adding docstrings to functions one by one) is considered forward progress and is NOT a loop. A loop would be repeatedly replacing the same text with the same content, or cycling between a small set of files with no net change.`; + /** * Service for detecting and preventing infinite loops in AI responses. * Monitors tool call repetitions and content sentence repetitions. @@ -375,21 +386,11 @@ export class LoopDetectionService { const trimmedHistory = this.trimRecentHistory(recentHistory); - const prompt = `You are a sophisticated AI diagnostic agent specializing in identifying when a conversational AI is stuck in an unproductive state. Your task is to analyze the provided conversation history and determine if the assistant has ceased to make meaningful progress. + const taskPrompt = `Please analyze the conversation history to determine the possibility that the conversation is stuck in a repetitive, non-productive state. Provide your response in the requested JSON format.`; -An unproductive state is characterized by one or more of the following patterns over the last 5 or more assistant turns: - -Repetitive Actions: The assistant repeats the same tool calls or conversational responses a decent number of times. This includes simple loops (e.g., tool_A, tool_A, tool_A) and alternating patterns (e.g., tool_A, tool_B, tool_A, tool_B, ...). - -Cognitive Loop: The assistant seems unable to determine the next logical step. It might express confusion, repeatedly ask the same questions, or generate responses that don't logically follow from the previous turns, indicating it's stuck and not advancing the task. - -Crucially, differentiate between a true unproductive state and legitimate, incremental progress. -For example, a series of 'tool_A' or 'tool_B' tool calls that make small, distinct changes to the same file (like adding docstrings to functions one by one) is considered forward progress and is NOT a loop. A loop would be repeatedly replacing the same text with the same content, or cycling between a small set of files with no net change. - -Please analyze the conversation history to determine the possibility that the conversation is stuck in a repetitive, non-productive state.`; const contents = [ ...trimmedHistory, - { role: 'user', parts: [{ text: prompt }] }, + { role: 'user', parts: [{ text: taskPrompt }] }, ]; const schema: Record = { type: 'object', @@ -409,9 +410,14 @@ Please analyze the conversation history to determine the possibility that the co }; let result; try { - result = await this.config - .getGeminiClient() - .generateJson(contents, schema, signal, DEFAULT_GEMINI_FLASH_MODEL); + result = await this.config.getBaseLlmClient().generateJson({ + contents, + schema, + model: DEFAULT_GEMINI_FLASH_MODEL, + systemInstruction: LOOP_DETECTION_SYSTEM_PROMPT, + abortSignal: signal, + promptId: this.promptId, + }); } catch (e) { // Do nothing, treat it as a non-loop. this.config.getDebugMode() ? console.error(e) : console.debug(e);