2025-05-10 13:11:03 -07:00
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
2025-08-26 00:04:53 +02:00
import type { Content } from '@google/genai' ;
2025-09-15 00:15:18 -04:00
import type { BaseLlmClient } from '../core/baseLlmClient.js' ;
2025-08-26 00:04:53 +02:00
import type { GeminiChat } from '../core/geminiChat.js' ;
2025-05-26 14:39:29 -07:00
import { isFunctionResponse } from './messageInspectors.js' ;
2025-10-21 16:35:22 -04:00
import { debugLogger } from './debugLogger.js' ;
2026-02-17 12:32:30 -05:00
import { LlmRole } from '../telemetry/types.js' ;
2025-05-10 13:11:03 -07:00
const CHECK_PROMPT = ` Analyze *only* the content and structure of your immediately preceding response (your last turn in the conversation history). Based *strictly* on that response, determine who should logically speak next: the 'user' or the 'model' (you).
**Decision Rules (apply in order):**
1. **Model Continues:** If your last response explicitly states an immediate next action *you* intend to take (e.g., "Next, I will...", "Now I'll process...", "Moving on to analyze...", indicates an intended tool call that didn't execute), OR if the response seems clearly incomplete (cut off mid-thought without a natural conclusion), then the **'model'** should speak next.
2. **Question to User:** If your last response ends with a direct question specifically addressed *to the user*, then the **'user'** should speak next.
2025-07-31 18:17:52 -07:00
3. **Waiting for User:** If your last response completed a thought, statement, or task *and* does not meet the criteria for Rule 1 (Model Continues) or Rule 2 (Question to User), it implies a pause expecting user input or reaction. In this case, the **'user'** should speak next. ` ;
2025-05-10 13:11:03 -07:00
2025-08-11 16:04:58 -07:00
const RESPONSE_SCHEMA : Record < string , unknown > = {
type : 'object' ,
2025-05-10 13:11:03 -07:00
properties : {
reasoning : {
2025-08-11 16:04:58 -07:00
type : 'string' ,
2025-05-10 13:11:03 -07:00
description :
"Brief explanation justifying the 'next_speaker' choice based *strictly* on the applicable rule and the content/structure of the preceding turn." ,
} ,
next_speaker : {
2025-08-11 16:04:58 -07:00
type : 'string' ,
2025-05-10 13:11:03 -07:00
enum : [ 'user' , 'model' ] ,
description :
'Who should speak next based *only* on the preceding turn and the decision rules' ,
} ,
} ,
required : [ 'reasoning' , 'next_speaker' ] ,
} ;
export interface NextSpeakerResponse {
reasoning : string ;
next_speaker : 'user' | 'model' ;
}
export async function checkNextSpeaker (
2025-05-26 14:17:56 -07:00
chat : GeminiChat ,
2025-09-15 00:15:18 -04:00
baseLlmClient : BaseLlmClient ,
2025-05-27 23:40:25 -07:00
abortSignal : AbortSignal ,
2025-09-15 00:15:18 -04:00
promptId : string ,
2025-05-10 13:11:03 -07:00
) : Promise < NextSpeakerResponse | null > {
2025-05-11 12:59:44 -07:00
// We need to capture the curated history because there are many moments when the model will return invalid turns
// that when passed back up to the endpoint will break subsequent calls. An example of this is when the model decides
// to respond with an empty part collection if you were to send that message back to the server it will respond with
// a 400 indicating that model part collections MUST have content.
2025-05-26 14:39:29 -07:00
const curatedHistory = chat . getHistory ( /* curated */ true ) ;
2025-05-11 12:59:44 -07:00
2025-05-10 13:11:03 -07:00
// Ensure there's a model response to analyze
2025-05-26 14:39:29 -07:00
if ( curatedHistory . length === 0 ) {
// Cannot determine next speaker if history is empty.
return null ;
}
const comprehensiveHistory = chat . getHistory ( ) ;
// If comprehensiveHistory is empty, there is no last message to check.
// This case should ideally be caught by the curatedHistory.length check earlier,
// but as a safeguard:
if ( comprehensiveHistory . length === 0 ) {
return null ;
}
const lastComprehensiveMessage =
comprehensiveHistory [ comprehensiveHistory . length - 1 ] ;
// If the last message is a user message containing only function_responses,
// then the model should speak next.
if (
lastComprehensiveMessage &&
isFunctionResponse ( lastComprehensiveMessage )
) {
return {
reasoning :
'The last message was a function response, so the model should speak next.' ,
next_speaker : 'model' ,
} ;
}
if (
lastComprehensiveMessage &&
lastComprehensiveMessage . role === 'model' &&
lastComprehensiveMessage . parts &&
lastComprehensiveMessage . parts . length === 0
) {
return {
reasoning :
'The last message was a filler model message with no content (nothing for user to act on), model should speak next.' ,
next_speaker : 'model' ,
} ;
}
2025-07-05 17:23:39 +02:00
// Things checked out. Let's proceed to potentially making an LLM request.
2025-05-26 14:39:29 -07:00
const lastMessage = curatedHistory [ curatedHistory . length - 1 ] ;
if ( ! lastMessage || lastMessage . role !== 'model' ) {
2025-05-10 13:11:03 -07:00
// Cannot determine next speaker if the last turn wasn't from the model
// or if history is empty.
return null ;
}
const contents : Content [ ] = [
2025-05-26 14:39:29 -07:00
. . . curatedHistory ,
2025-05-10 13:11:03 -07:00
{ role : 'user' , parts : [ { text : CHECK_PROMPT } ] } ,
] ;
try {
2026-02-10 00:10:15 +00:00
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
2025-09-15 00:15:18 -04:00
const parsedResponse = ( await baseLlmClient . generateJson ( {
2025-11-07 14:18:45 -08:00
modelConfigKey : { model : 'next-speaker-checker' } ,
2025-05-10 13:11:03 -07:00
contents ,
2025-09-15 00:15:18 -04:00
schema : RESPONSE_SCHEMA ,
2025-05-27 23:40:25 -07:00
abortSignal ,
2025-09-15 00:15:18 -04:00
promptId ,
2026-02-17 12:32:30 -05:00
role : LlmRole.UTILITY_NEXT_SPEAKER ,
2025-09-15 00:15:18 -04:00
} ) ) as unknown as NextSpeakerResponse ;
2025-05-10 13:11:03 -07:00
if (
parsedResponse &&
parsedResponse . next_speaker &&
[ 'user' , 'model' ] . includes ( parsedResponse . next_speaker )
) {
return parsedResponse ;
}
return null ;
} catch ( error ) {
2025-10-21 16:35:22 -04:00
debugLogger . warn (
2025-05-22 06:00:36 +00:00
'Failed to talk to Gemini endpoint when seeing if conversation should continue.' ,
2025-05-10 13:11:03 -07:00
error ,
) ;
return null ;
}
}