packages/core/src/utils/nextSpeakerChecker.ts

/**
 * @license
 * Copyright 2025 Google LLC
 * SPDX-License-Identifier: Apache-2.0
 */

import type { Content } from '@google/genai';
import type { BaseLlmClient } from '../core/baseLlmClient.js';
import type { GeminiChat } from '../core/geminiChat.js';
import { isFunctionResponse } from './messageInspectors.js';
import { debugLogger } from './debugLogger.js';
import { LlmRole } from '../telemetry/types.js';

const CHECK_PROMPT = `Analyze *only* the content and structure of your immediately preceding response (your last turn in the conversation history). Based *strictly* on that response, determine who should logically speak next: the 'user' or the 'model' (you).
**Decision Rules (apply in order):**
1.  **Model Continues:** If your last response explicitly states an immediate next action *you* intend to take (e.g., "Next, I will...", "Now I'll process...", "Moving on to analyze...", indicates an intended tool call that didn't execute), OR if the response seems clearly incomplete (cut off mid-thought without a natural conclusion), then the **'model'** should speak next.
2.  **Question to User:** If your last response ends with a direct question specifically addressed *to the user*, then the **'user'** should speak next.
3.  **Waiting for User:** If your last response completed a thought, statement, or task *and* does not meet the criteria for Rule 1 (Model Continues) or Rule 2 (Question to User), it implies a pause expecting user input or reaction. In this case, the **'user'** should speak next.`;

const RESPONSE_SCHEMA: Record<string, unknown> = {
  type: 'object',
  properties: {
    reasoning: {
      type: 'string',
      description:
        "Brief explanation justifying the 'next_speaker' choice based *strictly* on the applicable rule and the content/structure of the preceding turn.",
    },
    next_speaker: {
      type: 'string',
      enum: ['user', 'model'],
      description:
        'Who should speak next based *only* on the preceding turn and the decision rules',
    },
  },
  required: ['reasoning', 'next_speaker'],
};

export interface NextSpeakerResponse {
  reasoning: string;
  next_speaker: 'user' | 'model';
}

export async function checkNextSpeaker(
  chat: GeminiChat,
  baseLlmClient: BaseLlmClient,
  abortSignal: AbortSignal,
  promptId: string,
): Promise<NextSpeakerResponse | null> {
  // We need to capture the curated history because there are many moments when the model will return invalid turns
  // that when passed back up to the endpoint will break subsequent calls. An example of this is when the model decides
  // to respond with an empty part collection if you were to send that message back to the server it will respond with
  // a 400 indicating that model part collections MUST have content.
  const curatedHistory = chat.getHistory(/* curated */ true);

  // Ensure there's a model response to analyze
  if (curatedHistory.length === 0) {
    // Cannot determine next speaker if history is empty.
    return null;
  }

  const comprehensiveHistory = chat.getHistory();
  // If comprehensiveHistory is empty, there is no last message to check.
  // This case should ideally be caught by the curatedHistory.length check earlier,
  // but as a safeguard:
  if (comprehensiveHistory.length === 0) {
    return null;
  }
  const lastComprehensiveMessage =
    comprehensiveHistory[comprehensiveHistory.length - 1];

  // If the last message is a user message containing only function_responses,
  // then the model should speak next.
  if (
    lastComprehensiveMessage &&
    isFunctionResponse(lastComprehensiveMessage)
  ) {
    return {
      reasoning:
        'The last message was a function response, so the model should speak next.',
      next_speaker: 'model',
    };
  }

  if (
    lastComprehensiveMessage &&
    lastComprehensiveMessage.role === 'model' &&
    lastComprehensiveMessage.parts &&
    lastComprehensiveMessage.parts.length === 0
  ) {
    return {
      reasoning:
        'The last message was a filler model message with no content (nothing for user to act on), model should speak next.',
      next_speaker: 'model',
    };
  }

  // Things checked out. Let's proceed to potentially making an LLM request.

  const lastMessage = curatedHistory[curatedHistory.length - 1];
  if (!lastMessage || lastMessage.role !== 'model') {
    // Cannot determine next speaker if the last turn wasn't from the model
    // or if history is empty.
    return null;
  }

  const contents: Content[] = [
    ...curatedHistory,
    { role: 'user', parts: [{ text: CHECK_PROMPT }] },
  ];

  try {
    // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
    const parsedResponse = (await baseLlmClient.generateJson({
      modelConfigKey: { model: 'next-speaker-checker' },
      contents,
      schema: RESPONSE_SCHEMA,
      abortSignal,
      promptId,
      role: LlmRole.UTILITY_NEXT_SPEAKER,
    })) as unknown as NextSpeakerResponse;

    if (
      parsedResponse &&
      parsedResponse.next_speaker &&
      ['user', 'model'].includes(parsedResponse.next_speaker)
    ) {
      return parsedResponse;
    }
    return null;
  } catch (error) {
    debugLogger.warn(
      'Failed to talk to Gemini endpoint when seeing if conversation should continue.',
      error,
    );
    return null;
  }
}
Don't prematurely end convo w/ Gemini. 2025-05-10 13:11:03 -07:00			`/**`
			`* @license`
			`* Copyright 2025 Google LLC`
			`* SPDX-License-Identifier: Apache-2.0`
			`*/`

Explict imports & exports with `type` modifier (#3774 ) 2025-08-26 00:04:53 +02:00			`import type { Content } from '@google/genai';`
refactor(core): Migrate next speaker check to use BaseLlmClient (#8424 ) 2025-09-15 00:15:18 -04:00			`import type { BaseLlmClient } from '../core/baseLlmClient.js';`
Explict imports & exports with `type` modifier (#3774 ) 2025-08-26 00:04:53 +02:00			`import type { GeminiChat } from '../core/geminiChat.js';`
Fix(chat): Finalize next speaker detection logic 2025-05-26 14:39:29 -07:00			`import { isFunctionResponse } from './messageInspectors.js';`
refactor(logging): Centralize console logging with debugLogger (#11590 ) 2025-10-21 16:35:22 -04:00			`import { debugLogger } from './debugLogger.js';`
feat: add role-specific statistics to telemetry and UI (cont. #15234 ) (#18824 ) 2026-02-17 12:32:30 -05:00			`import { LlmRole } from '../telemetry/types.js';`
Don't prematurely end convo w/ Gemini. 2025-05-10 13:11:03 -07:00
			const CHECK_PROMPT = `Analyze only the content and structure of your immediately preceding response (your last turn in the conversation history). Based strictly on that response, determine who should logically speak next: the 'user' or the 'model' (you).
			`Decision Rules (apply in order):`
			`1. Model Continues: If your last response explicitly states an immediate next action you intend to take (e.g., "Next, I will...", "Now I'll process...", "Moving on to analyze...", indicates an intended tool call that didn't execute), OR if the response seems clearly incomplete (cut off mid-thought without a natural conclusion), then the 'model' should speak next.`
			`2. Question to User: If your last response ends with a direct question specifically addressed to the user, then the 'user' should speak next.`
fix(core): Remove json output schema form the next speaker check prompt (#5325 ) 2025-07-31 18:17:52 -07:00			3. Waiting for User: If your last response completed a thought, statement, or task and does not meet the criteria for Rule 1 (Model Continues) or Rule 2 (Question to User), it implies a pause expecting user input or reaction. In this case, the 'user' should speak next.`;
Don't prematurely end convo w/ Gemini. 2025-05-10 13:11:03 -07:00
chore: migrate from responseSchema to use responseJsonSchema. (#4814 ) 2025-08-11 16:04:58 -07:00			`const RESPONSE_SCHEMA: Record<string, unknown> = {`
			`type: 'object',`
Don't prematurely end convo w/ Gemini. 2025-05-10 13:11:03 -07:00			`properties: {`
			`reasoning: {`
chore: migrate from responseSchema to use responseJsonSchema. (#4814 ) 2025-08-11 16:04:58 -07:00			`type: 'string',`
Don't prematurely end convo w/ Gemini. 2025-05-10 13:11:03 -07:00			`description:`
			`"Brief explanation justifying the 'next_speaker' choice based strictly on the applicable rule and the content/structure of the preceding turn.",`
			`},`
			`next_speaker: {`
chore: migrate from responseSchema to use responseJsonSchema. (#4814 ) 2025-08-11 16:04:58 -07:00			`type: 'string',`
Don't prematurely end convo w/ Gemini. 2025-05-10 13:11:03 -07:00			`enum: ['user', 'model'],`
			`description:`
			`'Who should speak next based only on the preceding turn and the decision rules',`
			`},`
			`},`
			`required: ['reasoning', 'next_speaker'],`
			`};`

			`export interface NextSpeakerResponse {`
			`reasoning: string;`
			`next_speaker: 'user' \| 'model';`
			`}`

			`export async function checkNextSpeaker(`
Refactor(chat): Introduce custom Chat class for future modifications 2025-05-26 14:17:56 -07:00			`chat: GeminiChat,`
refactor(core): Migrate next speaker check to use BaseLlmClient (#8424 ) 2025-09-15 00:15:18 -04:00			`baseLlmClient: BaseLlmClient,`
feat: Allow cancellation of in-progress Gemini requests and pre-execution checks 2025-05-27 23:40:25 -07:00			`abortSignal: AbortSignal,`
refactor(core): Migrate next speaker check to use BaseLlmClient (#8424 ) 2025-09-15 00:15:18 -04:00			`promptId: string,`
Don't prematurely end convo w/ Gemini. 2025-05-10 13:11:03 -07:00			`): Promise<NextSpeakerResponse \| null> {`
Workaround model bug where it returns invalid history items. 2025-05-11 12:59:44 -07:00			`// We need to capture the curated history because there are many moments when the model will return invalid turns`
			`// that when passed back up to the endpoint will break subsequent calls. An example of this is when the model decides`
			`// to respond with an empty part collection if you were to send that message back to the server it will respond with`
			`// a 400 indicating that model part collections MUST have content.`
Fix(chat): Finalize next speaker detection logic 2025-05-26 14:39:29 -07:00			`const curatedHistory = chat.getHistory(/* curated */ true);`
Workaround model bug where it returns invalid history items. 2025-05-11 12:59:44 -07:00
Don't prematurely end convo w/ Gemini. 2025-05-10 13:11:03 -07:00			`// Ensure there's a model response to analyze`
Fix(chat): Finalize next speaker detection logic 2025-05-26 14:39:29 -07:00			`if (curatedHistory.length === 0) {`
			`// Cannot determine next speaker if history is empty.`
			`return null;`
			`}`

			`const comprehensiveHistory = chat.getHistory();`
			`// If comprehensiveHistory is empty, there is no last message to check.`
			`// This case should ideally be caught by the curatedHistory.length check earlier,`
			`// but as a safeguard:`
			`if (comprehensiveHistory.length === 0) {`
			`return null;`
			`}`
			`const lastComprehensiveMessage =`
			`comprehensiveHistory[comprehensiveHistory.length - 1];`

			`// If the last message is a user message containing only function_responses,`
			`// then the model should speak next.`
			`if (`
			`lastComprehensiveMessage &&`
			`isFunctionResponse(lastComprehensiveMessage)`
			`) {`
			`return {`
			`reasoning:`
			`'The last message was a function response, so the model should speak next.',`
			`next_speaker: 'model',`
			`};`
			`}`

			`if (`
			`lastComprehensiveMessage &&`
			`lastComprehensiveMessage.role === 'model' &&`
			`lastComprehensiveMessage.parts &&`
			`lastComprehensiveMessage.parts.length === 0`
			`) {`
			`return {`
			`reasoning:`
			`'The last message was a filler model message with no content (nothing for user to act on), model should speak next.',`
			`next_speaker: 'model',`
			`};`
			`}`

fix typos in diverse files (#3284 ) 2025-07-05 17:23:39 +02:00			`// Things checked out. Let's proceed to potentially making an LLM request.`
Fix(chat): Finalize next speaker detection logic 2025-05-26 14:39:29 -07:00
			`const lastMessage = curatedHistory[curatedHistory.length - 1];`
			`if (!lastMessage \|\| lastMessage.role !== 'model') {`
Don't prematurely end convo w/ Gemini. 2025-05-10 13:11:03 -07:00			`// Cannot determine next speaker if the last turn wasn't from the model`
			`// or if history is empty.`
			`return null;`
			`}`

			`const contents: Content[] = [`
Fix(chat): Finalize next speaker detection logic 2025-05-26 14:39:29 -07:00			`...curatedHistory,`
Don't prematurely end convo w/ Gemini. 2025-05-10 13:11:03 -07:00			`{ role: 'user', parts: [{ text: CHECK_PROMPT }] },`
			`];`

			`try {`
Disallow unsafe type assertions (#18688 ) 2026-02-10 00:10:15 +00:00			`// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion`
refactor(core): Migrate next speaker check to use BaseLlmClient (#8424 ) 2025-09-15 00:15:18 -04:00			`const parsedResponse = (await baseLlmClient.generateJson({`
feat(core): Migrate `generateJson` to resolved model configs. (#12626 ) 2025-11-07 14:18:45 -08:00			`modelConfigKey: { model: 'next-speaker-checker' },`
Don't prematurely end convo w/ Gemini. 2025-05-10 13:11:03 -07:00			`contents,`
refactor(core): Migrate next speaker check to use BaseLlmClient (#8424 ) 2025-09-15 00:15:18 -04:00			`schema: RESPONSE_SCHEMA,`
feat: Allow cancellation of in-progress Gemini requests and pre-execution checks 2025-05-27 23:40:25 -07:00			`abortSignal,`
refactor(core): Migrate next speaker check to use BaseLlmClient (#8424 ) 2025-09-15 00:15:18 -04:00			`promptId,`
feat: add role-specific statistics to telemetry and UI (cont. #15234 ) (#18824 ) 2026-02-17 12:32:30 -05:00			`role: LlmRole.UTILITY_NEXT_SPEAKER,`
refactor(core): Migrate next speaker check to use BaseLlmClient (#8424 ) 2025-09-15 00:15:18 -04:00			`})) as unknown as NextSpeakerResponse;`
Don't prematurely end convo w/ Gemini. 2025-05-10 13:11:03 -07:00
			`if (`
			`parsedResponse &&`
			`parsedResponse.next_speaker &&`
			`['user', 'model'].includes(parsedResponse.next_speaker)`
			`) {`
			`return parsedResponse;`
			`}`
			`return null;`
			`} catch (error) {`
refactor(logging): Centralize console logging with debugLogger (#11590 ) 2025-10-21 16:35:22 -04:00			`debugLogger.warn(`
fix: forward entire tool call confirmation object through useToolScheduler (#481 ) 2025-05-22 06:00:36 +00:00			`'Failed to talk to Gemini endpoint when seeing if conversation should continue.',`
Don't prematurely end convo w/ Gemini. 2025-05-10 13:11:03 -07:00			`error,`
			`);`
			`return null;`
			`}`
			`}`