packages/core/src/services/chatCompressionService.ts

/**
 * @license
 * Copyright 2025 Google LLC
 * SPDX-License-Identifier: Apache-2.0
 */

import type { Content } from '@google/genai';
import type { Config } from '../config/config.js';
import type { GeminiChat } from '../core/geminiChat.js';
import { type ChatCompressionInfo, CompressionStatus } from '../core/turn.js';
import { tokenLimit } from '../core/tokenLimits.js';
import { getCompressionPrompt } from '../core/prompts.js';
import { getResponseText } from '../utils/partUtils.js';
import { logChatCompression } from '../telemetry/loggers.js';
import { makeChatCompressionEvent } from '../telemetry/types.js';
import {
  saveTruncatedToolOutput,
  formatTruncatedToolOutput,
} from '../utils/fileUtils.js';
import { debugLogger } from '../utils/debugLogger.js';
import { getInitialChatHistory } from '../utils/environmentContext.js';
import {
  calculateRequestTokenCount,
  estimateTokenCountSync,
} from '../utils/tokenCalculation.js';
import {
  DEFAULT_GEMINI_FLASH_LITE_MODEL,
  DEFAULT_GEMINI_FLASH_MODEL,
  DEFAULT_GEMINI_MODEL,
  PREVIEW_GEMINI_MODEL,
  PREVIEW_GEMINI_FLASH_MODEL,
} from '../config/models.js';
import { PreCompressTrigger } from '../hooks/types.js';

/**
 * Default threshold for compression token count as a fraction of the model's
 * token limit. If the chat history exceeds this threshold, it will be compressed.
 */
export const DEFAULT_COMPRESSION_TOKEN_THRESHOLD = 0.5;

/**
 * The fraction of the latest chat history to keep. A value of 0.3
 * means that only the last 30% of the chat history will be kept after compression.
 */
export const COMPRESSION_PRESERVE_THRESHOLD = 0.3;

/**
 * The budget for function response tokens in the preserved history.
 */
export const COMPRESSION_FUNCTION_RESPONSE_TOKEN_BUDGET = 50_000;

/**
 * The number of lines to keep when truncating a function response during compression.
 */
export const COMPRESSION_TRUNCATE_LINES = 30;

/**
 * Returns the index of the oldest item to keep when compressing. May return
 * contents.length which indicates that everything should be compressed.
 *
 * Exported for testing purposes.
 */
export function findCompressSplitPoint(
  contents: Content[],
  fraction: number,
): number {
  if (fraction <= 0 || fraction >= 1) {
    throw new Error('Fraction must be between 0 and 1');
  }

  const charCounts = contents.map((content) => JSON.stringify(content).length);
  const totalCharCount = charCounts.reduce((a, b) => a + b, 0);
  const targetCharCount = totalCharCount * fraction;

  let lastSplitPoint = 0; // 0 is always valid (compress nothing)
  let cumulativeCharCount = 0;
  for (let i = 0; i < contents.length; i++) {
    const content = contents[i];
    if (
      content.role === 'user' &&
      !content.parts?.some((part) => !!part.functionResponse)
    ) {
      if (cumulativeCharCount >= targetCharCount) {
        return i;
      }
      lastSplitPoint = i;
    }
    cumulativeCharCount += charCounts[i];
  }

  // We found no split points after targetCharCount.
  // Check if it's safe to compress everything.
  const lastContent = contents[contents.length - 1];
  if (
    lastContent?.role === 'model' &&
    !lastContent?.parts?.some((part) => part.functionCall)
  ) {
    return contents.length;
  }

  // Can't compress everything so just compress at last splitpoint.
  return lastSplitPoint;
}

export function modelStringToModelConfigAlias(model: string): string {
  switch (model) {
    case PREVIEW_GEMINI_MODEL:
      return 'chat-compression-3-pro';
    case PREVIEW_GEMINI_FLASH_MODEL:
      return 'chat-compression-3-flash';
    case DEFAULT_GEMINI_MODEL:
      return 'chat-compression-2.5-pro';
    case DEFAULT_GEMINI_FLASH_MODEL:
      return 'chat-compression-2.5-flash';
    case DEFAULT_GEMINI_FLASH_LITE_MODEL:
      return 'chat-compression-2.5-flash-lite';
    default:
      return 'chat-compression-default';
  }
}

/**
 * Processes the chat history to ensure function responses don't exceed a specific token budget.
 *
 * This function implements a "Reverse Token Budget" strategy:
 * 1. It iterates through the history from the most recent turn to the oldest.
 * 2. It keeps a running tally of tokens used by function responses.
 * 3. Recent tool outputs are preserved in full to maintain high-fidelity context for the current turn.
 * 4. Once the budget (COMPRESSION_FUNCTION_RESPONSE_TOKEN_BUDGET) is exceeded, any older large
 *    tool responses are truncated to their last 30 lines and saved to a temporary file.
 *
 * This ensures that compression effectively reduces context size even when recent turns
 * contain massive tool outputs (like large grep results or logs).
 */
async function truncateHistoryToBudget(
  history: Content[],
  config: Config,
): Promise<Content[]> {
  let functionResponseTokenCounter = 0;
  const truncatedHistory: Content[] = [];

  // Iterate backwards: newest messages first to prioritize their context.
  for (let i = history.length - 1; i >= 0; i--) {
    const content = history[i];
    const newParts = [];

    if (content.parts) {
      // Process parts of the message backwards as well.
      for (let j = content.parts.length - 1; j >= 0; j--) {
        const part = content.parts[j];

        if (part.functionResponse) {
          const responseObj = part.functionResponse.response;
          // Ensure we have a string representation to truncate.
          // If the response is an object, we try to extract a primary string field (output or content).
          let contentStr: string;
          if (typeof responseObj === 'string') {
            contentStr = responseObj;
          } else if (responseObj && typeof responseObj === 'object') {
            if (
              'output' in responseObj &&
              typeof responseObj['output'] === 'string'
            ) {
              contentStr = responseObj['output'];
            } else if (
              'content' in responseObj &&
              typeof responseObj['content'] === 'string'
            ) {
              contentStr = responseObj['content'];
            } else {
              contentStr = JSON.stringify(responseObj, null, 2);
            }
          } else {
            contentStr = JSON.stringify(responseObj, null, 2);
          }

          const tokens = estimateTokenCountSync([{ text: contentStr }]);

          if (
            functionResponseTokenCounter + tokens >
            COMPRESSION_FUNCTION_RESPONSE_TOKEN_BUDGET
          ) {
            try {
              // Budget exceeded: Truncate this response.
              const { outputFile } = await saveTruncatedToolOutput(
                contentStr,
                part.functionResponse.name ?? 'unknown_tool',
                config.getNextCompressionTruncationId(),
                config.storage.getProjectTempDir(),
              );

              // Prepare a honest, readable snippet of the tail.
              const truncatedMessage = formatTruncatedToolOutput(
                contentStr,
                outputFile,
                COMPRESSION_TRUNCATE_LINES,
              );

              newParts.unshift({
                functionResponse: {
                  ...part.functionResponse,
                  response: { output: truncatedMessage },
                },
              });

              // Count the small truncated placeholder towards the budget.
              functionResponseTokenCounter += estimateTokenCountSync([
                { text: truncatedMessage },
              ]);
            } catch (error) {
              // Fallback: if truncation fails, keep the original part to avoid data loss in the chat.
              debugLogger.debug('Failed to truncate history to budget:', error);
              newParts.unshift(part);
              functionResponseTokenCounter += tokens;
            }
          } else {
            // Within budget: keep the full response.
            functionResponseTokenCounter += tokens;
            newParts.unshift(part);
          }
        } else {
          // Non-tool response part: always keep.
          newParts.unshift(part);
        }
      }
    }

    // Reconstruct the message with processed (potentially truncated) parts.
    truncatedHistory.unshift({ ...content, parts: newParts });
  }

  return truncatedHistory;
}

export class ChatCompressionService {
  async compress(
    chat: GeminiChat,
    promptId: string,
    force: boolean,
    model: string,
    config: Config,
    hasFailedCompressionAttempt: boolean,
  ): Promise<{ newHistory: Content[] | null; info: ChatCompressionInfo }> {
    const curatedHistory = chat.getHistory(true);

    // Regardless of `force`, don't do anything if the history is empty.
    if (
      curatedHistory.length === 0 ||
      (hasFailedCompressionAttempt && !force)
    ) {
      return {
        newHistory: null,
        info: {
          originalTokenCount: 0,
          newTokenCount: 0,
          compressionStatus: CompressionStatus.NOOP,
        },
      };
    }

    // Fire PreCompress hook before compression
    // This fires for both manual and auto compression attempts
    const trigger = force ? PreCompressTrigger.Manual : PreCompressTrigger.Auto;
    await config.getHookSystem()?.firePreCompressEvent(trigger);

    const originalTokenCount = chat.getLastPromptTokenCount();

    // Don't compress if not forced and we are under the limit.
    if (!force) {
      const threshold =
        (await config.getCompressionThreshold()) ??
        DEFAULT_COMPRESSION_TOKEN_THRESHOLD;
      if (originalTokenCount < threshold * tokenLimit(model)) {
        return {
          newHistory: null,
          info: {
            originalTokenCount,
            newTokenCount: originalTokenCount,
            compressionStatus: CompressionStatus.NOOP,
          },
        };
      }
    }

    // Apply token-based truncation to the entire history before splitting.
    // This ensures that even the "to compress" portion is within safe limits for the summarization model.
    const truncatedHistory = await truncateHistoryToBudget(
      curatedHistory,
      config,
    );

    const splitPoint = findCompressSplitPoint(
      truncatedHistory,
      1 - COMPRESSION_PRESERVE_THRESHOLD,
    );

    const historyToCompressTruncated = truncatedHistory.slice(0, splitPoint);
    const historyToKeepTruncated = truncatedHistory.slice(splitPoint);

    if (historyToCompressTruncated.length === 0) {
      return {
        newHistory: null,
        info: {
          originalTokenCount,
          newTokenCount: originalTokenCount,
          compressionStatus: CompressionStatus.NOOP,
        },
      };
    }

    // High Fidelity Decision: Should we send the original or truncated history to the summarizer?
    const originalHistoryToCompress = curatedHistory.slice(0, splitPoint);
    const originalToCompressTokenCount = estimateTokenCountSync(
      originalHistoryToCompress.flatMap((c) => c.parts || []),
    );

    const historyForSummarizer =
      originalToCompressTokenCount < tokenLimit(model)
        ? originalHistoryToCompress
        : historyToCompressTruncated;

    const summaryResponse = await config.getBaseLlmClient().generateContent({
      modelConfigKey: { model: modelStringToModelConfigAlias(model) },
      contents: [
        ...historyForSummarizer,
        {
          role: 'user',
          parts: [
            {
              text: 'First, reason in your scratchpad. Then, generate the <state_snapshot>.',
            },
          ],
        },
      ],
      systemInstruction: { text: getCompressionPrompt() },
      promptId,
      // TODO(joshualitt): wire up a sensible abort signal,
      abortSignal: new AbortController().signal,
    });
    const summary = getResponseText(summaryResponse) ?? '';

    const extraHistory: Content[] = [
      {
        role: 'user',
        parts: [{ text: summary }],
      },
      {
        role: 'model',
        parts: [{ text: 'Got it. Thanks for the additional context!' }],
      },
      ...historyToKeepTruncated,
    ];

    // Use a shared utility to construct the initial history for an accurate token count.
    const fullNewHistory = await getInitialChatHistory(config, extraHistory);

    const newTokenCount = await calculateRequestTokenCount(
      fullNewHistory.flatMap((c) => c.parts || []),
      config.getContentGenerator(),
      model,
    );

    logChatCompression(
      config,
      makeChatCompressionEvent({
        tokens_before: originalTokenCount,
        tokens_after: newTokenCount,
      }),
    );

    if (newTokenCount > originalTokenCount) {
      return {
        newHistory: null,
        info: {
          originalTokenCount,
          newTokenCount,
          compressionStatus:
            CompressionStatus.COMPRESSION_FAILED_INFLATED_TOKEN_COUNT,
        },
      };
    } else {
      return {
        newHistory: extraHistory,
        info: {
          originalTokenCount,
          newTokenCount,
          compressionStatus: CompressionStatus.COMPRESSED,
        },
      };
    }
  }
}
refactor(core): extract ChatCompressionService from GeminiClient (#12001) 2025-10-27 14:29:39 -07:00			`/**`
			`* @license`
			`* Copyright 2025 Google LLC`
			`* SPDX-License-Identifier: Apache-2.0`
			`*/`

			`import type { Content } from '@google/genai';`
			`import type { Config } from '../config/config.js';`
			`import type { GeminiChat } from '../core/geminiChat.js';`
			`import { type ChatCompressionInfo, CompressionStatus } from '../core/turn.js';`
			`import { tokenLimit } from '../core/tokenLimits.js';`
			`import { getCompressionPrompt } from '../core/prompts.js';`
			`import { getResponseText } from '../utils/partUtils.js';`
			`import { logChatCompression } from '../telemetry/loggers.js';`
			`import { makeChatCompressionEvent } from '../telemetry/types.js';`
fix(core): attempt compression before context overflow check (#16914) 2026-01-18 22:40:07 -08:00			`import {`
			`saveTruncatedToolOutput,`
			`formatTruncatedToolOutput,`
			`} from '../utils/fileUtils.js';`
			`import { debugLogger } from '../utils/debugLogger.js';`
refactor(core): extract ChatCompressionService from GeminiClient (#12001) 2025-10-27 14:29:39 -07:00			`import { getInitialChatHistory } from '../utils/environmentContext.js';`
fix(core): attempt compression before context overflow check (#16914) 2026-01-18 22:40:07 -08:00			`import {`
			`calculateRequestTokenCount,`
			`estimateTokenCountSync,`
			`} from '../utils/tokenCalculation.js';`
feat(core): Migrate chatCompressionService to model configs. (#12863) 2025-11-24 12:24:45 -08:00			`import {`
			`DEFAULT_GEMINI_FLASH_LITE_MODEL,`
			`DEFAULT_GEMINI_FLASH_MODEL,`
			`DEFAULT_GEMINI_MODEL,`
			`PREVIEW_GEMINI_MODEL,`
feat: launch Gemini 3 Flash in Gemini CLI ⚡️⚡️⚡️ (#15196) Co-authored-by: gemini-cli-robot <gemini-cli-robot@google.com> Co-authored-by: joshualitt <joshualitt@google.com> Co-authored-by: Sehoon Shon <sshon@google.com> Co-authored-by: Adam Weidman <65992621+adamfweidman@users.noreply.github.com> Co-authored-by: Adib234 <30782825+Adib234@users.noreply.github.com> Co-authored-by: Jenna Inouye <jinouye@google.com> 2025-12-17 09:43:21 -08:00			`PREVIEW_GEMINI_FLASH_MODEL,`
feat(core): Migrate chatCompressionService to model configs. (#12863) 2025-11-24 12:24:45 -08:00			`} from '../config/models.js';`
feat(hooks): Hook Session Lifecycle & Compression Integration (#14151) 2025-12-03 09:04:13 -08:00			`import { PreCompressTrigger } from '../hooks/types.js';`
refactor(core): extract ChatCompressionService from GeminiClient (#12001) 2025-10-27 14:29:39 -07:00
			`/**`
Change default compression threshold (#12306) 2025-10-30 13:46:41 -07:00			`* Default threshold for compression token count as a fraction of the model's`
			`* token limit. If the chat history exceeds this threshold, it will be compressed.`
refactor(core): extract ChatCompressionService from GeminiClient (#12001) 2025-10-27 14:29:39 -07:00			`*/`
Change default compress threshold to 0.5 for api key users (#13517) 2025-11-20 11:43:35 -08:00			`export const DEFAULT_COMPRESSION_TOKEN_THRESHOLD = 0.5;`
refactor(core): extract ChatCompressionService from GeminiClient (#12001) 2025-10-27 14:29:39 -07:00
			`/**`
			`* The fraction of the latest chat history to keep. A value of 0.3`
			`* means that only the last 30% of the chat history will be kept after compression.`
			`*/`
			`export const COMPRESSION_PRESERVE_THRESHOLD = 0.3;`

fix(core): attempt compression before context overflow check (#16914) 2026-01-18 22:40:07 -08:00			`/**`
			`* The budget for function response tokens in the preserved history.`
			`*/`
			`export const COMPRESSION_FUNCTION_RESPONSE_TOKEN_BUDGET = 50_000;`

			`/**`
			`* The number of lines to keep when truncating a function response during compression.`
			`*/`
			`export const COMPRESSION_TRUNCATE_LINES = 30;`

refactor(core): extract ChatCompressionService from GeminiClient (#12001) 2025-10-27 14:29:39 -07:00			`/**`
			`* Returns the index of the oldest item to keep when compressing. May return`
			`* contents.length which indicates that everything should be compressed.`
			`*`
			`* Exported for testing purposes.`
			`*/`
			`export function findCompressSplitPoint(`
			`contents: Content[],`
			`fraction: number,`
			`): number {`
			`if (fraction <= 0 \|\| fraction >= 1) {`
			`throw new Error('Fraction must be between 0 and 1');`
			`}`

			`const charCounts = contents.map((content) => JSON.stringify(content).length);`
			`const totalCharCount = charCounts.reduce((a, b) => a + b, 0);`
			`const targetCharCount = totalCharCount * fraction;`

			`let lastSplitPoint = 0; // 0 is always valid (compress nothing)`
			`let cumulativeCharCount = 0;`
			`for (let i = 0; i < contents.length; i++) {`
			`const content = contents[i];`
			`if (`
			`content.role === 'user' &&`
			`!content.parts?.some((part) => !!part.functionResponse)`
			`) {`
			`if (cumulativeCharCount >= targetCharCount) {`
			`return i;`
			`}`
			`lastSplitPoint = i;`
			`}`
			`cumulativeCharCount += charCounts[i];`
			`}`

			`// We found no split points after targetCharCount.`
			`// Check if it's safe to compress everything.`
			`const lastContent = contents[contents.length - 1];`
			`if (`
			`lastContent?.role === 'model' &&`
			`!lastContent?.parts?.some((part) => part.functionCall)`
			`) {`
			`return contents.length;`
			`}`

			`// Can't compress everything so just compress at last splitpoint.`
			`return lastSplitPoint;`
			`}`

feat(core): Migrate chatCompressionService to model configs. (#12863) 2025-11-24 12:24:45 -08:00			`export function modelStringToModelConfigAlias(model: string): string {`
			`switch (model) {`
			`case PREVIEW_GEMINI_MODEL:`
			`return 'chat-compression-3-pro';`
feat: launch Gemini 3 Flash in Gemini CLI ⚡️⚡️⚡️ (#15196) Co-authored-by: gemini-cli-robot <gemini-cli-robot@google.com> Co-authored-by: joshualitt <joshualitt@google.com> Co-authored-by: Sehoon Shon <sshon@google.com> Co-authored-by: Adam Weidman <65992621+adamfweidman@users.noreply.github.com> Co-authored-by: Adib234 <30782825+Adib234@users.noreply.github.com> Co-authored-by: Jenna Inouye <jinouye@google.com> 2025-12-17 09:43:21 -08:00			`case PREVIEW_GEMINI_FLASH_MODEL:`
			`return 'chat-compression-3-flash';`
feat(core): Migrate chatCompressionService to model configs. (#12863) 2025-11-24 12:24:45 -08:00			`case DEFAULT_GEMINI_MODEL:`
			`return 'chat-compression-2.5-pro';`
			`case DEFAULT_GEMINI_FLASH_MODEL:`
			`return 'chat-compression-2.5-flash';`
			`case DEFAULT_GEMINI_FLASH_LITE_MODEL:`
			`return 'chat-compression-2.5-flash-lite';`
			`default:`
bug(core): Add default chat compression config. (#13766) 2025-11-24 15:02:00 -08:00			`return 'chat-compression-default';`
feat(core): Migrate chatCompressionService to model configs. (#12863) 2025-11-24 12:24:45 -08:00			`}`
			`}`

fix(core): attempt compression before context overflow check (#16914) 2026-01-18 22:40:07 -08:00			`/**`
			`* Processes the chat history to ensure function responses don't exceed a specific token budget.`
			`*`
			`* This function implements a "Reverse Token Budget" strategy:`
			`* 1. It iterates through the history from the most recent turn to the oldest.`
			`* 2. It keeps a running tally of tokens used by function responses.`
			`* 3. Recent tool outputs are preserved in full to maintain high-fidelity context for the current turn.`
			`* 4. Once the budget (COMPRESSION_FUNCTION_RESPONSE_TOKEN_BUDGET) is exceeded, any older large`
			`* tool responses are truncated to their last 30 lines and saved to a temporary file.`
			`*`
			`* This ensures that compression effectively reduces context size even when recent turns`
			`* contain massive tool outputs (like large grep results or logs).`
			`*/`
			`async function truncateHistoryToBudget(`
			`history: Content[],`
			`config: Config,`
			`): Promise<Content[]> {`
			`let functionResponseTokenCounter = 0;`
			`const truncatedHistory: Content[] = [];`

			`// Iterate backwards: newest messages first to prioritize their context.`
			`for (let i = history.length - 1; i >= 0; i--) {`
			`const content = history[i];`
			`const newParts = [];`

			`if (content.parts) {`
			`// Process parts of the message backwards as well.`
			`for (let j = content.parts.length - 1; j >= 0; j--) {`
			`const part = content.parts[j];`

			`if (part.functionResponse) {`
			`const responseObj = part.functionResponse.response;`
			`// Ensure we have a string representation to truncate.`
			`// If the response is an object, we try to extract a primary string field (output or content).`
			`let contentStr: string;`
			`if (typeof responseObj === 'string') {`
			`contentStr = responseObj;`
			`} else if (responseObj && typeof responseObj === 'object') {`
			`if (`
			`'output' in responseObj &&`
			`typeof responseObj['output'] === 'string'`
			`) {`
			`contentStr = responseObj['output'];`
			`} else if (`
			`'content' in responseObj &&`
			`typeof responseObj['content'] === 'string'`
			`) {`
			`contentStr = responseObj['content'];`
			`} else {`
			`contentStr = JSON.stringify(responseObj, null, 2);`
			`}`
			`} else {`
			`contentStr = JSON.stringify(responseObj, null, 2);`
			`}`

			`const tokens = estimateTokenCountSync([{ text: contentStr }]);`

			`if (`
			`functionResponseTokenCounter + tokens >`
			`COMPRESSION_FUNCTION_RESPONSE_TOKEN_BUDGET`
			`) {`
			`try {`
			`// Budget exceeded: Truncate this response.`
			`const { outputFile } = await saveTruncatedToolOutput(`
			`contentStr,`
			`part.functionResponse.name ?? 'unknown_tool',`
			`config.getNextCompressionTruncationId(),`
			`config.storage.getProjectTempDir(),`
			`);`

			`// Prepare a honest, readable snippet of the tail.`
			`const truncatedMessage = formatTruncatedToolOutput(`
			`contentStr,`
			`outputFile,`
			`COMPRESSION_TRUNCATE_LINES,`
			`);`

			`newParts.unshift({`
			`functionResponse: {`
			`...part.functionResponse,`
			`response: { output: truncatedMessage },`
			`},`
			`});`

			`// Count the small truncated placeholder towards the budget.`
			`functionResponseTokenCounter += estimateTokenCountSync([`
			`{ text: truncatedMessage },`
			`]);`
			`} catch (error) {`
			`// Fallback: if truncation fails, keep the original part to avoid data loss in the chat.`
			`debugLogger.debug('Failed to truncate history to budget:', error);`
			`newParts.unshift(part);`
			`functionResponseTokenCounter += tokens;`
			`}`
			`} else {`
			`// Within budget: keep the full response.`
			`functionResponseTokenCounter += tokens;`
			`newParts.unshift(part);`
			`}`
			`} else {`
			`// Non-tool response part: always keep.`
			`newParts.unshift(part);`
			`}`
			`}`
			`}`

			`// Reconstruct the message with processed (potentially truncated) parts.`
			`truncatedHistory.unshift({ ...content, parts: newParts });`
			`}`

			`return truncatedHistory;`
			`}`

refactor(core): extract ChatCompressionService from GeminiClient (#12001) 2025-10-27 14:29:39 -07:00			`export class ChatCompressionService {`
			`async compress(`
			`chat: GeminiChat,`
			`promptId: string,`
			`force: boolean,`
			`model: string,`
			`config: Config,`
			`hasFailedCompressionAttempt: boolean,`
			`): Promise<{ newHistory: Content[] \| null; info: ChatCompressionInfo }> {`
			`const curatedHistory = chat.getHistory(true);`

			// Regardless of `force`, don't do anything if the history is empty.
			`if (`
			`curatedHistory.length === 0 \|\|`
			`(hasFailedCompressionAttempt && !force)`
			`) {`
			`return {`
			`newHistory: null,`
			`info: {`
			`originalTokenCount: 0,`
			`newTokenCount: 0,`
			`compressionStatus: CompressionStatus.NOOP,`
			`},`
			`};`
			`}`

refactor: migrate chatCompressionService to use HookSystem (#16259) Co-authored-by: Tommaso Sciortino <sciortino@gmail.com> 2026-01-10 01:48:06 +05:30			`// Fire PreCompress hook before compression`
feat(hooks): Hook Session Lifecycle & Compression Integration (#14151) 2025-12-03 09:04:13 -08:00			`// This fires for both manual and auto compression attempts`
refactor: migrate chatCompressionService to use HookSystem (#16259) Co-authored-by: Tommaso Sciortino <sciortino@gmail.com> 2026-01-10 01:48:06 +05:30			`const trigger = force ? PreCompressTrigger.Manual : PreCompressTrigger.Auto;`
			`await config.getHookSystem()?.firePreCompressEvent(trigger);`
feat(hooks): Hook Session Lifecycle & Compression Integration (#14151) 2025-12-03 09:04:13 -08:00
refactor(core): decouple GeminiChat from uiTelemetryService via Usage events (#12196) 2025-10-30 14:05:38 -07:00			`const originalTokenCount = chat.getLastPromptTokenCount();`
refactor(core): extract ChatCompressionService from GeminiClient (#12001) 2025-10-27 14:29:39 -07:00
			`// Don't compress if not forced and we are under the limit.`
			`if (!force) {`
			`const threshold =`
feat(core): Integrate remote experiments configuration (#12539) 2025-11-04 15:09:53 -08:00			`(await config.getCompressionThreshold()) ??`
			`DEFAULT_COMPRESSION_TOKEN_THRESHOLD;`
refactor(core): extract ChatCompressionService from GeminiClient (#12001) 2025-10-27 14:29:39 -07:00			`if (originalTokenCount < threshold * tokenLimit(model)) {`
			`return {`
			`newHistory: null,`
			`info: {`
			`originalTokenCount,`
			`newTokenCount: originalTokenCount,`
			`compressionStatus: CompressionStatus.NOOP,`
			`},`
			`};`
			`}`
			`}`

fix(core): attempt compression before context overflow check (#16914) 2026-01-18 22:40:07 -08:00			`// Apply token-based truncation to the entire history before splitting.`
			`// This ensures that even the "to compress" portion is within safe limits for the summarization model.`
			`const truncatedHistory = await truncateHistoryToBudget(`
refactor(core): extract ChatCompressionService from GeminiClient (#12001) 2025-10-27 14:29:39 -07:00			`curatedHistory,`
fix(core): attempt compression before context overflow check (#16914) 2026-01-18 22:40:07 -08:00			`config,`
			`);`

			`const splitPoint = findCompressSplitPoint(`
			`truncatedHistory,`
refactor(core): extract ChatCompressionService from GeminiClient (#12001) 2025-10-27 14:29:39 -07:00			`1 - COMPRESSION_PRESERVE_THRESHOLD,`
			`);`

fix(core): attempt compression before context overflow check (#16914) 2026-01-18 22:40:07 -08:00			`const historyToCompressTruncated = truncatedHistory.slice(0, splitPoint);`
			`const historyToKeepTruncated = truncatedHistory.slice(splitPoint);`
refactor(core): extract ChatCompressionService from GeminiClient (#12001) 2025-10-27 14:29:39 -07:00
fix(core): attempt compression before context overflow check (#16914) 2026-01-18 22:40:07 -08:00			`if (historyToCompressTruncated.length === 0) {`
refactor(core): extract ChatCompressionService from GeminiClient (#12001) 2025-10-27 14:29:39 -07:00			`return {`
			`newHistory: null,`
			`info: {`
			`originalTokenCount,`
			`newTokenCount: originalTokenCount,`
			`compressionStatus: CompressionStatus.NOOP,`
			`},`
			`};`
			`}`

fix(core): attempt compression before context overflow check (#16914) 2026-01-18 22:40:07 -08:00			`// High Fidelity Decision: Should we send the original or truncated history to the summarizer?`
			`const originalHistoryToCompress = curatedHistory.slice(0, splitPoint);`
			`const originalToCompressTokenCount = estimateTokenCountSync(`
			`originalHistoryToCompress.flatMap((c) => c.parts \|\| []),`
			`);`

			`const historyForSummarizer =`
			`originalToCompressTokenCount < tokenLimit(model)`
			`? originalHistoryToCompress`
			`: historyToCompressTruncated;`

feat(core): Migrate chatCompressionService to model configs. (#12863) 2025-11-24 12:24:45 -08:00			`const summaryResponse = await config.getBaseLlmClient().generateContent({`
			`modelConfigKey: { model: modelStringToModelConfigAlias(model) },`
			`contents: [`
fix(core): attempt compression before context overflow check (#16914) 2026-01-18 22:40:07 -08:00			`...historyForSummarizer,`
feat(core): Migrate chatCompressionService to model configs. (#12863) 2025-11-24 12:24:45 -08:00			`{`
			`role: 'user',`
			`parts: [`
			`{`
			`text: 'First, reason in your scratchpad. Then, generate the <state_snapshot>.',`
			`},`
			`],`
refactor(core): extract ChatCompressionService from GeminiClient (#12001) 2025-10-27 14:29:39 -07:00			`},`
feat(core): Migrate chatCompressionService to model configs. (#12863) 2025-11-24 12:24:45 -08:00			`],`
			`systemInstruction: { text: getCompressionPrompt() },`
refactor(core): extract ChatCompressionService from GeminiClient (#12001) 2025-10-27 14:29:39 -07:00			`promptId,`
feat(core): Migrate chatCompressionService to model configs. (#12863) 2025-11-24 12:24:45 -08:00			`// TODO(joshualitt): wire up a sensible abort signal,`
			`abortSignal: new AbortController().signal,`
			`});`
refactor(core): extract ChatCompressionService from GeminiClient (#12001) 2025-10-27 14:29:39 -07:00			`const summary = getResponseText(summaryResponse) ?? '';`

			`const extraHistory: Content[] = [`
			`{`
			`role: 'user',`
			`parts: [{ text: summary }],`
			`},`
			`{`
			`role: 'model',`
			`parts: [{ text: 'Got it. Thanks for the additional context!' }],`
			`},`
fix(core): attempt compression before context overflow check (#16914) 2026-01-18 22:40:07 -08:00			`...historyToKeepTruncated,`
refactor(core): extract ChatCompressionService from GeminiClient (#12001) 2025-10-27 14:29:39 -07:00			`];`

			`// Use a shared utility to construct the initial history for an accurate token count.`
			`const fullNewHistory = await getInitialChatHistory(config, extraHistory);`

feat(core): Improve request token calculation accuracy (#13824) 2025-11-26 12:20:46 +08:00			`const newTokenCount = await calculateRequestTokenCount(`
			`fullNewHistory.flatMap((c) => c.parts \|\| []),`
			`config.getContentGenerator(),`
			`model,`
refactor(core): extract ChatCompressionService from GeminiClient (#12001) 2025-10-27 14:29:39 -07:00			`);`

			`logChatCompression(`
			`config,`
			`makeChatCompressionEvent({`
			`tokens_before: originalTokenCount,`
			`tokens_after: newTokenCount,`
			`}),`
			`);`

			`if (newTokenCount > originalTokenCount) {`
			`return {`
			`newHistory: null,`
			`info: {`
			`originalTokenCount,`
			`newTokenCount,`
			`compressionStatus:`
			`CompressionStatus.COMPRESSION_FAILED_INFLATED_TOKEN_COUNT,`
			`},`
			`};`
			`} else {`
			`return {`
			`newHistory: extraHistory,`
			`info: {`
			`originalTokenCount,`
			`newTokenCount,`
			`compressionStatus: CompressionStatus.COMPRESSED,`
			`},`
			`};`
			`}`
			`}`
			`}`