2026-03-27 12:22:35 -07:00
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
2026-03-30 15:29:59 -07:00
import type { Content , Part } from '@google/genai' ;
2026-03-27 12:22:35 -07:00
import { getResponseText } from '../utils/partUtils.js' ;
2026-03-30 15:29:59 -07:00
import { estimateTokenCountSync } from '../utils/tokenCalculation.js' ;
2026-03-27 12:22:35 -07:00
import { LlmRole } from '../telemetry/llmRole.js' ;
import { debugLogger } from '../utils/debugLogger.js' ;
2026-04-02 09:22:04 -07:00
import type { AgentHistoryProviderConfig } from './types.js' ;
2026-03-30 15:29:59 -07:00
import type { Config } from '../config/config.js' ;
import {
MIN_TARGET_TOKENS ,
MIN_CHARS_FOR_TRUNCATION ,
TEXT_TRUNCATION_PREFIX ,
estimateCharsFromTokens ,
truncateProportionally ,
normalizeFunctionResponse ,
2026-03-31 17:01:46 -07:00
} from './truncation.js' ;
2026-03-27 12:22:35 -07:00
export class AgentHistoryProvider {
2026-03-30 15:29:59 -07:00
// TODO(joshualitt): just pass the BaseLlmClient instead of the whole Config.
2026-03-27 12:22:35 -07:00
constructor (
private readonly providerConfig : AgentHistoryProviderConfig ,
2026-03-30 15:29:59 -07:00
private readonly config : Config ,
2026-03-27 12:22:35 -07:00
) { }
/**
* Evaluates the chat history and performs truncation and summarization if necessary.
* Returns a new array of Content if truncation occurred, otherwise returns the original array.
*/
async manageHistory (
history : readonly Content [ ] ,
abortSignal? : AbortSignal ,
) : Promise < readonly Content [ ] > {
2026-04-02 09:22:04 -07:00
if ( history . length === 0 ) {
2026-03-27 12:22:35 -07:00
return history ;
}
2026-03-30 15:29:59 -07:00
// Step 1: Normalize newest messages.
const normalizedHistory = this . enforceMessageSizeLimits ( history ) ;
const totalTokens = estimateTokenCountSync (
normalizedHistory . flatMap ( ( c ) = > c . parts || [ ] ) ,
) ;
// Step 2: Check if truncation is needed based on the token threshold (High Watermark)
if ( totalTokens <= this . providerConfig . maxTokens ) {
return normalizedHistory ;
}
// Step 3: Split into keep/truncate boundaries
2026-03-27 12:22:35 -07:00
const { messagesToKeep , messagesToTruncate } =
2026-03-30 15:29:59 -07:00
this . splitHistoryForTruncation ( normalizedHistory ) ;
if ( messagesToTruncate . length === 0 ) {
return messagesToKeep ;
}
2026-03-27 12:22:35 -07:00
debugLogger . log (
` AgentHistoryProvider: Truncating ${ messagesToTruncate . length } messages, retaining ${ messagesToKeep . length } messages. ` ,
) ;
const summaryText = await this . getSummaryText (
messagesToTruncate ,
2026-03-30 15:29:59 -07:00
messagesToKeep ,
2026-03-27 12:22:35 -07:00
abortSignal ,
) ;
return this . mergeSummaryWithHistory ( summaryText , messagesToKeep ) ;
}
2026-03-30 15:29:59 -07:00
/**
* Enforces message size limits on the most recent message and the message
* that just exited the grace zone.
* - Recent messages have a high MAXIMUM limit.
* - Older messages (already processed) are restricted to the NORMAL limit
* once they exit the grace period.
*/
private enforceMessageSizeLimits (
history : readonly Content [ ] ,
) : readonly Content [ ] {
if ( history . length === 0 ) return history ;
let hasChanges = false ;
let accumulatedTokens = 0 ;
// Scan backwards to find the index where the token budget is exhausted
let graceStartIndex = 0 ;
for ( let i = history . length - 1 ; i >= 0 ; i -- ) {
const msgTokens = estimateTokenCountSync ( history [ i ] . parts || [ ] ) ;
accumulatedTokens += msgTokens ;
if ( accumulatedTokens > this . providerConfig . retainedTokens ) {
graceStartIndex = i + 1 ;
break ;
}
}
const newHistory = history . map ( ( msg , i ) = > {
const targetTokens =
i < graceStartIndex
? this . providerConfig.normalMessageTokens
: this.providerConfig.maximumMessageTokens ;
const normalizedMsg = this . normalizeMessage ( msg , targetTokens ) ;
if ( normalizedMsg !== msg ) {
hasChanges = true ;
}
return normalizedMsg ;
} ) ;
return hasChanges ? newHistory : history ;
}
/**
* Normalizes a message by proportionally masking its text or function response
* if its total token count exceeds the target token limit.
*/
private normalizeMessage ( msg : Content , targetTokens : number ) : Content {
const currentTokens = estimateTokenCountSync ( msg . parts || [ ] ) ;
if ( currentTokens <= targetTokens ) {
return msg ;
}
// Calculate the compression ratio to apply to all large parts
const ratio = targetTokens / currentTokens ;
// Proportional compression of the parts to fit the targetTokens budget
// while maintaining API structure (never dropping a part completely).
const newParts : Part [ ] = [ ] ;
for ( const part of msg . parts || [ ] ) {
if ( part . text ) {
const partTokens = estimateTokenCountSync ( [ part ] ) ;
const targetPartTokens = Math . max (
MIN_TARGET_TOKENS ,
Math . floor ( partTokens * ratio ) ,
) ;
const targetChars = estimateCharsFromTokens (
part . text ,
targetPartTokens ,
) ;
if (
part . text . length > targetChars &&
targetChars > MIN_CHARS_FOR_TRUNCATION
) {
const newText = truncateProportionally (
part . text ,
targetChars ,
TEXT_TRUNCATION_PREFIX ,
this . providerConfig . normalizationHeadRatio ,
) ;
newParts . push ( { text : newText } ) ;
} else {
newParts . push ( part ) ;
}
} else if ( part . functionResponse ) {
newParts . push (
normalizeFunctionResponse (
part ,
ratio ,
this . providerConfig . normalizationHeadRatio ,
) ,
) ;
} else {
newParts . push ( part ) ;
}
2026-03-27 12:22:35 -07:00
}
2026-03-30 15:29:59 -07:00
return { . . . msg , parts : newParts } ;
2026-03-27 12:22:35 -07:00
}
2026-03-30 15:29:59 -07:00
/**
* Determines the boundary for splitting history based on the token budget,
* keeping recent messages under a specific target token threshold,
* while ensuring structural integrity (e.g. keeping functionCall/functionResponse pairs).
*/
2026-03-27 12:22:35 -07:00
private splitHistoryForTruncation ( history : readonly Content [ ] ) : {
messagesToKeep : readonly Content [ ] ;
messagesToTruncate : readonly Content [ ] ;
} {
2026-03-30 15:29:59 -07:00
let accumulatedTokens = 0 ;
let truncationBoundary = 0 ; // The index of the first message to keep
// Scan backwards to calculate the boundary based on token budget
for ( let i = history . length - 1 ; i >= 0 ; i -- ) {
const msg = history [ i ] ;
const msgTokens = estimateTokenCountSync ( msg . parts || [ ] ) ;
// Token Budget
if ( accumulatedTokens + msgTokens > this . providerConfig . retainedTokens ) {
// Exceeded budget, stop retaining messages here.
truncationBoundary = i + 1 ;
break ;
}
accumulatedTokens += msgTokens ;
}
// Ensure structural integrity of the boundary
truncationBoundary = this . adjustBoundaryForIntegrity (
history ,
truncationBoundary ,
) ;
const messagesToKeep = history . slice ( truncationBoundary ) ;
const messagesToTruncate = history . slice ( 0 , truncationBoundary ) ;
2026-03-27 12:22:35 -07:00
return {
2026-03-30 15:29:59 -07:00
messagesToKeep ,
messagesToTruncate ,
2026-03-27 12:22:35 -07:00
} ;
}
2026-03-30 15:29:59 -07:00
/**
* Adjusts the truncation boundary backwards to prevent breaking functionCall/functionResponse pairs.
*/
private adjustBoundaryForIntegrity (
history : readonly Content [ ] ,
boundary : number ,
) : number {
let currentBoundary = boundary ;
// Ensure we don't start at index 0 or out of bounds.
if ( currentBoundary <= 0 || currentBoundary >= history . length ) {
return currentBoundary ;
}
while (
currentBoundary > 0 &&
currentBoundary < history . length &&
history [ currentBoundary ] . role === 'user' &&
history [ currentBoundary ] . parts ? . some ( ( p ) = > p . functionResponse ) &&
history [ currentBoundary - 1 ] . role === 'model' &&
history [ currentBoundary - 1 ] . parts ? . some ( ( p ) = > p . functionCall )
) {
currentBoundary -- ; // Include the functionCall in the retained history
}
return currentBoundary ;
}
2026-03-27 12:22:35 -07:00
private getFallbackSummaryText (
messagesToTruncate : readonly Content [ ] ,
) : string {
2026-03-30 15:29:59 -07:00
const userMessages = messagesToTruncate . filter ( ( m ) = > m . role === 'user' ) ;
const modelMessages = messagesToTruncate . filter ( ( m ) = > m . role === 'model' ) ;
const lastUserText = userMessages
. slice ( - 1 ) [ 0 ]
? . parts ? . map ( ( p ) = > p . text || '' )
. join ( '' )
. trim ( ) ;
const actionPath = modelMessages
. flatMap (
( m ) = >
m . parts
? . filter ( ( p ) = > p . functionCall )
. map ( ( p ) = > p . functionCall ! . name ) || [ ] ,
)
. join ( ' → ' ) ;
const summaryParts = [
'### [System Note: Conversation History Truncated]' ,
'Prior context was offloaded to maintain performance. Key highlights from the truncated history:' ,
] ;
2026-03-27 12:22:35 -07:00
if ( lastUserText ) {
2026-03-30 15:29:59 -07:00
summaryParts . push ( ` - **Last User Intent:** " ${ lastUserText } " ` ) ;
2026-03-27 12:22:35 -07:00
}
2026-03-30 15:29:59 -07:00
if ( actionPath ) {
summaryParts . push ( ` - **Action Path:** ${ actionPath } ` ) ;
}
summaryParts . push (
'- **Notice:** For deeper context, review persistent memory or task-specific logs.' ,
) ;
return summaryParts . join ( '\n' ) ;
2026-03-27 12:22:35 -07:00
}
private async getSummaryText (
messagesToTruncate : readonly Content [ ] ,
2026-03-30 15:29:59 -07:00
messagesToKeep : readonly Content [ ] ,
2026-03-27 12:22:35 -07:00
abortSignal? : AbortSignal ,
) : Promise < string > {
2026-03-30 15:29:59 -07:00
if ( messagesToTruncate . length === 0 ) return '' ;
2026-03-27 12:22:35 -07:00
try {
2026-03-30 15:29:59 -07:00
// Use the first few messages of the Grace Zone as a "contextual bridge"
// to give the summarizer lookahead into the current state.
const bridge = messagesToKeep . slice ( 0 , 5 ) ;
return await this . generateIntentSummary (
2026-03-27 12:22:35 -07:00
messagesToTruncate ,
2026-03-30 15:29:59 -07:00
bridge ,
2026-03-27 12:22:35 -07:00
abortSignal ,
) ;
} catch ( error ) {
debugLogger . log ( 'AgentHistoryProvider: Summarization failed.' , error ) ;
return this . getFallbackSummaryText ( messagesToTruncate ) ;
}
}
private mergeSummaryWithHistory (
summaryText : string ,
messagesToKeep : readonly Content [ ] ,
) : readonly Content [ ] {
2026-03-30 15:29:59 -07:00
if ( ! summaryText ) return messagesToKeep ;
2026-03-27 12:22:35 -07:00
if ( messagesToKeep . length === 0 ) {
return [ { role : 'user' , parts : [ { text : summaryText } ] } ] ;
}
// To ensure strict user/model alternating roles required by the Gemini API,
// we merge the summary into the first retained message if it's from the 'user'.
const firstRetainedMessage = messagesToKeep [ 0 ] ;
if ( firstRetainedMessage . role === 'user' ) {
const mergedParts = [
{ text : summaryText } ,
. . . ( firstRetainedMessage . parts || [ ] ) ,
] ;
const mergedMessage : Content = {
role : 'user' ,
parts : mergedParts ,
} ;
return [ mergedMessage , . . . messagesToKeep . slice ( 1 ) ] ;
} else {
const summaryMessage : Content = {
role : 'user' ,
parts : [ { text : summaryText } ] ,
} ;
return [ summaryMessage , . . . messagesToKeep ] ;
}
}
private async generateIntentSummary (
messagesToTruncate : readonly Content [ ] ,
2026-03-30 15:29:59 -07:00
bridge : readonly Content [ ] ,
2026-03-27 12:22:35 -07:00
abortSignal? : AbortSignal ,
) : Promise < string > {
2026-03-30 15:29:59 -07:00
// 1. Identify and extract any existing summary from the truncated head
const firstMsg = messagesToTruncate [ 0 ] ;
const firstPartText = firstMsg ? . parts ? . [ 0 ] ? . text || '' ;
const hasPreviousSummary = firstPartText . includes ( '<intent_summary>' ) ;
2026-03-27 12:22:35 -07:00
2026-03-30 15:29:59 -07:00
// 2. Extract "The Action Path" (necklace of function names)
const actionPath = messagesToTruncate
. filter ( ( m ) = > m . role === 'model' )
. flatMap (
( m ) = >
m . parts
? . filter ( ( p ) = > p . functionCall )
. map ( ( p ) = > p . functionCall ! . name ) || [ ] ,
)
. join ( ' → ' ) ;
const prompt = ` ### State Update: Agent Continuity
The conversation history has been truncated. You are generating a highly factual state summary to preserve the agent's exact working context.
You have these signals to synthesize:
${ hasPreviousSummary ? '1. **Previous Summary:** The existing state before this truncation.\n' : '' } 2. **The Action Path:** A chronological list of tools called: [ ${ actionPath } ]
3. **Truncated History:** The specific actions, tool inputs, and tool outputs being offloaded.
4. **Active Bridge:** The first few turns of the "Grace Zone" (what follows immediately after this summary), showing the current tactical moment.
### Your Goal:
Distill these into a high-density Markdown block that orientates the agent on the CONCRETE STATE of the workspace:
- **Primary Goal:** The ultimate objective requested by the user.
- **Verified Facts:** What has been definitively completed or proven (e.g., "File X was created", "Bug Y was reproduced").
- **Working Set:** The exact file paths currently being analyzed or modified.
- **Active Blockers:** Exact error messages or failing test names currently preventing progress.
### Constraints:
- **Format:** Wrap the entire response in <intent_summary> tags.
- **Factuality:** Base all points strictly on the provided history. Do not invent rationale or assume success without proof. Use exact names and quotes.
- **Brevity:** Maximum 15 lines. No conversational preamble.
${ hasPreviousSummary ? 'PREVIOUS SUMMARY AND TRUNCATED HISTORY:' : 'TRUNCATED HISTORY:' }
${ JSON . stringify ( messagesToTruncate ) }
ACTIVE BRIDGE (LOOKAHEAD):
${ JSON . stringify ( bridge ) } ` ;
2026-03-27 12:22:35 -07:00
const summaryResponse = await this . config
. getBaseLlmClient ( )
. generateContent ( {
modelConfigKey : { model : 'agent-history-provider-summarizer' } ,
contents : [
{
role : 'user' ,
parts : [ { text : prompt } ] ,
} ,
] ,
promptId : 'agent-history-provider' ,
abortSignal : abortSignal ? ? new AbortController ( ) . signal ,
role : LlmRole.UTILITY_COMPRESSOR ,
} ) ;
let summary = getResponseText ( summaryResponse ) ? ? '' ;
2026-03-30 15:29:59 -07:00
// Clean up if the model included extra tags or markdown
summary = summary
. replace ( /```markdown/g , '' )
. replace ( /```/g , '' )
. trim ( ) ;
if ( ! summary . includes ( '<intent_summary>' ) ) {
summary = ` <intent_summary> \ n ${ summary } \ n</intent_summary> ` ;
}
return summary ;
2026-03-27 12:22:35 -07:00
}
}