mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-04-12 14:22:00 -07:00
feat(browser): supersede stale snapshots to reclaim context-window tokens (#24440)
This commit is contained in:
@@ -14,6 +14,7 @@
|
||||
*/
|
||||
|
||||
import type { LocalAgentDefinition } from '../types.js';
|
||||
import { supersedeStaleSnapshots } from './snapshotSuperseder.js';
|
||||
import type { Config } from '../../config/config.js';
|
||||
import { z } from 'zod';
|
||||
import {
|
||||
@@ -184,6 +185,11 @@ export const BrowserAgentDefinition = (
|
||||
// This is undefined here and will be set at invocation time
|
||||
toolConfig: undefined,
|
||||
|
||||
// Supersede stale take_snapshot outputs to reclaim context-window tokens.
|
||||
// Each snapshot contains the full accessibility tree; only the most recent
|
||||
// one is meaningful, so prior snapshots are replaced with a placeholder.
|
||||
onBeforeTurn: (chat) => supersedeStaleSnapshots(chat),
|
||||
|
||||
promptConfig: {
|
||||
query: `Your task is:
|
||||
<task>
|
||||
|
||||
214
packages/core/src/agents/browser/snapshotSuperseder.test.ts
Normal file
214
packages/core/src/agents/browser/snapshotSuperseder.test.ts
Normal file
@@ -0,0 +1,214 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import {
|
||||
supersedeStaleSnapshots,
|
||||
SNAPSHOT_SUPERSEDED_PLACEHOLDER,
|
||||
} from './snapshotSuperseder.js';
|
||||
import type { GeminiChat } from '../../core/geminiChat.js';
|
||||
import type { Content } from '@google/genai';
|
||||
|
||||
/** Builds a minimal mock GeminiChat around a mutable history array. */
|
||||
function createMockChat(history: Content[]): GeminiChat {
|
||||
return {
|
||||
getHistory: vi.fn(() => [...history]),
|
||||
setHistory: vi.fn((newHistory: readonly Content[]) => {
|
||||
history.length = 0;
|
||||
history.push(...newHistory);
|
||||
}),
|
||||
} as unknown as GeminiChat;
|
||||
}
|
||||
|
||||
/** Helper: creates a take_snapshot functionResponse part. */
|
||||
function snapshotResponse(output: string) {
|
||||
return {
|
||||
functionResponse: {
|
||||
name: 'take_snapshot',
|
||||
response: { output },
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/** Helper: creates a non-snapshot functionResponse part. */
|
||||
function otherToolResponse(name: string, output: string) {
|
||||
return {
|
||||
functionResponse: {
|
||||
name,
|
||||
response: { output },
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
describe('supersedeStaleSnapshots', () => {
|
||||
let history: Content[];
|
||||
let chat: GeminiChat;
|
||||
|
||||
beforeEach(() => {
|
||||
history = [];
|
||||
});
|
||||
|
||||
it('should no-op when history has no snapshots', () => {
|
||||
history.push(
|
||||
{ role: 'user', parts: [{ text: 'Click the button' }] },
|
||||
{
|
||||
role: 'user',
|
||||
parts: [otherToolResponse('click', 'Clicked element')],
|
||||
},
|
||||
);
|
||||
chat = createMockChat(history);
|
||||
|
||||
supersedeStaleSnapshots(chat);
|
||||
|
||||
expect(chat.setHistory).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should no-op when history has exactly 1 snapshot', () => {
|
||||
history.push(
|
||||
{ role: 'user', parts: [{ text: 'Navigate to page' }] },
|
||||
{
|
||||
role: 'user',
|
||||
parts: [snapshotResponse('<tree>big accessibility tree</tree>')],
|
||||
},
|
||||
);
|
||||
chat = createMockChat(history);
|
||||
|
||||
supersedeStaleSnapshots(chat);
|
||||
|
||||
expect(chat.setHistory).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should replace all but the last snapshot when there are 2+', () => {
|
||||
history.push(
|
||||
{
|
||||
role: 'user',
|
||||
parts: [snapshotResponse('<tree>snapshot 1</tree>')],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
parts: [otherToolResponse('click', 'Clicked OK')],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
parts: [snapshotResponse('<tree>snapshot 2</tree>')],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
parts: [otherToolResponse('type_text', 'Typed hello')],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
parts: [snapshotResponse('<tree>snapshot 3 (latest)</tree>')],
|
||||
},
|
||||
);
|
||||
chat = createMockChat(history);
|
||||
|
||||
supersedeStaleSnapshots(chat);
|
||||
|
||||
expect(chat.setHistory).toHaveBeenCalledTimes(1);
|
||||
|
||||
// First two snapshots should be replaced
|
||||
const part0 = history[0].parts![0];
|
||||
expect(part0.functionResponse?.response).toEqual({
|
||||
output: SNAPSHOT_SUPERSEDED_PLACEHOLDER,
|
||||
});
|
||||
|
||||
const part2 = history[2].parts![0];
|
||||
expect(part2.functionResponse?.response).toEqual({
|
||||
output: SNAPSHOT_SUPERSEDED_PLACEHOLDER,
|
||||
});
|
||||
|
||||
// Last snapshot should be untouched
|
||||
const part4 = history[4].parts![0];
|
||||
expect(part4.functionResponse?.response).toEqual({
|
||||
output: '<tree>snapshot 3 (latest)</tree>',
|
||||
});
|
||||
});
|
||||
|
||||
it('should leave non-snapshot tool responses untouched', () => {
|
||||
history.push(
|
||||
{
|
||||
role: 'user',
|
||||
parts: [snapshotResponse('<tree>snapshot A</tree>')],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
parts: [otherToolResponse('click', 'Clicked button')],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
parts: [snapshotResponse('<tree>snapshot B (latest)</tree>')],
|
||||
},
|
||||
);
|
||||
chat = createMockChat(history);
|
||||
|
||||
supersedeStaleSnapshots(chat);
|
||||
|
||||
// click response should be untouched
|
||||
const clickPart = history[1].parts![0];
|
||||
expect(clickPart.functionResponse?.response).toEqual({
|
||||
output: 'Clicked button',
|
||||
});
|
||||
});
|
||||
|
||||
it('should no-op when all stale snapshots are already superseded', () => {
|
||||
history.push(
|
||||
{
|
||||
role: 'user',
|
||||
parts: [snapshotResponse(SNAPSHOT_SUPERSEDED_PLACEHOLDER)],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
parts: [snapshotResponse('<tree>current snapshot</tree>')],
|
||||
},
|
||||
);
|
||||
chat = createMockChat(history);
|
||||
|
||||
supersedeStaleSnapshots(chat);
|
||||
|
||||
// Should not call setHistory since nothing changed
|
||||
expect(chat.setHistory).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should handle snapshots in Content entries with multiple parts', () => {
|
||||
history.push(
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
otherToolResponse('click', 'Clicked'),
|
||||
snapshotResponse('<tree>snapshot in multi-part</tree>'),
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
parts: [snapshotResponse('<tree>latest snapshot</tree>')],
|
||||
},
|
||||
);
|
||||
chat = createMockChat(history);
|
||||
|
||||
supersedeStaleSnapshots(chat);
|
||||
|
||||
expect(chat.setHistory).toHaveBeenCalledTimes(1);
|
||||
|
||||
// The click response (index 0 of parts) should be untouched
|
||||
const clickPart = history[0].parts![0];
|
||||
expect(clickPart.functionResponse?.response).toEqual({
|
||||
output: 'Clicked',
|
||||
});
|
||||
|
||||
// The snapshot (index 1 of parts) should be replaced
|
||||
const snapshotPart = history[0].parts![1];
|
||||
expect(snapshotPart.functionResponse?.response).toEqual({
|
||||
output: SNAPSHOT_SUPERSEDED_PLACEHOLDER,
|
||||
});
|
||||
|
||||
// Latest snapshot untouched
|
||||
const latestPart = history[1].parts![0];
|
||||
expect(latestPart.functionResponse?.response).toEqual({
|
||||
output: '<tree>latest snapshot</tree>',
|
||||
});
|
||||
});
|
||||
});
|
||||
149
packages/core/src/agents/browser/snapshotSuperseder.ts
Normal file
149
packages/core/src/agents/browser/snapshotSuperseder.ts
Normal file
@@ -0,0 +1,149 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* @fileoverview Supersedes stale `take_snapshot` outputs in the browser
|
||||
* subagent's conversation history. Each snapshot contains the full
|
||||
* accessibility tree and is only meaningful as the "current" page state;
|
||||
* prior snapshots are stale and waste context-window tokens.
|
||||
*
|
||||
* Called via the {@link LocalAgentDefinition.onBeforeTurn} hook before each
|
||||
* model call so the model only ever sees the most recent snapshot in full.
|
||||
*/
|
||||
|
||||
import type { GeminiChat } from '../../core/geminiChat.js';
|
||||
import type { Content, Part } from '@google/genai';
|
||||
import { debugLogger } from '../../utils/debugLogger.js';
|
||||
|
||||
const TAKE_SNAPSHOT_TOOL_NAME = 'take_snapshot';
|
||||
|
||||
/**
|
||||
* Placeholder that replaces superseded snapshot outputs.
|
||||
* Kept short to minimise token cost while still being informative.
|
||||
*/
|
||||
export const SNAPSHOT_SUPERSEDED_PLACEHOLDER =
|
||||
'[Snapshot superseded — a newer snapshot exists later in this conversation. ' +
|
||||
'Call take_snapshot for current page state.]';
|
||||
|
||||
/**
|
||||
* Scans the chat history and replaces all but the most recent
|
||||
* `take_snapshot` `functionResponse` with a compact placeholder.
|
||||
*
|
||||
* No-ops when:
|
||||
* - There are fewer than 2 snapshots (nothing to supersede).
|
||||
* - All prior snapshots have already been superseded.
|
||||
*
|
||||
* Uses {@link GeminiChat.setHistory} to apply the modified history.
|
||||
*/
|
||||
export function supersedeStaleSnapshots(chat: GeminiChat): void {
|
||||
const history = chat.getHistory();
|
||||
|
||||
// Locate all (contentIndex, partIndex) tuples for take_snapshot responses.
|
||||
const snapshotLocations: Array<{
|
||||
contentIdx: number;
|
||||
partIdx: number;
|
||||
}> = [];
|
||||
|
||||
for (let i = 0; i < history.length; i++) {
|
||||
const parts = history[i].parts;
|
||||
if (!parts) continue;
|
||||
for (let j = 0; j < parts.length; j++) {
|
||||
const part = parts[j];
|
||||
if (
|
||||
part.functionResponse &&
|
||||
part.functionResponse.name === TAKE_SNAPSHOT_TOOL_NAME
|
||||
) {
|
||||
snapshotLocations.push({ contentIdx: i, partIdx: j });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Nothing to do if there are 0 or 1 snapshots.
|
||||
if (snapshotLocations.length < 2) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Check whether any stale snapshot actually needs replacement.
|
||||
// (Skip the last entry — that's the one we keep.)
|
||||
const staleLocations = snapshotLocations.slice(0, -1);
|
||||
const needsUpdate = staleLocations.some(({ contentIdx, partIdx }) => {
|
||||
const output = getResponseOutput(
|
||||
history[contentIdx].parts![partIdx].functionResponse?.response,
|
||||
);
|
||||
return !output.includes(SNAPSHOT_SUPERSEDED_PLACEHOLDER);
|
||||
});
|
||||
|
||||
if (!needsUpdate) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Shallow-copy the history and replace stale snapshots.
|
||||
const newHistory: Content[] = history.map((content) => ({
|
||||
...content,
|
||||
parts: content.parts ? [...content.parts] : undefined,
|
||||
}));
|
||||
|
||||
let replacedCount = 0;
|
||||
|
||||
for (const { contentIdx, partIdx } of staleLocations) {
|
||||
const originalPart = newHistory[contentIdx].parts![partIdx];
|
||||
if (!originalPart.functionResponse) continue;
|
||||
|
||||
// Check if already superseded
|
||||
const output = getResponseOutput(originalPart.functionResponse.response);
|
||||
if (output.includes(SNAPSHOT_SUPERSEDED_PLACEHOLDER)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const replacementPart: Part = {
|
||||
functionResponse: {
|
||||
// eslint-disable-next-line @typescript-eslint/no-misused-spread
|
||||
...originalPart.functionResponse,
|
||||
response: { output: SNAPSHOT_SUPERSEDED_PLACEHOLDER },
|
||||
},
|
||||
};
|
||||
|
||||
newHistory[contentIdx].parts![partIdx] = replacementPart;
|
||||
replacedCount++;
|
||||
}
|
||||
|
||||
if (replacedCount > 0) {
|
||||
chat.setHistory(newHistory);
|
||||
debugLogger.log(
|
||||
`[SnapshotSuperseder] Replaced ${replacedCount} stale take_snapshot output(s).`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Shape of a functionResponse.response that contains an `output` string.
|
||||
*/
|
||||
interface ResponseWithOutput {
|
||||
output: string;
|
||||
}
|
||||
|
||||
function isResponseWithOutput(
|
||||
response: object | undefined,
|
||||
): response is ResponseWithOutput {
|
||||
return (
|
||||
response !== null &&
|
||||
response !== undefined &&
|
||||
'output' in response &&
|
||||
typeof response.output === 'string'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Safely extracts the `output` string from a functionResponse.response object.
|
||||
* The GenAI SDK types `response` as `object | undefined`, so we need runtime
|
||||
* checks to access the `output` field.
|
||||
*/
|
||||
function getResponseOutput(response: object | undefined): string {
|
||||
if (isResponseWithOutput(response)) {
|
||||
return response.output;
|
||||
}
|
||||
return '';
|
||||
}
|
||||
@@ -317,6 +317,10 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
|
||||
|
||||
await this.tryCompressChat(chat, promptId, combinedSignal);
|
||||
|
||||
// Allow the agent definition to modify history before the model call
|
||||
// (e.g., superseding stale tool outputs to reclaim context tokens).
|
||||
await this.definition.onBeforeTurn?.(chat, combinedSignal);
|
||||
|
||||
const { functionCalls, modelToUse } = await promptIdContext.run(
|
||||
promptId,
|
||||
async () =>
|
||||
|
||||
@@ -16,6 +16,7 @@ import type { AnySchema } from 'ajv';
|
||||
import type { AgentCard } from '@a2a-js/sdk';
|
||||
import type { A2AAuthConfig } from './auth-provider/types.js';
|
||||
import type { MCPServerConfig } from '../config/config.js';
|
||||
import type { GeminiChat } from '../core/geminiChat.js';
|
||||
|
||||
/**
|
||||
* Describes the possible termination modes for an agent.
|
||||
@@ -227,6 +228,18 @@ export interface LocalAgentDefinition<
|
||||
* @returns A string representation of the final output.
|
||||
*/
|
||||
processOutput?: (output: z.infer<TOutput>) => string;
|
||||
|
||||
/**
|
||||
* Optional hook invoked before each model call. Receives the active
|
||||
* {@link GeminiChat} instance and may modify chat history (e.g., to
|
||||
* supersede stale tool outputs and reclaim context-window tokens).
|
||||
*
|
||||
* Runs immediately after chat compression in the agent loop.
|
||||
*/
|
||||
onBeforeTurn?: (
|
||||
chat: GeminiChat,
|
||||
signal?: AbortSignal,
|
||||
) => Promise<void> | void;
|
||||
}
|
||||
|
||||
export interface BaseRemoteAgentDefinition<
|
||||
|
||||
Reference in New Issue
Block a user