feat(browser): supersede stale snapshots to reclaim context-window tokens (#24440)

This commit is contained in:
Gaurav
2026-04-02 02:41:39 +08:00
committed by GitHub
parent 43cf63e189
commit aed85725b6
5 changed files with 386 additions and 0 deletions

View File

@@ -14,6 +14,7 @@
*/
import type { LocalAgentDefinition } from '../types.js';
import { supersedeStaleSnapshots } from './snapshotSuperseder.js';
import type { Config } from '../../config/config.js';
import { z } from 'zod';
import {
@@ -184,6 +185,11 @@ export const BrowserAgentDefinition = (
// This is undefined here and will be set at invocation time
toolConfig: undefined,
// Supersede stale take_snapshot outputs to reclaim context-window tokens.
// Each snapshot contains the full accessibility tree; only the most recent
// one is meaningful, so prior snapshots are replaced with a placeholder.
onBeforeTurn: (chat) => supersedeStaleSnapshots(chat),
promptConfig: {
query: `Your task is:
<task>

View File

@@ -0,0 +1,214 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, vi, beforeEach } from 'vitest';
import {
supersedeStaleSnapshots,
SNAPSHOT_SUPERSEDED_PLACEHOLDER,
} from './snapshotSuperseder.js';
import type { GeminiChat } from '../../core/geminiChat.js';
import type { Content } from '@google/genai';
/** Builds a minimal mock GeminiChat around a mutable history array. */
function createMockChat(history: Content[]): GeminiChat {
return {
getHistory: vi.fn(() => [...history]),
setHistory: vi.fn((newHistory: readonly Content[]) => {
history.length = 0;
history.push(...newHistory);
}),
} as unknown as GeminiChat;
}
/** Helper: creates a take_snapshot functionResponse part. */
function snapshotResponse(output: string) {
return {
functionResponse: {
name: 'take_snapshot',
response: { output },
},
};
}
/** Helper: creates a non-snapshot functionResponse part. */
function otherToolResponse(name: string, output: string) {
return {
functionResponse: {
name,
response: { output },
},
};
}
describe('supersedeStaleSnapshots', () => {
let history: Content[];
let chat: GeminiChat;
beforeEach(() => {
history = [];
});
it('should no-op when history has no snapshots', () => {
history.push(
{ role: 'user', parts: [{ text: 'Click the button' }] },
{
role: 'user',
parts: [otherToolResponse('click', 'Clicked element')],
},
);
chat = createMockChat(history);
supersedeStaleSnapshots(chat);
expect(chat.setHistory).not.toHaveBeenCalled();
});
it('should no-op when history has exactly 1 snapshot', () => {
history.push(
{ role: 'user', parts: [{ text: 'Navigate to page' }] },
{
role: 'user',
parts: [snapshotResponse('<tree>big accessibility tree</tree>')],
},
);
chat = createMockChat(history);
supersedeStaleSnapshots(chat);
expect(chat.setHistory).not.toHaveBeenCalled();
});
it('should replace all but the last snapshot when there are 2+', () => {
history.push(
{
role: 'user',
parts: [snapshotResponse('<tree>snapshot 1</tree>')],
},
{
role: 'user',
parts: [otherToolResponse('click', 'Clicked OK')],
},
{
role: 'user',
parts: [snapshotResponse('<tree>snapshot 2</tree>')],
},
{
role: 'user',
parts: [otherToolResponse('type_text', 'Typed hello')],
},
{
role: 'user',
parts: [snapshotResponse('<tree>snapshot 3 (latest)</tree>')],
},
);
chat = createMockChat(history);
supersedeStaleSnapshots(chat);
expect(chat.setHistory).toHaveBeenCalledTimes(1);
// First two snapshots should be replaced
const part0 = history[0].parts![0];
expect(part0.functionResponse?.response).toEqual({
output: SNAPSHOT_SUPERSEDED_PLACEHOLDER,
});
const part2 = history[2].parts![0];
expect(part2.functionResponse?.response).toEqual({
output: SNAPSHOT_SUPERSEDED_PLACEHOLDER,
});
// Last snapshot should be untouched
const part4 = history[4].parts![0];
expect(part4.functionResponse?.response).toEqual({
output: '<tree>snapshot 3 (latest)</tree>',
});
});
it('should leave non-snapshot tool responses untouched', () => {
history.push(
{
role: 'user',
parts: [snapshotResponse('<tree>snapshot A</tree>')],
},
{
role: 'user',
parts: [otherToolResponse('click', 'Clicked button')],
},
{
role: 'user',
parts: [snapshotResponse('<tree>snapshot B (latest)</tree>')],
},
);
chat = createMockChat(history);
supersedeStaleSnapshots(chat);
// click response should be untouched
const clickPart = history[1].parts![0];
expect(clickPart.functionResponse?.response).toEqual({
output: 'Clicked button',
});
});
it('should no-op when all stale snapshots are already superseded', () => {
history.push(
{
role: 'user',
parts: [snapshotResponse(SNAPSHOT_SUPERSEDED_PLACEHOLDER)],
},
{
role: 'user',
parts: [snapshotResponse('<tree>current snapshot</tree>')],
},
);
chat = createMockChat(history);
supersedeStaleSnapshots(chat);
// Should not call setHistory since nothing changed
expect(chat.setHistory).not.toHaveBeenCalled();
});
it('should handle snapshots in Content entries with multiple parts', () => {
history.push(
{
role: 'user',
parts: [
otherToolResponse('click', 'Clicked'),
snapshotResponse('<tree>snapshot in multi-part</tree>'),
],
},
{
role: 'user',
parts: [snapshotResponse('<tree>latest snapshot</tree>')],
},
);
chat = createMockChat(history);
supersedeStaleSnapshots(chat);
expect(chat.setHistory).toHaveBeenCalledTimes(1);
// The click response (index 0 of parts) should be untouched
const clickPart = history[0].parts![0];
expect(clickPart.functionResponse?.response).toEqual({
output: 'Clicked',
});
// The snapshot (index 1 of parts) should be replaced
const snapshotPart = history[0].parts![1];
expect(snapshotPart.functionResponse?.response).toEqual({
output: SNAPSHOT_SUPERSEDED_PLACEHOLDER,
});
// Latest snapshot untouched
const latestPart = history[1].parts![0];
expect(latestPart.functionResponse?.response).toEqual({
output: '<tree>latest snapshot</tree>',
});
});
});

View File

@@ -0,0 +1,149 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
/**
* @fileoverview Supersedes stale `take_snapshot` outputs in the browser
* subagent's conversation history. Each snapshot contains the full
* accessibility tree and is only meaningful as the "current" page state;
* prior snapshots are stale and waste context-window tokens.
*
* Called via the {@link LocalAgentDefinition.onBeforeTurn} hook before each
* model call so the model only ever sees the most recent snapshot in full.
*/
import type { GeminiChat } from '../../core/geminiChat.js';
import type { Content, Part } from '@google/genai';
import { debugLogger } from '../../utils/debugLogger.js';
const TAKE_SNAPSHOT_TOOL_NAME = 'take_snapshot';
/**
* Placeholder that replaces superseded snapshot outputs.
* Kept short to minimise token cost while still being informative.
*/
export const SNAPSHOT_SUPERSEDED_PLACEHOLDER =
'[Snapshot superseded — a newer snapshot exists later in this conversation. ' +
'Call take_snapshot for current page state.]';
/**
* Scans the chat history and replaces all but the most recent
* `take_snapshot` `functionResponse` with a compact placeholder.
*
* No-ops when:
* - There are fewer than 2 snapshots (nothing to supersede).
* - All prior snapshots have already been superseded.
*
* Uses {@link GeminiChat.setHistory} to apply the modified history.
*/
export function supersedeStaleSnapshots(chat: GeminiChat): void {
const history = chat.getHistory();
// Locate all (contentIndex, partIndex) tuples for take_snapshot responses.
const snapshotLocations: Array<{
contentIdx: number;
partIdx: number;
}> = [];
for (let i = 0; i < history.length; i++) {
const parts = history[i].parts;
if (!parts) continue;
for (let j = 0; j < parts.length; j++) {
const part = parts[j];
if (
part.functionResponse &&
part.functionResponse.name === TAKE_SNAPSHOT_TOOL_NAME
) {
snapshotLocations.push({ contentIdx: i, partIdx: j });
}
}
}
// Nothing to do if there are 0 or 1 snapshots.
if (snapshotLocations.length < 2) {
return;
}
// Check whether any stale snapshot actually needs replacement.
// (Skip the last entry — that's the one we keep.)
const staleLocations = snapshotLocations.slice(0, -1);
const needsUpdate = staleLocations.some(({ contentIdx, partIdx }) => {
const output = getResponseOutput(
history[contentIdx].parts![partIdx].functionResponse?.response,
);
return !output.includes(SNAPSHOT_SUPERSEDED_PLACEHOLDER);
});
if (!needsUpdate) {
return;
}
// Shallow-copy the history and replace stale snapshots.
const newHistory: Content[] = history.map((content) => ({
...content,
parts: content.parts ? [...content.parts] : undefined,
}));
let replacedCount = 0;
for (const { contentIdx, partIdx } of staleLocations) {
const originalPart = newHistory[contentIdx].parts![partIdx];
if (!originalPart.functionResponse) continue;
// Check if already superseded
const output = getResponseOutput(originalPart.functionResponse.response);
if (output.includes(SNAPSHOT_SUPERSEDED_PLACEHOLDER)) {
continue;
}
const replacementPart: Part = {
functionResponse: {
// eslint-disable-next-line @typescript-eslint/no-misused-spread
...originalPart.functionResponse,
response: { output: SNAPSHOT_SUPERSEDED_PLACEHOLDER },
},
};
newHistory[contentIdx].parts![partIdx] = replacementPart;
replacedCount++;
}
if (replacedCount > 0) {
chat.setHistory(newHistory);
debugLogger.log(
`[SnapshotSuperseder] Replaced ${replacedCount} stale take_snapshot output(s).`,
);
}
}
/**
* Shape of a functionResponse.response that contains an `output` string.
*/
interface ResponseWithOutput {
output: string;
}
function isResponseWithOutput(
response: object | undefined,
): response is ResponseWithOutput {
return (
response !== null &&
response !== undefined &&
'output' in response &&
typeof response.output === 'string'
);
}
/**
* Safely extracts the `output` string from a functionResponse.response object.
* The GenAI SDK types `response` as `object | undefined`, so we need runtime
* checks to access the `output` field.
*/
function getResponseOutput(response: object | undefined): string {
if (isResponseWithOutput(response)) {
return response.output;
}
return '';
}

View File

@@ -317,6 +317,10 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
await this.tryCompressChat(chat, promptId, combinedSignal);
// Allow the agent definition to modify history before the model call
// (e.g., superseding stale tool outputs to reclaim context tokens).
await this.definition.onBeforeTurn?.(chat, combinedSignal);
const { functionCalls, modelToUse } = await promptIdContext.run(
promptId,
async () =>

View File

@@ -16,6 +16,7 @@ import type { AnySchema } from 'ajv';
import type { AgentCard } from '@a2a-js/sdk';
import type { A2AAuthConfig } from './auth-provider/types.js';
import type { MCPServerConfig } from '../config/config.js';
import type { GeminiChat } from '../core/geminiChat.js';
/**
* Describes the possible termination modes for an agent.
@@ -227,6 +228,18 @@ export interface LocalAgentDefinition<
* @returns A string representation of the final output.
*/
processOutput?: (output: z.infer<TOutput>) => string;
/**
* Optional hook invoked before each model call. Receives the active
* {@link GeminiChat} instance and may modify chat history (e.g., to
* supersede stale tool outputs and reclaim context-window tokens).
*
* Runs immediately after chat compression in the agent loop.
*/
onBeforeTurn?: (
chat: GeminiChat,
signal?: AbortSignal,
) => Promise<void> | void;
}
export interface BaseRemoteAgentDefinition<