feat(agent): implement tool-controlled display protocol (Steps 2-3) (#25134)

This commit is contained in:
Michael Bleigh
2026-04-13 12:09:02 -07:00
committed by GitHub
parent ea36ccb567
commit 95944ec5af
13 changed files with 318 additions and 75 deletions
@@ -8,7 +8,6 @@ import { describe, expect, it } from 'vitest';
import {
geminiPartsToContentParts,
contentPartsToGeminiParts,
toolResultDisplayToContentParts,
buildToolResponseData,
} from './content-utils.js';
import type { Part } from '@google/genai';
@@ -200,27 +199,6 @@ describe('contentPartsToGeminiParts', () => {
});
});
describe('toolResultDisplayToContentParts', () => {
it('returns undefined for undefined', () => {
expect(toolResultDisplayToContentParts(undefined)).toBeUndefined();
});
it('returns undefined for null', () => {
expect(toolResultDisplayToContentParts(null)).toBeUndefined();
});
it('handles string resultDisplay as-is', () => {
const result = toolResultDisplayToContentParts('File written');
expect(result).toEqual([{ type: 'text', text: 'File written' }]);
});
it('stringifies object resultDisplay', () => {
const display = { type: 'FileDiff', oldPath: 'a.ts', newPath: 'b.ts' };
const result = toolResultDisplayToContentParts(display);
expect(result).toEqual([{ type: 'text', text: JSON.stringify(display) }]);
});
});
describe('buildToolResponseData', () => {
it('preserves outputFile and contentLength', () => {
const result = buildToolResponseData({
-18
View File
@@ -101,24 +101,6 @@ export function contentPartsToGeminiParts(content: ContentPart[]): Part[] {
return result;
}
/**
* Converts a ToolCallResponseInfo.resultDisplay value into ContentPart[].
* Handles string, object-valued (FileDiff, SubagentProgress, etc.),
* and undefined resultDisplay consistently.
*/
export function toolResultDisplayToContentParts(
resultDisplay: unknown,
): ContentPart[] | undefined {
if (resultDisplay === undefined || resultDisplay === null) {
return undefined;
}
const text =
typeof resultDisplay === 'string'
? resultDisplay
: JSON.stringify(resultDisplay);
return [{ type: 'text', text }];
}
/**
* Builds the data record for a tool_response AgentEvent, preserving
* all available metadata from the ToolCallResponseInfo.
@@ -155,9 +155,10 @@ describe('translateEvent', () => {
expect(resp.content).toEqual([
{ type: 'text', text: 'Permission denied to write' },
]);
expect(resp.displayContent).toEqual([
{ type: 'text', text: 'Permission denied' },
]);
expect(resp.display?.result).toEqual({
type: 'text',
text: 'Permission denied',
});
expect(resp.data).toEqual({ errorType: 'permission_denied' });
});
@@ -200,9 +201,12 @@ describe('translateEvent', () => {
};
const result = translateEvent(event, state);
const resp = result[0] as AgentEvent<'tool_response'>;
expect(resp.displayContent).toEqual([
{ type: 'text', text: JSON.stringify(objectDisplay) },
]);
expect(resp.display?.result).toEqual({
type: 'diff',
path: '/tmp/test.txt',
beforeText: 'a',
afterText: 'b',
});
});
it('passes through string resultDisplay as-is', () => {
@@ -220,9 +224,10 @@ describe('translateEvent', () => {
};
const result = translateEvent(event, state);
const resp = result[0] as AgentEvent<'tool_response'>;
expect(resp.displayContent).toEqual([
{ type: 'text', text: 'Command output text' },
]);
expect(resp.display?.result).toEqual({
type: 'text',
text: 'Command output text',
});
});
it('preserves outputFile and contentLength in data', () => {
+10 -5
View File
@@ -25,12 +25,13 @@ import type {
ErrorData,
Usage,
AgentEventType,
ToolDisplay,
} from './types.js';
import {
geminiPartsToContentParts,
toolResultDisplayToContentParts,
buildToolResponseData,
} from './content-utils.js';
import { toolResultDisplayToDisplayContent } from './tool-display-utils.js';
// ---------------------------------------------------------------------------
// Translation State
@@ -241,10 +242,14 @@ export function translateEvent(
case GeminiEventType.ToolCallResponse: {
ensureStreamStart(state, out);
const displayContent = toolResultDisplayToContentParts(
event.value.resultDisplay,
);
const data = buildToolResponseData(event.value);
const display: ToolDisplay | undefined = event.value.resultDisplay
? {
result: toolResultDisplayToDisplayContent(
event.value.resultDisplay,
),
}
: undefined;
out.push(
makeEvent('tool_response', state, {
requestId: event.value.callId,
@@ -253,7 +258,7 @@ export function translateEvent(
? [{ type: 'text', text: event.value.error.message }]
: geminiPartsToContentParts(event.value.responseParts),
isError: event.value.error !== undefined,
...(displayContent ? { displayContent } : {}),
...(display ? { display } : {}),
...(data ? { data } : {}),
}),
);
@@ -489,9 +489,10 @@ describe('LegacyAgentSession', () => {
expect(toolResp?.content).toEqual([
{ type: 'text', text: 'Permission denied' },
]);
expect(toolResp?.displayContent).toEqual([
{ type: 'text', text: 'Error display' },
]);
expect(toolResp?.display?.result).toEqual({
type: 'text',
text: 'Error display',
});
});
it('stops on STOP_EXECUTION tool error', async () => {
@@ -23,8 +23,8 @@ import {
buildToolResponseData,
contentPartsToGeminiParts,
geminiPartsToContentParts,
toolResultDisplayToContentParts,
} from './content-utils.js';
import { populateToolDisplay } from './tool-display-utils.js';
import { AgentSession } from './agent-session.js';
import {
createTranslationState,
@@ -262,9 +262,12 @@ export class LegacyAgentProtocol implements AgentProtocol {
const content: ContentPart[] = response.error
? [{ type: 'text', text: response.error.message }]
: geminiPartsToContentParts(response.responseParts);
const displayContent = toolResultDisplayToContentParts(
response.resultDisplay,
);
const display = populateToolDisplay({
name: request.name,
invocation: 'invocation' in tc ? tc.invocation : undefined,
resultDisplay: response.resultDisplay,
displayName: 'tool' in tc ? tc.tool?.displayName : undefined,
});
const data = buildToolResponseData(response);
this._emit([
@@ -273,7 +276,7 @@ export class LegacyAgentProtocol implements AgentProtocol {
name: request.name,
content,
isError: response.error !== undefined,
...(displayContent ? { displayContent } : {}),
...(display ? { display } : {}),
...(data ? { data } : {}),
}),
]);
@@ -0,0 +1,124 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, expect, it } from 'vitest';
import type {
ToolInvocation,
ToolResult,
ToolResultDisplay,
} from '../tools/tools.js';
import type { DisplayContent } from './types.js';
import {
populateToolDisplay,
renderDisplayDiff,
displayContentToString,
} from './tool-display-utils.js';
describe('tool-display-utils', () => {
describe('populateToolDisplay', () => {
it('uses displayName if provided', () => {
const mockInvocation = {
getDescription: () => 'Doing something...',
} as unknown as ToolInvocation<object, ToolResult>;
const display = populateToolDisplay({
name: 'raw-name',
invocation: mockInvocation,
displayName: 'Custom Display Name',
});
expect(display.name).toBe('Custom Display Name');
expect(display.description).toBe('Doing something...');
});
it('falls back to raw name if no displayName provided', () => {
const mockInvocation = {
getDescription: () => 'Doing something...',
} as unknown as ToolInvocation<object, ToolResult>;
const display = populateToolDisplay({
name: 'raw-name',
invocation: mockInvocation,
});
expect(display.name).toBe('raw-name');
});
it('populates result from resultDisplay', () => {
const display = populateToolDisplay({
name: 'test',
resultDisplay: 'hello world',
});
expect(display.result).toEqual({ type: 'text', text: 'hello world' });
});
it('translates FileDiff to DisplayDiff', () => {
const fileDiff = {
fileDiff: '@@ ...',
fileName: 'test.ts',
filePath: 'src/test.ts',
originalContent: 'old',
newContent: 'new',
} as unknown as ToolResultDisplay;
const display = populateToolDisplay({
name: 'test',
resultDisplay: fileDiff,
});
expect(display.result).toEqual({
type: 'diff',
path: 'src/test.ts',
beforeText: 'old',
afterText: 'new',
});
});
});
describe('renderDisplayDiff', () => {
it('renders a universal diff', () => {
const diff = {
type: 'diff' as const,
path: 'test.ts',
beforeText: 'line 1\nline 2',
afterText: 'line 1\nline 2 modified',
};
const rendered = renderDisplayDiff(diff);
expect(rendered).toContain('--- test.ts\tOriginal');
expect(rendered).toContain('+++ test.ts\tModified');
expect(rendered).toContain('-line 2');
expect(rendered).toContain('+line 2 modified');
});
});
describe('displayContentToString', () => {
it('returns undefined for undefined input', () => {
expect(displayContentToString(undefined)).toBeUndefined();
});
it('returns text for text input', () => {
expect(displayContentToString({ type: 'text', text: 'hello' })).toBe(
'hello',
);
});
it('renders a diff for diff input', () => {
const diff = {
type: 'diff' as const,
path: 'test.ts',
beforeText: 'old',
afterText: 'new',
};
const rendered = displayContentToString(diff);
expect(rendered).toContain('--- test.ts\tOriginal');
expect(rendered).toContain('+++ test.ts\tModified');
});
it('stringifies unknown structured objects', () => {
const unknown = {
type: 'something_else',
data: 123,
} as unknown as DisplayContent;
expect(displayContentToString(unknown)).toBe(JSON.stringify(unknown));
});
});
});
@@ -0,0 +1,106 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import * as Diff from 'diff';
import type {
ToolInvocation,
ToolResult,
ToolResultDisplay,
} from '../tools/tools.js';
import type { ToolDisplay, DisplayContent, DisplayDiff } from './types.js';
/**
* Populates a ToolDisplay object from a tool invocation and its result.
* This serves as a centralized bridge during the migration to tool-controlled display.
*/
export function populateToolDisplay({
name,
invocation,
resultDisplay,
displayName,
}: {
name: string;
invocation?: ToolInvocation<object, ToolResult>;
resultDisplay?: ToolResultDisplay;
displayName?: string;
}): ToolDisplay {
const display: ToolDisplay = {
name: displayName || name,
description: invocation?.getDescription?.(),
};
if (resultDisplay) {
display.result = toolResultDisplayToDisplayContent(resultDisplay);
}
return display;
}
/**
* Converts a legacy ToolResultDisplay into the new DisplayContent format.
*/
export function toolResultDisplayToDisplayContent(
resultDisplay: ToolResultDisplay,
): DisplayContent {
if (typeof resultDisplay === 'string') {
return { type: 'text', text: resultDisplay };
}
// Handle FileDiff -> DisplayDiff
if (
typeof resultDisplay === 'object' &&
resultDisplay !== null &&
'fileDiff' in resultDisplay &&
'newContent' in resultDisplay
) {
return {
type: 'diff',
path: resultDisplay.filePath || resultDisplay.fileName,
beforeText: resultDisplay.originalContent ?? '',
afterText: resultDisplay.newContent,
};
}
// Fallback for other structured types (LsTool, GrepTool, etc.)
// These will be fully migrated in Step 5.
return {
type: 'text',
text: JSON.stringify(resultDisplay),
};
}
/**
* Renders a universal diff string from a DisplayDiff object.
*/
export function renderDisplayDiff(diff: DisplayDiff): string {
return Diff.createPatch(
diff.path || 'file',
diff.beforeText,
diff.afterText,
'Original',
'Modified',
{ context: 3 },
);
}
/**
* Converts a DisplayContent object into a string representation.
* Useful for fallback displays or non-interactive environments.
*/
export function displayContentToString(
display: DisplayContent | undefined,
): string | undefined {
if (!display) {
return undefined;
}
if (display.type === 'text') {
return display.text;
}
if (display.type === 'diff') {
return renderDisplayDiff(display);
}
return JSON.stringify(display);
}
+24 -5
View File
@@ -106,7 +106,7 @@ export interface AgentEvents {
/** Updates configuration about the current session/agent. */
session_update: SessionUpdate;
/** Message content provided by user, agent, or developer. */
message: Message;
message: AgentMessage;
/** Event indicating the start of agent activity on a stream. */
agent_start: AgentStart;
/** Event indicating the end of agent activity on a stream. */
@@ -170,17 +170,35 @@ export type ContentPart =
) &
WithMeta;
export interface Message {
export interface AgentMessage {
role: 'user' | 'agent' | 'developer';
content: ContentPart[];
}
export type DisplayText = { type: 'text'; text: string };
export type DisplayDiff = {
type: 'diff';
path?: string;
beforeText: string;
afterText: string;
};
export type DisplayContent = DisplayText | DisplayDiff;
export interface ToolDisplay {
name?: string;
description?: string;
resultSummary?: string;
result?: DisplayContent;
}
export interface ToolRequest {
/** A unique identifier for this tool request to be correlated by the response. */
requestId: string;
/** The name of the tool being requested. */
name: string;
/** The arguments for the tool. */
/** Tool-controlled display information. */
display?: ToolDisplay;
args: Record<string, unknown>;
/** UI specific metadata */
_meta?: {
@@ -201,7 +219,8 @@ export interface ToolRequest {
*/
export interface ToolUpdate {
requestId: string;
displayContent?: ContentPart[];
/** Tool-controlled display information. */
display?: ToolDisplay;
content?: ContentPart[];
data?: Record<string, unknown>;
/** UI specific metadata */
@@ -221,8 +240,8 @@ export interface ToolUpdate {
export interface ToolResponse {
requestId: string;
name: string;
/** Content representing the tool call's outcome to be presented to the user. */
displayContent?: ContentPart[];
/** Tool-controlled display information. */
display?: ToolDisplay;
/** Multi-part content to be sent to the model. */
content?: ContentPart[];
/** Structured data to be sent to the model. */
+9
View File
@@ -198,6 +198,7 @@ export * from './agent/agent-session.js';
export * from './agent/legacy-agent-session.js';
export * from './agent/event-translator.js';
export * from './agent/content-utils.js';
export * from './agent/tool-display-utils.js';
// Agent event types — namespaced to avoid collisions with existing exports
export type {
AgentEvent,
@@ -209,6 +210,7 @@ export type {
AgentProtocol,
AgentSend,
AgentStart,
AgentMessage,
ContentPart,
ErrorData,
StreamEndReason,
@@ -216,6 +218,13 @@ export type {
Unsubscribe,
Usage as AgentUsage,
WithMeta,
ToolRequest,
ToolResponse,
ToolUpdate,
ToolDisplay,
DisplayText,
DisplayDiff,
DisplayContent,
} from './agent/types.js';
// Export specific tool logic