Fix tool output fragmentation by encapsulating content in functionResponse (#13082)

This commit is contained in:
Abhi
2025-12-15 15:36:34 -05:00
committed by GitHub
parent 13944b9bb1
commit d236df5b21
8 changed files with 276 additions and 83 deletions

View File

@@ -11,6 +11,7 @@ import type {
} from '@a2a-js/sdk';
import {
ApprovalMode,
DEFAULT_GEMINI_MODEL,
DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES,
DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD,
GeminiClient,
@@ -46,6 +47,7 @@ export function createMockConfig(
getTruncateToolOutputThreshold: () =>
DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD,
getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES,
getActiveModel: vi.fn().mockReturnValue(DEFAULT_GEMINI_MODEL),
getDebugMode: vi.fn().mockReturnValue(false),
getContentGeneratorConfig: vi.fn().mockReturnValue({ model: 'gemini-pro' }),
getModel: vi.fn().mockReturnValue('gemini-pro'),

View File

@@ -33,6 +33,7 @@ import {
ApprovalMode,
MockTool,
HookSystem,
PREVIEW_GEMINI_MODEL,
} from '@google/gemini-cli-core';
import { createMockMessageBus } from '@google/gemini-cli-core/src/test-utils/mock-message-bus.js';
import { ToolCallStatus } from '../types.js';
@@ -71,6 +72,7 @@ const mockConfig = {
getTruncateToolOutputThreshold: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD,
getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES,
getAllowedTools: vi.fn(() => []),
getActiveModel: () => PREVIEW_GEMINI_MODEL,
getContentGeneratorConfig: () => ({
model: 'test-model',
authType: 'oauth-personal',

View File

@@ -497,7 +497,12 @@ export class Session {
),
);
return convertToFunctionResponse(fc.name, callId, toolResult.llmContent);
return convertToFunctionResponse(
fc.name,
callId,
toolResult.llmContent,
this.config.getActiveModel(),
);
} catch (e) {
const error = e instanceof Error ? e : new Error(String(e));

View File

@@ -14,8 +14,25 @@ import {
GEMINI_MODEL_ALIAS_PRO,
GEMINI_MODEL_ALIAS_FLASH,
GEMINI_MODEL_ALIAS_FLASH_LITE,
supportsMultimodalFunctionResponse,
} from './models.js';
describe('supportsMultimodalFunctionResponse', () => {
it('should return true for gemini-3 model', () => {
expect(supportsMultimodalFunctionResponse('gemini-3-pro')).toBe(true);
});
it('should return false for gemini-2 models', () => {
expect(supportsMultimodalFunctionResponse('gemini-2.5-pro')).toBe(false);
expect(supportsMultimodalFunctionResponse('gemini-2.5-flash')).toBe(false);
});
it('should return false for other models', () => {
expect(supportsMultimodalFunctionResponse('some-other-model')).toBe(false);
expect(supportsMultimodalFunctionResponse('')).toBe(false);
});
});
describe('getEffectiveModel', () => {
describe('When NOT in fallback mode', () => {
const isInFallbackMode = false;

View File

@@ -99,8 +99,19 @@ export function getEffectiveModel(
* Checks if the model is a Gemini 2.x model.
*
* @param model The model name to check.
* @returns True if the model is a Gemini 2.x model.
* @returns True if the model is a Gemini-2.x model.
*/
export function isGemini2Model(model: string): boolean {
return /^gemini-2(\.|$)/.test(model);
}
/**
* Checks if the model supports multimodal function responses (multimodal data nested within function response).
* This is supported in Gemini 3.
*
* @param model The model name to check.
* @returns True if the model supports multimodal function responses.
*/
export function supportsMultimodalFunctionResponse(model: string): boolean {
return model.startsWith('gemini-3-');
}

View File

@@ -46,6 +46,10 @@ import * as modifiableToolModule from '../tools/modifiable-tool.js';
import * as fs from 'node:fs/promises';
import * as path from 'node:path';
import { isShellInvocationAllowlisted } from '../utils/shell-permissions.js';
import {
DEFAULT_GEMINI_MODEL,
PREVIEW_GEMINI_MODEL,
} from '../config/models.js';
vi.mock('fs/promises', () => ({
writeFile: vi.fn(),
@@ -255,6 +259,7 @@ function createMockConfig(overrides: Partial<Config> = {}): Config {
DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD,
getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES,
getToolRegistry: () => defaultToolRegistry,
getActiveModel: () => DEFAULT_GEMINI_MODEL,
getUseSmartEdit: () => false,
getGeminiClient: () => null,
getEnableMessageBusIntegration: () => false,
@@ -767,7 +772,12 @@ describe('convertToFunctionResponse', () => {
it('should handle simple string llmContent', () => {
const llmContent = 'Simple text output';
const result = convertToFunctionResponse(toolName, callId, llmContent);
const result = convertToFunctionResponse(
toolName,
callId,
llmContent,
DEFAULT_GEMINI_MODEL,
);
expect(result).toEqual([
{
functionResponse: {
@@ -781,7 +791,12 @@ describe('convertToFunctionResponse', () => {
it('should handle llmContent as a single Part with text', () => {
const llmContent: Part = { text: 'Text from Part object' };
const result = convertToFunctionResponse(toolName, callId, llmContent);
const result = convertToFunctionResponse(
toolName,
callId,
llmContent,
DEFAULT_GEMINI_MODEL,
);
expect(result).toEqual([
{
functionResponse: {
@@ -795,7 +810,12 @@ describe('convertToFunctionResponse', () => {
it('should handle llmContent as a PartListUnion array with a single text Part', () => {
const llmContent: PartListUnion = [{ text: 'Text from array' }];
const result = convertToFunctionResponse(toolName, callId, llmContent);
const result = convertToFunctionResponse(
toolName,
callId,
llmContent,
DEFAULT_GEMINI_MODEL,
);
expect(result).toEqual([
{
functionResponse: {
@@ -807,60 +827,147 @@ describe('convertToFunctionResponse', () => {
]);
});
it('should handle llmContent with inlineData', () => {
const llmContent: Part = {
inlineData: { mimeType: 'image/png', data: 'base64...' },
};
const result = convertToFunctionResponse(toolName, callId, llmContent);
it('should handle llmContent as a PartListUnion array with multiple Parts', () => {
const llmContent: PartListUnion = [{ text: 'part1' }, { text: 'part2' }];
const result = convertToFunctionResponse(
toolName,
callId,
llmContent,
DEFAULT_GEMINI_MODEL,
);
expect(result).toEqual([
{
functionResponse: {
name: toolName,
id: callId,
response: {
output: 'Binary content of type image/png was processed.',
},
response: { output: 'part1\npart2' },
},
},
]);
});
it('should handle llmContent with fileData for Gemini 3 model (should be siblings)', () => {
const llmContent: Part = {
fileData: { mimeType: 'application/pdf', fileUri: 'gs://...' },
};
const result = convertToFunctionResponse(
toolName,
callId,
llmContent,
PREVIEW_GEMINI_MODEL,
);
expect(result).toEqual([
{
functionResponse: {
name: toolName,
id: callId,
response: { output: 'Binary content provided (1 item(s)).' },
},
},
llmContent,
]);
});
it('should handle llmContent with fileData', () => {
it('should handle llmContent with inlineData for Gemini 3 model (should be nested)', () => {
const llmContent: Part = {
fileData: { mimeType: 'application/pdf', fileUri: 'gs://...' },
inlineData: { mimeType: 'image/png', data: 'base64...' },
};
const result = convertToFunctionResponse(toolName, callId, llmContent);
const result = convertToFunctionResponse(
toolName,
callId,
llmContent,
PREVIEW_GEMINI_MODEL,
);
expect(result).toEqual([
{
functionResponse: {
name: toolName,
id: callId,
response: {
output: 'Binary content of type application/pdf was processed.',
},
response: { output: 'Binary content provided (1 item(s)).' },
parts: [llmContent],
},
},
]);
});
it('should handle llmContent with fileData for non-Gemini 3 models', () => {
const llmContent: Part = {
fileData: { mimeType: 'application/pdf', fileUri: 'gs://...' },
};
const result = convertToFunctionResponse(
toolName,
callId,
llmContent,
DEFAULT_GEMINI_MODEL,
);
expect(result).toEqual([
{
functionResponse: {
name: toolName,
id: callId,
response: { output: 'Binary content provided (1 item(s)).' },
},
},
llmContent,
]);
});
it('should preserve existing functionResponse metadata', () => {
const innerId = 'inner-call-id';
const innerName = 'inner-tool-name';
const responseMetadata = {
flags: ['flag1'],
isError: false,
customData: { key: 'value' },
};
const input: Part = {
functionResponse: {
id: innerId,
name: innerName,
response: responseMetadata,
},
};
const result = convertToFunctionResponse(
toolName,
callId,
input,
DEFAULT_GEMINI_MODEL,
);
expect(result).toHaveLength(1);
expect(result[0].functionResponse).toEqual({
id: callId,
name: toolName,
response: responseMetadata,
});
});
it('should handle llmContent as an array of multiple Parts (text and inlineData)', () => {
const llmContent: PartListUnion = [
{ text: 'Some textual description' },
{ inlineData: { mimeType: 'image/jpeg', data: 'base64data...' } },
{ text: 'Another text part' },
];
const result = convertToFunctionResponse(toolName, callId, llmContent);
const result = convertToFunctionResponse(
toolName,
callId,
llmContent,
PREVIEW_GEMINI_MODEL,
);
expect(result).toEqual([
{
functionResponse: {
name: toolName,
id: callId,
response: { output: 'Tool execution succeeded.' },
response: {
output: 'Some textual description\nAnother text part',
},
parts: [
{ inlineData: { mimeType: 'image/jpeg', data: 'base64data...' } },
],
},
},
...llmContent,
]);
});
@@ -868,30 +975,38 @@ describe('convertToFunctionResponse', () => {
const llmContent: PartListUnion = [
{ inlineData: { mimeType: 'image/gif', data: 'gifdata...' } },
];
const result = convertToFunctionResponse(toolName, callId, llmContent);
const result = convertToFunctionResponse(
toolName,
callId,
llmContent,
PREVIEW_GEMINI_MODEL,
);
expect(result).toEqual([
{
functionResponse: {
name: toolName,
id: callId,
response: {
output: 'Binary content of type image/gif was processed.',
},
response: { output: 'Binary content provided (1 item(s)).' },
parts: llmContent,
},
},
...llmContent,
]);
});
it('should handle llmContent as a generic Part (not text, inlineData, or fileData)', () => {
const llmContent: Part = { functionCall: { name: 'test', args: {} } };
const result = convertToFunctionResponse(toolName, callId, llmContent);
const result = convertToFunctionResponse(
toolName,
callId,
llmContent,
PREVIEW_GEMINI_MODEL,
);
expect(result).toEqual([
{
functionResponse: {
name: toolName,
id: callId,
response: { output: 'Tool execution succeeded.' },
response: {},
},
},
]);
@@ -899,7 +1014,12 @@ describe('convertToFunctionResponse', () => {
it('should handle empty string llmContent', () => {
const llmContent = '';
const result = convertToFunctionResponse(toolName, callId, llmContent);
const result = convertToFunctionResponse(
toolName,
callId,
llmContent,
PREVIEW_GEMINI_MODEL,
);
expect(result).toEqual([
{
functionResponse: {
@@ -913,13 +1033,18 @@ describe('convertToFunctionResponse', () => {
it('should handle llmContent as an empty array', () => {
const llmContent: PartListUnion = [];
const result = convertToFunctionResponse(toolName, callId, llmContent);
const result = convertToFunctionResponse(
toolName,
callId,
llmContent,
PREVIEW_GEMINI_MODEL,
);
expect(result).toEqual([
{
functionResponse: {
name: toolName,
id: callId,
response: { output: 'Tool execution succeeded.' },
response: {},
},
},
]);
@@ -927,13 +1052,18 @@ describe('convertToFunctionResponse', () => {
it('should handle llmContent as a Part with undefined inlineData/fileData/text', () => {
const llmContent: Part = {}; // An empty part object
const result = convertToFunctionResponse(toolName, callId, llmContent);
const result = convertToFunctionResponse(
toolName,
callId,
llmContent,
PREVIEW_GEMINI_MODEL,
);
expect(result).toEqual([
{
functionResponse: {
name: toolName,
id: callId,
response: { output: 'Tool execution succeeded.' },
response: {},
},
},
]);

View File

@@ -29,7 +29,7 @@ import {
} from '../index.js';
import { READ_FILE_TOOL_NAME, SHELL_TOOL_NAME } from '../tools/tool-names.js';
import type { Part, PartListUnion } from '@google/genai';
import { getResponseTextFromParts } from '../utils/generateContentResponseUtilities.js';
import { supportsMultimodalFunctionResponse } from '../config/models.js';
import type { ModifyContext } from '../tools/modifiable-tool.js';
import {
isModifiableDeclarativeTool,
@@ -50,6 +50,7 @@ import {
fireToolNotificationHook,
executeToolWithHooks,
} from './coreToolHookTriggers.js';
import { debugLogger } from '../utils/debugLogger.js';
export type ValidatingToolCall = {
status: 'validating';
@@ -171,61 +172,85 @@ export function convertToFunctionResponse(
toolName: string,
callId: string,
llmContent: PartListUnion,
model: string,
): Part[] {
const contentToProcess =
Array.isArray(llmContent) && llmContent.length === 1
? llmContent[0]
: llmContent;
if (typeof contentToProcess === 'string') {
return [createFunctionResponsePart(callId, toolName, contentToProcess)];
if (typeof llmContent === 'string') {
return [createFunctionResponsePart(callId, toolName, llmContent)];
}
if (Array.isArray(contentToProcess)) {
const functionResponse = createFunctionResponsePart(
callId,
toolName,
'Tool execution succeeded.',
);
return [functionResponse, ...toParts(contentToProcess)];
}
const parts = toParts(llmContent);
// After this point, contentToProcess is a single Part object.
if (contentToProcess.functionResponse) {
if (contentToProcess.functionResponse.response?.['content']) {
const stringifiedOutput =
getResponseTextFromParts(
contentToProcess.functionResponse.response['content'] as Part[],
) || '';
return [createFunctionResponsePart(callId, toolName, stringifiedOutput)];
// Separate text from binary types
const textParts: string[] = [];
const inlineDataParts: Part[] = [];
const fileDataParts: Part[] = [];
for (const part of parts) {
if (part.text !== undefined) {
textParts.push(part.text);
} else if (part.inlineData) {
inlineDataParts.push(part);
} else if (part.fileData) {
fileDataParts.push(part);
} else if (part.functionResponse) {
if (parts.length > 1) {
debugLogger.warn(
'convertToFunctionResponse received multiple parts with a functionResponse. Only the functionResponse will be used, other parts will be ignored',
);
}
// Handle passthrough case
return [
{
functionResponse: {
id: callId,
name: toolName,
response: part.functionResponse.response,
},
},
];
}
// It's a functionResponse that we should pass through as is.
return [contentToProcess];
// Ignore other part types
}
if (contentToProcess.inlineData || contentToProcess.fileData) {
const mimeType =
contentToProcess.inlineData?.mimeType ||
contentToProcess.fileData?.mimeType ||
'unknown';
const functionResponse = createFunctionResponsePart(
callId,
toolName,
`Binary content of type ${mimeType} was processed.`,
);
return [functionResponse, contentToProcess];
// Build the primary response part
const part: Part = {
functionResponse: {
id: callId,
name: toolName,
response: textParts.length > 0 ? { output: textParts.join('\n') } : {},
},
};
const isMultimodalFRSupported = supportsMultimodalFunctionResponse(model);
const siblingParts: Part[] = [...fileDataParts];
if (inlineDataParts.length > 0) {
if (isMultimodalFRSupported) {
// Nest inlineData if supported by the model
(part.functionResponse as unknown as { parts: Part[] }).parts =
inlineDataParts;
} else {
// Otherwise treat as siblings
siblingParts.push(...inlineDataParts);
}
}
if (contentToProcess.text !== undefined) {
return [
createFunctionResponsePart(callId, toolName, contentToProcess.text),
];
// Add descriptive text if the response object is empty but we have binary content
if (
textParts.length === 0 &&
(inlineDataParts.length > 0 || fileDataParts.length > 0)
) {
const totalBinaryItems = inlineDataParts.length + fileDataParts.length;
part.functionResponse!.response = {
output: `Binary content provided (${totalBinaryItems} item(s)).`,
};
}
// Default case for other kinds of parts.
return [
createFunctionResponsePart(callId, toolName, 'Tool execution succeeded.'),
];
if (siblingParts.length > 0) {
return [part, ...siblingParts];
}
return [part];
}
function toParts(input: PartListUnion): Part[] {
@@ -1228,6 +1253,7 @@ export class CoreToolScheduler {
toolName,
callId,
content,
this.config.getActiveModel(),
);
const successResponse: ToolCallResponseInfo = {
callId,

View File

@@ -19,6 +19,7 @@ import {
ToolErrorType,
ApprovalMode,
HookSystem,
PREVIEW_GEMINI_MODEL,
} from '../index.js';
import type { Part } from '@google/genai';
import { MockTool } from '../test-utils/mock-tool.js';
@@ -61,6 +62,7 @@ describe('executeToolCall', () => {
getTruncateToolOutputThreshold: () =>
DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD,
getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES,
getActiveModel: () => PREVIEW_GEMINI_MODEL,
getUseSmartEdit: () => false,
getGeminiClient: () => null, // No client needed for these tests
getEnableMessageBusIntegration: () => false,
@@ -321,12 +323,10 @@ describe('executeToolCall', () => {
functionResponse: {
name: 'testTool',
id: 'call6',
response: {
output: 'Binary content of type image/png was processed.',
},
response: { output: 'Binary content provided (1 item(s)).' },
parts: [imageDataPart],
},
},
imageDataPart,
],
});
});