diff --git a/packages/a2a-server/src/utils/testing_utils.ts b/packages/a2a-server/src/utils/testing_utils.ts index d8a85a172b..2f5a884753 100644 --- a/packages/a2a-server/src/utils/testing_utils.ts +++ b/packages/a2a-server/src/utils/testing_utils.ts @@ -11,6 +11,7 @@ import type { } from '@a2a-js/sdk'; import { ApprovalMode, + DEFAULT_GEMINI_MODEL, DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES, DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD, GeminiClient, @@ -46,6 +47,7 @@ export function createMockConfig( getTruncateToolOutputThreshold: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD, getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES, + getActiveModel: vi.fn().mockReturnValue(DEFAULT_GEMINI_MODEL), getDebugMode: vi.fn().mockReturnValue(false), getContentGeneratorConfig: vi.fn().mockReturnValue({ model: 'gemini-pro' }), getModel: vi.fn().mockReturnValue('gemini-pro'), diff --git a/packages/cli/src/ui/hooks/useToolScheduler.test.ts b/packages/cli/src/ui/hooks/useToolScheduler.test.ts index c61e608a1a..b6c1ebd4fa 100644 --- a/packages/cli/src/ui/hooks/useToolScheduler.test.ts +++ b/packages/cli/src/ui/hooks/useToolScheduler.test.ts @@ -33,6 +33,7 @@ import { ApprovalMode, MockTool, HookSystem, + PREVIEW_GEMINI_MODEL, } from '@google/gemini-cli-core'; import { createMockMessageBus } from '@google/gemini-cli-core/src/test-utils/mock-message-bus.js'; import { ToolCallStatus } from '../types.js'; @@ -71,6 +72,7 @@ const mockConfig = { getTruncateToolOutputThreshold: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD, getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES, getAllowedTools: vi.fn(() => []), + getActiveModel: () => PREVIEW_GEMINI_MODEL, getContentGeneratorConfig: () => ({ model: 'test-model', authType: 'oauth-personal', diff --git a/packages/cli/src/zed-integration/zedIntegration.ts b/packages/cli/src/zed-integration/zedIntegration.ts index 0694a79f80..abcfb62ccc 100644 --- a/packages/cli/src/zed-integration/zedIntegration.ts +++ b/packages/cli/src/zed-integration/zedIntegration.ts @@ -497,7 +497,12 @@ export class Session { ), ); - return convertToFunctionResponse(fc.name, callId, toolResult.llmContent); + return convertToFunctionResponse( + fc.name, + callId, + toolResult.llmContent, + this.config.getActiveModel(), + ); } catch (e) { const error = e instanceof Error ? e : new Error(String(e)); diff --git a/packages/core/src/config/models.test.ts b/packages/core/src/config/models.test.ts index 48a6f80030..0cd285ee79 100644 --- a/packages/core/src/config/models.test.ts +++ b/packages/core/src/config/models.test.ts @@ -14,8 +14,25 @@ import { GEMINI_MODEL_ALIAS_PRO, GEMINI_MODEL_ALIAS_FLASH, GEMINI_MODEL_ALIAS_FLASH_LITE, + supportsMultimodalFunctionResponse, } from './models.js'; +describe('supportsMultimodalFunctionResponse', () => { + it('should return true for gemini-3 model', () => { + expect(supportsMultimodalFunctionResponse('gemini-3-pro')).toBe(true); + }); + + it('should return false for gemini-2 models', () => { + expect(supportsMultimodalFunctionResponse('gemini-2.5-pro')).toBe(false); + expect(supportsMultimodalFunctionResponse('gemini-2.5-flash')).toBe(false); + }); + + it('should return false for other models', () => { + expect(supportsMultimodalFunctionResponse('some-other-model')).toBe(false); + expect(supportsMultimodalFunctionResponse('')).toBe(false); + }); +}); + describe('getEffectiveModel', () => { describe('When NOT in fallback mode', () => { const isInFallbackMode = false; diff --git a/packages/core/src/config/models.ts b/packages/core/src/config/models.ts index 215019ef03..9840af42b1 100644 --- a/packages/core/src/config/models.ts +++ b/packages/core/src/config/models.ts @@ -99,8 +99,19 @@ export function getEffectiveModel( * Checks if the model is a Gemini 2.x model. * * @param model The model name to check. - * @returns True if the model is a Gemini 2.x model. + * @returns True if the model is a Gemini-2.x model. */ export function isGemini2Model(model: string): boolean { return /^gemini-2(\.|$)/.test(model); } + +/** + * Checks if the model supports multimodal function responses (multimodal data nested within function response). + * This is supported in Gemini 3. + * + * @param model The model name to check. + * @returns True if the model supports multimodal function responses. + */ +export function supportsMultimodalFunctionResponse(model: string): boolean { + return model.startsWith('gemini-3-'); +} diff --git a/packages/core/src/core/coreToolScheduler.test.ts b/packages/core/src/core/coreToolScheduler.test.ts index 8c64f9c2db..3a45f73e2f 100644 --- a/packages/core/src/core/coreToolScheduler.test.ts +++ b/packages/core/src/core/coreToolScheduler.test.ts @@ -46,6 +46,10 @@ import * as modifiableToolModule from '../tools/modifiable-tool.js'; import * as fs from 'node:fs/promises'; import * as path from 'node:path'; import { isShellInvocationAllowlisted } from '../utils/shell-permissions.js'; +import { + DEFAULT_GEMINI_MODEL, + PREVIEW_GEMINI_MODEL, +} from '../config/models.js'; vi.mock('fs/promises', () => ({ writeFile: vi.fn(), @@ -255,6 +259,7 @@ function createMockConfig(overrides: Partial = {}): Config { DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD, getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES, getToolRegistry: () => defaultToolRegistry, + getActiveModel: () => DEFAULT_GEMINI_MODEL, getUseSmartEdit: () => false, getGeminiClient: () => null, getEnableMessageBusIntegration: () => false, @@ -767,7 +772,12 @@ describe('convertToFunctionResponse', () => { it('should handle simple string llmContent', () => { const llmContent = 'Simple text output'; - const result = convertToFunctionResponse(toolName, callId, llmContent); + const result = convertToFunctionResponse( + toolName, + callId, + llmContent, + DEFAULT_GEMINI_MODEL, + ); expect(result).toEqual([ { functionResponse: { @@ -781,7 +791,12 @@ describe('convertToFunctionResponse', () => { it('should handle llmContent as a single Part with text', () => { const llmContent: Part = { text: 'Text from Part object' }; - const result = convertToFunctionResponse(toolName, callId, llmContent); + const result = convertToFunctionResponse( + toolName, + callId, + llmContent, + DEFAULT_GEMINI_MODEL, + ); expect(result).toEqual([ { functionResponse: { @@ -795,7 +810,12 @@ describe('convertToFunctionResponse', () => { it('should handle llmContent as a PartListUnion array with a single text Part', () => { const llmContent: PartListUnion = [{ text: 'Text from array' }]; - const result = convertToFunctionResponse(toolName, callId, llmContent); + const result = convertToFunctionResponse( + toolName, + callId, + llmContent, + DEFAULT_GEMINI_MODEL, + ); expect(result).toEqual([ { functionResponse: { @@ -807,60 +827,147 @@ describe('convertToFunctionResponse', () => { ]); }); - it('should handle llmContent with inlineData', () => { - const llmContent: Part = { - inlineData: { mimeType: 'image/png', data: 'base64...' }, - }; - const result = convertToFunctionResponse(toolName, callId, llmContent); + it('should handle llmContent as a PartListUnion array with multiple Parts', () => { + const llmContent: PartListUnion = [{ text: 'part1' }, { text: 'part2' }]; + const result = convertToFunctionResponse( + toolName, + callId, + llmContent, + DEFAULT_GEMINI_MODEL, + ); expect(result).toEqual([ { functionResponse: { name: toolName, id: callId, - response: { - output: 'Binary content of type image/png was processed.', - }, + response: { output: 'part1\npart2' }, + }, + }, + ]); + }); + + it('should handle llmContent with fileData for Gemini 3 model (should be siblings)', () => { + const llmContent: Part = { + fileData: { mimeType: 'application/pdf', fileUri: 'gs://...' }, + }; + const result = convertToFunctionResponse( + toolName, + callId, + llmContent, + PREVIEW_GEMINI_MODEL, + ); + expect(result).toEqual([ + { + functionResponse: { + name: toolName, + id: callId, + response: { output: 'Binary content provided (1 item(s)).' }, }, }, llmContent, ]); }); - it('should handle llmContent with fileData', () => { + it('should handle llmContent with inlineData for Gemini 3 model (should be nested)', () => { const llmContent: Part = { - fileData: { mimeType: 'application/pdf', fileUri: 'gs://...' }, + inlineData: { mimeType: 'image/png', data: 'base64...' }, }; - const result = convertToFunctionResponse(toolName, callId, llmContent); + const result = convertToFunctionResponse( + toolName, + callId, + llmContent, + PREVIEW_GEMINI_MODEL, + ); expect(result).toEqual([ { functionResponse: { name: toolName, id: callId, - response: { - output: 'Binary content of type application/pdf was processed.', - }, + response: { output: 'Binary content provided (1 item(s)).' }, + parts: [llmContent], + }, + }, + ]); + }); + + it('should handle llmContent with fileData for non-Gemini 3 models', () => { + const llmContent: Part = { + fileData: { mimeType: 'application/pdf', fileUri: 'gs://...' }, + }; + const result = convertToFunctionResponse( + toolName, + callId, + llmContent, + DEFAULT_GEMINI_MODEL, + ); + expect(result).toEqual([ + { + functionResponse: { + name: toolName, + id: callId, + response: { output: 'Binary content provided (1 item(s)).' }, }, }, llmContent, ]); }); + it('should preserve existing functionResponse metadata', () => { + const innerId = 'inner-call-id'; + const innerName = 'inner-tool-name'; + const responseMetadata = { + flags: ['flag1'], + isError: false, + customData: { key: 'value' }, + }; + const input: Part = { + functionResponse: { + id: innerId, + name: innerName, + response: responseMetadata, + }, + }; + + const result = convertToFunctionResponse( + toolName, + callId, + input, + DEFAULT_GEMINI_MODEL, + ); + + expect(result).toHaveLength(1); + expect(result[0].functionResponse).toEqual({ + id: callId, + name: toolName, + response: responseMetadata, + }); + }); + it('should handle llmContent as an array of multiple Parts (text and inlineData)', () => { const llmContent: PartListUnion = [ { text: 'Some textual description' }, { inlineData: { mimeType: 'image/jpeg', data: 'base64data...' } }, { text: 'Another text part' }, ]; - const result = convertToFunctionResponse(toolName, callId, llmContent); + const result = convertToFunctionResponse( + toolName, + callId, + llmContent, + PREVIEW_GEMINI_MODEL, + ); expect(result).toEqual([ { functionResponse: { name: toolName, id: callId, - response: { output: 'Tool execution succeeded.' }, + response: { + output: 'Some textual description\nAnother text part', + }, + parts: [ + { inlineData: { mimeType: 'image/jpeg', data: 'base64data...' } }, + ], }, }, - ...llmContent, ]); }); @@ -868,30 +975,38 @@ describe('convertToFunctionResponse', () => { const llmContent: PartListUnion = [ { inlineData: { mimeType: 'image/gif', data: 'gifdata...' } }, ]; - const result = convertToFunctionResponse(toolName, callId, llmContent); + const result = convertToFunctionResponse( + toolName, + callId, + llmContent, + PREVIEW_GEMINI_MODEL, + ); expect(result).toEqual([ { functionResponse: { name: toolName, id: callId, - response: { - output: 'Binary content of type image/gif was processed.', - }, + response: { output: 'Binary content provided (1 item(s)).' }, + parts: llmContent, }, }, - ...llmContent, ]); }); it('should handle llmContent as a generic Part (not text, inlineData, or fileData)', () => { const llmContent: Part = { functionCall: { name: 'test', args: {} } }; - const result = convertToFunctionResponse(toolName, callId, llmContent); + const result = convertToFunctionResponse( + toolName, + callId, + llmContent, + PREVIEW_GEMINI_MODEL, + ); expect(result).toEqual([ { functionResponse: { name: toolName, id: callId, - response: { output: 'Tool execution succeeded.' }, + response: {}, }, }, ]); @@ -899,7 +1014,12 @@ describe('convertToFunctionResponse', () => { it('should handle empty string llmContent', () => { const llmContent = ''; - const result = convertToFunctionResponse(toolName, callId, llmContent); + const result = convertToFunctionResponse( + toolName, + callId, + llmContent, + PREVIEW_GEMINI_MODEL, + ); expect(result).toEqual([ { functionResponse: { @@ -913,13 +1033,18 @@ describe('convertToFunctionResponse', () => { it('should handle llmContent as an empty array', () => { const llmContent: PartListUnion = []; - const result = convertToFunctionResponse(toolName, callId, llmContent); + const result = convertToFunctionResponse( + toolName, + callId, + llmContent, + PREVIEW_GEMINI_MODEL, + ); expect(result).toEqual([ { functionResponse: { name: toolName, id: callId, - response: { output: 'Tool execution succeeded.' }, + response: {}, }, }, ]); @@ -927,13 +1052,18 @@ describe('convertToFunctionResponse', () => { it('should handle llmContent as a Part with undefined inlineData/fileData/text', () => { const llmContent: Part = {}; // An empty part object - const result = convertToFunctionResponse(toolName, callId, llmContent); + const result = convertToFunctionResponse( + toolName, + callId, + llmContent, + PREVIEW_GEMINI_MODEL, + ); expect(result).toEqual([ { functionResponse: { name: toolName, id: callId, - response: { output: 'Tool execution succeeded.' }, + response: {}, }, }, ]); diff --git a/packages/core/src/core/coreToolScheduler.ts b/packages/core/src/core/coreToolScheduler.ts index 81487e96cc..85632265d4 100644 --- a/packages/core/src/core/coreToolScheduler.ts +++ b/packages/core/src/core/coreToolScheduler.ts @@ -29,7 +29,7 @@ import { } from '../index.js'; import { READ_FILE_TOOL_NAME, SHELL_TOOL_NAME } from '../tools/tool-names.js'; import type { Part, PartListUnion } from '@google/genai'; -import { getResponseTextFromParts } from '../utils/generateContentResponseUtilities.js'; +import { supportsMultimodalFunctionResponse } from '../config/models.js'; import type { ModifyContext } from '../tools/modifiable-tool.js'; import { isModifiableDeclarativeTool, @@ -50,6 +50,7 @@ import { fireToolNotificationHook, executeToolWithHooks, } from './coreToolHookTriggers.js'; +import { debugLogger } from '../utils/debugLogger.js'; export type ValidatingToolCall = { status: 'validating'; @@ -171,61 +172,85 @@ export function convertToFunctionResponse( toolName: string, callId: string, llmContent: PartListUnion, + model: string, ): Part[] { - const contentToProcess = - Array.isArray(llmContent) && llmContent.length === 1 - ? llmContent[0] - : llmContent; - - if (typeof contentToProcess === 'string') { - return [createFunctionResponsePart(callId, toolName, contentToProcess)]; + if (typeof llmContent === 'string') { + return [createFunctionResponsePart(callId, toolName, llmContent)]; } - if (Array.isArray(contentToProcess)) { - const functionResponse = createFunctionResponsePart( - callId, - toolName, - 'Tool execution succeeded.', - ); - return [functionResponse, ...toParts(contentToProcess)]; - } + const parts = toParts(llmContent); - // After this point, contentToProcess is a single Part object. - if (contentToProcess.functionResponse) { - if (contentToProcess.functionResponse.response?.['content']) { - const stringifiedOutput = - getResponseTextFromParts( - contentToProcess.functionResponse.response['content'] as Part[], - ) || ''; - return [createFunctionResponsePart(callId, toolName, stringifiedOutput)]; + // Separate text from binary types + const textParts: string[] = []; + const inlineDataParts: Part[] = []; + const fileDataParts: Part[] = []; + + for (const part of parts) { + if (part.text !== undefined) { + textParts.push(part.text); + } else if (part.inlineData) { + inlineDataParts.push(part); + } else if (part.fileData) { + fileDataParts.push(part); + } else if (part.functionResponse) { + if (parts.length > 1) { + debugLogger.warn( + 'convertToFunctionResponse received multiple parts with a functionResponse. Only the functionResponse will be used, other parts will be ignored', + ); + } + // Handle passthrough case + return [ + { + functionResponse: { + id: callId, + name: toolName, + response: part.functionResponse.response, + }, + }, + ]; } - // It's a functionResponse that we should pass through as is. - return [contentToProcess]; + // Ignore other part types } - if (contentToProcess.inlineData || contentToProcess.fileData) { - const mimeType = - contentToProcess.inlineData?.mimeType || - contentToProcess.fileData?.mimeType || - 'unknown'; - const functionResponse = createFunctionResponsePart( - callId, - toolName, - `Binary content of type ${mimeType} was processed.`, - ); - return [functionResponse, contentToProcess]; + // Build the primary response part + const part: Part = { + functionResponse: { + id: callId, + name: toolName, + response: textParts.length > 0 ? { output: textParts.join('\n') } : {}, + }, + }; + + const isMultimodalFRSupported = supportsMultimodalFunctionResponse(model); + const siblingParts: Part[] = [...fileDataParts]; + + if (inlineDataParts.length > 0) { + if (isMultimodalFRSupported) { + // Nest inlineData if supported by the model + (part.functionResponse as unknown as { parts: Part[] }).parts = + inlineDataParts; + } else { + // Otherwise treat as siblings + siblingParts.push(...inlineDataParts); + } } - if (contentToProcess.text !== undefined) { - return [ - createFunctionResponsePart(callId, toolName, contentToProcess.text), - ]; + // Add descriptive text if the response object is empty but we have binary content + if ( + textParts.length === 0 && + (inlineDataParts.length > 0 || fileDataParts.length > 0) + ) { + const totalBinaryItems = inlineDataParts.length + fileDataParts.length; + part.functionResponse!.response = { + output: `Binary content provided (${totalBinaryItems} item(s)).`, + }; } - // Default case for other kinds of parts. - return [ - createFunctionResponsePart(callId, toolName, 'Tool execution succeeded.'), - ]; + if (siblingParts.length > 0) { + return [part, ...siblingParts]; + } + + return [part]; } function toParts(input: PartListUnion): Part[] { @@ -1228,6 +1253,7 @@ export class CoreToolScheduler { toolName, callId, content, + this.config.getActiveModel(), ); const successResponse: ToolCallResponseInfo = { callId, diff --git a/packages/core/src/core/nonInteractiveToolExecutor.test.ts b/packages/core/src/core/nonInteractiveToolExecutor.test.ts index 27e1f774d5..42bd93c6b1 100644 --- a/packages/core/src/core/nonInteractiveToolExecutor.test.ts +++ b/packages/core/src/core/nonInteractiveToolExecutor.test.ts @@ -19,6 +19,7 @@ import { ToolErrorType, ApprovalMode, HookSystem, + PREVIEW_GEMINI_MODEL, } from '../index.js'; import type { Part } from '@google/genai'; import { MockTool } from '../test-utils/mock-tool.js'; @@ -61,6 +62,7 @@ describe('executeToolCall', () => { getTruncateToolOutputThreshold: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD, getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES, + getActiveModel: () => PREVIEW_GEMINI_MODEL, getUseSmartEdit: () => false, getGeminiClient: () => null, // No client needed for these tests getEnableMessageBusIntegration: () => false, @@ -321,12 +323,10 @@ describe('executeToolCall', () => { functionResponse: { name: 'testTool', id: 'call6', - response: { - output: 'Binary content of type image/png was processed.', - }, + response: { output: 'Binary content provided (1 item(s)).' }, + parts: [imageDataPart], }, }, - imageDataPart, ], }); });