Fix tool output fragmentation by encapsulating content in functionResponse (#13082)

2026-05-13 05:12:55 -07:00 · 2025-12-15 15:36:34 -05:00
parent 13944b9bb1
commit d236df5b21
8 changed files with 276 additions and 83 deletions
@@ -11,6 +11,7 @@ import type {
 } from '@a2a-js/sdk';
 import {
  ApprovalMode,
+  DEFAULT_GEMINI_MODEL,
  DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES,
  DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD,
  GeminiClient,
@@ -46,6 +47,7 @@ export function createMockConfig(
    getTruncateToolOutputThreshold: () =>
      DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD,
    getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES,
+    getActiveModel: vi.fn().mockReturnValue(DEFAULT_GEMINI_MODEL),
    getDebugMode: vi.fn().mockReturnValue(false),
    getContentGeneratorConfig: vi.fn().mockReturnValue({ model: 'gemini-pro' }),
    getModel: vi.fn().mockReturnValue('gemini-pro'),
@@ -33,6 +33,7 @@ import {
  ApprovalMode,
  MockTool,
  HookSystem,
+  PREVIEW_GEMINI_MODEL,
 } from '@google/gemini-cli-core';
 import { createMockMessageBus } from '@google/gemini-cli-core/src/test-utils/mock-message-bus.js';
 import { ToolCallStatus } from '../types.js';
@@ -71,6 +72,7 @@ const mockConfig = {
  getTruncateToolOutputThreshold: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD,
  getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES,
  getAllowedTools: vi.fn(() => []),
+  getActiveModel: () => PREVIEW_GEMINI_MODEL,
  getContentGeneratorConfig: () => ({
    model: 'test-model',
    authType: 'oauth-personal',
@@ -497,7 +497,12 @@ export class Session {
        ),
      );

-      return convertToFunctionResponse(fc.name, callId, toolResult.llmContent);
+      return convertToFunctionResponse(
+        fc.name,
+        callId,
+        toolResult.llmContent,
+        this.config.getActiveModel(),
+      );
    } catch (e) {
      const error = e instanceof Error ? e : new Error(String(e));

@@ -14,8 +14,25 @@ import {
  GEMINI_MODEL_ALIAS_PRO,
  GEMINI_MODEL_ALIAS_FLASH,
  GEMINI_MODEL_ALIAS_FLASH_LITE,
+  supportsMultimodalFunctionResponse,
 } from './models.js';

+describe('supportsMultimodalFunctionResponse', () => {
+  it('should return true for gemini-3 model', () => {
+    expect(supportsMultimodalFunctionResponse('gemini-3-pro')).toBe(true);
+  });
+
+  it('should return false for gemini-2 models', () => {
+    expect(supportsMultimodalFunctionResponse('gemini-2.5-pro')).toBe(false);
+    expect(supportsMultimodalFunctionResponse('gemini-2.5-flash')).toBe(false);
+  });
+
+  it('should return false for other models', () => {
+    expect(supportsMultimodalFunctionResponse('some-other-model')).toBe(false);
+    expect(supportsMultimodalFunctionResponse('')).toBe(false);
+  });
+});
+
 describe('getEffectiveModel', () => {
  describe('When NOT in fallback mode', () => {
    const isInFallbackMode = false;
@@ -99,8 +99,19 @@ export function getEffectiveModel(
 * Checks if the model is a Gemini 2.x model.
 *
 * @param model The model name to check.
- * @returns True if the model is a Gemini 2.x model.
+ * @returns True if the model is a Gemini-2.x model.
 */
 export function isGemini2Model(model: string): boolean {
  return /^gemini-2(\.|$)/.test(model);
 }
+
+/**
+ * Checks if the model supports multimodal function responses (multimodal data nested within function response).
+ * This is supported in Gemini 3.
+ *
+ * @param model The model name to check.
+ * @returns True if the model supports multimodal function responses.
+ */
+export function supportsMultimodalFunctionResponse(model: string): boolean {
+  return model.startsWith('gemini-3-');
+}
@@ -46,6 +46,10 @@ import * as modifiableToolModule from '../tools/modifiable-tool.js';
 import * as fs from 'node:fs/promises';
 import * as path from 'node:path';
 import { isShellInvocationAllowlisted } from '../utils/shell-permissions.js';
+import {
+  DEFAULT_GEMINI_MODEL,
+  PREVIEW_GEMINI_MODEL,
+} from '../config/models.js';

 vi.mock('fs/promises', () => ({
  writeFile: vi.fn(),
@@ -255,6 +259,7 @@ function createMockConfig(overrides: Partial<Config> = {}): Config {
      DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD,
    getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES,
    getToolRegistry: () => defaultToolRegistry,
+    getActiveModel: () => DEFAULT_GEMINI_MODEL,
    getUseSmartEdit: () => false,
    getGeminiClient: () => null,
    getEnableMessageBusIntegration: () => false,
@@ -767,7 +772,12 @@ describe('convertToFunctionResponse', () => {

  it('should handle simple string llmContent', () => {
    const llmContent = 'Simple text output';
-    const result = convertToFunctionResponse(toolName, callId, llmContent);
+    const result = convertToFunctionResponse(
+      toolName,
+      callId,
+      llmContent,
+      DEFAULT_GEMINI_MODEL,
+    );
    expect(result).toEqual([
      {
        functionResponse: {
@@ -781,7 +791,12 @@ describe('convertToFunctionResponse', () => {

  it('should handle llmContent as a single Part with text', () => {
    const llmContent: Part = { text: 'Text from Part object' };
-    const result = convertToFunctionResponse(toolName, callId, llmContent);
+    const result = convertToFunctionResponse(
+      toolName,
+      callId,
+      llmContent,
+      DEFAULT_GEMINI_MODEL,
+    );
    expect(result).toEqual([
      {
        functionResponse: {
@@ -795,7 +810,12 @@ describe('convertToFunctionResponse', () => {

  it('should handle llmContent as a PartListUnion array with a single text Part', () => {
    const llmContent: PartListUnion = [{ text: 'Text from array' }];
-    const result = convertToFunctionResponse(toolName, callId, llmContent);
+    const result = convertToFunctionResponse(
+      toolName,
+      callId,
+      llmContent,
+      DEFAULT_GEMINI_MODEL,
+    );
    expect(result).toEqual([
      {
        functionResponse: {
@@ -807,60 +827,147 @@ describe('convertToFunctionResponse', () => {
    ]);
  });

-  it('should handle llmContent with inlineData', () => {
-    const llmContent: Part = {
-      inlineData: { mimeType: 'image/png', data: 'base64...' },
-    };
-    const result = convertToFunctionResponse(toolName, callId, llmContent);
+  it('should handle llmContent as a PartListUnion array with multiple Parts', () => {
+    const llmContent: PartListUnion = [{ text: 'part1' }, { text: 'part2' }];
+    const result = convertToFunctionResponse(
+      toolName,
+      callId,
+      llmContent,
+      DEFAULT_GEMINI_MODEL,
+    );
    expect(result).toEqual([
      {
        functionResponse: {
          name: toolName,
          id: callId,
-          response: {
-            output: 'Binary content of type image/png was processed.',
-          },
+          response: { output: 'part1\npart2' },
+        },
+      },
+    ]);
+  });
+
+  it('should handle llmContent with fileData for Gemini 3 model (should be siblings)', () => {
+    const llmContent: Part = {
+      fileData: { mimeType: 'application/pdf', fileUri: 'gs://...' },
+    };
+    const result = convertToFunctionResponse(
+      toolName,
+      callId,
+      llmContent,
+      PREVIEW_GEMINI_MODEL,
+    );
+    expect(result).toEqual([
+      {
+        functionResponse: {
+          name: toolName,
+          id: callId,
+          response: { output: 'Binary content provided (1 item(s)).' },
        },
      },
      llmContent,
    ]);
  });

-  it('should handle llmContent with fileData', () => {
+  it('should handle llmContent with inlineData for Gemini 3 model (should be nested)', () => {
    const llmContent: Part = {
-      fileData: { mimeType: 'application/pdf', fileUri: 'gs://...' },
+      inlineData: { mimeType: 'image/png', data: 'base64...' },
    };
-    const result = convertToFunctionResponse(toolName, callId, llmContent);
+    const result = convertToFunctionResponse(
+      toolName,
+      callId,
+      llmContent,
+      PREVIEW_GEMINI_MODEL,
+    );
    expect(result).toEqual([
      {
        functionResponse: {
          name: toolName,
          id: callId,
-          response: {
-            output: 'Binary content of type application/pdf was processed.',
-          },
+          response: { output: 'Binary content provided (1 item(s)).' },
+          parts: [llmContent],
+        },
+      },
+    ]);
+  });
+
+  it('should handle llmContent with fileData for non-Gemini 3 models', () => {
+    const llmContent: Part = {
+      fileData: { mimeType: 'application/pdf', fileUri: 'gs://...' },
+    };
+    const result = convertToFunctionResponse(
+      toolName,
+      callId,
+      llmContent,
+      DEFAULT_GEMINI_MODEL,
+    );
+    expect(result).toEqual([
+      {
+        functionResponse: {
+          name: toolName,
+          id: callId,
+          response: { output: 'Binary content provided (1 item(s)).' },
        },
      },
      llmContent,
    ]);
  });

+  it('should preserve existing functionResponse metadata', () => {
+    const innerId = 'inner-call-id';
+    const innerName = 'inner-tool-name';
+    const responseMetadata = {
+      flags: ['flag1'],
+      isError: false,
+      customData: { key: 'value' },
+    };
+    const input: Part = {
+      functionResponse: {
+        id: innerId,
+        name: innerName,
+        response: responseMetadata,
+      },
+    };
+
+    const result = convertToFunctionResponse(
+      toolName,
+      callId,
+      input,
+      DEFAULT_GEMINI_MODEL,
+    );
+
+    expect(result).toHaveLength(1);
+    expect(result[0].functionResponse).toEqual({
+      id: callId,
+      name: toolName,
+      response: responseMetadata,
+    });
+  });
+
  it('should handle llmContent as an array of multiple Parts (text and inlineData)', () => {
    const llmContent: PartListUnion = [
      { text: 'Some textual description' },
      { inlineData: { mimeType: 'image/jpeg', data: 'base64data...' } },
      { text: 'Another text part' },
    ];
-    const result = convertToFunctionResponse(toolName, callId, llmContent);
+    const result = convertToFunctionResponse(
+      toolName,
+      callId,
+      llmContent,
+      PREVIEW_GEMINI_MODEL,
+    );
    expect(result).toEqual([
      {
        functionResponse: {
          name: toolName,
          id: callId,
-          response: { output: 'Tool execution succeeded.' },
+          response: {
+            output: 'Some textual description\nAnother text part',
+          },
+          parts: [
+            { inlineData: { mimeType: 'image/jpeg', data: 'base64data...' } },
+          ],
        },
      },
-      ...llmContent,
    ]);
  });

@@ -868,30 +975,38 @@ describe('convertToFunctionResponse', () => {
    const llmContent: PartListUnion = [
      { inlineData: { mimeType: 'image/gif', data: 'gifdata...' } },
    ];
-    const result = convertToFunctionResponse(toolName, callId, llmContent);
+    const result = convertToFunctionResponse(
+      toolName,
+      callId,
+      llmContent,
+      PREVIEW_GEMINI_MODEL,
+    );
    expect(result).toEqual([
      {
        functionResponse: {
          name: toolName,
          id: callId,
-          response: {
-            output: 'Binary content of type image/gif was processed.',
-          },
+          response: { output: 'Binary content provided (1 item(s)).' },
+          parts: llmContent,
        },
      },
-      ...llmContent,
    ]);
  });

  it('should handle llmContent as a generic Part (not text, inlineData, or fileData)', () => {
    const llmContent: Part = { functionCall: { name: 'test', args: {} } };
-    const result = convertToFunctionResponse(toolName, callId, llmContent);
+    const result = convertToFunctionResponse(
+      toolName,
+      callId,
+      llmContent,
+      PREVIEW_GEMINI_MODEL,
+    );
    expect(result).toEqual([
      {
        functionResponse: {
          name: toolName,
          id: callId,
-          response: { output: 'Tool execution succeeded.' },
+          response: {},
        },
      },
    ]);
@@ -899,7 +1014,12 @@ describe('convertToFunctionResponse', () => {

  it('should handle empty string llmContent', () => {
    const llmContent = '';
-    const result = convertToFunctionResponse(toolName, callId, llmContent);
+    const result = convertToFunctionResponse(
+      toolName,
+      callId,
+      llmContent,
+      PREVIEW_GEMINI_MODEL,
+    );
    expect(result).toEqual([
      {
        functionResponse: {
@@ -913,13 +1033,18 @@ describe('convertToFunctionResponse', () => {

  it('should handle llmContent as an empty array', () => {
    const llmContent: PartListUnion = [];
-    const result = convertToFunctionResponse(toolName, callId, llmContent);
+    const result = convertToFunctionResponse(
+      toolName,
+      callId,
+      llmContent,
+      PREVIEW_GEMINI_MODEL,
+    );
    expect(result).toEqual([
      {
        functionResponse: {
          name: toolName,
          id: callId,
-          response: { output: 'Tool execution succeeded.' },
+          response: {},
        },
      },
    ]);
@@ -927,13 +1052,18 @@ describe('convertToFunctionResponse', () => {

  it('should handle llmContent as a Part with undefined inlineData/fileData/text', () => {
    const llmContent: Part = {}; // An empty part object
-    const result = convertToFunctionResponse(toolName, callId, llmContent);
+    const result = convertToFunctionResponse(
+      toolName,
+      callId,
+      llmContent,
+      PREVIEW_GEMINI_MODEL,
+    );
    expect(result).toEqual([
      {
        functionResponse: {
          name: toolName,
          id: callId,
-          response: { output: 'Tool execution succeeded.' },
+          response: {},
        },
      },
    ]);
@@ -29,7 +29,7 @@ import {
 } from '../index.js';
 import { READ_FILE_TOOL_NAME, SHELL_TOOL_NAME } from '../tools/tool-names.js';
 import type { Part, PartListUnion } from '@google/genai';
-import { getResponseTextFromParts } from '../utils/generateContentResponseUtilities.js';
+import { supportsMultimodalFunctionResponse } from '../config/models.js';
 import type { ModifyContext } from '../tools/modifiable-tool.js';
 import {
  isModifiableDeclarativeTool,
@@ -50,6 +50,7 @@ import {
  fireToolNotificationHook,
  executeToolWithHooks,
 } from './coreToolHookTriggers.js';
+import { debugLogger } from '../utils/debugLogger.js';

 export type ValidatingToolCall = {
  status: 'validating';
@@ -171,61 +172,85 @@ export function convertToFunctionResponse(
  toolName: string,
  callId: string,
  llmContent: PartListUnion,
+  model: string,
 ): Part[] {
-  const contentToProcess =
-    Array.isArray(llmContent) && llmContent.length === 1
-      ? llmContent[0]
-      : llmContent;
-
-  if (typeof contentToProcess === 'string') {
-    return [createFunctionResponsePart(callId, toolName, contentToProcess)];
+  if (typeof llmContent === 'string') {
+    return [createFunctionResponsePart(callId, toolName, llmContent)];
  }

-  if (Array.isArray(contentToProcess)) {
-    const functionResponse = createFunctionResponsePart(
-      callId,
-      toolName,
-      'Tool execution succeeded.',
-    );
-    return [functionResponse, ...toParts(contentToProcess)];
-  }
+  const parts = toParts(llmContent);

-  // After this point, contentToProcess is a single Part object.
-  if (contentToProcess.functionResponse) {
-    if (contentToProcess.functionResponse.response?.['content']) {
-      const stringifiedOutput =
-        getResponseTextFromParts(
-          contentToProcess.functionResponse.response['content'] as Part[],
-        ) || '';
-      return [createFunctionResponsePart(callId, toolName, stringifiedOutput)];
+  // Separate text from binary types
+  const textParts: string[] = [];
+  const inlineDataParts: Part[] = [];
+  const fileDataParts: Part[] = [];
+
+  for (const part of parts) {
+    if (part.text !== undefined) {
+      textParts.push(part.text);
+    } else if (part.inlineData) {
+      inlineDataParts.push(part);
+    } else if (part.fileData) {
+      fileDataParts.push(part);
+    } else if (part.functionResponse) {
+      if (parts.length > 1) {
+        debugLogger.warn(
+          'convertToFunctionResponse received multiple parts with a functionResponse. Only the functionResponse will be used, other parts will be ignored',
+        );
+      }
+      // Handle passthrough case
+      return [
+        {
+          functionResponse: {
+            id: callId,
+            name: toolName,
+            response: part.functionResponse.response,
+          },
+        },
+      ];
    }
-    // It's a functionResponse that we should pass through as is.
-    return [contentToProcess];
+    // Ignore other part types
  }

-  if (contentToProcess.inlineData || contentToProcess.fileData) {
-    const mimeType =
-      contentToProcess.inlineData?.mimeType ||
-      contentToProcess.fileData?.mimeType ||
-      'unknown';
-    const functionResponse = createFunctionResponsePart(
-      callId,
-      toolName,
-      `Binary content of type ${mimeType} was processed.`,
-    );
-    return [functionResponse, contentToProcess];
+  // Build the primary response part
+  const part: Part = {
+    functionResponse: {
+      id: callId,
+      name: toolName,
+      response: textParts.length > 0 ? { output: textParts.join('\n') } : {},
+    },
+  };
+
+  const isMultimodalFRSupported = supportsMultimodalFunctionResponse(model);
+  const siblingParts: Part[] = [...fileDataParts];
+
+  if (inlineDataParts.length > 0) {
+    if (isMultimodalFRSupported) {
+      // Nest inlineData if supported by the model
+      (part.functionResponse as unknown as { parts: Part[] }).parts =
+        inlineDataParts;
+    } else {
+      // Otherwise treat as siblings
+      siblingParts.push(...inlineDataParts);
+    }
  }

-  if (contentToProcess.text !== undefined) {
-    return [
-      createFunctionResponsePart(callId, toolName, contentToProcess.text),
-    ];
+  // Add descriptive text if the response object is empty but we have binary content
+  if (
+    textParts.length === 0 &&
+    (inlineDataParts.length > 0 || fileDataParts.length > 0)
+  ) {
+    const totalBinaryItems = inlineDataParts.length + fileDataParts.length;
+    part.functionResponse!.response = {
+      output: `Binary content provided (${totalBinaryItems} item(s)).`,
+    };
  }

-  // Default case for other kinds of parts.
-  return [
-    createFunctionResponsePart(callId, toolName, 'Tool execution succeeded.'),
-  ];
+  if (siblingParts.length > 0) {
+    return [part, ...siblingParts];
+  }
+
+  return [part];
 }

 function toParts(input: PartListUnion): Part[] {
@@ -1228,6 +1253,7 @@ export class CoreToolScheduler {
                  toolName,
                  callId,
                  content,
+                  this.config.getActiveModel(),
                );
                const successResponse: ToolCallResponseInfo = {
                  callId,
@@ -19,6 +19,7 @@ import {
  ToolErrorType,
  ApprovalMode,
  HookSystem,
+  PREVIEW_GEMINI_MODEL,
 } from '../index.js';
 import type { Part } from '@google/genai';
 import { MockTool } from '../test-utils/mock-tool.js';
@@ -61,6 +62,7 @@ describe('executeToolCall', () => {
      getTruncateToolOutputThreshold: () =>
        DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD,
      getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES,
+      getActiveModel: () => PREVIEW_GEMINI_MODEL,
      getUseSmartEdit: () => false,
      getGeminiClient: () => null, // No client needed for these tests
      getEnableMessageBusIntegration: () => false,
@@ -321,12 +323,10 @@ describe('executeToolCall', () => {
          functionResponse: {
            name: 'testTool',
            id: 'call6',
-            response: {
-              output: 'Binary content of type image/png was processed.',
-            },
+            response: { output: 'Binary content provided (1 item(s)).' },
+            parts: [imageDataPart],
          },
        },
-        imageDataPart,
      ],
    });
  });