fix(hooks): preserve non-text parts in fromHookLLMRequest (#26275)

2026-05-13 13:22:35 -07:00 · 2026-05-04 10:45:52 -07:00
parent 790f2cf815
commit 165efa8a38
2 changed files with 329 additions and 17 deletions
@@ -173,6 +173,215 @@ describe('HookTranslator', () => {
    });
  });

+  // Regression tests for https://github.com/google-gemini/gemini-cli/issues/25558
+  // BeforeModel hooks that modify text in conversations containing tool calls
+  // were destroying functionCall/functionResponse parts because
+  // fromHookLLMRequest rebuilt contents text-only. The fix merges hook text
+  // edits back into baseRequest.contents in place, preserving non-text parts.
+  describe('fromHookLLMRequest with baseRequest (non-text part preservation)', () => {
+    it('should preserve functionCall parts when merging hook text back', () => {
+      const baseRequest = {
+        model: 'gemini-2.0-flash',
+        contents: [
+          {
+            role: 'user',
+            parts: [{ text: 'Hello' }],
+          },
+          {
+            role: 'model',
+            parts: [
+              { text: 'Let me check that.' },
+              { functionCall: { name: 'search', args: { q: 'test' } } },
+            ],
+          },
+          {
+            role: 'user',
+            parts: [
+              {
+                functionResponse: {
+                  name: 'search',
+                  response: { results: [] },
+                },
+              },
+            ],
+          },
+          {
+            role: 'model',
+            parts: [{ text: 'No results found.' }],
+          },
+        ],
+      } as unknown as GenerateContentParameters;
+
+      const hookRequest: LLMRequest = {
+        model: 'gemini-2.0-flash',
+        messages: [
+          { role: 'user', content: 'Hello [MODIFIED]' },
+          { role: 'model', content: 'Let me check that.' },
+          // contents[2] (functionResponse only) was skipped by toHookLLMRequest
+          { role: 'model', content: 'No results found.' },
+        ],
+      };
+
+      const result = translator.fromHookLLMRequest(hookRequest, baseRequest);
+      const contents = result.contents as Array<{
+        role: string;
+        parts: Array<Record<string, unknown>>;
+      }>;
+
+      expect(contents).toHaveLength(4);
+
+      // First content: text updated
+      expect(contents[0].parts[0]['text']).toBe('Hello [MODIFIED]');
+
+      // Second content: text updated AND functionCall preserved
+      expect(contents[1].parts).toHaveLength(2);
+      expect(contents[1].parts[0]['text']).toBe('Let me check that.');
+      expect(contents[1].parts[1]['functionCall']).toBeDefined();
+
+      // Third content: functionResponse preserved as-is (was skipped)
+      expect(contents[2].parts[0]['functionResponse']).toBeDefined();
+      expect(contents[2].parts).toHaveLength(1);
+
+      // Fourth content: text updated
+      expect(contents[3].parts[0]['text']).toBe('No results found.');
+    });
+
+    it('should handle text-only entries interleaved with function-only entries', () => {
+      const baseRequest = {
+        model: 'gemini-2.0-flash',
+        contents: [
+          { role: 'user', parts: [{ text: 'Q1' }] },
+          {
+            role: 'model',
+            parts: [{ functionCall: { name: 'tool1', args: {} } }],
+          },
+          {
+            role: 'user',
+            parts: [
+              {
+                functionResponse: {
+                  name: 'tool1',
+                  response: { ok: true },
+                },
+              },
+            ],
+          },
+          { role: 'model', parts: [{ text: 'Answer' }] },
+        ],
+      } as unknown as GenerateContentParameters;
+
+      const hookRequest: LLMRequest = {
+        model: 'gemini-2.0-flash',
+        messages: [
+          { role: 'user', content: 'Q1-modified' },
+          // contents[1] and [2] skipped (no text)
+          { role: 'model', content: 'Answer-modified' },
+        ],
+      };
+
+      const result = translator.fromHookLLMRequest(hookRequest, baseRequest);
+      const contents = result.contents as Array<{
+        role: string;
+        parts: Array<Record<string, unknown>>;
+      }>;
+
+      expect(contents).toHaveLength(4);
+      expect(contents[0].parts[0]['text']).toBe('Q1-modified');
+      expect(contents[1].parts[0]['functionCall']).toBeDefined();
+      expect(contents[2].parts[0]['functionResponse']).toBeDefined();
+      expect(contents[3].parts[0]['text']).toBe('Answer-modified');
+    });
+
+    it('should collapse multiple text parts and preserve non-text parts', () => {
+      const baseRequest = {
+        model: 'gemini-2.0-flash',
+        contents: [
+          {
+            role: 'model',
+            parts: [
+              { text: 'I will search' },
+              { text: ' for you.' },
+              { functionCall: { name: 'search', args: {} } },
+            ],
+          },
+        ],
+      } as unknown as GenerateContentParameters;
+
+      const hookRequest: LLMRequest = {
+        model: 'gemini-2.0-flash',
+        messages: [
+          { role: 'model', content: 'I will search for you. [BLINDED]' },
+        ],
+      };
+
+      const result = translator.fromHookLLMRequest(hookRequest, baseRequest);
+      const contents = result.contents as Array<{
+        role: string;
+        parts: Array<Record<string, unknown>>;
+      }>;
+
+      expect(contents).toHaveLength(1);
+      const parts = contents[0].parts;
+      // Multiple text parts collapsed to one, non-text preserved
+      expect(parts[0]['text']).toBe('I will search for you. [BLINDED]');
+      expect(parts[1]['functionCall']).toBeDefined();
+      expect(parts).toHaveLength(2);
+    });
+
+    it('should fall back to text-only when baseRequest is undefined', () => {
+      const hookRequest: LLMRequest = {
+        model: 'gemini-2.0-flash',
+        messages: [{ role: 'user', content: 'Hello' }],
+      };
+
+      const result = translator.fromHookLLMRequest(hookRequest);
+
+      expect(result.contents).toEqual([
+        { role: 'user', parts: [{ text: 'Hello' }] },
+      ]);
+    });
+
+    it('should fall back to text-only when baseRequest has no contents', () => {
+      const hookRequest: LLMRequest = {
+        model: 'gemini-2.0-flash',
+        messages: [{ role: 'user', content: 'Hello' }],
+      };
+      const baseRequest = {
+        model: 'gemini-2.0-flash',
+      } as GenerateContentParameters;
+
+      const result = translator.fromHookLLMRequest(hookRequest, baseRequest);
+
+      expect(result.contents).toEqual([
+        { role: 'user', parts: [{ text: 'Hello' }] },
+      ]);
+    });
+
+    it('should append extra hook messages beyond base contents', () => {
+      const baseRequest = {
+        model: 'gemini-2.0-flash',
+        contents: [{ role: 'user', parts: [{ text: 'Hello' }] }],
+      } as unknown as GenerateContentParameters;
+
+      const hookRequest: LLMRequest = {
+        model: 'gemini-2.0-flash',
+        messages: [
+          { role: 'user', content: 'Hello' },
+          { role: 'model', content: 'Extra message added by hook' },
+        ],
+      };
+
+      const result = translator.fromHookLLMRequest(hookRequest, baseRequest);
+      const contents = result.contents as Array<{
+        role: string;
+        parts: Array<Record<string, unknown>>;
+      }>;
+
+      expect(contents).toHaveLength(2);
+      expect(contents[1].parts[0]['text']).toBe('Extra message added by hook');
+    });
+  });
+
  describe('LLM Response Translation', () => {
    it('should convert SDK response to hook format', () => {
      const sdkResponse: GenerateContentResponse = {
@@ -5,8 +5,10 @@
 */

 import type {
+  Content,
  GenerateContentResponse,
  GenerateContentParameters,
+  Part,
  ToolConfig,
  FinishReason,
  FunctionCallingConfig,
@@ -100,11 +102,10 @@ function hasTextProperty(value: unknown): value is { text: string } {
 }

 /**
- * Type guard to check if content has role and parts properties
+ * Type guard to check if a value is a Content object (i.e. has role and parts
+ * properties). Narrows to Content so callers can access `parts` as Part[].
 */
-function isContentWithParts(
-  content: unknown,
-): content is { role: string; parts: unknown } {
+function isContentWithParts(content: unknown): content is Content {
  return (
    typeof content === 'object' &&
    content !== null &&
@@ -226,22 +227,124 @@ export class HookTranslatorGenAIv1 extends HookTranslator {
    baseRequest?: GenerateContentParameters,
  ): GenerateContentParameters {
    // Convert hook messages back to SDK Content format.
+    //
+    // When both hookRequest.messages and baseRequest.contents are present, we
+    // merge the hook's text edits back into the original contents in place,
+    // preserving non-text parts (functionCall, functionResponse, inlineData,
+    // thought, etc.) that toHookLLMRequest filtered out for the simplified
+    // hook API. Without this merge, a BeforeModel hook that modifies text
+    // would destroy tool call/response history and cause the model to loop
+    // (see https://github.com/google-gemini/gemini-cli/issues/25558).
+    //
    // If the hook returned a partial request without messages (e.g. only
    // overriding `model`), fall back to the base request's contents so the
    // conversation is preserved.
-    const contents = hookRequest.messages
-      ? hookRequest.messages.map((message) => ({
-          role: message.role === 'model' ? 'model' : message.role,
-          parts: [
-            {
-              text:
-                typeof message.content === 'string'
-                  ? message.content
-                  : String(message.content),
-            },
-          ],
-        }))
-      : (baseRequest?.contents ?? []);
+    let contents: GenerateContentParameters['contents'];
+
+    if (!hookRequest.messages) {
+      contents = baseRequest?.contents ?? [];
+    } else if (baseRequest?.contents) {
+      // Merge hook messages back into base contents, preserving non-text parts.
+      const baseContents = Array.isArray(baseRequest.contents)
+        ? baseRequest.contents
+        : [baseRequest.contents];
+
+      // The merged result is uniformly Content[] — ContentListUnion does not
+      // allow mixing strings (PartUnion) and Content objects in the same
+      // array, so any string entries from baseContents are normalized to
+      // Content here.
+      const merged: Content[] = [];
+      let messageIndex = 0;
+
+      const messageToContent = (
+        message: LLMRequest['messages'][number],
+      ): Content => ({
+        role: message.role === 'model' ? 'model' : message.role,
+        parts: [
+          {
+            text:
+              typeof message.content === 'string'
+                ? message.content
+                : String(message.content),
+          },
+        ],
+      });
+
+      for (const content of baseContents) {
+        // Normalize each baseContents entry into a Content object so the
+        // merged array is homogeneous.
+        if (typeof content === 'string') {
+          // String entries always contributed one message to the hook view.
+          if (messageIndex < hookRequest.messages.length) {
+            merged.push(messageToContent(hookRequest.messages[messageIndex++]));
+          } else {
+            merged.push({ role: 'user', parts: [{ text: content }] });
+          }
+          continue;
+        }
+
+        if (!isContentWithParts(content)) {
+          // Bare Part object (PartUnion expansion: Content | Part | string).
+          // toHookLLMRequest does not emit a message for these, so preserve
+          // them as a single-part Content with a default role.
+          merged.push({ role: 'user', parts: [content] });
+          continue;
+        }
+
+        const parts: Part[] = content.parts ?? [];
+        const hasText = parts.some(hasTextProperty);
+        const baseContent: Content = { ...content, parts };
+
+        if (!hasText) {
+          // toHookLLMRequest skipped this entry — preserve it untouched so
+          // tool-call/response history is not lost.
+          merged.push(baseContent);
+          continue;
+        }
+
+        // This entry contributed a message — merge the hook's text back in
+        // and keep any non-text parts in their original order.
+        if (messageIndex < hookRequest.messages.length) {
+          const message = hookRequest.messages[messageIndex++];
+          const newText =
+            typeof message.content === 'string'
+              ? message.content
+              : String(message.content);
+          const nonTextParts = parts.filter(
+            (p): p is Part => !hasTextProperty(p),
+          );
+
+          merged.push({
+            ...baseContent,
+            role: message.role === 'model' ? 'model' : message.role,
+            parts: [{ text: newText }, ...nonTextParts],
+          });
+        } else {
+          merged.push(baseContent);
+        }
+      }
+
+      // Append any remaining hook messages beyond baseContents (the hook may
+      // have added new turns).
+      while (messageIndex < hookRequest.messages.length) {
+        merged.push(messageToContent(hookRequest.messages[messageIndex++]));
+      }
+
+      contents = merged;
+    } else {
+      // No baseRequest contents to merge against — fall back to text-only.
+      contents = hookRequest.messages.map((message) => ({
+        role: message.role === 'model' ? 'model' : message.role,
+        parts: [
+          {
+            text:
+              typeof message.content === 'string'
+                ? message.content
+                : String(message.content),
+          },
+        ],
+      }));
+    }

    // Build the result with proper typing.
    // Use nullish coalescing so a hook that only sets `model` still works --