fix(cli): use byte length instead of string length for readStdin size limits (#26224)

2026-04-30 15:04:16 -07:00 · 2026-04-30 10:12:44 -04:00
parent d743c6fae6
commit 487fb219cc
2 changed files with 65 additions and 4 deletions
@@ -140,6 +140,49 @@ describe('readStdin', () => {
    expect(mockStdin.destroy).toHaveBeenCalled();
  });

+  it('should truncate multi-byte characters at byte boundary', async () => {
+    const MAX_STDIN_SIZE = 8 * 1024 * 1024;
+    // '한' is 3 bytes. 2,796,202 * 3 = 8,388,606 bytes.
+    // 2,796,203 * 3 = 8,388,609 bytes.
+    const charCount = Math.floor(MAX_STDIN_SIZE / 3) + 1;
+    const multiByteChunk = '한'.repeat(charCount);
+
+    mockStdin.read
+      .mockReturnValueOnce(multiByteChunk)
+      .mockReturnValueOnce(null);
+
+    const promise = readStdin();
+    onReadableHandler();
+
+    const result = await promise;
+    const resultBytes = Buffer.byteLength(result, 'utf8');
+
+    expect(resultBytes).toBeLessThanOrEqual(MAX_STDIN_SIZE);
+    expect(resultBytes).toBe(Math.floor(MAX_STDIN_SIZE / 3) * 3);
+    expect(result).not.toContain('\uFFFD'); // No replacement characters
+  });
+
+  it('should use byte length instead of string length for limit', async () => {
+    const MAX_STDIN_SIZE = 8 * 1024 * 1024;
+    // '한' is 3 bytes. If we use string length, we'd allow 8M characters = 24MB.
+    // We want to ensure it stops at 8MB.
+    const charCount = MAX_STDIN_SIZE; // 8M characters = 24MB
+    const multiByteChunk = '한'.repeat(charCount);
+
+    mockStdin.read
+      .mockReturnValueOnce(multiByteChunk)
+      .mockReturnValueOnce(null);
+
+    const promise = readStdin();
+    onReadableHandler();
+
+    const result = await promise;
+    expect(Buffer.byteLength(result, 'utf8')).toBeLessThanOrEqual(
+      MAX_STDIN_SIZE,
+    );
+    expect(result.length).toBeLessThan(charCount);
+  });
+
  it('should handle stdin error', async () => {
    const promise = readStdin();
    const error = new Error('stdin error');
@@ -6,6 +6,23 @@

 import { debugLogger } from '@google/gemini-cli-core';

+/**
+ * Truncates a string to fit within a UTF-8 byte limit without splitting
+ * multi-byte characters. Walks back from the cut point to find the last
+ * complete character boundary.
+ */
+function truncateUtf8Bytes(str: string, maxBytes: number): string {
+  const buf = Buffer.from(str, 'utf8');
+  if (buf.length <= maxBytes) return str;
+  let end = maxBytes;
+  // Walk backward past any UTF-8 continuation bytes (10xxxxxx)
+  while (end > 0 && (buf[end] & 0xc0) === 0x80) {
+    end--;
+  }
+  // end now points to the lead byte of an incomplete sequence — exclude it
+  return buf.subarray(0, end).toString('utf8');
+}
+
 export async function readStdin(): Promise<string> {
  const MAX_STDIN_SIZE = 8 * 1024 * 1024; // 8MB
  return new Promise((resolve, reject) => {
@@ -30,9 +47,10 @@ export async function readStdin(): Promise<string> {
          pipedInputTimerId = null;
        }

-        if (totalSize + chunk.length > MAX_STDIN_SIZE) {
-          const remainingSize = MAX_STDIN_SIZE - totalSize;
-          data += chunk.slice(0, remainingSize);
+        const chunkByteLength = Buffer.byteLength(chunk, 'utf8');
+        if (totalSize + chunkByteLength > MAX_STDIN_SIZE) {
+          const remainingBytes = MAX_STDIN_SIZE - totalSize;
+          data += truncateUtf8Bytes(chunk, remainingBytes);
          debugLogger.warn(
            `Warning: stdin input truncated to ${MAX_STDIN_SIZE} bytes.`,
          );
@@ -41,7 +59,7 @@ export async function readStdin(): Promise<string> {
          break;
        }
        data += chunk;
-        totalSize += chunk.length;
+        totalSize += chunkByteLength;
      }
    };