diff --git a/packages/cli/src/utils/readStdin.test.ts b/packages/cli/src/utils/readStdin.test.ts index 1a5202173c..4a0d4d42ea 100644 --- a/packages/cli/src/utils/readStdin.test.ts +++ b/packages/cli/src/utils/readStdin.test.ts @@ -140,6 +140,49 @@ describe('readStdin', () => { expect(mockStdin.destroy).toHaveBeenCalled(); }); + it('should truncate multi-byte characters at byte boundary', async () => { + const MAX_STDIN_SIZE = 8 * 1024 * 1024; + // '한' is 3 bytes. 2,796,202 * 3 = 8,388,606 bytes. + // 2,796,203 * 3 = 8,388,609 bytes. + const charCount = Math.floor(MAX_STDIN_SIZE / 3) + 1; + const multiByteChunk = '한'.repeat(charCount); + + mockStdin.read + .mockReturnValueOnce(multiByteChunk) + .mockReturnValueOnce(null); + + const promise = readStdin(); + onReadableHandler(); + + const result = await promise; + const resultBytes = Buffer.byteLength(result, 'utf8'); + + expect(resultBytes).toBeLessThanOrEqual(MAX_STDIN_SIZE); + expect(resultBytes).toBe(Math.floor(MAX_STDIN_SIZE / 3) * 3); + expect(result).not.toContain('\uFFFD'); // No replacement characters + }); + + it('should use byte length instead of string length for limit', async () => { + const MAX_STDIN_SIZE = 8 * 1024 * 1024; + // '한' is 3 bytes. If we use string length, we'd allow 8M characters = 24MB. + // We want to ensure it stops at 8MB. + const charCount = MAX_STDIN_SIZE; // 8M characters = 24MB + const multiByteChunk = '한'.repeat(charCount); + + mockStdin.read + .mockReturnValueOnce(multiByteChunk) + .mockReturnValueOnce(null); + + const promise = readStdin(); + onReadableHandler(); + + const result = await promise; + expect(Buffer.byteLength(result, 'utf8')).toBeLessThanOrEqual( + MAX_STDIN_SIZE, + ); + expect(result.length).toBeLessThan(charCount); + }); + it('should handle stdin error', async () => { const promise = readStdin(); const error = new Error('stdin error'); diff --git a/packages/cli/src/utils/readStdin.ts b/packages/cli/src/utils/readStdin.ts index 3b5b17fe04..f828bca88c 100644 --- a/packages/cli/src/utils/readStdin.ts +++ b/packages/cli/src/utils/readStdin.ts @@ -6,6 +6,23 @@ import { debugLogger } from '@google/gemini-cli-core'; +/** + * Truncates a string to fit within a UTF-8 byte limit without splitting + * multi-byte characters. Walks back from the cut point to find the last + * complete character boundary. + */ +function truncateUtf8Bytes(str: string, maxBytes: number): string { + const buf = Buffer.from(str, 'utf8'); + if (buf.length <= maxBytes) return str; + let end = maxBytes; + // Walk backward past any UTF-8 continuation bytes (10xxxxxx) + while (end > 0 && (buf[end] & 0xc0) === 0x80) { + end--; + } + // end now points to the lead byte of an incomplete sequence — exclude it + return buf.subarray(0, end).toString('utf8'); +} + export async function readStdin(): Promise { const MAX_STDIN_SIZE = 8 * 1024 * 1024; // 8MB return new Promise((resolve, reject) => { @@ -30,9 +47,10 @@ export async function readStdin(): Promise { pipedInputTimerId = null; } - if (totalSize + chunk.length > MAX_STDIN_SIZE) { - const remainingSize = MAX_STDIN_SIZE - totalSize; - data += chunk.slice(0, remainingSize); + const chunkByteLength = Buffer.byteLength(chunk, 'utf8'); + if (totalSize + chunkByteLength > MAX_STDIN_SIZE) { + const remainingBytes = MAX_STDIN_SIZE - totalSize; + data += truncateUtf8Bytes(chunk, remainingBytes); debugLogger.warn( `Warning: stdin input truncated to ${MAX_STDIN_SIZE} bytes.`, ); @@ -41,7 +59,7 @@ export async function readStdin(): Promise { break; } data += chunk; - totalSize += chunk.length; + totalSize += chunkByteLength; } };