fix(cli): use byte length instead of string length for readStdin size limits (#26224)

This commit is contained in:
Adib234
2026-04-30 10:12:44 -04:00
committed by GitHub
parent d743c6fae6
commit 487fb219cc
2 changed files with 65 additions and 4 deletions
+43
View File
@@ -140,6 +140,49 @@ describe('readStdin', () => {
expect(mockStdin.destroy).toHaveBeenCalled();
});
it('should truncate multi-byte characters at byte boundary', async () => {
const MAX_STDIN_SIZE = 8 * 1024 * 1024;
// '한' is 3 bytes. 2,796,202 * 3 = 8,388,606 bytes.
// 2,796,203 * 3 = 8,388,609 bytes.
const charCount = Math.floor(MAX_STDIN_SIZE / 3) + 1;
const multiByteChunk = '한'.repeat(charCount);
mockStdin.read
.mockReturnValueOnce(multiByteChunk)
.mockReturnValueOnce(null);
const promise = readStdin();
onReadableHandler();
const result = await promise;
const resultBytes = Buffer.byteLength(result, 'utf8');
expect(resultBytes).toBeLessThanOrEqual(MAX_STDIN_SIZE);
expect(resultBytes).toBe(Math.floor(MAX_STDIN_SIZE / 3) * 3);
expect(result).not.toContain('\uFFFD'); // No replacement characters
});
it('should use byte length instead of string length for limit', async () => {
const MAX_STDIN_SIZE = 8 * 1024 * 1024;
// '한' is 3 bytes. If we use string length, we'd allow 8M characters = 24MB.
// We want to ensure it stops at 8MB.
const charCount = MAX_STDIN_SIZE; // 8M characters = 24MB
const multiByteChunk = '한'.repeat(charCount);
mockStdin.read
.mockReturnValueOnce(multiByteChunk)
.mockReturnValueOnce(null);
const promise = readStdin();
onReadableHandler();
const result = await promise;
expect(Buffer.byteLength(result, 'utf8')).toBeLessThanOrEqual(
MAX_STDIN_SIZE,
);
expect(result.length).toBeLessThan(charCount);
});
it('should handle stdin error', async () => {
const promise = readStdin();
const error = new Error('stdin error');
+22 -4
View File
@@ -6,6 +6,23 @@
import { debugLogger } from '@google/gemini-cli-core';
/**
* Truncates a string to fit within a UTF-8 byte limit without splitting
* multi-byte characters. Walks back from the cut point to find the last
* complete character boundary.
*/
function truncateUtf8Bytes(str: string, maxBytes: number): string {
const buf = Buffer.from(str, 'utf8');
if (buf.length <= maxBytes) return str;
let end = maxBytes;
// Walk backward past any UTF-8 continuation bytes (10xxxxxx)
while (end > 0 && (buf[end] & 0xc0) === 0x80) {
end--;
}
// end now points to the lead byte of an incomplete sequence — exclude it
return buf.subarray(0, end).toString('utf8');
}
export async function readStdin(): Promise<string> {
const MAX_STDIN_SIZE = 8 * 1024 * 1024; // 8MB
return new Promise((resolve, reject) => {
@@ -30,9 +47,10 @@ export async function readStdin(): Promise<string> {
pipedInputTimerId = null;
}
if (totalSize + chunk.length > MAX_STDIN_SIZE) {
const remainingSize = MAX_STDIN_SIZE - totalSize;
data += chunk.slice(0, remainingSize);
const chunkByteLength = Buffer.byteLength(chunk, 'utf8');
if (totalSize + chunkByteLength > MAX_STDIN_SIZE) {
const remainingBytes = MAX_STDIN_SIZE - totalSize;
data += truncateUtf8Bytes(chunk, remainingBytes);
debugLogger.warn(
`Warning: stdin input truncated to ${MAX_STDIN_SIZE} bytes.`,
);
@@ -41,7 +59,7 @@ export async function readStdin(): Promise<string> {
break;
}
data += chunk;
totalSize += chunk.length;
totalSize += chunkByteLength;
}
};