mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-04-30 15:04:16 -07:00
fix(cli): use byte length instead of string length for readStdin size limits (#26224)
This commit is contained in:
@@ -140,6 +140,49 @@ describe('readStdin', () => {
|
||||
expect(mockStdin.destroy).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should truncate multi-byte characters at byte boundary', async () => {
|
||||
const MAX_STDIN_SIZE = 8 * 1024 * 1024;
|
||||
// '한' is 3 bytes. 2,796,202 * 3 = 8,388,606 bytes.
|
||||
// 2,796,203 * 3 = 8,388,609 bytes.
|
||||
const charCount = Math.floor(MAX_STDIN_SIZE / 3) + 1;
|
||||
const multiByteChunk = '한'.repeat(charCount);
|
||||
|
||||
mockStdin.read
|
||||
.mockReturnValueOnce(multiByteChunk)
|
||||
.mockReturnValueOnce(null);
|
||||
|
||||
const promise = readStdin();
|
||||
onReadableHandler();
|
||||
|
||||
const result = await promise;
|
||||
const resultBytes = Buffer.byteLength(result, 'utf8');
|
||||
|
||||
expect(resultBytes).toBeLessThanOrEqual(MAX_STDIN_SIZE);
|
||||
expect(resultBytes).toBe(Math.floor(MAX_STDIN_SIZE / 3) * 3);
|
||||
expect(result).not.toContain('\uFFFD'); // No replacement characters
|
||||
});
|
||||
|
||||
it('should use byte length instead of string length for limit', async () => {
|
||||
const MAX_STDIN_SIZE = 8 * 1024 * 1024;
|
||||
// '한' is 3 bytes. If we use string length, we'd allow 8M characters = 24MB.
|
||||
// We want to ensure it stops at 8MB.
|
||||
const charCount = MAX_STDIN_SIZE; // 8M characters = 24MB
|
||||
const multiByteChunk = '한'.repeat(charCount);
|
||||
|
||||
mockStdin.read
|
||||
.mockReturnValueOnce(multiByteChunk)
|
||||
.mockReturnValueOnce(null);
|
||||
|
||||
const promise = readStdin();
|
||||
onReadableHandler();
|
||||
|
||||
const result = await promise;
|
||||
expect(Buffer.byteLength(result, 'utf8')).toBeLessThanOrEqual(
|
||||
MAX_STDIN_SIZE,
|
||||
);
|
||||
expect(result.length).toBeLessThan(charCount);
|
||||
});
|
||||
|
||||
it('should handle stdin error', async () => {
|
||||
const promise = readStdin();
|
||||
const error = new Error('stdin error');
|
||||
|
||||
@@ -6,6 +6,23 @@
|
||||
|
||||
import { debugLogger } from '@google/gemini-cli-core';
|
||||
|
||||
/**
|
||||
* Truncates a string to fit within a UTF-8 byte limit without splitting
|
||||
* multi-byte characters. Walks back from the cut point to find the last
|
||||
* complete character boundary.
|
||||
*/
|
||||
function truncateUtf8Bytes(str: string, maxBytes: number): string {
|
||||
const buf = Buffer.from(str, 'utf8');
|
||||
if (buf.length <= maxBytes) return str;
|
||||
let end = maxBytes;
|
||||
// Walk backward past any UTF-8 continuation bytes (10xxxxxx)
|
||||
while (end > 0 && (buf[end] & 0xc0) === 0x80) {
|
||||
end--;
|
||||
}
|
||||
// end now points to the lead byte of an incomplete sequence — exclude it
|
||||
return buf.subarray(0, end).toString('utf8');
|
||||
}
|
||||
|
||||
export async function readStdin(): Promise<string> {
|
||||
const MAX_STDIN_SIZE = 8 * 1024 * 1024; // 8MB
|
||||
return new Promise((resolve, reject) => {
|
||||
@@ -30,9 +47,10 @@ export async function readStdin(): Promise<string> {
|
||||
pipedInputTimerId = null;
|
||||
}
|
||||
|
||||
if (totalSize + chunk.length > MAX_STDIN_SIZE) {
|
||||
const remainingSize = MAX_STDIN_SIZE - totalSize;
|
||||
data += chunk.slice(0, remainingSize);
|
||||
const chunkByteLength = Buffer.byteLength(chunk, 'utf8');
|
||||
if (totalSize + chunkByteLength > MAX_STDIN_SIZE) {
|
||||
const remainingBytes = MAX_STDIN_SIZE - totalSize;
|
||||
data += truncateUtf8Bytes(chunk, remainingBytes);
|
||||
debugLogger.warn(
|
||||
`Warning: stdin input truncated to ${MAX_STDIN_SIZE} bytes.`,
|
||||
);
|
||||
@@ -41,7 +59,7 @@ export async function readStdin(): Promise<string> {
|
||||
break;
|
||||
}
|
||||
data += chunk;
|
||||
totalSize += chunk.length;
|
||||
totalSize += chunkByteLength;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user