fix(core): mitigate data corruption during write_file on massive text blocks

This commit is contained in:
Coco Sheng
2026-05-20 13:36:48 -04:00
parent 29481a1562
commit 8047123225
4 changed files with 7 additions and 4 deletions
@@ -112,7 +112,7 @@ export const DEFAULT_LEGACY_SET: CoreToolSet = {
name: WRITE_FILE_TOOL_NAME,
description: `Writes content to a specified file in the local filesystem.
The user has the ability to modify \`content\`. If modified, this will be stated in the response.`,
The user has the ability to modify \`content\`. If modified, this will be stated in the response. WARNING: Do NOT use this tool if the file contains massive literal text sequences (like a 6000+ character string, large arrays, or inline base64 images), as LLMs are prone to corrupting or truncating such sequences during a full file rewrite. Use the '${EDIT_TOOL_NAME}' tool instead.`,
parametersJsonSchema: {
type: 'object',
properties: {
@@ -122,7 +122,7 @@ export const DEFAULT_LEGACY_SET: CoreToolSet = {
},
[WRITE_FILE_PARAM_CONTENT]: {
description:
"The content to write to the file. Do not use omission placeholders like '(rest of methods ...)', '...', or 'unchanged code'; provide complete literal content.",
"The content to write to the file. Do not use omission placeholders like '(rest of methods ...)', '...', or 'unchanged code'; provide complete literal content. WARNING: Do not use this tool to rewrite files containing massive literal text blocks (e.g., inline base64 images or >6000 character strings) because you may corrupt them. Use the replace tool instead.",
type: 'string',
},
},
@@ -119,7 +119,7 @@ export const GEMINI_3_SET: CoreToolSet = {
write_file: {
name: WRITE_FILE_TOOL_NAME,
description: `Writes the complete content to a file, automatically creating missing parent directories. Overwrites existing files. The user has the ability to modify 'content' before it is saved. Best for new or small files; use '${EDIT_TOOL_NAME}' for targeted edits to large files to minimize token usage and simplify reviews.`,
description: `Writes the complete content to a file, automatically creating missing parent directories. Overwrites existing files. The user has the ability to modify 'content' before it is saved. Best for new or small files; use '${EDIT_TOOL_NAME}' for targeted edits to large files to minimize token usage and simplify reviews. WARNING: Do NOT use this tool if the file contains massive literal text sequences (like a 6000+ character string, large arrays, or inline base64 images), as LLMs are prone to corrupting or truncating such sequences during a full file rewrite. Use the '${EDIT_TOOL_NAME}' tool instead.`,
parametersJsonSchema: {
type: 'object',
properties: {
@@ -129,7 +129,7 @@ export const GEMINI_3_SET: CoreToolSet = {
},
[WRITE_FILE_PARAM_CONTENT]: {
description:
"The complete content to write. Provide the full file; do not use placeholders like '// ... rest of code'.",
"The complete content to write. Provide the full file; do not use placeholders like '// ... rest of code'. WARNING: Do not use this tool to rewrite files containing massive literal text blocks (e.g., inline base64 images or >6000 character strings) because you may corrupt them. Use the replace tool instead.",
type: 'string',
},
},
@@ -9,6 +9,8 @@ import { detectOmissionPlaceholders } from './omissionPlaceholderDetector.js';
describe('detectOmissionPlaceholders', () => {
it('detects standalone placeholder lines', () => {
expect(detectOmissionPlaceholders('...')).toEqual([' ...']);
expect(detectOmissionPlaceholders('// ...')).toEqual([' ...']);
expect(detectOmissionPlaceholders('(rest of methods ...)')).toEqual([
'rest of methods ...',
]);
@@ -5,6 +5,7 @@
*/
const OMITTED_PREFIXES = new Set([
'',
'rest of',
'rest of method',
'rest of methods',