From 804712322579c9745254f4688940b0f32c8360e4 Mon Sep 17 00:00:00 2001 From: Coco Sheng Date: Wed, 20 May 2026 13:36:48 -0400 Subject: [PATCH] fix(core): mitigate data corruption during write_file on massive text blocks --- .../src/tools/definitions/model-family-sets/default-legacy.ts | 4 ++-- .../core/src/tools/definitions/model-family-sets/gemini-3.ts | 4 ++-- packages/core/src/tools/omissionPlaceholderDetector.test.ts | 2 ++ packages/core/src/tools/omissionPlaceholderDetector.ts | 1 + 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts b/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts index 3dfe8dd40e..07bfc1189e 100644 --- a/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts +++ b/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts @@ -112,7 +112,7 @@ export const DEFAULT_LEGACY_SET: CoreToolSet = { name: WRITE_FILE_TOOL_NAME, description: `Writes content to a specified file in the local filesystem. - The user has the ability to modify \`content\`. If modified, this will be stated in the response.`, + The user has the ability to modify \`content\`. If modified, this will be stated in the response. WARNING: Do NOT use this tool if the file contains massive literal text sequences (like a 6000+ character string, large arrays, or inline base64 images), as LLMs are prone to corrupting or truncating such sequences during a full file rewrite. Use the '${EDIT_TOOL_NAME}' tool instead.`, parametersJsonSchema: { type: 'object', properties: { @@ -122,7 +122,7 @@ export const DEFAULT_LEGACY_SET: CoreToolSet = { }, [WRITE_FILE_PARAM_CONTENT]: { description: - "The content to write to the file. Do not use omission placeholders like '(rest of methods ...)', '...', or 'unchanged code'; provide complete literal content.", + "The content to write to the file. Do not use omission placeholders like '(rest of methods ...)', '...', or 'unchanged code'; provide complete literal content. WARNING: Do not use this tool to rewrite files containing massive literal text blocks (e.g., inline base64 images or >6000 character strings) because you may corrupt them. Use the replace tool instead.", type: 'string', }, }, diff --git a/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts b/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts index 57a897f9ee..b4a9268667 100644 --- a/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts +++ b/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts @@ -119,7 +119,7 @@ export const GEMINI_3_SET: CoreToolSet = { write_file: { name: WRITE_FILE_TOOL_NAME, - description: `Writes the complete content to a file, automatically creating missing parent directories. Overwrites existing files. The user has the ability to modify 'content' before it is saved. Best for new or small files; use '${EDIT_TOOL_NAME}' for targeted edits to large files to minimize token usage and simplify reviews.`, + description: `Writes the complete content to a file, automatically creating missing parent directories. Overwrites existing files. The user has the ability to modify 'content' before it is saved. Best for new or small files; use '${EDIT_TOOL_NAME}' for targeted edits to large files to minimize token usage and simplify reviews. WARNING: Do NOT use this tool if the file contains massive literal text sequences (like a 6000+ character string, large arrays, or inline base64 images), as LLMs are prone to corrupting or truncating such sequences during a full file rewrite. Use the '${EDIT_TOOL_NAME}' tool instead.`, parametersJsonSchema: { type: 'object', properties: { @@ -129,7 +129,7 @@ export const GEMINI_3_SET: CoreToolSet = { }, [WRITE_FILE_PARAM_CONTENT]: { description: - "The complete content to write. Provide the full file; do not use placeholders like '// ... rest of code'.", + "The complete content to write. Provide the full file; do not use placeholders like '// ... rest of code'. WARNING: Do not use this tool to rewrite files containing massive literal text blocks (e.g., inline base64 images or >6000 character strings) because you may corrupt them. Use the replace tool instead.", type: 'string', }, }, diff --git a/packages/core/src/tools/omissionPlaceholderDetector.test.ts b/packages/core/src/tools/omissionPlaceholderDetector.test.ts index 4e574d5e22..edee2d1a07 100644 --- a/packages/core/src/tools/omissionPlaceholderDetector.test.ts +++ b/packages/core/src/tools/omissionPlaceholderDetector.test.ts @@ -9,6 +9,8 @@ import { detectOmissionPlaceholders } from './omissionPlaceholderDetector.js'; describe('detectOmissionPlaceholders', () => { it('detects standalone placeholder lines', () => { + expect(detectOmissionPlaceholders('...')).toEqual([' ...']); + expect(detectOmissionPlaceholders('// ...')).toEqual([' ...']); expect(detectOmissionPlaceholders('(rest of methods ...)')).toEqual([ 'rest of methods ...', ]); diff --git a/packages/core/src/tools/omissionPlaceholderDetector.ts b/packages/core/src/tools/omissionPlaceholderDetector.ts index 7057a7f09d..be9a5b582b 100644 --- a/packages/core/src/tools/omissionPlaceholderDetector.ts +++ b/packages/core/src/tools/omissionPlaceholderDetector.ts @@ -5,6 +5,7 @@ */ const OMITTED_PREFIXES = new Set([ + '', 'rest of', 'rest of method', 'rest of methods',