mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-11 06:31:01 -07:00
refactor(core): foundational truncation refactoring and token estimation optimization (#16824)
This commit is contained in:
@@ -20,7 +20,8 @@ import { createMockMessageBus } from '../test-utils/mock-message-bus.js';
|
||||
|
||||
// Mock file utils
|
||||
vi.mock('../utils/fileUtils.js', () => ({
|
||||
saveTruncatedContent: vi.fn(),
|
||||
saveTruncatedToolOutput: vi.fn(),
|
||||
formatTruncatedToolOutput: vi.fn(),
|
||||
}));
|
||||
|
||||
// Mock executeToolWithHooks
|
||||
@@ -40,12 +41,13 @@ describe('ToolExecutor', () => {
|
||||
// Reset mocks
|
||||
vi.resetAllMocks();
|
||||
|
||||
// Default mock implementation for saveTruncatedContent
|
||||
vi.mocked(fileUtils.saveTruncatedContent).mockImplementation(
|
||||
async (_content, _callId, _tempDir, _threshold, _lines) => ({
|
||||
content: 'TruncatedContent...',
|
||||
outputFile: '/tmp/truncated_output.txt',
|
||||
}),
|
||||
// Default mock implementation
|
||||
vi.mocked(fileUtils.saveTruncatedToolOutput).mockResolvedValue({
|
||||
outputFile: '/tmp/truncated_output.txt',
|
||||
totalLines: 100,
|
||||
});
|
||||
vi.mocked(fileUtils.formatTruncatedToolOutput).mockReturnValue(
|
||||
'TruncatedContent...',
|
||||
);
|
||||
});
|
||||
|
||||
@@ -214,11 +216,16 @@ describe('ToolExecutor', () => {
|
||||
});
|
||||
|
||||
// 4. Verify Truncation Logic
|
||||
expect(fileUtils.saveTruncatedContent).toHaveBeenCalledWith(
|
||||
expect(fileUtils.saveTruncatedToolOutput).toHaveBeenCalledWith(
|
||||
longOutput,
|
||||
SHELL_TOOL_NAME,
|
||||
'call-trunc',
|
||||
expect.any(String), // temp dir
|
||||
10, // threshold
|
||||
);
|
||||
|
||||
expect(fileUtils.formatTruncatedToolOutput).toHaveBeenCalledWith(
|
||||
longOutput,
|
||||
'/tmp/truncated_output.txt',
|
||||
5, // lines
|
||||
);
|
||||
|
||||
@@ -226,7 +233,7 @@ describe('ToolExecutor', () => {
|
||||
if (result.status === 'success') {
|
||||
const response = result.response.responseParts[0]?.functionResponse
|
||||
?.response as Record<string, unknown>;
|
||||
// The content should be the *truncated* version returned by the mock saveTruncatedContent
|
||||
// The content should be the *truncated* version returned by the mock formatTruncatedToolOutput
|
||||
expect(response).toEqual({ output: 'TruncatedContent...' });
|
||||
expect(result.response.outputFile).toBe('/tmp/truncated_output.txt');
|
||||
}
|
||||
|
||||
@@ -20,7 +20,10 @@ import {
|
||||
import { SHELL_TOOL_NAME } from '../tools/tool-names.js';
|
||||
import { ShellToolInvocation } from '../tools/shell.js';
|
||||
import { executeToolWithHooks } from '../core/coreToolHookTriggers.js';
|
||||
import { saveTruncatedContent } from '../utils/fileUtils.js';
|
||||
import {
|
||||
saveTruncatedToolOutput,
|
||||
formatTruncatedToolOutput,
|
||||
} from '../utils/fileUtils.js';
|
||||
import { convertToFunctionResponse } from '../utils/generateContentResponseUtilities.js';
|
||||
import type {
|
||||
CompletedToolCall,
|
||||
@@ -212,17 +215,17 @@ export class ToolExecutor {
|
||||
const originalContentLength = content.length;
|
||||
const threshold = this.config.getTruncateToolOutputThreshold();
|
||||
const lines = this.config.getTruncateToolOutputLines();
|
||||
const truncatedResult = await saveTruncatedContent(
|
||||
content,
|
||||
callId,
|
||||
this.config.storage.getProjectTempDir(),
|
||||
threshold,
|
||||
lines,
|
||||
);
|
||||
content = truncatedResult.content;
|
||||
outputFile = truncatedResult.outputFile;
|
||||
|
||||
if (outputFile) {
|
||||
if (content.length > threshold) {
|
||||
const { outputFile: savedPath } = await saveTruncatedToolOutput(
|
||||
content,
|
||||
toolName,
|
||||
callId,
|
||||
this.config.storage.getProjectTempDir(),
|
||||
);
|
||||
outputFile = savedPath;
|
||||
content = formatTruncatedToolOutput(content, outputFile, lines);
|
||||
|
||||
logToolOutputTruncated(
|
||||
this.config,
|
||||
new ToolOutputTruncatedEvent(call.request.prompt_id, {
|
||||
|
||||
@@ -32,7 +32,8 @@ import {
|
||||
readFileWithEncoding,
|
||||
fileExists,
|
||||
readWasmBinaryFromDisk,
|
||||
saveTruncatedContent,
|
||||
saveTruncatedToolOutput,
|
||||
formatTruncatedToolOutput,
|
||||
} from './fileUtils.js';
|
||||
import { StandardFileSystemService } from '../services/fileSystemService.js';
|
||||
|
||||
@@ -1024,212 +1025,107 @@ describe('fileUtils', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('saveTruncatedContent', () => {
|
||||
const THRESHOLD = 40_000;
|
||||
const TRUNCATE_LINES = 1000;
|
||||
describe('saveTruncatedToolOutput & formatTruncatedToolOutput', () => {
|
||||
it('should save content to a file with safe name', async () => {
|
||||
const content = 'some content';
|
||||
const toolName = 'shell';
|
||||
const id = '123';
|
||||
|
||||
it('should return content unchanged if below threshold', async () => {
|
||||
const content = 'Short content';
|
||||
const callId = 'test-call-id';
|
||||
|
||||
const result = await saveTruncatedContent(
|
||||
const result = await saveTruncatedToolOutput(
|
||||
content,
|
||||
callId,
|
||||
toolName,
|
||||
id,
|
||||
tempRootDir,
|
||||
THRESHOLD,
|
||||
TRUNCATE_LINES,
|
||||
);
|
||||
|
||||
expect(result).toEqual({ content });
|
||||
const outputFile = path.join(tempRootDir, `${callId}.output`);
|
||||
expect(await fileExists(outputFile)).toBe(false);
|
||||
});
|
||||
|
||||
it('should truncate content by lines when content has many lines', async () => {
|
||||
// Create content that exceeds 100,000 character threshold with many lines
|
||||
const lines = Array(2000).fill('x'.repeat(100));
|
||||
const content = lines.join('\n');
|
||||
const callId = 'test-call-id';
|
||||
|
||||
const result = await saveTruncatedContent(
|
||||
content,
|
||||
callId,
|
||||
tempRootDir,
|
||||
THRESHOLD,
|
||||
TRUNCATE_LINES,
|
||||
);
|
||||
|
||||
const expectedOutputFile = path.join(tempRootDir, `${callId}.output`);
|
||||
const expectedOutputFile = path.join(tempRootDir, 'shell_123.txt');
|
||||
expect(result.outputFile).toBe(expectedOutputFile);
|
||||
expect(result.totalLines).toBe(1);
|
||||
|
||||
const savedContent = await fsPromises.readFile(
|
||||
expectedOutputFile,
|
||||
'utf-8',
|
||||
);
|
||||
expect(savedContent).toBe(content);
|
||||
|
||||
// Should contain the first and last lines with 1/5 head and 4/5 tail
|
||||
const head = Math.floor(TRUNCATE_LINES / 5);
|
||||
const beginning = lines.slice(0, head);
|
||||
const end = lines.slice(-(TRUNCATE_LINES - head));
|
||||
const expectedTruncated =
|
||||
beginning.join('\n') +
|
||||
'\n... [CONTENT TRUNCATED] ...\n' +
|
||||
end.join('\n');
|
||||
|
||||
expect(result.content).toContain(
|
||||
'Tool output was too large and has been truncated',
|
||||
);
|
||||
expect(result.content).toContain('Truncated part of the output:');
|
||||
expect(result.content).toContain(expectedTruncated);
|
||||
});
|
||||
|
||||
it('should wrap and truncate content when content has few but long lines', async () => {
|
||||
const content = 'a'.repeat(200_000); // A single very long line
|
||||
const callId = 'test-call-id';
|
||||
const wrapWidth = 120;
|
||||
it('should sanitize tool name in filename', async () => {
|
||||
const content = 'content';
|
||||
const toolName = '../../dangerous/tool';
|
||||
const id = 1;
|
||||
|
||||
// Manually wrap the content to generate the expected file content
|
||||
const wrappedLines: string[] = [];
|
||||
for (let i = 0; i < content.length; i += wrapWidth) {
|
||||
wrappedLines.push(content.substring(i, i + wrapWidth));
|
||||
}
|
||||
const expectedFileContent = wrappedLines.join('\n');
|
||||
|
||||
const result = await saveTruncatedContent(
|
||||
const result = await saveTruncatedToolOutput(
|
||||
content,
|
||||
callId,
|
||||
toolName,
|
||||
id,
|
||||
tempRootDir,
|
||||
THRESHOLD,
|
||||
TRUNCATE_LINES,
|
||||
);
|
||||
|
||||
const expectedOutputFile = path.join(tempRootDir, `${callId}.output`);
|
||||
// ../../dangerous/tool -> ______dangerous_tool
|
||||
const expectedOutputFile = path.join(
|
||||
tempRootDir,
|
||||
'______dangerous_tool_1.txt',
|
||||
);
|
||||
expect(result.outputFile).toBe(expectedOutputFile);
|
||||
|
||||
const savedContent = await fsPromises.readFile(
|
||||
expectedOutputFile,
|
||||
'utf-8',
|
||||
);
|
||||
expect(savedContent).toBe(expectedFileContent);
|
||||
|
||||
// Should contain the first and last lines with 1/5 head and 4/5 tail of the wrapped content
|
||||
const head = Math.floor(TRUNCATE_LINES / 5);
|
||||
const beginning = wrappedLines.slice(0, head);
|
||||
const end = wrappedLines.slice(-(TRUNCATE_LINES - head));
|
||||
const expectedTruncated =
|
||||
beginning.join('\n') +
|
||||
'\n... [CONTENT TRUNCATED] ...\n' +
|
||||
end.join('\n');
|
||||
expect(result.content).toContain(
|
||||
'Tool output was too large and has been truncated',
|
||||
);
|
||||
expect(result.content).toContain('Truncated part of the output:');
|
||||
expect(result.content).toContain(expectedTruncated);
|
||||
});
|
||||
|
||||
it('should save to correct file path with call ID', async () => {
|
||||
const content = 'a'.repeat(200_000);
|
||||
const callId = 'unique-call-123';
|
||||
const wrapWidth = 120;
|
||||
it('should sanitize id in filename', async () => {
|
||||
const content = 'content';
|
||||
const toolName = 'shell';
|
||||
const id = '../../etc/passwd';
|
||||
|
||||
// Manually wrap the content to generate the expected file content
|
||||
const wrappedLines: string[] = [];
|
||||
for (let i = 0; i < content.length; i += wrapWidth) {
|
||||
wrappedLines.push(content.substring(i, i + wrapWidth));
|
||||
}
|
||||
const expectedFileContent = wrappedLines.join('\n');
|
||||
|
||||
const result = await saveTruncatedContent(
|
||||
const result = await saveTruncatedToolOutput(
|
||||
content,
|
||||
callId,
|
||||
toolName,
|
||||
id,
|
||||
tempRootDir,
|
||||
THRESHOLD,
|
||||
TRUNCATE_LINES,
|
||||
);
|
||||
|
||||
const expectedPath = path.join(tempRootDir, `${callId}.output`);
|
||||
expect(result.outputFile).toBe(expectedPath);
|
||||
|
||||
const savedContent = await fsPromises.readFile(expectedPath, 'utf-8');
|
||||
expect(savedContent).toBe(expectedFileContent);
|
||||
// ../../etc/passwd -> ______etc_passwd
|
||||
const expectedOutputFile = path.join(
|
||||
tempRootDir,
|
||||
'shell_______etc_passwd.txt',
|
||||
);
|
||||
expect(result.outputFile).toBe(expectedOutputFile);
|
||||
});
|
||||
|
||||
it('should include helpful instructions in truncated message', async () => {
|
||||
const content = 'a'.repeat(200_000);
|
||||
const callId = 'test-call-id';
|
||||
it('should format multi-line output correctly', () => {
|
||||
const lines = Array.from({ length: 50 }, (_, i) => `line ${i}`);
|
||||
const content = lines.join('\n');
|
||||
const outputFile = '/tmp/out.txt';
|
||||
|
||||
const result = await saveTruncatedContent(
|
||||
content,
|
||||
callId,
|
||||
tempRootDir,
|
||||
THRESHOLD,
|
||||
TRUNCATE_LINES,
|
||||
);
|
||||
const formatted = formatTruncatedToolOutput(content, outputFile, 10);
|
||||
|
||||
expect(result.content).toContain(
|
||||
'read_file tool with the absolute file path above',
|
||||
);
|
||||
expect(result.content).toContain(
|
||||
'read_file tool with offset=0, limit=100',
|
||||
);
|
||||
expect(result.content).toContain(
|
||||
'read_file tool with offset=N to skip N lines',
|
||||
);
|
||||
expect(result.content).toContain(
|
||||
'read_file tool with limit=M to read only M lines',
|
||||
expect(formatted).toContain(
|
||||
'Output too large. Showing the last 10 of 50 lines.',
|
||||
);
|
||||
expect(formatted).toContain('For full output see: /tmp/out.txt');
|
||||
expect(formatted).toContain('line 49');
|
||||
expect(formatted).not.toContain('line 0');
|
||||
});
|
||||
|
||||
it('should sanitize callId to prevent path traversal', async () => {
|
||||
const content = 'a'.repeat(200_000);
|
||||
const callId = '../../../../../etc/passwd';
|
||||
const wrapWidth = 120;
|
||||
it('should truncate "elephant lines" (long single line in multi-line output)', () => {
|
||||
const longLine = 'a'.repeat(2000);
|
||||
const content = `line 1\n${longLine}\nline 3`;
|
||||
const outputFile = '/tmp/out.txt';
|
||||
|
||||
// Manually wrap the content to generate the expected file content
|
||||
const wrappedLines: string[] = [];
|
||||
for (let i = 0; i < content.length; i += wrapWidth) {
|
||||
wrappedLines.push(content.substring(i, i + wrapWidth));
|
||||
}
|
||||
const expectedFileContent = wrappedLines.join('\n');
|
||||
const formatted = formatTruncatedToolOutput(content, outputFile, 3);
|
||||
|
||||
await saveTruncatedContent(
|
||||
content,
|
||||
callId,
|
||||
tempRootDir,
|
||||
THRESHOLD,
|
||||
TRUNCATE_LINES,
|
||||
);
|
||||
|
||||
const expectedPath = path.join(tempRootDir, 'passwd.output');
|
||||
|
||||
const savedContent = await fsPromises.readFile(expectedPath, 'utf-8');
|
||||
expect(savedContent).toBe(expectedFileContent);
|
||||
expect(formatted).toContain('(some long lines truncated)');
|
||||
expect(formatted).toContain('... [LINE WIDTH TRUNCATED]');
|
||||
expect(formatted.length).toBeLessThan(longLine.length);
|
||||
});
|
||||
|
||||
it('should handle file write errors gracefully', async () => {
|
||||
const content = 'a'.repeat(50_000);
|
||||
const callId = 'test-call-id-fail';
|
||||
it('should handle massive single-line string with character-based truncation', () => {
|
||||
const content = 'a'.repeat(50000);
|
||||
const outputFile = '/tmp/out.txt';
|
||||
|
||||
const writeFileSpy = vi
|
||||
.spyOn(fsPromises, 'writeFile')
|
||||
.mockRejectedValue(new Error('File write failed'));
|
||||
const formatted = formatTruncatedToolOutput(content, outputFile);
|
||||
|
||||
const result = await saveTruncatedContent(
|
||||
content,
|
||||
callId,
|
||||
tempRootDir,
|
||||
THRESHOLD,
|
||||
TRUNCATE_LINES,
|
||||
expect(formatted).toContain(
|
||||
'Output too large. Showing the last 10,000 characters',
|
||||
);
|
||||
|
||||
expect(result.outputFile).toBeUndefined();
|
||||
expect(result.content).toContain(
|
||||
'[Note: Could not save full output to file]',
|
||||
);
|
||||
expect(writeFileSpy).toHaveBeenCalled();
|
||||
|
||||
writeFileSpy.mockRestore();
|
||||
expect(formatted.endsWith(content.slice(-10000))).toBe(true);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -15,7 +15,6 @@ import { ToolErrorType } from '../tools/tool-error.js';
|
||||
import { BINARY_EXTENSIONS } from './ignorePatterns.js';
|
||||
import { createRequire as createModuleRequire } from 'node:module';
|
||||
import { debugLogger } from './debugLogger.js';
|
||||
import { READ_FILE_TOOL_NAME } from '../tools/tool-names.js';
|
||||
|
||||
const requireModule = createModuleRequire(import.meta.url);
|
||||
|
||||
@@ -517,66 +516,71 @@ export async function fileExists(filePath: string): Promise<boolean> {
|
||||
}
|
||||
}
|
||||
|
||||
export async function saveTruncatedContent(
|
||||
content: string,
|
||||
callId: string,
|
||||
projectTempDir: string,
|
||||
threshold: number,
|
||||
truncateLines: number,
|
||||
): Promise<{ content: string; outputFile?: string }> {
|
||||
if (content.length <= threshold) {
|
||||
return { content };
|
||||
}
|
||||
const MAX_TRUNCATED_LINE_WIDTH = 1000;
|
||||
const MAX_TRUNCATED_CHARS = 10000;
|
||||
|
||||
let lines = content.split('\n');
|
||||
let fileContent = content;
|
||||
/**
|
||||
* Formats a truncated message for tool output, handling multi-line and single-line (elephant) cases.
|
||||
*/
|
||||
export function formatTruncatedToolOutput(
|
||||
contentStr: string,
|
||||
outputFile: string,
|
||||
truncateLines: number = 30,
|
||||
): string {
|
||||
const physicalLines = contentStr.split('\n');
|
||||
const totalPhysicalLines = physicalLines.length;
|
||||
|
||||
// If the content is long but has few lines, wrap it to enable line-based truncation.
|
||||
if (lines.length <= truncateLines) {
|
||||
const wrapWidth = 120; // A reasonable width for wrapping.
|
||||
const wrappedLines: string[] = [];
|
||||
for (const line of lines) {
|
||||
if (line.length > wrapWidth) {
|
||||
for (let i = 0; i < line.length; i += wrapWidth) {
|
||||
wrappedLines.push(line.substring(i, i + wrapWidth));
|
||||
}
|
||||
} else {
|
||||
wrappedLines.push(line);
|
||||
if (totalPhysicalLines > 1) {
|
||||
// Multi-line case: show last N lines, but protect against "elephant" lines.
|
||||
const lastLines = physicalLines.slice(-truncateLines);
|
||||
let someLinesTruncatedInWidth = false;
|
||||
const processedLines = lastLines.map((line) => {
|
||||
if (line.length > MAX_TRUNCATED_LINE_WIDTH) {
|
||||
someLinesTruncatedInWidth = true;
|
||||
return (
|
||||
line.substring(0, MAX_TRUNCATED_LINE_WIDTH) +
|
||||
'... [LINE WIDTH TRUNCATED]'
|
||||
);
|
||||
}
|
||||
}
|
||||
lines = wrappedLines;
|
||||
fileContent = lines.join('\n');
|
||||
}
|
||||
return line;
|
||||
});
|
||||
|
||||
const head = Math.floor(truncateLines / 5);
|
||||
const beginning = lines.slice(0, head);
|
||||
const end = lines.slice(-(truncateLines - head));
|
||||
const truncatedContent =
|
||||
beginning.join('\n') + '\n... [CONTENT TRUNCATED] ...\n' + end.join('\n');
|
||||
|
||||
// Sanitize callId to prevent path traversal.
|
||||
const safeFileName = `${path.basename(callId)}.output`;
|
||||
const outputFile = path.join(projectTempDir, safeFileName);
|
||||
try {
|
||||
await fsPromises.writeFile(outputFile, fileContent);
|
||||
|
||||
return {
|
||||
content: `Tool output was too large and has been truncated.
|
||||
The full output has been saved to: ${outputFile}
|
||||
To read the complete output, use the ${READ_FILE_TOOL_NAME} tool with the absolute file path above. For large files, you can use the offset and limit parameters to read specific sections:
|
||||
- ${READ_FILE_TOOL_NAME} tool with offset=0, limit=100 to see the first 100 lines
|
||||
- ${READ_FILE_TOOL_NAME} tool with offset=N to skip N lines from the beginning
|
||||
- ${READ_FILE_TOOL_NAME} tool with limit=M to read only M lines at a time
|
||||
The truncated output below shows the beginning and end of the content. The marker '... [CONTENT TRUNCATED] ...' indicates where content was removed.
|
||||
This allows you to efficiently examine different parts of the output without loading the entire file.
|
||||
Truncated part of the output:
|
||||
${truncatedContent}`,
|
||||
outputFile,
|
||||
};
|
||||
} catch (_error) {
|
||||
return {
|
||||
content:
|
||||
truncatedContent + `\n[Note: Could not save full output to file]`,
|
||||
};
|
||||
const widthWarning = someLinesTruncatedInWidth
|
||||
? ' (some long lines truncated)'
|
||||
: '';
|
||||
return `Output too large. Showing the last ${processedLines.length} of ${totalPhysicalLines} lines${widthWarning}. For full output see: ${outputFile}
|
||||
...
|
||||
${processedLines.join('\n')}`;
|
||||
} else {
|
||||
// Single massive line case: use character-based truncation description.
|
||||
const snippet = contentStr.slice(-MAX_TRUNCATED_CHARS);
|
||||
return `Output too large. Showing the last ${MAX_TRUNCATED_CHARS.toLocaleString()} characters of the output. For full output see: ${outputFile}
|
||||
...${snippet}`;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Saves tool output to a temporary file for later retrieval.
|
||||
*/
|
||||
export async function saveTruncatedToolOutput(
|
||||
content: string,
|
||||
toolName: string,
|
||||
id: string | number, // Accept string (callId) or number (truncationId)
|
||||
projectTempDir: string,
|
||||
): Promise<{ outputFile: string; totalLines: number }> {
|
||||
const safeToolName = toolName.replace(/[^a-z0-9]/gi, '_').toLowerCase();
|
||||
const safeId = id
|
||||
.toString()
|
||||
.replace(/[^a-z0-9]/gi, '_')
|
||||
.toLowerCase();
|
||||
const fileName = `${safeToolName}_${safeId}.txt`;
|
||||
const outputFile = path.join(projectTempDir, fileName);
|
||||
|
||||
await fsPromises.writeFile(outputFile, content);
|
||||
|
||||
const lines = content.split('\n');
|
||||
return {
|
||||
outputFile,
|
||||
totalLines: lines.length,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -20,20 +20,28 @@ const IMAGE_TOKEN_ESTIMATE = 3000;
|
||||
// See: https://ai.google.dev/gemini-api/docs/document-processing
|
||||
const PDF_TOKEN_ESTIMATE = 25800;
|
||||
|
||||
// Maximum number of characters to process with the full character-by-character heuristic.
|
||||
// Above this, we use a faster approximation to avoid performance bottlenecks.
|
||||
const MAX_CHARS_FOR_FULL_HEURISTIC = 100_000;
|
||||
|
||||
/**
|
||||
* Estimates token count for parts synchronously using a heuristic.
|
||||
* - Text: character-based heuristic (ASCII vs CJK).
|
||||
* - Text: character-based heuristic (ASCII vs CJK) for small strings, length/4 for massive ones.
|
||||
* - Non-text (Tools, etc): JSON string length / 4.
|
||||
*/
|
||||
export function estimateTokenCountSync(parts: Part[]): number {
|
||||
let totalTokens = 0;
|
||||
for (const part of parts) {
|
||||
if (typeof part.text === 'string') {
|
||||
for (const char of part.text) {
|
||||
if (char.codePointAt(0)! <= 127) {
|
||||
totalTokens += ASCII_TOKENS_PER_CHAR;
|
||||
} else {
|
||||
totalTokens += NON_ASCII_TOKENS_PER_CHAR;
|
||||
if (part.text.length > MAX_CHARS_FOR_FULL_HEURISTIC) {
|
||||
totalTokens += part.text.length / 4;
|
||||
} else {
|
||||
for (const char of part.text) {
|
||||
if (char.codePointAt(0)! <= 127) {
|
||||
totalTokens += ASCII_TOKENS_PER_CHAR;
|
||||
} else {
|
||||
totalTokens += NON_ASCII_TOKENS_PER_CHAR;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
||||
Reference in New Issue
Block a user