diff --git a/docs/admin/enterprise-controls.md b/docs/admin/enterprise-controls.md index 8c9ba60a13..5792a6c5bc 100644 --- a/docs/admin/enterprise-controls.md +++ b/docs/admin/enterprise-controls.md @@ -106,6 +106,67 @@ organization. ensures users maintain final control over which permitted servers are actually active in their environment. +#### Required MCP Servers (preview) + +**Default**: empty + +Allows administrators to define MCP servers that are **always injected** into +the user's environment. Unlike the allowlist (which filters user-configured +servers), required servers are automatically added regardless of the user's +local configuration. + +**Required Servers Format:** + +```json +{ + "requiredMcpServers": { + "corp-compliance-tool": { + "url": "https://mcp.corp/compliance", + "type": "http", + "trust": true, + "description": "Corporate compliance tool" + }, + "internal-registry": { + "url": "https://registry.corp/mcp", + "type": "sse", + "authProviderType": "google_credentials", + "oauth": { + "scopes": ["https://www.googleapis.com/auth/scope"] + } + } + } +} +``` + +**Supported Fields:** + +- `url`: (Required) The full URL of the MCP server endpoint. +- `type`: (Required) The connection type (`sse` or `http`). +- `trust`: (Optional) If set to `true`, tool execution will not require user + approval. Defaults to `true` for required servers. +- `description`: (Optional) Human-readable description of the server. +- `authProviderType`: (Optional) Authentication provider (`dynamic_discovery`, + `google_credentials`, or `service_account_impersonation`). +- `oauth`: (Optional) OAuth configuration including `scopes`, `clientId`, and + `clientSecret`. +- `targetAudience`: (Optional) OAuth target audience for service-to-service + auth. +- `targetServiceAccount`: (Optional) Service account email to impersonate. +- `headers`: (Optional) Additional HTTP headers to send with requests. +- `includeTools` / `excludeTools`: (Optional) Tool filtering lists. +- `timeout`: (Optional) Timeout in milliseconds for MCP requests. + +**Client Enforcement Logic:** + +- Required servers are injected **after** allowlist filtering, so they are + always available even if the allowlist is active. +- If a required server has the **same name** as a locally configured server, the + admin configuration **completely overrides** the local one. +- Required servers only support remote transports (`sse`, `http`). Local + execution fields (`command`, `args`, `env`, `cwd`) are not supported. +- Required servers can coexist with allowlisted servers β€” both features work + independently. + ### Unmanaged Capabilities **Enabled/Disabled** | Default: disabled diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index 81a05bf51c..d3b08d565a 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -1728,7 +1728,11 @@ their corresponding top-level category object in your `settings.json` file. - **Default:** `true` - **`admin.mcp.config`** (object): - - **Description:** Admin-configured MCP servers. + - **Description:** Admin-configured MCP servers (allowlist). + - **Default:** `{}` + +- **`admin.mcp.requiredConfig`** (object): + - **Description:** Admin-required MCP servers that are always injected. - **Default:** `{}` - **`admin.skills.enabled`** (boolean): diff --git a/packages/cli/src/acp/fileSystemService.ts b/packages/cli/src/acp/fileSystemService.ts index 02b9d68195..1d3c8ad0b8 100644 --- a/packages/cli/src/acp/fileSystemService.ts +++ b/packages/cli/src/acp/fileSystemService.ts @@ -14,7 +14,7 @@ export class AcpFileSystemService implements FileSystemService { constructor( private readonly connection: acp.AgentSideConnection, private readonly sessionId: string, - private readonly capabilities: acp.FileSystemCapabilities, + private readonly capabilities: acp.FileSystemCapability, private readonly fallback: FileSystemService, ) {} diff --git a/packages/cli/src/commands/mcp/list.test.ts b/packages/cli/src/commands/mcp/list.test.ts index 54534961dd..578894845e 100644 --- a/packages/cli/src/commands/mcp/list.test.ts +++ b/packages/cli/src/commands/mcp/list.test.ts @@ -264,6 +264,7 @@ describe('mcp list command', () => { config: { 'allowed-server': { url: 'http://allowed' }, }, + requiredConfig: {}, }, }; diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index 3c74fd05bd..d5e4851e97 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -36,6 +36,7 @@ import { Config, resolveToRealPath, applyAdminAllowlist, + applyRequiredServers, getAdminBlockedMcpServersMessage, type HookDefinition, type HookEventName, @@ -750,6 +751,25 @@ export async function loadCliConfig( } } + // Apply admin-required MCP servers (injected regardless of allowlist) + if (mcpEnabled) { + const requiredMcpConfig = settings.admin?.mcp?.requiredConfig; + if (requiredMcpConfig && Object.keys(requiredMcpConfig).length > 0) { + const requiredResult = applyRequiredServers( + mcpServers ?? {}, + requiredMcpConfig, + ); + mcpServers = requiredResult.mcpServers; + + if (requiredResult.requiredServerNames.length > 0) { + coreEvents.emitConsoleLog( + 'info', + `Admin-required MCP servers injected: ${requiredResult.requiredServerNames.join(', ')}`, + ); + } + } + } + const isAcpMode = !!argv.acp || !!argv.experimentalAcp; let clientName: string | undefined = undefined; if (isAcpMode) { diff --git a/packages/cli/src/config/settings.test.ts b/packages/cli/src/config/settings.test.ts index 06129a4760..a58b9889a2 100644 --- a/packages/cli/src/config/settings.test.ts +++ b/packages/cli/src/config/settings.test.ts @@ -2751,6 +2751,28 @@ describe('Settings Loading and Merging', () => { expect(loadedSettings.merged.admin?.mcp?.config).toEqual(mcpServers); }); + it('should map requiredMcpConfig from remote settings', () => { + const loadedSettings = loadSettings(MOCK_WORKSPACE_DIR); + const requiredMcpConfig = { + 'corp-tool': { + url: 'https://mcp.corp/tool', + type: 'http' as const, + trust: true, + }, + }; + + loadedSettings.setRemoteAdminSettings({ + mcpSetting: { + mcpEnabled: true, + requiredMcpConfig, + }, + }); + + expect(loadedSettings.merged.admin?.mcp?.requiredConfig).toEqual( + requiredMcpConfig, + ); + }); + it('should set skills based on unmanagedCapabilitiesEnabled', () => { const loadedSettings = loadSettings(); loadedSettings.setRemoteAdminSettings({ diff --git a/packages/cli/src/config/settings.ts b/packages/cli/src/config/settings.ts index 711ff93271..beecd6a017 100644 --- a/packages/cli/src/config/settings.ts +++ b/packages/cli/src/config/settings.ts @@ -480,6 +480,7 @@ export class LoadedSettings { admin.mcp = { enabled: mcpSetting?.mcpEnabled, config: mcpSetting?.mcpConfig?.mcpServers, + requiredConfig: mcpSetting?.requiredMcpConfig, }; admin.extensions = { enabled: cliFeatureSetting?.extensionsSetting?.extensionsEnabled, diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index de8fe65c46..f1711f3b92 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -12,7 +12,9 @@ import { DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD, DEFAULT_MODEL_CONFIGS, + AuthProviderType, type MCPServerConfig, + type RequiredMcpServerConfig, type BugCommandSettings, type TelemetrySettings, type AuthType, @@ -2435,7 +2437,7 @@ const SETTINGS_SCHEMA = { category: 'Admin', requiresRestart: false, default: {} as Record, - description: 'Admin-configured MCP servers.', + description: 'Admin-configured MCP servers (allowlist).', showInDialog: false, mergeStrategy: MergeStrategy.REPLACE, additionalProperties: { @@ -2443,6 +2445,20 @@ const SETTINGS_SCHEMA = { ref: 'MCPServerConfig', }, }, + requiredConfig: { + type: 'object', + label: 'Required MCP Config', + category: 'Admin', + requiresRestart: false, + default: {} as Record, + description: 'Admin-required MCP servers that are always injected.', + showInDialog: false, + mergeStrategy: MergeStrategy.REPLACE, + additionalProperties: { + type: 'object', + ref: 'RequiredMcpServerConfig', + }, + }, }, }, skills: { @@ -2567,11 +2583,72 @@ export const SETTINGS_SCHEMA_DEFINITIONS: Record< type: 'string', description: 'Authentication provider used for acquiring credentials (for example `dynamic_discovery`).', - enum: [ - 'dynamic_discovery', - 'google_credentials', - 'service_account_impersonation', - ], + enum: Object.values(AuthProviderType), + }, + targetAudience: { + type: 'string', + description: + 'OAuth target audience (CLIENT_ID.apps.googleusercontent.com).', + }, + targetServiceAccount: { + type: 'string', + description: + 'Service account email to impersonate (name@project.iam.gserviceaccount.com).', + }, + }, + }, + RequiredMcpServerConfig: { + type: 'object', + description: + 'Admin-required MCP server configuration (remote transports only).', + additionalProperties: false, + properties: { + url: { + type: 'string', + description: 'URL for the required MCP server.', + }, + type: { + type: 'string', + description: 'Transport type for the required server.', + enum: ['sse', 'http'], + }, + headers: { + type: 'object', + description: 'Additional HTTP headers sent to the server.', + additionalProperties: { type: 'string' }, + }, + timeout: { + type: 'number', + description: 'Timeout in milliseconds for MCP requests.', + }, + trust: { + type: 'boolean', + description: + 'Marks the server as trusted. Defaults to true for admin-required servers.', + }, + description: { + type: 'string', + description: 'Human-readable description of the server.', + }, + includeTools: { + type: 'array', + description: 'Subset of tools enabled for this server.', + items: { type: 'string' }, + }, + excludeTools: { + type: 'array', + description: 'Tools disabled for this server.', + items: { type: 'string' }, + }, + oauth: { + type: 'object', + description: 'OAuth configuration for authenticating with the server.', + additionalProperties: true, + }, + authProviderType: { + type: 'string', + description: 'Authentication provider used for acquiring credentials.', + enum: Object.values(AuthProviderType), }, targetAudience: { type: 'string', diff --git a/packages/core/src/code_assist/admin/admin_controls.test.ts b/packages/core/src/code_assist/admin/admin_controls.test.ts index d676a59a92..afd80ad758 100644 --- a/packages/core/src/code_assist/admin/admin_controls.test.ts +++ b/packages/core/src/code_assist/admin/admin_controls.test.ts @@ -224,6 +224,89 @@ describe('Admin Controls', () => { const result = sanitizeAdminSettings(input); expect(result.strictModeDisabled).toBe(true); }); + + it('should parse requiredMcpServers from mcpConfigJson', () => { + const mcpConfig = { + mcpServers: { + 'allowed-server': { + url: 'http://allowed.com', + type: 'sse' as const, + }, + }, + requiredMcpServers: { + 'corp-tool': { + url: 'https://mcp.corp/tool', + type: 'http' as const, + trust: true, + description: 'Corp compliance tool', + }, + }, + }; + + const input: FetchAdminControlsResponse = { + mcpSetting: { + mcpEnabled: true, + mcpConfigJson: JSON.stringify(mcpConfig), + }, + }; + + const result = sanitizeAdminSettings(input); + expect(result.mcpSetting?.mcpConfig?.mcpServers).toEqual( + mcpConfig.mcpServers, + ); + expect(result.mcpSetting?.requiredMcpConfig).toEqual( + mcpConfig.requiredMcpServers, + ); + }); + + it('should sort requiredMcpServers tool lists for stable comparison', () => { + const mcpConfig = { + requiredMcpServers: { + 'corp-tool': { + url: 'https://mcp.corp/tool', + type: 'http' as const, + includeTools: ['toolC', 'toolA', 'toolB'], + excludeTools: ['toolZ', 'toolX'], + }, + }, + }; + + const input: FetchAdminControlsResponse = { + mcpSetting: { + mcpEnabled: true, + mcpConfigJson: JSON.stringify(mcpConfig), + }, + }; + + const result = sanitizeAdminSettings(input); + const corpTool = result.mcpSetting?.requiredMcpConfig?.['corp-tool']; + expect(corpTool?.includeTools).toEqual(['toolA', 'toolB', 'toolC']); + expect(corpTool?.excludeTools).toEqual(['toolX', 'toolZ']); + }); + + it('should handle mcpConfigJson with only requiredMcpServers and no mcpServers', () => { + const mcpConfig = { + requiredMcpServers: { + 'required-only': { + url: 'https://required.corp/tool', + type: 'http' as const, + }, + }, + }; + + const input: FetchAdminControlsResponse = { + mcpSetting: { + mcpEnabled: true, + mcpConfigJson: JSON.stringify(mcpConfig), + }, + }; + + const result = sanitizeAdminSettings(input); + expect(result.mcpSetting?.mcpConfig?.mcpServers).toBeUndefined(); + expect(result.mcpSetting?.requiredMcpConfig).toEqual( + mcpConfig.requiredMcpServers, + ); + }); }); describe('isDeepStrictEqual verification', () => { diff --git a/packages/core/src/code_assist/admin/admin_controls.ts b/packages/core/src/code_assist/admin/admin_controls.ts index d18fcf3d66..4812ce013e 100644 --- a/packages/core/src/code_assist/admin/admin_controls.ts +++ b/packages/core/src/code_assist/admin/admin_controls.ts @@ -48,6 +48,16 @@ export function sanitizeAdminSettings( } } } + if (mcpConfig.requiredMcpServers) { + for (const server of Object.values(mcpConfig.requiredMcpServers)) { + if (server.includeTools) { + server.includeTools.sort(); + } + if (server.excludeTools) { + server.excludeTools.sort(); + } + } + } } } catch (_e) { // Ignore parsing errors @@ -77,6 +87,7 @@ export function sanitizeAdminSettings( mcpSetting: { mcpEnabled: sanitized.mcpSetting?.mcpEnabled ?? false, mcpConfig: mcpConfig ?? {}, + requiredMcpConfig: mcpConfig?.requiredMcpServers, }, }; } diff --git a/packages/core/src/code_assist/admin/mcpUtils.test.ts b/packages/core/src/code_assist/admin/mcpUtils.test.ts index 313e654d7d..fadfa59331 100644 --- a/packages/core/src/code_assist/admin/mcpUtils.test.ts +++ b/packages/core/src/code_assist/admin/mcpUtils.test.ts @@ -5,8 +5,10 @@ */ import { describe, it, expect } from 'vitest'; -import { applyAdminAllowlist } from './mcpUtils.js'; +import { applyAdminAllowlist, applyRequiredServers } from './mcpUtils.js'; import type { MCPServerConfig } from '../../config/config.js'; +import { AuthProviderType } from '../../config/config.js'; +import type { RequiredMcpServerConfig } from '../types.js'; describe('applyAdminAllowlist', () => { it('should return original servers if no allowlist provided', () => { @@ -111,3 +113,147 @@ describe('applyAdminAllowlist', () => { expect(result.mcpServers['server1']?.includeTools).toEqual(['local-tool']); }); }); + +describe('applyRequiredServers', () => { + it('should return original servers if no required servers provided', () => { + const mcpServers: Record = { + server1: { command: 'cmd1' }, + }; + const result = applyRequiredServers(mcpServers, undefined); + expect(result.mcpServers).toEqual(mcpServers); + expect(result.requiredServerNames).toEqual([]); + }); + + it('should return original servers if required servers is empty', () => { + const mcpServers: Record = { + server1: { command: 'cmd1' }, + }; + const result = applyRequiredServers(mcpServers, {}); + expect(result.mcpServers).toEqual(mcpServers); + expect(result.requiredServerNames).toEqual([]); + }); + + it('should inject required servers when no local config exists', () => { + const mcpServers: Record = { + 'local-server': { command: 'cmd1' }, + }; + const required: Record = { + 'corp-tool': { + url: 'https://mcp.corp.internal/tool', + type: 'http', + description: 'Corp compliance tool', + }, + }; + + const result = applyRequiredServers(mcpServers, required); + expect(Object.keys(result.mcpServers)).toContain('local-server'); + expect(Object.keys(result.mcpServers)).toContain('corp-tool'); + expect(result.requiredServerNames).toEqual(['corp-tool']); + + const corpTool = result.mcpServers['corp-tool']; + expect(corpTool).toBeDefined(); + expect(corpTool?.url).toBe('https://mcp.corp.internal/tool'); + expect(corpTool?.type).toBe('http'); + expect(corpTool?.description).toBe('Corp compliance tool'); + // trust defaults to true for admin-forced servers + expect(corpTool?.trust).toBe(true); + // stdio fields should not be set + expect(corpTool?.command).toBeUndefined(); + expect(corpTool?.args).toBeUndefined(); + }); + + it('should override local server with same name', () => { + const mcpServers: Record = { + 'shared-server': { + command: 'local-cmd', + args: ['local-arg'], + description: 'Local version', + }, + }; + const required: Record = { + 'shared-server': { + url: 'https://admin.corp/shared', + type: 'sse', + trust: false, + description: 'Admin-mandated version', + }, + }; + + const result = applyRequiredServers(mcpServers, required); + const server = result.mcpServers['shared-server']; + + // Admin config should completely override local + expect(server?.url).toBe('https://admin.corp/shared'); + expect(server?.type).toBe('sse'); + expect(server?.trust).toBe(false); + expect(server?.description).toBe('Admin-mandated version'); + // Local fields should NOT be preserved + expect(server?.command).toBeUndefined(); + expect(server?.args).toBeUndefined(); + }); + + it('should preserve auth configuration', () => { + const required: Record = { + 'auth-server': { + url: 'https://auth.corp/tool', + type: 'http', + authProviderType: AuthProviderType.GOOGLE_CREDENTIALS, + oauth: { + scopes: ['https://www.googleapis.com/auth/scope1'], + }, + targetAudience: 'client-id.apps.googleusercontent.com', + headers: { 'X-Custom': 'value' }, + }, + }; + + const result = applyRequiredServers({}, required); + const server = result.mcpServers['auth-server']; + + expect(server?.authProviderType).toBe(AuthProviderType.GOOGLE_CREDENTIALS); + expect(server?.oauth).toEqual({ + scopes: ['https://www.googleapis.com/auth/scope1'], + }); + expect(server?.targetAudience).toBe('client-id.apps.googleusercontent.com'); + expect(server?.headers).toEqual({ 'X-Custom': 'value' }); + }); + + it('should preserve tool filtering', () => { + const required: Record = { + 'filtered-server': { + url: 'https://corp/tool', + type: 'http', + includeTools: ['toolA', 'toolB'], + excludeTools: ['toolC'], + }, + }; + + const result = applyRequiredServers({}, required); + const server = result.mcpServers['filtered-server']; + + expect(server?.includeTools).toEqual(['toolA', 'toolB']); + expect(server?.excludeTools).toEqual(['toolC']); + }); + + it('should coexist with allowlisted servers', () => { + // Simulate post-allowlist filtering + const afterAllowlist: Record = { + 'allowed-server': { + url: 'http://allowed', + type: 'sse', + trust: true, + }, + }; + const required: Record = { + 'required-server': { + url: 'https://required.corp/tool', + type: 'http', + }, + }; + + const result = applyRequiredServers(afterAllowlist, required); + expect(Object.keys(result.mcpServers)).toHaveLength(2); + expect(result.mcpServers['allowed-server']).toBeDefined(); + expect(result.mcpServers['required-server']).toBeDefined(); + expect(result.requiredServerNames).toEqual(['required-server']); + }); +}); diff --git a/packages/core/src/code_assist/admin/mcpUtils.ts b/packages/core/src/code_assist/admin/mcpUtils.ts index 12c5845d5b..768a40847e 100644 --- a/packages/core/src/code_assist/admin/mcpUtils.ts +++ b/packages/core/src/code_assist/admin/mcpUtils.ts @@ -4,7 +4,8 @@ * SPDX-License-Identifier: Apache-2.0 */ -import type { MCPServerConfig } from '../../config/config.js'; +import { MCPServerConfig } from '../../config/config.js'; +import type { RequiredMcpServerConfig } from '../types.js'; /** * Applies the admin allowlist to the local MCP servers. @@ -65,3 +66,58 @@ export function applyAdminAllowlist( } return { mcpServers: filteredMcpServers, blockedServerNames }; } + +/** + * Applies admin-required MCP servers by injecting them into the MCP server + * list. Required servers always take precedence over locally configured servers + * with the same name and cannot be disabled by the user. + * + * @param mcpServers The current MCP servers (after allowlist filtering). + * @param requiredServers The admin-required MCP server configurations. + * @returns The MCP servers with required servers injected, and the list of + * required server names for informational purposes. + */ +export function applyRequiredServers( + mcpServers: Record, + requiredServers: Record | undefined, +): { + mcpServers: Record; + requiredServerNames: string[]; +} { + if (!requiredServers || Object.keys(requiredServers).length === 0) { + return { mcpServers, requiredServerNames: [] }; + } + + const result: Record = { ...mcpServers }; + const requiredServerNames: string[] = []; + + for (const [serverId, requiredConfig] of Object.entries(requiredServers)) { + requiredServerNames.push(serverId); + + // Convert RequiredMcpServerConfig to MCPServerConfig. + // Required servers completely override any local config with the same name. + result[serverId] = new MCPServerConfig( + undefined, // command (stdio not supported for required servers) + undefined, // args + undefined, // env + undefined, // cwd + requiredConfig.url, // url + undefined, // httpUrl (use url + type instead) + requiredConfig.headers, // headers + undefined, // tcp + requiredConfig.type, // type + requiredConfig.timeout, // timeout + requiredConfig.trust ?? true, // trust defaults to true for admin-forced + requiredConfig.description, // description + requiredConfig.includeTools, // includeTools + requiredConfig.excludeTools, // excludeTools + undefined, // extension + requiredConfig.oauth, // oauth + requiredConfig.authProviderType, // authProviderType + requiredConfig.targetAudience, // targetAudience + requiredConfig.targetServiceAccount, // targetServiceAccount + ); + } + + return { mcpServers: result, requiredServerNames }; +} diff --git a/packages/core/src/code_assist/types.ts b/packages/core/src/code_assist/types.ts index d238d1a75e..d2aa4c3c1d 100644 --- a/packages/core/src/code_assist/types.ts +++ b/packages/core/src/code_assist/types.ts @@ -5,6 +5,7 @@ */ import { z } from 'zod'; +import { AuthProviderType } from '../config/config.js'; export interface ClientMetadata { ideType?: ClientMetadataIdeType; @@ -359,8 +360,41 @@ const McpServerConfigSchema = z.object({ excludeTools: z.array(z.string()).optional(), }); +const RequiredMcpServerOAuthSchema = z.object({ + scopes: z.array(z.string()).optional(), + clientId: z.string().optional(), + clientSecret: z.string().optional(), +}); + +export const RequiredMcpServerConfigSchema = z.object({ + // Connection (required for forced servers) + url: z.string(), + type: z.enum(['sse', 'http']), + + // Auth + authProviderType: z.nativeEnum(AuthProviderType).optional(), + oauth: RequiredMcpServerOAuthSchema.optional(), + targetAudience: z.string().optional(), + targetServiceAccount: z.string().optional(), + headers: z.record(z.string()).optional(), + + // Common + trust: z.boolean().optional(), + timeout: z.number().optional(), + description: z.string().optional(), + + // Tool filtering + includeTools: z.array(z.string()).optional(), + excludeTools: z.array(z.string()).optional(), +}); + +export type RequiredMcpServerConfig = z.infer< + typeof RequiredMcpServerConfigSchema +>; + export const McpConfigDefinitionSchema = z.object({ mcpServers: z.record(McpServerConfigSchema).optional(), + requiredMcpServers: z.record(RequiredMcpServerConfigSchema).optional(), }); export type McpConfigDefinition = z.infer; @@ -377,6 +411,7 @@ export const AdminControlsSettingsSchema = z.object({ .object({ mcpEnabled: z.boolean().optional(), mcpConfig: McpConfigDefinitionSchema.optional(), + requiredMcpConfig: z.record(RequiredMcpServerConfigSchema).optional(), }) .optional(), cliFeatureSetting: CliFeatureSettingSchema.optional(), diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index d5ff8714b0..378277d934 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -25,6 +25,7 @@ import { GREP_PARAM_AFTER, READ_FILE_PARAM_START_LINE, READ_FILE_PARAM_END_LINE, + READ_FILE_PARAM_FULL, SHELL_PARAM_IS_BACKGROUND, EDIT_PARAM_OLD_STRING, TRACKER_CREATE_TASK_TOOL_NAME, @@ -215,7 +216,7 @@ Use the following guidelines to optimize your search and read patterns. - **Searching:** utilize search tools like ${GREP_TOOL_NAME} and ${GLOB_TOOL_NAME} with a conservative result count (\`${GREP_PARAM_TOTAL_MAX_MATCHES}\`) and a narrow scope (\`${GREP_PARAM_INCLUDE_PATTERN}\` and \`${GREP_PARAM_EXCLUDE_PATTERN}\` parameters). - **Searching and editing:** utilize search tools like ${GREP_TOOL_NAME} with a conservative result count and a narrow scope. Use \`${GREP_PARAM_CONTEXT}\`, \`${GREP_PARAM_BEFORE}\`, and/or \`${GREP_PARAM_AFTER}\` to request enough context to avoid the need to read the file before editing matches. - **Understanding:** minimize turns needed to understand a file. It's most efficient to read small files in their entirety. -- **Large files:** utilize search tools like ${GREP_TOOL_NAME} and/or ${READ_FILE_TOOL_NAME} called in parallel with '${READ_FILE_PARAM_START_LINE}' and '${READ_FILE_PARAM_END_LINE}' to reduce the impact on context. Minimize extra turns, unless unavoidable due to the file being too large. +- **Large files:** utilize ${READ_FILE_TOOL_NAME} to get a summary/outline of the file, then use '${READ_FILE_PARAM_START_LINE}' and '${READ_FILE_PARAM_END_LINE}' for targeted reads of relevant sections, or use '${READ_FILE_PARAM_FULL}: true' for small to medium files when the complete context is required. - **Navigating:** read the minimum required to not require additional turns spent reading the file. diff --git a/packages/core/src/tools/__snapshots__/read-file.test.ts.snap b/packages/core/src/tools/__snapshots__/read-file.test.ts.snap index 36dbcf1572..ad24519dee 100644 --- a/packages/core/src/tools/__snapshots__/read-file.test.ts.snap +++ b/packages/core/src/tools/__snapshots__/read-file.test.ts.snap @@ -1,7 +1,7 @@ // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html -exports[`ReadFileTool > getSchema > should return the Gemini 3 schema when a Gemini 3 modelId is provided 1`] = `"Reads and returns the content of a specified file. To maintain context efficiency, you MUST use 'start_line' and 'end_line' for targeted, surgical reads of specific sections. For your safety, the tool will automatically truncate output exceeding 2000 lines, 2000 characters per line, or 20MB in size; however, triggering these limits is considered token-inefficient. Always retrieve only the minimum content necessary for your next step. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), audio files (MP3, WAV, AIFF, AAC, OGG, FLAC), and PDF files."`; +exports[`ReadFileTool > getSchema > should return the Gemini 3 schema when a Gemini 3 modelId is provided 1`] = `"Reads and returns the content of a specified file. For large files, the tool will return a summary/outline to help you understand the file's structure. You can then use 'start_line' and 'end_line' for targeted reads of specific sections, or 'full: true' to retrieve the complete content. For your safety, the tool will automatically truncate output exceeding 2000 lines, 2000 characters per line, or 20MB in size; however, triggering these limits is considered token-inefficient. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), audio files (MP3, WAV, AIFF, AAC, OGG, FLAC), and PDF files."`; -exports[`ReadFileTool > getSchema > should return the base schema when no modelId is provided 1`] = `"Reads and returns the content of a specified file. If the file is large, the content will be truncated. The tool's response will clearly indicate if truncation has occurred and will provide details on how to read more of the file using the 'start_line' and 'end_line' parameters. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), audio files (MP3, WAV, AIFF, AAC, OGG, FLAC), and PDF files. For text files, it can read specific line ranges."`; +exports[`ReadFileTool > getSchema > should return the base schema when no modelId is provided 1`] = `"Reads and returns the content of a specified file. For large files, the tool will return a summary/outline to help you understand the file's structure. You can then use 'start_line' and 'end_line' for targeted reads of specific sections, or 'full: true' to retrieve the complete content. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), audio files (MP3, WAV, AIFF, AAC, OGG, FLAC), and PDF files."`; -exports[`ReadFileTool > getSchema > should return the schema from the resolver when modelId is provided 1`] = `"Reads and returns the content of a specified file. If the file is large, the content will be truncated. The tool's response will clearly indicate if truncation has occurred and will provide details on how to read more of the file using the 'start_line' and 'end_line' parameters. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), audio files (MP3, WAV, AIFF, AAC, OGG, FLAC), and PDF files. For text files, it can read specific line ranges."`; +exports[`ReadFileTool > getSchema > should return the schema from the resolver when modelId is provided 1`] = `"Reads and returns the content of a specified file. For large files, the tool will return a summary/outline to help you understand the file's structure. You can then use 'start_line' and 'end_line' for targeted reads of specific sections, or 'full: true' to retrieve the complete content. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), audio files (MP3, WAV, AIFF, AAC, OGG, FLAC), and PDF files."`; diff --git a/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap b/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap index e2bab4d050..709c137236 100644 --- a/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap +++ b/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap @@ -410,7 +410,7 @@ exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snaps exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snapshot for tool: read_file 1`] = ` { - "description": "Reads and returns the content of a specified file. If the file is large, the content will be truncated. The tool's response will clearly indicate if truncation has occurred and will provide details on how to read more of the file using the 'start_line' and 'end_line' parameters. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), audio files (MP3, WAV, AIFF, AAC, OGG, FLAC), and PDF files. For text files, it can read specific line ranges.", + "description": "Reads and returns the content of a specified file. For large files, the tool will return a summary/outline to help you understand the file's structure. You can then use 'start_line' and 'end_line' for targeted reads of specific sections, or 'full: true' to retrieve the complete content. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), audio files (MP3, WAV, AIFF, AAC, OGG, FLAC), and PDF files.", "name": "read_file", "parametersJsonSchema": { "properties": { @@ -422,6 +422,10 @@ exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snaps "description": "The path to the file to read.", "type": "string", }, + "full": { + "description": "Optional: If true, returns the full file contents. If false (default), large files may be summarized for efficiency.", + "type": "boolean", + }, "start_line": { "description": "Optional: The 1-based line number to start reading from.", "type": "number", @@ -1199,7 +1203,7 @@ exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > snapshot for tool: read_file 1`] = ` { - "description": "Reads and returns the content of a specified file. To maintain context efficiency, you MUST use 'start_line' and 'end_line' for targeted, surgical reads of specific sections. For your safety, the tool will automatically truncate output exceeding 2000 lines, 2000 characters per line, or 20MB in size; however, triggering these limits is considered token-inefficient. Always retrieve only the minimum content necessary for your next step. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), audio files (MP3, WAV, AIFF, AAC, OGG, FLAC), and PDF files.", + "description": "Reads and returns the content of a specified file. For large files, the tool will return a summary/outline to help you understand the file's structure. You can then use 'start_line' and 'end_line' for targeted reads of specific sections, or 'full: true' to retrieve the complete content. For your safety, the tool will automatically truncate output exceeding 2000 lines, 2000 characters per line, or 20MB in size; however, triggering these limits is considered token-inefficient. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), audio files (MP3, WAV, AIFF, AAC, OGG, FLAC), and PDF files.", "name": "read_file", "parametersJsonSchema": { "properties": { @@ -1211,6 +1215,10 @@ exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > "description": "The path to the file to read.", "type": "string", }, + "full": { + "description": "Optional: If true, returns the full file contents. If false (default), large files may be summarized for efficiency.", + "type": "boolean", + }, "start_line": { "description": "Optional: The 1-based line number to start reading from.", "type": "number", diff --git a/packages/core/src/tools/definitions/base-declarations.ts b/packages/core/src/tools/definitions/base-declarations.ts index b39dc42286..f894fb3003 100644 --- a/packages/core/src/tools/definitions/base-declarations.ts +++ b/packages/core/src/tools/definitions/base-declarations.ts @@ -51,6 +51,7 @@ export const LS_PARAM_IGNORE = 'ignore'; export const READ_FILE_TOOL_NAME = 'read_file'; export const READ_FILE_PARAM_START_LINE = 'start_line'; export const READ_FILE_PARAM_END_LINE = 'end_line'; +export const READ_FILE_PARAM_FULL = 'full'; // -- run_shell_command -- export const SHELL_TOOL_NAME = 'run_shell_command'; diff --git a/packages/core/src/tools/definitions/coreTools.ts b/packages/core/src/tools/definitions/coreTools.ts index b5121ca5d2..da3be480a5 100644 --- a/packages/core/src/tools/definitions/coreTools.ts +++ b/packages/core/src/tools/definitions/coreTools.ts @@ -50,6 +50,7 @@ export { // Tool-specific parameter names READ_FILE_PARAM_START_LINE, READ_FILE_PARAM_END_LINE, + READ_FILE_PARAM_FULL, WRITE_FILE_PARAM_CONTENT, GREP_PARAM_INCLUDE_PATTERN, GREP_PARAM_EXCLUDE_PATTERN, diff --git a/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts b/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts index 5c219f4685..66f5d549be 100644 --- a/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts +++ b/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts @@ -36,6 +36,7 @@ import { // Tool-specific parameter names READ_FILE_PARAM_START_LINE, READ_FILE_PARAM_END_LINE, + READ_FILE_PARAM_FULL, WRITE_FILE_PARAM_CONTENT, GREP_PARAM_INCLUDE_PATTERN, GREP_PARAM_EXCLUDE_PATTERN, @@ -83,7 +84,7 @@ import { export const DEFAULT_LEGACY_SET: CoreToolSet = { read_file: { name: READ_FILE_TOOL_NAME, - description: `Reads and returns the content of a specified file. If the file is large, the content will be truncated. The tool's response will clearly indicate if truncation has occurred and will provide details on how to read more of the file using the 'start_line' and 'end_line' parameters. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), audio files (MP3, WAV, AIFF, AAC, OGG, FLAC), and PDF files. For text files, it can read specific line ranges.`, + description: `Reads and returns the content of a specified file. For large files, the tool will return a summary/outline to help you understand the file's structure. You can then use 'start_line' and 'end_line' for targeted reads of specific sections, or 'full: true' to retrieve the complete content. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), audio files (MP3, WAV, AIFF, AAC, OGG, FLAC), and PDF files.`, parametersJsonSchema: { type: 'object', properties: { @@ -101,6 +102,11 @@ export const DEFAULT_LEGACY_SET: CoreToolSet = { 'Optional: The 1-based line number to end reading at (inclusive).', type: 'number', }, + [READ_FILE_PARAM_FULL]: { + description: + 'Optional: If true, returns the full file contents. If false (default), large files may be summarized for efficiency.', + type: 'boolean', + }, }, required: [PARAM_FILE_PATH], }, diff --git a/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts b/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts index cac98a90b3..eb9fbc63f8 100644 --- a/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts +++ b/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts @@ -36,6 +36,7 @@ import { // Tool-specific parameter names READ_FILE_PARAM_START_LINE, READ_FILE_PARAM_END_LINE, + READ_FILE_PARAM_FULL, WRITE_FILE_PARAM_CONTENT, GREP_PARAM_INCLUDE_PATTERN, GREP_PARAM_EXCLUDE_PATTERN, @@ -91,7 +92,7 @@ import { export const GEMINI_3_SET: CoreToolSet = { read_file: { name: READ_FILE_TOOL_NAME, - description: `Reads and returns the content of a specified file. To maintain context efficiency, you MUST use 'start_line' and 'end_line' for targeted, surgical reads of specific sections. For your safety, the tool will automatically truncate output exceeding ${DEFAULT_MAX_LINES_TEXT_FILE} lines, ${MAX_LINE_LENGTH_TEXT_FILE} characters per line, or ${MAX_FILE_SIZE_MB}MB in size; however, triggering these limits is considered token-inefficient. Always retrieve only the minimum content necessary for your next step. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), audio files (MP3, WAV, AIFF, AAC, OGG, FLAC), and PDF files.`, + description: `Reads and returns the content of a specified file. For large files, the tool will return a summary/outline to help you understand the file's structure. You can then use 'start_line' and 'end_line' for targeted reads of specific sections, or 'full: true' to retrieve the complete content. For your safety, the tool will automatically truncate output exceeding ${DEFAULT_MAX_LINES_TEXT_FILE} lines, ${MAX_LINE_LENGTH_TEXT_FILE} characters per line, or ${MAX_FILE_SIZE_MB}MB in size; however, triggering these limits is considered token-inefficient. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), audio files (MP3, WAV, AIFF, AAC, OGG, FLAC), and PDF files.`, parametersJsonSchema: { type: 'object', properties: { @@ -109,6 +110,11 @@ export const GEMINI_3_SET: CoreToolSet = { 'Optional: The 1-based line number to end reading at (inclusive).', type: 'number', }, + [READ_FILE_PARAM_FULL]: { + description: + 'Optional: If true, returns the full file contents. If false (default), large files may be summarized for efficiency.', + type: 'boolean', + }, }, required: [PARAM_FILE_PATH], }, diff --git a/packages/core/src/tools/read-file.test.ts b/packages/core/src/tools/read-file.test.ts index fa7a0669d6..cd1678450d 100644 --- a/packages/core/src/tools/read-file.test.ts +++ b/packages/core/src/tools/read-file.test.ts @@ -8,6 +8,7 @@ import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; import { ReadFileTool, type ReadFileToolParams } from './read-file.js'; import { ToolErrorType } from './tool-error.js'; import path from 'node:path'; +import { execSync } from 'node:child_process'; import { isSubpath } from '../utils/paths.js'; import os from 'node:os'; import fs from 'node:fs'; @@ -41,6 +42,14 @@ vi.mock('./jit-context.js', () => ({ JIT_CONTEXT_SUFFIX: '\n--- End Project Context ---', })); +vi.mock('node:child_process', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + execSync: vi.fn(), + }; +}); + describe('ReadFileTool', () => { let tempRootDir: string; let tool: ReadFileTool; @@ -610,7 +619,7 @@ describe('ReadFileTool', () => { const schema = tool.getSchema(modelId); expect(schema.name).toBe(ReadFileTool.Name); expect(schema.description).toMatchSnapshot(); - expect(schema.description).toContain('surgical reads'); + expect(schema.description).toContain('targeted reads'); }); }); @@ -685,4 +694,124 @@ describe('ReadFileTool', () => { ); }); }); + + describe('tilth and full parameter', () => { + it('should use tilth for large files when full is false', async () => { + const filePath = path.join(tempRootDir, 'large.txt'); + const content = 'A'.repeat(5000); // Exceeds default 4096 threshold + fs.writeFileSync(filePath, content); + + vi.mocked(execSync).mockReturnValue('tilth summary'); + + const params: ReadFileToolParams = { + file_path: 'large.txt', + full: false, + }; + const invocation = tool.build(params); + const result = await invocation.execute(abortSignal); + + expect(execSync).toHaveBeenCalledWith( + expect.stringContaining(`npx -y tilth --budget 1000 "${filePath}"`), + expect.any(Object), + ); + expect(result.llmContent).toContain('tilth summary'); + expect(result.returnDisplay).toContain('Summarized large file'); + }); + + it('should NOT use tilth when full is true', async () => { + const filePath = path.join(tempRootDir, 'large.txt'); + const content = 'A'.repeat(5000); + fs.writeFileSync(filePath, content); + + vi.mocked(execSync).mockClear(); + + const params: ReadFileToolParams = { + file_path: 'large.txt', + full: true, + }; + const invocation = tool.build(params); + const result = await invocation.execute(abortSignal); + + expect(execSync).not.toHaveBeenCalled(); + // Should return full content (clamped by MAX_LINE_LENGTH_TEXT_FILE per line, but not summarized by tilth) + expect(result.llmContent).not.toContain('--- FILE SUMMARY ---'); + }); + + it('should use tilth when start_line is 1 and no end_line is provided', async () => { + const filePath = path.join(tempRootDir, 'large.txt'); + const content = 'A\n'.repeat(3000); // 6000 bytes + fs.writeFileSync(filePath, content); + + vi.mocked(execSync).mockClear(); + vi.mocked(execSync).mockReturnValue('tilth summary for line 1'); + + const params: ReadFileToolParams = { + file_path: 'large.txt', + start_line: 1, + }; + const invocation = tool.build(params); + const result = await invocation.execute(abortSignal); + + expect(execSync).toHaveBeenCalled(); + expect(result.llmContent).toContain('tilth summary for line 1'); + }); + + it('should NOT use tilth when custom start_line (not 1) is provided', async () => { + const filePath = path.join(tempRootDir, 'large.txt'); + const content = 'A\n'.repeat(3000); + fs.writeFileSync(filePath, content); + + vi.mocked(execSync).mockClear(); + + const params: ReadFileToolParams = { + file_path: 'large.txt', + start_line: 10, + }; + const invocation = tool.build(params); + const result = await invocation.execute(abortSignal); + + expect(execSync).not.toHaveBeenCalled(); + expect(result.llmContent).not.toContain('--- FILE SUMMARY ---'); + }); + + it('should read to end when start_line > 1 is provided without end_line', async () => { + const filePath = path.join(tempRootDir, 'large.txt'); + const lines = Array.from({ length: 3000 }, (_, i) => `Line ${i + 1}`); + fs.writeFileSync(filePath, lines.join('\n')); + + vi.mocked(execSync).mockClear(); + + const params: ReadFileToolParams = { + file_path: 'large.txt', + start_line: 10, + }; + const invocation = tool.build(params); + const result = await invocation.execute(abortSignal); + + expect(execSync).not.toHaveBeenCalled(); + expect(result.llmContent).toContain('Line 10'); + expect(result.llmContent).not.toContain('Line 1\n'); + expect(result.returnDisplay).toContain('Read lines 10-'); // Clamped by default max lines, but direct read + }); + + it('should respect custom GEMINI_CLI_FULL_READ_THRESHOLD', async () => { + const filePath = path.join(tempRootDir, 'medium.txt'); + const content = 'A'.repeat(1000); + fs.writeFileSync(filePath, content); + + vi.stubEnv('GEMINI_CLI_FULL_READ_THRESHOLD', '500'); + vi.mocked(execSync).mockReturnValue('tilth summary'); + + const params: ReadFileToolParams = { + file_path: 'medium.txt', + }; + const invocation = tool.build(params); + const result = await invocation.execute(abortSignal); + + expect(execSync).toHaveBeenCalled(); + expect(result.llmContent).toContain('tilth summary'); + + vi.unstubAllEnvs(); + }); + }); }); diff --git a/packages/core/src/tools/read-file.ts b/packages/core/src/tools/read-file.ts index 69f9e0274b..830027949f 100644 --- a/packages/core/src/tools/read-file.ts +++ b/packages/core/src/tools/read-file.ts @@ -6,6 +6,8 @@ import type { MessageBus } from '../confirmation-bus/message-bus.js'; import path from 'node:path'; +import { execSync } from 'node:child_process'; +import fs from 'node:fs'; import { makeRelative, shortenPath } from '../utils/paths.js'; import { BaseDeclarativeTool, @@ -24,6 +26,8 @@ import type { PartListUnion } from '@google/genai'; import { processSingleFileContent, getSpecificMimeType, + detectFileType, + type ProcessedFileReadResult, } from '../utils/fileUtils.js'; import type { Config } from '../config/config.js'; import { FileOperation } from '../telemetry/metrics.js'; @@ -34,6 +38,7 @@ import { READ_FILE_TOOL_NAME, READ_FILE_DISPLAY_NAME } from './tool-names.js'; import { FileDiscoveryService } from '../services/fileDiscoveryService.js'; import { READ_FILE_DEFINITION } from './definitions/coreTools.js'; import { resolveToolDeclaration } from './definitions/resolver.js'; +import { MAX_FILE_SIZE_MB } from '../utils/constants.js'; import { discoverJitContext, appendJitContext, @@ -58,6 +63,12 @@ export interface ReadFileToolParams { * The line number to end reading at (optional, 1-based, inclusive) */ end_line?: number; + + /** + * If true, returns the full file contents. + * If false (default), large files may be summarized for efficiency. + */ + full?: boolean; } class ReadFileToolInvocation extends BaseToolInvocation< @@ -120,13 +131,81 @@ class ReadFileToolInvocation extends BaseToolInvocation< }; } - const result = await processSingleFileContent( - this.resolvedPath, - this.config.getTargetDir(), - this.config.getFileSystemService(), - this.params.start_line, - this.params.end_line, - ); + let result: ProcessedFileReadResult; + if (!fs.existsSync(this.resolvedPath)) { + result = await processSingleFileContent( + this.resolvedPath, + this.config.getTargetDir(), + this.config.getFileSystemService(), + this.params.start_line, + this.params.end_line, + this.params.full, + ); + } else { + const stats = await fs.promises.stat(this.resolvedPath); + const fileSizeInMB = stats.size / (1024 * 1024); + if (fileSizeInMB > MAX_FILE_SIZE_MB) { + result = await processSingleFileContent( + this.resolvedPath, + this.config.getTargetDir(), + this.config.getFileSystemService(), + this.params.start_line, + this.params.end_line, + this.params.full, + ); + } else { + const fileType = await detectFileType(this.resolvedPath); + const fullReadThreshold = parseInt( + process.env['GEMINI_CLI_FULL_READ_THRESHOLD'] ?? '4096', + 10, + ); + + const isExplicitLineRange = + (this.params.start_line !== undefined && + this.params.start_line !== 1) || + this.params.end_line !== undefined; + + const isDirectReadRequired = + this.params.full === true || + isExplicitLineRange || + stats.size < fullReadThreshold || + fileType !== 'text'; + if (isDirectReadRequired) { + result = await processSingleFileContent( + this.resolvedPath, + this.config.getTargetDir(), + this.config.getFileSystemService(), + this.params.start_line, + this.params.end_line, + this.params.full, + ); + } else { + try { + const summary = execSync( + `npx -y tilth --budget 1000 "${this.resolvedPath}"`, + { + encoding: 'utf-8', + stdio: ['ignore', 'pipe', 'pipe'], + }, + ).toString(); + result = { + llmContent: summary, + returnDisplay: `Summarized large file: ${shortenPath(makeRelative(this.resolvedPath, this.config.getTargetDir()))}`, + }; + } catch (_error) { + // Fallback to normal read if tilth fails + result = await processSingleFileContent( + this.resolvedPath, + this.config.getTargetDir(), + this.config.getFileSystemService(), + this.params.start_line, + this.params.end_line, + this.params.full, + ); + } + } + } + } if (result.error) { return { @@ -256,6 +335,10 @@ export class ReadFileTool extends BaseDeclarativeTool< return 'start_line cannot be greater than end_line'; } + if (params.full !== undefined && typeof params.full !== 'boolean') { + return 'full must be a boolean'; + } + const fileFilteringOptions = this.config.getFileFilteringOptions(); if ( this.fileDiscoveryService.shouldIgnoreFile( diff --git a/packages/core/src/tools/tool-names.ts b/packages/core/src/tools/tool-names.ts index e818881662..daa26c7d71 100644 --- a/packages/core/src/tools/tool-names.ts +++ b/packages/core/src/tools/tool-names.ts @@ -34,6 +34,7 @@ import { // Tool-specific parameter names READ_FILE_PARAM_START_LINE, READ_FILE_PARAM_END_LINE, + READ_FILE_PARAM_FULL, WRITE_FILE_PARAM_CONTENT, GREP_PARAM_INCLUDE_PATTERN, GREP_PARAM_EXCLUDE_PATTERN, @@ -107,6 +108,7 @@ export { // Tool-specific parameter names READ_FILE_PARAM_START_LINE, READ_FILE_PARAM_END_LINE, + READ_FILE_PARAM_FULL, WRITE_FILE_PARAM_CONTENT, GREP_PARAM_INCLUDE_PATTERN, GREP_PARAM_EXCLUDE_PATTERN, diff --git a/packages/core/src/utils/fileUtils.test.ts b/packages/core/src/utils/fileUtils.test.ts index dcbf22c5a7..dee0fa3d5a 100644 --- a/packages/core/src/utils/fileUtils.test.ts +++ b/packages/core/src/utils/fileUtils.test.ts @@ -30,7 +30,6 @@ import { processSingleFileContent, detectBOM, readFileWithEncoding, - fileExists, readWasmBinaryFromDisk, saveTruncatedToolOutput, formatTruncatedToolOutput, @@ -38,6 +37,7 @@ import { isEmpty, } from './fileUtils.js'; import { StandardFileSystemService } from '../services/fileSystemService.js'; +import { ToolErrorType } from '../tools/tool-error.js'; vi.mock('mime/lite', () => ({ default: { getType: vi.fn() }, @@ -98,649 +98,239 @@ describe('fileUtils', () => { ); expect(result).toBeInstanceOf(Uint8Array); - expect(result).toStrictEqual(expectedBytes); + expect(result).toEqual(expectedBytes); + }); + }); + + describe('detectBOM', () => { + it('detects UTF-8 BOM', () => { + const buf = Buffer.from([0xef, 0xbb, 0xbf, 0x61, 0x62, 0x63]); + expect(detectBOM(buf)).toEqual({ encoding: 'utf8', bomLength: 3 }); + }); + + it('detects UTF-16 LE BOM', () => { + const buf = Buffer.from([0xff, 0xfe, 0x61, 0x00]); + expect(detectBOM(buf)).toEqual({ encoding: 'utf16le', bomLength: 2 }); + }); + + it('detects UTF-16 BE BOM', () => { + const buf = Buffer.from([0xfe, 0xff, 0x00, 0x61]); + expect(detectBOM(buf)).toEqual({ encoding: 'utf16be', bomLength: 2 }); + }); + + it('detects UTF-32 LE BOM', () => { + const buf = Buffer.from([0xff, 0xfe, 0x00, 0x00, 0x61, 0x00, 0x00, 0x00]); + expect(detectBOM(buf)).toEqual({ encoding: 'utf32le', bomLength: 4 }); + }); + + it('detects UTF-32 BE BOM', () => { + const buf = Buffer.from([0x00, 0x00, 0xfe, 0xff, 0x00, 0x00, 0x00, 0x61]); + expect(detectBOM(buf)).toEqual({ encoding: 'utf32be', bomLength: 4 }); + }); + + it('returns null for no BOM', () => { + const buf = Buffer.from([0x61, 0x62, 0x63]); + expect(detectBOM(buf)).toBeNull(); + }); + }); + + describe('readFileWithEncoding', () => { + it('reads UTF-8 file without BOM', async () => { + actualNodeFs.writeFileSync(testTextFilePath, 'hello'); + expect(await readFileWithEncoding(testTextFilePath)).toBe('hello'); + }); + + it('reads UTF-8 file with BOM', async () => { + const buf = Buffer.from([0xef, 0xbb, 0xbf, 0x68, 0x69]); + actualNodeFs.writeFileSync(testTextFilePath, buf); + expect(await readFileWithEncoding(testTextFilePath)).toBe('hi'); + }); + + it('reads UTF-16 LE file with BOM', async () => { + const buf = Buffer.from([0xff, 0xfe, 0x68, 0x00, 0x69, 0x00]); + actualNodeFs.writeFileSync(testTextFilePath, buf); + expect(await readFileWithEncoding(testTextFilePath)).toBe('hi'); + }); + + it('reads UTF-16 BE file with BOM', async () => { + const buf = Buffer.from([0xfe, 0xff, 0x00, 0x68, 0x00, 0x69]); + actualNodeFs.writeFileSync(testTextFilePath, buf); + expect(await readFileWithEncoding(testTextFilePath)).toBe('hi'); + }); + + it('reads UTF-32 LE file with BOM', async () => { + const buf = Buffer.from([ + 0xff, 0xfe, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x69, 0x00, 0x00, 0x00, + ]); + actualNodeFs.writeFileSync(testTextFilePath, buf); + expect(await readFileWithEncoding(testTextFilePath)).toBe('hi'); + }); + + it('reads UTF-32 BE file with BOM', async () => { + const buf = Buffer.from([ + 0x00, 0x00, 0xfe, 0xff, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x69, + ]); + actualNodeFs.writeFileSync(testTextFilePath, buf); + expect(await readFileWithEncoding(testTextFilePath)).toBe('hi'); }); }); describe('isWithinRoot', () => { - const defaultRoot = path.resolve('/project/root'); - - it.each([ - { - name: 'a path directly within the root', - path: path.join(defaultRoot, 'file.txt'), - expected: true, - }, - { - name: 'a path in a subdirectory within the root', - path: path.join(defaultRoot, 'subdir', 'file.txt'), - expected: true, - }, - { name: 'the root path itself', path: defaultRoot, expected: true }, - { - name: 'a path with a trailing slash', - path: path.join(defaultRoot, 'file.txt') + path.sep, - expected: true, - }, - { - name: 'the root path with a trailing slash', - path: defaultRoot + path.sep, - expected: true, - }, - { - name: 'a sub-path of the path to check', - path: path.resolve('/project/root/sub'), - root: path.resolve('/project/root'), - expected: true, - }, - { - name: 'a path outside the root', - path: path.resolve('/project/other', 'file.txt'), - expected: false, - }, - { - name: 'an unrelated path', - path: path.resolve('/unrelated', 'file.txt'), - expected: false, - }, - { - name: 'a path that only partially matches the root prefix', - path: path.resolve('/project/root-but-actually-different'), - expected: false, - }, - { - name: 'a root path that is a sub-path of the path to check', - path: path.resolve('/project/root'), - root: path.resolve('/project/root/sub'), - expected: false, - }, - { - name: 'a POSIX path inside', - path: '/project/root/file.txt', - root: '/project/root', - expected: true, - }, - { - name: 'a POSIX path outside', - path: '/project/other/file.txt', - root: '/project/root', - expected: false, - }, - ])( - 'should return $expected for $name', - ({ path: testPath, root, expected }) => { - expect(isWithinRoot(testPath, root || defaultRoot)).toBe(expected); - }, - ); - }); - - describe('getRealPath', () => { - it('should resolve a real path for an existing file', () => { - const testFile = path.join(tempRootDir, 'real.txt'); - actualNodeFs.writeFileSync(testFile, 'content'); - expect(getRealPath(testFile)).toBe(actualNodeFs.realpathSync(testFile)); + it('should return true if path is within root', () => { + const root = '/path/to/project'; + const file = '/path/to/project/src/index.js'; + expect(isWithinRoot(file, root)).toBe(true); }); - it('should return absolute resolved path for a non-existent file', () => { - const ghostFile = path.join(tempRootDir, 'ghost.txt'); - expect(getRealPath(ghostFile)).toBe(path.resolve(ghostFile)); + it('should return true if path is root itself', () => { + const root = '/path/to/project'; + expect(isWithinRoot(root, root)).toBe(true); }); - it('should resolve symbolic links', () => { - const targetFile = path.join(tempRootDir, 'target.txt'); - const linkFile = path.join(tempRootDir, 'link.txt'); - actualNodeFs.writeFileSync(targetFile, 'content'); - actualNodeFs.symlinkSync(targetFile, linkFile); + it('should return false if path is outside root', () => { + const root = '/path/to/project'; + const file = '/path/to/other/project/src/index.js'; + expect(isWithinRoot(file, root)).toBe(false); + }); - expect(getRealPath(linkFile)).toBe(actualNodeFs.realpathSync(targetFile)); + it('should handle trailing separators in root', () => { + const root = '/path/to/project/'; + const file = '/path/to/project/src/index.js'; + expect(isWithinRoot(file, root)).toBe(true); + }); + + it('should handle relative paths and resolve them', () => { + const root = './project'; + const file = './project/src/index.js'; + // Resolving against process.cwd which is mocked in beforeEach + const resolvedRoot = path.resolve(tempRootDir, root); + const resolvedFile = path.resolve(tempRootDir, file); + expect(isWithinRoot(resolvedFile, resolvedRoot)).toBe(true); }); }); describe('isEmpty', () => { - it('should return false for a non-empty file', async () => { - const testFile = path.join(tempRootDir, 'full.txt'); - actualNodeFs.writeFileSync(testFile, 'some content'); - expect(await isEmpty(testFile)).toBe(false); + it('returns true for 0-byte file', async () => { + actualNodeFs.writeFileSync(testTextFilePath, ''); + expect(await isEmpty(testTextFilePath)).toBe(true); }); - it('should return true for an empty file', async () => { - const testFile = path.join(tempRootDir, 'empty.txt'); - actualNodeFs.writeFileSync(testFile, ' '); - expect(await isEmpty(testFile)).toBe(true); + it('returns true for whitespace-only file', async () => { + actualNodeFs.writeFileSync(testTextFilePath, ' \n \r\t '); + expect(await isEmpty(testTextFilePath)).toBe(true); }); - it('should return true for a non-existent file (defensive)', async () => { - const testFile = path.join(tempRootDir, 'ghost.txt'); - expect(await isEmpty(testFile)).toBe(true); - }); - }); - - describe('fileExists', () => { - it('should return true if the file exists', async () => { - const testFile = path.join(tempRootDir, 'exists.txt'); - actualNodeFs.writeFileSync(testFile, 'content'); - await expect(fileExists(testFile)).resolves.toBe(true); + it('returns true for UTF-8 BOM + whitespace', async () => { + const buf = Buffer.from([0xef, 0xbb, 0xbf, 0x20, 0x0a]); + actualNodeFs.writeFileSync(testTextFilePath, buf); + expect(await isEmpty(testTextFilePath)).toBe(true); }); - it('should return false if the file does not exist', async () => { - const testFile = path.join(tempRootDir, 'does-not-exist.txt'); - await expect(fileExists(testFile)).resolves.toBe(false); + it('returns false for file with content', async () => { + actualNodeFs.writeFileSync(testTextFilePath, 'hello'); + expect(await isEmpty(testTextFilePath)).toBe(false); }); - it('should return true for a directory that exists', async () => { - const testDir = path.join(tempRootDir, 'exists-dir'); - actualNodeFs.mkdirSync(testDir); - await expect(fileExists(testDir)).resolves.toBe(true); + it('returns true if file is missing', async () => { + expect(await isEmpty(nonexistentFilePath)).toBe(true); }); }); describe('isBinaryFile', () => { - let filePathForBinaryTest: string; - - beforeEach(() => { - filePathForBinaryTest = path.join(tempRootDir, 'binaryCheck.tmp'); - }); - - afterEach(() => { - if (actualNodeFs.existsSync(filePathForBinaryTest)) { - actualNodeFs.unlinkSync(filePathForBinaryTest); - } - }); - it('should return false for an empty file', async () => { - actualNodeFs.writeFileSync(filePathForBinaryTest, ''); - expect(await isBinaryFile(filePathForBinaryTest)).toBe(false); + actualNodeFs.writeFileSync(testTextFilePath, ''); + expect(await isBinaryFile(testTextFilePath)).toBe(false); }); - it('should return false for a typical text file', async () => { - actualNodeFs.writeFileSync( - filePathForBinaryTest, - 'Hello, world!\nThis is a test file with normal text content.', - ); - expect(await isBinaryFile(filePathForBinaryTest)).toBe(false); + it('should return false for a plain text file', async () => { + actualNodeFs.writeFileSync(testTextFilePath, 'This is some plain text.'); + expect(await isBinaryFile(testTextFilePath)).toBe(false); }); - it('should return true for a file with many null bytes', async () => { - const binaryContent = Buffer.from([ - 0x48, 0x65, 0x00, 0x6c, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, - ]); // "He\0llo\0\0\0\0\0" - actualNodeFs.writeFileSync(filePathForBinaryTest, binaryContent); - expect(await isBinaryFile(filePathForBinaryTest)).toBe(true); + it('should return true for a file with null bytes', async () => { + const content = Buffer.from([0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x00, 0x21]); + actualNodeFs.writeFileSync(testBinaryFilePath, content); + expect(await isBinaryFile(testBinaryFilePath)).toBe(true); }); - it('should return true for a file with high percentage of non-printable ASCII', async () => { - const binaryContent = Buffer.from([ - 0x41, 0x42, 0x01, 0x02, 0x03, 0x04, 0x05, 0x43, 0x44, 0x06, - ]); // AB\x01\x02\x03\x04\x05CD\x06 - actualNodeFs.writeFileSync(filePathForBinaryTest, binaryContent); - expect(await isBinaryFile(filePathForBinaryTest)).toBe(true); - }); - - it('should return false if file access fails (e.g., ENOENT)', async () => { - // Ensure the file does not exist - if (actualNodeFs.existsSync(filePathForBinaryTest)) { - actualNodeFs.unlinkSync(filePathForBinaryTest); + it('should return true for a file with high non-printable character ratio', async () => { + const content = Buffer.alloc(100); + for (let i = 0; i < 40; i++) { + content[i] = 1; // Non-printable SOH } - expect(await isBinaryFile(filePathForBinaryTest)).toBe(false); - }); - }); - - describe('BOM detection and encoding', () => { - let testDir: string; - - beforeEach(async () => { - testDir = await fsPromises.mkdtemp( - path.join( - await fsPromises.realpath(os.tmpdir()), - 'fileUtils-bom-test-', - ), - ); + actualNodeFs.writeFileSync(testBinaryFilePath, content); + expect(await isBinaryFile(testBinaryFilePath)).toBe(true); }); - afterEach(async () => { - if (testDir) { - await fsPromises.rm(testDir, { recursive: true, force: true }); - } - }); - - describe('detectBOM', () => { - it('should detect UTF-8 BOM', () => { - const buf = Buffer.from([ - 0xef, 0xbb, 0xbf, 0x48, 0x65, 0x6c, 0x6c, 0x6f, - ]); - const result = detectBOM(buf); - expect(result).toEqual({ encoding: 'utf8', bomLength: 3 }); - }); - - it('should detect UTF-16 LE BOM', () => { - const buf = Buffer.from([0xff, 0xfe, 0x48, 0x00, 0x65, 0x00]); - const result = detectBOM(buf); - expect(result).toEqual({ encoding: 'utf16le', bomLength: 2 }); - }); - - it('should detect UTF-16 BE BOM', () => { - const buf = Buffer.from([0xfe, 0xff, 0x00, 0x48, 0x00, 0x65]); - const result = detectBOM(buf); - expect(result).toEqual({ encoding: 'utf16be', bomLength: 2 }); - }); - - it('should detect UTF-32 LE BOM', () => { - const buf = Buffer.from([ - 0xff, 0xfe, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, - ]); - const result = detectBOM(buf); - expect(result).toEqual({ encoding: 'utf32le', bomLength: 4 }); - }); - - it('should detect UTF-32 BE BOM', () => { - const buf = Buffer.from([ - 0x00, 0x00, 0xfe, 0xff, 0x00, 0x00, 0x00, 0x48, - ]); - const result = detectBOM(buf); - expect(result).toEqual({ encoding: 'utf32be', bomLength: 4 }); - }); - - it('should return null for no BOM', () => { - const buf = Buffer.from([0x48, 0x65, 0x6c, 0x6c, 0x6f]); - const result = detectBOM(buf); - expect(result).toBeNull(); - }); - - it('should return null for empty buffer', () => { - const buf = Buffer.alloc(0); - const result = detectBOM(buf); - expect(result).toBeNull(); - }); - - it('should return null for partial BOM', () => { - const buf = Buffer.from([0xef, 0xbb]); // Incomplete UTF-8 BOM - const result = detectBOM(buf); - expect(result).toBeNull(); - }); - }); - - describe('readFileWithEncoding', () => { - it('should read UTF-8 BOM file correctly', async () => { - const content = 'Hello, δΈ–η•Œ! 🌍'; - const utf8Bom = Buffer.from([0xef, 0xbb, 0xbf]); - const utf8Content = Buffer.from(content, 'utf8'); - const fullBuffer = Buffer.concat([utf8Bom, utf8Content]); - - const filePath = path.join(testDir, 'utf8-bom.txt'); - await fsPromises.writeFile(filePath, fullBuffer); - - const result = await readFileWithEncoding(filePath); - expect(result).toBe(content); - }); - - it('should read UTF-16 LE BOM file correctly', async () => { - const content = 'Hello, δΈ–η•Œ! 🌍'; - const utf16leBom = Buffer.from([0xff, 0xfe]); - const utf16leContent = Buffer.from(content, 'utf16le'); - const fullBuffer = Buffer.concat([utf16leBom, utf16leContent]); - - const filePath = path.join(testDir, 'utf16le-bom.txt'); - await fsPromises.writeFile(filePath, fullBuffer); - - const result = await readFileWithEncoding(filePath); - expect(result).toBe(content); - }); - - it('should read UTF-16 BE BOM file correctly', async () => { - const content = 'Hello, δΈ–η•Œ! 🌍'; - // Manually encode UTF-16 BE: each char as big-endian 16-bit - const utf16beBom = Buffer.from([0xfe, 0xff]); - const chars = Array.from(content); - const utf16beBytes: number[] = []; - - for (const char of chars) { - const code = char.codePointAt(0)!; - if (code > 0xffff) { - // Surrogate pair for emoji - const surrogate1 = 0xd800 + ((code - 0x10000) >> 10); - const surrogate2 = 0xdc00 + ((code - 0x10000) & 0x3ff); - utf16beBytes.push((surrogate1 >> 8) & 0xff, surrogate1 & 0xff); - utf16beBytes.push((surrogate2 >> 8) & 0xff, surrogate2 & 0xff); - } else { - utf16beBytes.push((code >> 8) & 0xff, code & 0xff); - } - } - - const utf16beContent = Buffer.from(utf16beBytes); - const fullBuffer = Buffer.concat([utf16beBom, utf16beContent]); - - const filePath = path.join(testDir, 'utf16be-bom.txt'); - await fsPromises.writeFile(filePath, fullBuffer); - - const result = await readFileWithEncoding(filePath); - expect(result).toBe(content); - }); - - it('should read UTF-32 LE BOM file correctly', async () => { - const content = 'Hello, δΈ–η•Œ! 🌍'; - const utf32leBom = Buffer.from([0xff, 0xfe, 0x00, 0x00]); - - const utf32leBytes: number[] = []; - for (const char of Array.from(content)) { - const code = char.codePointAt(0)!; - utf32leBytes.push( - code & 0xff, - (code >> 8) & 0xff, - (code >> 16) & 0xff, - (code >> 24) & 0xff, - ); - } - - const utf32leContent = Buffer.from(utf32leBytes); - const fullBuffer = Buffer.concat([utf32leBom, utf32leContent]); - - const filePath = path.join(testDir, 'utf32le-bom.txt'); - await fsPromises.writeFile(filePath, fullBuffer); - - const result = await readFileWithEncoding(filePath); - expect(result).toBe(content); - }); - - it('should read UTF-32 BE BOM file correctly', async () => { - const content = 'Hello, δΈ–η•Œ! 🌍'; - const utf32beBom = Buffer.from([0x00, 0x00, 0xfe, 0xff]); - - const utf32beBytes: number[] = []; - for (const char of Array.from(content)) { - const code = char.codePointAt(0)!; - utf32beBytes.push( - (code >> 24) & 0xff, - (code >> 16) & 0xff, - (code >> 8) & 0xff, - code & 0xff, - ); - } - - const utf32beContent = Buffer.from(utf32beBytes); - const fullBuffer = Buffer.concat([utf32beBom, utf32beContent]); - - const filePath = path.join(testDir, 'utf32be-bom.txt'); - await fsPromises.writeFile(filePath, fullBuffer); - - const result = await readFileWithEncoding(filePath); - expect(result).toBe(content); - }); - - it('should read file without BOM as UTF-8', async () => { - const content = 'Hello, δΈ–η•Œ!'; - const filePath = path.join(testDir, 'no-bom.txt'); - await fsPromises.writeFile(filePath, content, 'utf8'); - - const result = await readFileWithEncoding(filePath); - expect(result).toBe(content); - }); - - it('should handle empty file', async () => { - const filePath = path.join(testDir, 'empty.txt'); - await fsPromises.writeFile(filePath, ''); - - const result = await readFileWithEncoding(filePath); - expect(result).toBe(''); - }); - }); - - describe('isBinaryFile with BOM awareness', () => { - it('should not treat UTF-8 BOM file as binary', async () => { - const content = 'Hello, world!'; - const utf8Bom = Buffer.from([0xef, 0xbb, 0xbf]); - const utf8Content = Buffer.from(content, 'utf8'); - const fullBuffer = Buffer.concat([utf8Bom, utf8Content]); - - const filePath = path.join(testDir, 'utf8-bom-test.txt'); - await fsPromises.writeFile(filePath, fullBuffer); - - const result = await isBinaryFile(filePath); - expect(result).toBe(false); - }); - - it('should not treat UTF-16 LE BOM file as binary', async () => { - const content = 'Hello, world!'; - const utf16leBom = Buffer.from([0xff, 0xfe]); - const utf16leContent = Buffer.from(content, 'utf16le'); - const fullBuffer = Buffer.concat([utf16leBom, utf16leContent]); - - const filePath = path.join(testDir, 'utf16le-bom-test.txt'); - await fsPromises.writeFile(filePath, fullBuffer); - - const result = await isBinaryFile(filePath); - expect(result).toBe(false); - }); - - it('should not treat UTF-16 BE BOM file as binary', async () => { - const utf16beBom = Buffer.from([0xfe, 0xff]); - // Simple ASCII in UTF-16 BE - const utf16beContent = Buffer.from([ - 0x00, - 0x48, // H - 0x00, - 0x65, // e - 0x00, - 0x6c, // l - 0x00, - 0x6c, // l - 0x00, - 0x6f, // o - 0x00, - 0x2c, // , - 0x00, - 0x20, // space - 0x00, - 0x77, // w - 0x00, - 0x6f, // o - 0x00, - 0x72, // r - 0x00, - 0x6c, // l - 0x00, - 0x64, // d - 0x00, - 0x21, // ! - ]); - const fullBuffer = Buffer.concat([utf16beBom, utf16beContent]); - - const filePath = path.join(testDir, 'utf16be-bom-test.txt'); - await fsPromises.writeFile(filePath, fullBuffer); - - const result = await isBinaryFile(filePath); - expect(result).toBe(false); - }); - - it('should not treat UTF-32 LE BOM file as binary', async () => { - const utf32leBom = Buffer.from([0xff, 0xfe, 0x00, 0x00]); - const utf32leContent = Buffer.from([ - 0x48, - 0x00, - 0x00, - 0x00, // H - 0x65, - 0x00, - 0x00, - 0x00, // e - 0x6c, - 0x00, - 0x00, - 0x00, // l - 0x6c, - 0x00, - 0x00, - 0x00, // l - 0x6f, - 0x00, - 0x00, - 0x00, // o - ]); - const fullBuffer = Buffer.concat([utf32leBom, utf32leContent]); - - const filePath = path.join(testDir, 'utf32le-bom-test.txt'); - await fsPromises.writeFile(filePath, fullBuffer); - - const result = await isBinaryFile(filePath); - expect(result).toBe(false); - }); - - it('should not treat UTF-32 BE BOM file as binary', async () => { - const utf32beBom = Buffer.from([0x00, 0x00, 0xfe, 0xff]); - const utf32beContent = Buffer.from([ - 0x00, - 0x00, - 0x00, - 0x48, // H - 0x00, - 0x00, - 0x00, - 0x65, // e - 0x00, - 0x00, - 0x00, - 0x6c, // l - 0x00, - 0x00, - 0x00, - 0x6c, // l - 0x00, - 0x00, - 0x00, - 0x6f, // o - ]); - const fullBuffer = Buffer.concat([utf32beBom, utf32beContent]); - - const filePath = path.join(testDir, 'utf32be-bom-test.txt'); - await fsPromises.writeFile(filePath, fullBuffer); - - const result = await isBinaryFile(filePath); - expect(result).toBe(false); - }); - - it('should still treat actual binary file as binary', async () => { - // PNG header + some binary data with null bytes - const pngHeader = Buffer.from([ - 0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, - ]); - const binaryData = Buffer.from([ - 0x00, 0x00, 0x00, 0x0d, 0x49, 0x48, 0x44, 0x52, - ]); // IHDR chunk with nulls - const fullContent = Buffer.concat([pngHeader, binaryData]); - const filePath = path.join(testDir, 'test.png'); - await fsPromises.writeFile(filePath, fullContent); - - const result = await isBinaryFile(filePath); - expect(result).toBe(true); - }); - - it('should treat file with null bytes (no BOM) as binary', async () => { - const content = Buffer.from([ - 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x00, 0x77, 0x6f, 0x72, 0x6c, 0x64, - ]); - const filePath = path.join(testDir, 'null-bytes.bin'); - await fsPromises.writeFile(filePath, content); - - const result = await isBinaryFile(filePath); - expect(result).toBe(true); - }); + it('should return false for a UTF-16 LE file with BOM', async () => { + const buf = Buffer.from([0xff, 0xfe, 0x61, 0x00, 0x62, 0x00]); + actualNodeFs.writeFileSync(testTextFilePath, buf); + expect(await isBinaryFile(testTextFilePath)).toBe(false); }); }); describe('detectFileType', () => { - let filePathForDetectTest: string; - - beforeEach(() => { - filePathForDetectTest = path.join(tempRootDir, 'detectType.tmp'); - // Default: create as a text file for isBinaryFile fallback - actualNodeFs.writeFileSync(filePathForDetectTest, 'Plain text content'); + it("should return 'text' for .ts file", async () => { + const tsFilePath = path.join(tempRootDir, 'script.ts'); + actualNodeFs.writeFileSync(tsFilePath, 'console.log("hello");'); + expect(await detectFileType(tsFilePath)).toBe('text'); }); - afterEach(() => { - if (actualNodeFs.existsSync(filePathForDetectTest)) { - actualNodeFs.unlinkSync(filePathForDetectTest); - } - vi.restoreAllMocks(); // Restore spies on actualNodeFs + it("should return 'image' for .png file", async () => { + mockMimeGetType.mockReturnValue('image/png'); + actualNodeFs.writeFileSync(testImageFilePath, 'fake-image-data'); + expect(await detectFileType(testImageFilePath)).toBe('image'); }); - it('should detect typescript type by extension (ts, mts, cts, tsx)', async () => { - expect(await detectFileType('file.ts')).toBe('text'); - expect(await detectFileType('file.test.ts')).toBe('text'); - expect(await detectFileType('file.mts')).toBe('text'); - expect(await detectFileType('vite.config.mts')).toBe('text'); - expect(await detectFileType('file.cts')).toBe('text'); - expect(await detectFileType('component.tsx')).toBe('text'); + it("should return 'pdf' for .pdf file", async () => { + mockMimeGetType.mockReturnValue('application/pdf'); + actualNodeFs.writeFileSync(testPdfFilePath, 'fake-pdf-data'); + expect(await detectFileType(testPdfFilePath)).toBe('pdf'); }); - it.each([ - { type: 'image', file: 'file.png', mime: 'image/png' }, - { type: 'image', file: 'file.jpg', mime: 'image/jpeg' }, - { type: 'pdf', file: 'file.pdf', mime: 'application/pdf' }, - { type: 'binary', file: 'archive.zip', mime: 'application/zip' }, - { type: 'binary', file: 'app.exe', mime: 'application/octet-stream' }, - ])( - 'should detect $type type for $file by extension', - async ({ file, mime, type }) => { - mockMimeGetType.mockReturnValueOnce(mime); - expect(await detectFileType(file)).toBe(type); - }, - ); + it("should return 'audio' for .mp3 file", async () => { + mockMimeGetType.mockReturnValue('audio/mpeg'); + // Mock isBinaryFile to return true to confirm it's recognized as audio + vi.spyOn(actualNodeFs.promises, 'open').mockImplementation( + async () => + ({ + stat: async () => ({ size: 100 }), + read: async (buf: Buffer) => { + buf[0] = 0x00; // null byte to make it look binary + return { bytesRead: 1 }; + }, + close: async () => {}, + }) as unknown as fs.promises.FileHandle, + ); - it.each([ - { type: 'audio', ext: '.mp3', mime: 'audio/mpeg' }, - { type: 'video', ext: '.mp4', mime: 'video/mp4' }, - ])( - 'should detect $type type for binary files with $ext extension', - async ({ type, ext, mime }) => { - const filePath = path.join(tempRootDir, `test${ext}`); - const binaryContent = Buffer.from([ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - ]); - actualNodeFs.writeFileSync(filePath, binaryContent); - - mockMimeGetType.mockReturnValueOnce(mime); - expect(await detectFileType(filePath)).toBe(type); - - actualNodeFs.unlinkSync(filePath); - }, - ); - - it('should detect svg type by extension', async () => { - expect(await detectFileType('image.svg')).toBe('svg'); - expect(await detectFileType('image.icon.svg')).toBe('svg'); + actualNodeFs.writeFileSync(testAudioFilePath, 'fake-audio-data'); + expect(await detectFileType(testAudioFilePath)).toBe('audio'); }); - it('should use isBinaryFile for unknown extensions and detect as binary', async () => { - mockMimeGetType.mockReturnValueOnce(false); // Unknown mime type - // Create a file that isBinaryFile will identify as binary - const binaryContent = Buffer.from([ - 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, - ]); - actualNodeFs.writeFileSync(filePathForDetectTest, binaryContent); - expect(await detectFileType(filePathForDetectTest)).toBe('binary'); + it("should return 'text' for audio extension with text content", async () => { + mockMimeGetType.mockReturnValue('audio/mpeg'); + // No null bytes, should be text + actualNodeFs.writeFileSync(testAudioFilePath, 'Not really audio'); + expect(await detectFileType(testAudioFilePath)).toBe('text'); }); - it('should default to text if mime type is unknown and content is not binary', async () => { - mockMimeGetType.mockReturnValueOnce(false); // Unknown mime type - // filePathForDetectTest is already a text file by default from beforeEach - expect(await detectFileType(filePathForDetectTest)).toBe('text'); + it("should return 'binary' for known binary extension", async () => { + const exeFilePath = path.join(tempRootDir, 'app.exe'); + actualNodeFs.writeFileSync(exeFilePath, 'fake-exe-data'); + expect(await detectFileType(exeFilePath)).toBe('binary'); }); - it('should detect .adp files with XML content as text, not audio (#16888)', async () => { - const adpFilePath = path.join(tempRootDir, 'test.adp'); - const xmlContent = ` - - - - - - -`; - actualNodeFs.writeFileSync(adpFilePath, xmlContent); - mockMimeGetType.mockReturnValueOnce('audio/adpcm'); + it("should return 'binary' for unknown extension with binary content", async () => { + const unknownFilePath = path.join(tempRootDir, 'file.dat'); + const content = Buffer.from([0x00, 0x01, 0x02, 0x03]); + actualNodeFs.writeFileSync(unknownFilePath, content); + expect(await detectFileType(unknownFilePath)).toBe('binary'); + }); - expect(await detectFileType(adpFilePath)).toBe('text'); - - actualNodeFs.unlinkSync(adpFilePath); + it("should return 'svg' for .svg extension", async () => { + const svgPath = path.join(tempRootDir, 'icon.svg'); + actualNodeFs.writeFileSync(svgPath, ''); + expect(await detectFileType(svgPath)).toBe('svg'); }); }); @@ -760,7 +350,7 @@ describe('fileUtils', () => { }); it('should read a text file successfully', async () => { - const content = 'Line 1\\nLine 2\\nLine 3'; + const content = 'Line 1\nLine 2\nLine 3'; actualNodeFs.writeFileSync(testTextFilePath, content); const result = await processSingleFileContent( testTextFilePath, @@ -772,480 +362,223 @@ describe('fileUtils', () => { expect(result.error).toBeUndefined(); }); + it('should read all lines when full flag is true', async () => { + // Create a file with more than DEFAULT_MAX_LINES_TEXT_FILE lines (2000 lines) + const lines = Array.from({ length: 2500 }, (_, i) => `Line ${i + 1}`); + const content = lines.join('\n'); + actualNodeFs.writeFileSync(testTextFilePath, content); + + // Without full flag, it should be truncated + const truncatedResult = await processSingleFileContent( + testTextFilePath, + tempRootDir, + new StandardFileSystemService(), + ); + expect(truncatedResult.originalLineCount).toBe(2500); + expect(truncatedResult.isTruncated).toBe(true); + expect( + (truncatedResult.llmContent as string).split('\n').length, + ).toBeLessThan(2500); + + // With full flag, it should return all lines + const fullResult = await processSingleFileContent( + testTextFilePath, + tempRootDir, + new StandardFileSystemService(), + undefined, + undefined, + true, + ); + expect(fullResult.originalLineCount).toBe(2500); + expect(fullResult.isTruncated).toBe(false); + expect((fullResult.llmContent as string).split('\n').length).toBe(2500); + }); + it('should handle file not found', async () => { const result = await processSingleFileContent( nonexistentFilePath, tempRootDir, new StandardFileSystemService(), ); - expect(result.error).toContain('File not found'); - expect(result.returnDisplay).toContain('File not found'); + expect(result.returnDisplay).toBe('File not found.'); + expect(result.errorType).toBe(ToolErrorType.FILE_NOT_FOUND); }); - it('should handle read errors for text files', async () => { - actualNodeFs.writeFileSync(testTextFilePath, 'content'); // File must exist for initial statSync - const readError = new Error('Simulated read error'); - vi.spyOn(fsPromises, 'readFile').mockRejectedValueOnce(readError); - - const result = await processSingleFileContent( - testTextFilePath, - tempRootDir, - new StandardFileSystemService(), - ); - expect(result.error).toContain('Simulated read error'); - expect(result.returnDisplay).toContain('Simulated read error'); - }); - - it('should handle read errors for image/pdf files', async () => { - actualNodeFs.writeFileSync(testImageFilePath, 'content'); // File must exist - mockMimeGetType.mockReturnValue('image/png'); - const readError = new Error('Simulated image read error'); - vi.spyOn(fsPromises, 'readFile').mockRejectedValueOnce(readError); - - const result = await processSingleFileContent( - testImageFilePath, - tempRootDir, - new StandardFileSystemService(), - ); - expect(result.error).toContain('Simulated image read error'); - expect(result.returnDisplay).toContain('Simulated image read error'); - }); - - it('should process an image file', async () => { - const fakePngData = Buffer.from('fake png data'); - actualNodeFs.writeFileSync(testImageFilePath, fakePngData); - mockMimeGetType.mockReturnValue('image/png'); - const result = await processSingleFileContent( - testImageFilePath, - tempRootDir, - new StandardFileSystemService(), - ); - expect( - (result.llmContent as { inlineData: unknown }).inlineData, - ).toBeDefined(); - expect( - (result.llmContent as { inlineData: { mimeType: string } }).inlineData - .mimeType, - ).toBe('image/png'); - expect( - (result.llmContent as { inlineData: { data: string } }).inlineData.data, - ).toBe(fakePngData.toString('base64')); - expect(result.returnDisplay).toContain('Read image file: image.png'); - }); - - it('should process a PDF file', async () => { - const fakePdfData = Buffer.from('fake pdf data'); - actualNodeFs.writeFileSync(testPdfFilePath, fakePdfData); - mockMimeGetType.mockReturnValue('application/pdf'); - const result = await processSingleFileContent( - testPdfFilePath, - tempRootDir, - new StandardFileSystemService(), - ); - expect( - (result.llmContent as { inlineData: unknown }).inlineData, - ).toBeDefined(); - expect( - (result.llmContent as { inlineData: { mimeType: string } }).inlineData - .mimeType, - ).toBe('application/pdf'); - expect( - (result.llmContent as { inlineData: { data: string } }).inlineData.data, - ).toBe(fakePdfData.toString('base64')); - expect(result.returnDisplay).toContain('Read pdf file: document.pdf'); - }); - - it('should process an audio file', async () => { - const fakeMp3Data = Buffer.from([ - 0x49, 0x44, 0x33, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - ]); - actualNodeFs.writeFileSync(testAudioFilePath, fakeMp3Data); - mockMimeGetType.mockReturnValue('audio/mpeg'); - const result = await processSingleFileContent( - testAudioFilePath, - tempRootDir, - new StandardFileSystemService(), - ); - expect( - (result.llmContent as { inlineData: unknown }).inlineData, - ).toBeDefined(); - expect( - (result.llmContent as { inlineData: { mimeType: string } }).inlineData - .mimeType, - ).toBe('audio/mpeg'); - expect( - (result.llmContent as { inlineData: { data: string } }).inlineData.data, - ).toBe(fakeMp3Data.toString('base64')); - expect(result.returnDisplay).toContain('Read audio file: audio.mp3'); - }); - - it('should read an SVG file as text when under 1MB', async () => { - const svgContent = ` - - - - `; - const testSvgFilePath = path.join(tempRootDir, 'test.svg'); - actualNodeFs.writeFileSync(testSvgFilePath, svgContent, 'utf-8'); - - mockMimeGetType.mockReturnValue('image/svg+xml'); - - const result = await processSingleFileContent( - testSvgFilePath, - tempRootDir, - new StandardFileSystemService(), - ); - - expect(result.llmContent).toBe(svgContent); - expect(result.returnDisplay).toContain('Read SVG as text'); - }); - - it('should skip binary files', async () => { - actualNodeFs.writeFileSync( - testBinaryFilePath, - Buffer.from([0x00, 0x01, 0x02]), - ); - mockMimeGetType.mockReturnValueOnce('application/octet-stream'); - // isBinaryFile will operate on the real file. - - const result = await processSingleFileContent( - testBinaryFilePath, - tempRootDir, - new StandardFileSystemService(), - ); - expect(result.llmContent).toContain( - 'Cannot display content of binary file', - ); - expect(result.returnDisplay).toContain('Skipped binary file: app.exe'); - }); - - it('should handle path being a directory', async () => { + it('should handle directory path', async () => { const result = await processSingleFileContent( directoryPath, tempRootDir, new StandardFileSystemService(), ); - expect(result.error).toContain('Path is a directory'); - expect(result.returnDisplay).toContain('Path is a directory'); + expect(result.returnDisplay).toBe('Path is a directory.'); + expect(result.errorType).toBe(ToolErrorType.TARGET_IS_DIRECTORY); }); - it('should paginate text files correctly (startLine and endLine)', async () => { - const lines = Array.from({ length: 20 }, (_, i) => `Line ${i + 1}`); - actualNodeFs.writeFileSync(testTextFilePath, lines.join('\n')); - - const result = await processSingleFileContent( - testTextFilePath, - tempRootDir, - new StandardFileSystemService(), - 6, - 10, - ); // Read lines 6-10 (1-based) - const expectedContent = lines.slice(5, 10).join('\n'); - - expect(result.llmContent).toBe(expectedContent); - expect(result.returnDisplay).toBe('Read lines 6-10 of 20 from test.txt'); - expect(result.isTruncated).toBe(true); - expect(result.originalLineCount).toBe(20); - expect(result.linesShown).toEqual([6, 10]); - }); - - it('should identify truncation when reading the end of a file', async () => { - const lines = Array.from({ length: 20 }, (_, i) => `Line ${i + 1}`); - actualNodeFs.writeFileSync(testTextFilePath, lines.join('\n')); - - // Read from line 11 to 20. The start is not 1, so it's truncated. - const result = await processSingleFileContent( - testTextFilePath, - tempRootDir, - new StandardFileSystemService(), - 11, - 20, - ); - const expectedContent = lines.slice(10, 20).join('\n'); - - expect(result.llmContent).toContain(expectedContent); - expect(result.returnDisplay).toBe('Read lines 11-20 of 20 from test.txt'); - expect(result.isTruncated).toBe(true); // This is the key check for the bug - expect(result.originalLineCount).toBe(20); - expect(result.linesShown).toEqual([11, 20]); - }); - - it('should handle endLine exceeding file length', async () => { - const lines = ['Line 1', 'Line 2']; - actualNodeFs.writeFileSync(testTextFilePath, lines.join('\n')); - - const result = await processSingleFileContent( - testTextFilePath, - tempRootDir, - new StandardFileSystemService(), - 1, - 10, - ); - const expectedContent = lines.join('\n'); - - expect(result.llmContent).toBe(expectedContent); - expect(result.returnDisplay).toBe(''); - expect(result.isTruncated).toBe(false); - expect(result.originalLineCount).toBe(2); - expect(result.linesShown).toEqual([1, 2]); - }); - - it('should truncate long lines in text files', async () => { - const longLine = 'a'.repeat(2500); - actualNodeFs.writeFileSync( - testTextFilePath, - `Short line\n${longLine}\nAnother short line`, - ); + it('should handle large file error', async () => { + // Ensure file exists so it doesn't fail on existsSync check + actualNodeFs.writeFileSync(testTextFilePath, 'some content'); + // Mock stat to return a large size + vi.spyOn(fs.promises, 'stat').mockResolvedValue({ + size: 30 * 1024 * 1024, + isDirectory: () => false, + } as unknown as fs.Stats); const result = await processSingleFileContent( testTextFilePath, tempRootDir, new StandardFileSystemService(), ); + expect(result.returnDisplay).toMatch(/File size exceeds the .*MB limit/); + expect(result.errorType).toBe(ToolErrorType.FILE_TOO_LARGE); + }); - expect(result.llmContent).toContain('Short line'); + it('should handle binary file', async () => { + const content = Buffer.from([0x00, 0x01, 0x02, 0x03]); + actualNodeFs.writeFileSync(testBinaryFilePath, content); + const result = await processSingleFileContent( + testBinaryFilePath, + tempRootDir, + new StandardFileSystemService(), + ); + expect(result.returnDisplay).toContain('Skipped binary file'); expect(result.llmContent).toContain( - longLine.substring(0, 2000) + '... [truncated]', + 'Cannot display content of binary file', ); - expect(result.llmContent).toContain('Another short line'); - expect(result.returnDisplay).toBe( - 'Read all 3 lines from test.txt (some lines were shortened)', - ); - expect(result.isTruncated).toBe(true); }); - it('should truncate when line count exceeds the default limit', async () => { - const lines = Array.from({ length: 2500 }, (_, i) => `Line ${i + 1}`); - actualNodeFs.writeFileSync(testTextFilePath, lines.join('\n')); + it('should handle image file', async () => { + mockMimeGetType.mockReturnValue('image/png'); + const content = Buffer.from('fake-image-data'); + actualNodeFs.writeFileSync(testImageFilePath, content); + const result = await processSingleFileContent( + testImageFilePath, + tempRootDir, + new StandardFileSystemService(), + ); + expect(result.returnDisplay).toContain('Read image file'); + expect( + (result.llmContent as { inlineData: { data: string } }).inlineData.data, + ).toBe(content.toString('base64')); + expect( + (result.llmContent as { inlineData: { mimeType: string } }).inlineData + .mimeType, + ).toBe('image/png'); + }); - // No ranges provided, should use default limit (2000) + it('should handle PDF file', async () => { + mockMimeGetType.mockReturnValue('application/pdf'); + const content = Buffer.from('fake-pdf-data'); + actualNodeFs.writeFileSync(testPdfFilePath, content); + const result = await processSingleFileContent( + testPdfFilePath, + tempRootDir, + new StandardFileSystemService(), + ); + expect(result.returnDisplay).toContain('Read pdf file'); + expect( + (result.llmContent as { inlineData: { data: string } }).inlineData.data, + ).toBe(content.toString('base64')); + }); + + it('should handle truncation of text files', async () => { + const lines = Array.from({ length: 3000 }, (_, i) => `Line ${i + 1}`); + actualNodeFs.writeFileSync(testTextFilePath, lines.join('\n')); const result = await processSingleFileContent( testTextFilePath, tempRootDir, new StandardFileSystemService(), ); - expect(result.isTruncated).toBe(true); - expect(result.returnDisplay).toBe( - 'Read lines 1-2000 of 2500 from test.txt', - ); + expect(result.originalLineCount).toBe(3000); expect(result.linesShown).toEqual([1, 2000]); }); - it('should truncate when a line length exceeds the character limit', async () => { - const longLine = 'b'.repeat(2500); - const lines = Array.from({ length: 10 }, (_, i) => `Line ${i + 1}`); - lines.push(longLine); // Total 11 lines + it('should read a specific line range', async () => { + const lines = Array.from({ length: 100 }, (_, i) => `Line ${i + 1}`); actualNodeFs.writeFileSync(testTextFilePath, lines.join('\n')); - - // Read all 11 lines, including the long one const result = await processSingleFileContent( testTextFilePath, tempRootDir, new StandardFileSystemService(), - 1, - 11, - ); - - expect(result.isTruncated).toBe(true); - expect(result.returnDisplay).toBe( - 'Read all 11 lines from test.txt (some lines were shortened)', - ); - }); - - it('should truncate both line count and line length when both exceed limits', async () => { - const linesWithLongInMiddle = Array.from( - { length: 20 }, - (_, i) => `Line ${i + 1}`, - ); - linesWithLongInMiddle[4] = 'c'.repeat(2500); - actualNodeFs.writeFileSync( - testTextFilePath, - linesWithLongInMiddle.join('\n'), - ); - - // Read 10 lines out of 20, including the long line - const result = await processSingleFileContent( - testTextFilePath, - tempRootDir, - new StandardFileSystemService(), - 1, 10, + 20, ); - expect(result.isTruncated).toBe(true); - expect(result.returnDisplay).toBe( - 'Read lines 1-10 of 20 from test.txt (some lines were shortened)', - ); + expect(result.originalLineCount).toBe(100); + expect(result.linesShown).toEqual([10, 20]); + const contentLines = (result.llmContent as string).split('\n'); + expect(contentLines.length).toBe(11); + expect(contentLines[0]).toBe('Line 10'); + expect(contentLines[10]).toBe('Line 20'); }); - it('should return an error if the file size exceeds 20MB', async () => { - // Create a small test file - actualNodeFs.writeFileSync(testTextFilePath, 'test content'); - - // Spy on fs.promises.stat to return a large file size - const statSpy = vi.spyOn(fs.promises, 'stat').mockResolvedValueOnce({ - size: 21 * 1024 * 1024, - isDirectory: () => false, - } as fs.Stats); - - try { - const result = await processSingleFileContent( - testTextFilePath, - tempRootDir, - new StandardFileSystemService(), - ); - - expect(result.error).toContain('File size exceeds the 20MB limit'); - expect(result.returnDisplay).toContain( - 'File size exceeds the 20MB limit', - ); - expect(result.llmContent).toContain('File size exceeds the 20MB limit'); - } finally { - statSpy.mockRestore(); - } + it('should handle SVG file as text', async () => { + const svgContent = ''; + const svgPath = path.join(tempRootDir, 'test.svg'); + actualNodeFs.writeFileSync(svgPath, svgContent); + const result = await processSingleFileContent( + svgPath, + tempRootDir, + new StandardFileSystemService(), + ); + expect(result.llmContent).toBe(svgContent); + expect(result.returnDisplay).toContain('Read SVG as text'); }); }); - describe('saveTruncatedToolOutput & formatTruncatedToolOutput', () => { - it('should save content to a file with safe name', async () => { - const content = 'some content'; - const toolName = 'shell'; - const id = 'shell_123'; - - const result = await saveTruncatedToolOutput( + describe('saveTruncatedToolOutput', () => { + it('saves content to a temporary file', async () => { + const content = 'large output'; + const { outputFile } = await saveTruncatedToolOutput( content, - toolName, - id, + 'testTool', + 'call123', tempRootDir, ); - - const expectedOutputFile = path.join( - tempRootDir, - 'tool-outputs', - 'shell_123.txt', - ); - expect(result.outputFile).toBe(expectedOutputFile); - - const savedContent = await fsPromises.readFile( - expectedOutputFile, - 'utf-8', - ); - expect(savedContent).toBe(content); + expect(actualNodeFs.existsSync(outputFile)).toBe(true); + expect(actualNodeFs.readFileSync(outputFile, 'utf8')).toBe(content); + expect(outputFile).toContain('testtool_call123.txt'); }); - it('should sanitize tool name in filename', async () => { - const content = 'content'; - const toolName = '../../dangerous/tool'; - const id = 1; - - const result = await saveTruncatedToolOutput( + it('handles session ID in path', async () => { + const content = 'session content'; + const { outputFile } = await saveTruncatedToolOutput( content, - toolName, - id, + 'testTool', + 'call123', tempRootDir, + 'session-456', ); + // sanitization of 'session-456' results in 'session-456' + expect(outputFile).toContain('session-session-456'); + expect(actualNodeFs.readFileSync(outputFile, 'utf8')).toBe(content); + }); + }); - // ../../dangerous/tool -> ______dangerous_tool - const expectedOutputFile = path.join( - tempRootDir, - 'tool-outputs', - '______dangerous_tool_1.txt', - ); - expect(result.outputFile).toBe(expectedOutputFile); + describe('formatTruncatedToolOutput', () => { + it('does not truncate if within limit', () => { + const content = 'short'; + expect(formatTruncatedToolOutput(content, 'out.txt', 100)).toBe(content); }); - it('should not duplicate tool name when id already starts with it', async () => { - const content = 'content'; - const toolName = 'run_shell_command'; - const id = 'run_shell_command_1707400000000_0'; + it('truncates content and adds link to file', () => { + const content = 'A'.repeat(1000); + const result = formatTruncatedToolOutput(content, 'out.txt', 100); + expect(result).toContain('Output too large'); + expect(result).toContain('out.txt'); + expect(result).toContain('characters omitted'); + }); + }); - const result = await saveTruncatedToolOutput( - content, - toolName, - id, - tempRootDir, - ); - - const expectedOutputFile = path.join( - tempRootDir, - 'tool-outputs', - 'run_shell_command_1707400000000_0.txt', - ); - expect(result.outputFile).toBe(expectedOutputFile); + describe('getRealPath', () => { + it('returns real path for existing file', () => { + actualNodeFs.writeFileSync(testTextFilePath, 'test'); + const real = getRealPath(testTextFilePath); + expect(path.isAbsolute(real)).toBe(true); }); - it('should sanitize id in filename', async () => { - const content = 'content'; - const toolName = 'shell'; - const id = '../../etc/passwd'; - - const result = await saveTruncatedToolOutput( - content, - toolName, - id, - tempRootDir, - ); - - // ../../etc/passwd -> ______etc_passwd - const expectedOutputFile = path.join( - tempRootDir, - 'tool-outputs', - 'shell_______etc_passwd.txt', - ); - expect(result.outputFile).toBe(expectedOutputFile); - }); - - it('should sanitize sessionId in filename/path', async () => { - const content = 'content'; - const toolName = 'shell'; - const id = 'shell_1'; - const sessionId = '../../etc/passwd'; - - const result = await saveTruncatedToolOutput( - content, - toolName, - id, - tempRootDir, - sessionId, - ); - - // ../../etc/passwd -> ______etc_passwd - const expectedOutputFile = path.join( - tempRootDir, - 'tool-outputs', - 'session-______etc_passwd', - 'shell_1.txt', - ); - expect(result.outputFile).toBe(expectedOutputFile); - }); - - it('should truncate showing first 20% and last 80%', () => { - const content = 'abcdefghijklmnopqrstuvwxyz'; // 26 chars - const outputFile = '/tmp/out.txt'; - - // maxChars=10 -> head=2 (20%), tail=8 (80%) - const formatted = formatTruncatedToolOutput(content, outputFile, 10); - - expect(formatted).toContain('Showing first 2 and last 8 characters'); - expect(formatted).toContain('For full output see: /tmp/out.txt'); - expect(formatted).toContain('ab'); // first 2 chars - expect(formatted).toContain('stuvwxyz'); // last 8 chars - expect(formatted).toContain('[16 characters omitted]'); // 26 - 2 - 8 = 16 - }); - - it('should format large content with head/tail truncation', () => { - const content = 'a'.repeat(50000); - const outputFile = '/tmp/out.txt'; - - // maxChars=4000 -> head=800 (20%), tail=3200 (80%) - const formatted = formatTruncatedToolOutput(content, outputFile, 4000); - - expect(formatted).toContain( - 'Showing first 800 and last 3,200 characters', - ); - expect(formatted).toContain('For full output see: /tmp/out.txt'); - expect(formatted).toContain('[46,000 characters omitted]'); // 50000 - 800 - 3200 + it('returns resolved path for non-existent file', () => { + const real = getRealPath(nonexistentFilePath); + expect(real).toBe(path.resolve(nonexistentFilePath)); }); }); }); diff --git a/packages/core/src/utils/fileUtils.ts b/packages/core/src/utils/fileUtils.ts index 6bb89df83c..1d8318f06d 100644 --- a/packages/core/src/utils/fileUtils.ts +++ b/packages/core/src/utils/fileUtils.ts @@ -402,6 +402,7 @@ export interface ProcessedFileReadResult { * @param _fileSystemService Currently unused in this function; kept for signature stability. * @param startLine Optional 1-based line number to start reading from. * @param endLine Optional 1-based line number to end reading at (inclusive). + * @param full Optional boolean to indicate if full file content should be returned without line truncation. * @returns ProcessedFileReadResult object. */ export async function processSingleFileContent( @@ -410,6 +411,7 @@ export async function processSingleFileContent( _fileSystemService: FileSystemService, startLine?: number, endLine?: number, + full?: boolean, ): Promise { try { if (!fs.existsSync(filePath)) { @@ -482,11 +484,13 @@ export async function processSingleFileContent( sliceStart = startLine ? startLine - 1 : 0; sliceEnd = endLine ? Math.min(endLine, originalLineCount) - : Math.min( - sliceStart + DEFAULT_MAX_LINES_TEXT_FILE, - originalLineCount, - ); - } else { + : full + ? originalLineCount + : Math.min( + sliceStart + DEFAULT_MAX_LINES_TEXT_FILE, + originalLineCount, + ); + } else if (!full) { sliceEnd = Math.min(DEFAULT_MAX_LINES_TEXT_FILE, originalLineCount); } @@ -496,7 +500,7 @@ export async function processSingleFileContent( let linesWereTruncatedInLength = false; const formattedLines = selectedLines.map((line) => { - if (line.length > MAX_LINE_LENGTH_TEXT_FILE) { + if (!full && line.length > MAX_LINE_LENGTH_TEXT_FILE) { linesWereTruncatedInLength = true; return ( line.substring(0, MAX_LINE_LENGTH_TEXT_FILE) + '... [truncated]' diff --git a/schemas/settings.schema.json b/schemas/settings.schema.json index 17409313ce..9c790c6268 100644 --- a/schemas/settings.schema.json +++ b/schemas/settings.schema.json @@ -3040,13 +3040,23 @@ }, "config": { "title": "MCP Config", - "description": "Admin-configured MCP servers.", - "markdownDescription": "Admin-configured MCP servers.\n\n- Category: `Admin`\n- Requires restart: `no`\n- Default: `{}`", + "description": "Admin-configured MCP servers (allowlist).", + "markdownDescription": "Admin-configured MCP servers (allowlist).\n\n- Category: `Admin`\n- Requires restart: `no`\n- Default: `{}`", "default": {}, "type": "object", "additionalProperties": { "$ref": "#/$defs/MCPServerConfig" } + }, + "requiredConfig": { + "title": "Required MCP Config", + "description": "Admin-required MCP servers that are always injected.", + "markdownDescription": "Admin-required MCP servers that are always injected.\n\n- Category: `Admin`\n- Requires restart: `no`\n- Default: `{}`", + "default": {}, + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/RequiredMcpServerConfig" + } } }, "additionalProperties": false @@ -3181,6 +3191,77 @@ } } }, + "RequiredMcpServerConfig": { + "type": "object", + "description": "Admin-required MCP server configuration (remote transports only).", + "additionalProperties": false, + "properties": { + "url": { + "type": "string", + "description": "URL for the required MCP server." + }, + "type": { + "type": "string", + "description": "Transport type for the required server.", + "enum": ["sse", "http"] + }, + "headers": { + "type": "object", + "description": "Additional HTTP headers sent to the server.", + "additionalProperties": { + "type": "string" + } + }, + "timeout": { + "type": "number", + "description": "Timeout in milliseconds for MCP requests." + }, + "trust": { + "type": "boolean", + "description": "Marks the server as trusted. Defaults to true for admin-required servers." + }, + "description": { + "type": "string", + "description": "Human-readable description of the server." + }, + "includeTools": { + "type": "array", + "description": "Subset of tools enabled for this server.", + "items": { + "type": "string" + } + }, + "excludeTools": { + "type": "array", + "description": "Tools disabled for this server.", + "items": { + "type": "string" + } + }, + "oauth": { + "type": "object", + "description": "OAuth configuration for authenticating with the server.", + "additionalProperties": true + }, + "authProviderType": { + "type": "string", + "description": "Authentication provider used for acquiring credentials.", + "enum": [ + "dynamic_discovery", + "google_credentials", + "service_account_impersonation" + ] + }, + "targetAudience": { + "type": "string", + "description": "OAuth target audience (CLIENT_ID.apps.googleusercontent.com)." + }, + "targetServiceAccount": { + "type": "string", + "description": "Service account email to impersonate (name@project.iam.gserviceaccount.com)." + } + } + }, "TelemetrySettings": { "type": "object", "description": "Telemetry configuration for Gemini CLI.",