From 9637fb39903a276fa46b589d71664409c3653148 Mon Sep 17 00:00:00 2001 From: Jack Wotherspoon Date: Tue, 7 Apr 2026 17:01:14 -0400 Subject: [PATCH 01/39] fix(core): remove tmux alternate buffer warning (#24852) --- packages/core/src/utils/compatibility.test.ts | 13 ------------- packages/core/src/utils/compatibility.ts | 9 --------- 2 files changed, 22 deletions(-) diff --git a/packages/core/src/utils/compatibility.test.ts b/packages/core/src/utils/compatibility.test.ts index c94cbee3a6..28fa26453c 100644 --- a/packages/core/src/utils/compatibility.test.ts +++ b/packages/core/src/utils/compatibility.test.ts @@ -289,19 +289,6 @@ describe('compatibility', () => { ); }); - it('should return tmux warning when detected and in alternate buffer', () => { - vi.stubEnv('TMUX', '/tmp/tmux-1001/default,1,0'); - - const warnings = getCompatibilityWarnings({ isAlternateBuffer: true }); - expect(warnings).toContainEqual( - expect.objectContaining({ - id: 'tmux-alternate-buffer', - message: expect.stringContaining('tmux detected'), - priority: WarningPriority.High, - }), - ); - }); - it('should return low-color tmux warning when detected', () => { vi.stubEnv('TERM', 'screen'); vi.stubEnv('TMUX', '1'); diff --git a/packages/core/src/utils/compatibility.ts b/packages/core/src/utils/compatibility.ts index 4b126bd4eb..8a997b42cf 100644 --- a/packages/core/src/utils/compatibility.ts +++ b/packages/core/src/utils/compatibility.ts @@ -145,15 +145,6 @@ export function getCompatibilityWarnings(options?: { }); } - if (isTmux() && options?.isAlternateBuffer) { - warnings.push({ - id: 'tmux-alternate-buffer', - message: - 'Warning: tmux detected — alternate buffer mode may cause unexpected scrollback loss and flickering. If you experience issues, disable it in /settings → "Use Alternate Screen Buffer".\n Tip: Use Ctrl-b [ to access tmux copy mode for scrolling history.', - priority: WarningPriority.High, - }); - } - if (isLowColorTmux()) { warnings.push({ id: 'low-color-tmux', From adf7b3b717a454a9923d4c690b08cc84d87ad030 Mon Sep 17 00:00:00 2001 From: David Pierce Date: Tue, 7 Apr 2026 21:08:18 +0000 Subject: [PATCH 02/39] Improve sandbox error matching and caching (#24550) --- .../src/sandbox/linux/LinuxSandboxManager.ts | 11 +- .../src/sandbox/macos/MacOsSandboxManager.ts | 10 +- .../core/src/sandbox/utils/fsUtils.test.ts | 52 ++++++++ packages/core/src/sandbox/utils/fsUtils.ts | 2 + .../sandbox/utils/sandboxDenialUtils.test.ts | 110 +++++++++++++++- .../src/sandbox/utils/sandboxDenialUtils.ts | 117 +++++++++++++----- .../sandbox/utils/sandboxReadWriteUtils.ts | 4 + .../sandbox/windows/WindowsSandboxManager.ts | 14 ++- .../windows/windowsSandboxDenialUtils.ts | 38 +++--- packages/core/src/utils/paths.ts | 17 +++ 10 files changed, 324 insertions(+), 51 deletions(-) create mode 100644 packages/core/src/sandbox/utils/fsUtils.test.ts diff --git a/packages/core/src/sandbox/linux/LinuxSandboxManager.ts b/packages/core/src/sandbox/linux/LinuxSandboxManager.ts index 000fea510f..f210138127 100644 --- a/packages/core/src/sandbox/linux/LinuxSandboxManager.ts +++ b/packages/core/src/sandbox/linux/LinuxSandboxManager.ts @@ -27,11 +27,16 @@ import { verifySandboxOverrides, getCommandName, } from '../utils/commandUtils.js'; +import { assertValidPathString } from '../../utils/paths.js'; import { isKnownSafeCommand, isDangerousCommand, } from '../utils/commandSafety.js'; -import { parsePosixSandboxDenials } from '../utils/sandboxDenialUtils.js'; +import { + parsePosixSandboxDenials, + createSandboxDenialCache, + type SandboxDenialCache, +} from '../utils/sandboxDenialUtils.js'; import { handleReadWriteCommands } from '../utils/sandboxReadWriteUtils.js'; import { buildBwrapArgs } from './bwrapArgsBuilder.js'; @@ -108,6 +113,7 @@ function getSeccompBpfPath(): string { * Ensures a file or directory exists. */ function touch(filePath: string, isDirectory: boolean) { + assertValidPathString(filePath); try { // If it exists (even as a broken symlink), do nothing if (fs.lstatSync(filePath)) return; @@ -129,6 +135,7 @@ function touch(filePath: string, isDirectory: boolean) { export class LinuxSandboxManager implements SandboxManager { private static maskFilePath: string | undefined; + private readonly denialCache: SandboxDenialCache = createSandboxDenialCache(); constructor(private readonly options: GlobalSandboxOptions) {} @@ -141,7 +148,7 @@ export class LinuxSandboxManager implements SandboxManager { } parseDenials(result: ShellExecutionResult): ParsedSandboxDenial | undefined { - return parsePosixSandboxDenials(result); + return parsePosixSandboxDenials(result, this.denialCache); } getWorkspace(): string { diff --git a/packages/core/src/sandbox/macos/MacOsSandboxManager.ts b/packages/core/src/sandbox/macos/MacOsSandboxManager.ts index 0fee35110a..44774e8e82 100644 --- a/packages/core/src/sandbox/macos/MacOsSandboxManager.ts +++ b/packages/core/src/sandbox/macos/MacOsSandboxManager.ts @@ -32,10 +32,16 @@ import { getCommandName as getFullCommandName, isStrictlyApproved, } from '../utils/commandUtils.js'; -import { parsePosixSandboxDenials } from '../utils/sandboxDenialUtils.js'; +import { + parsePosixSandboxDenials, + createSandboxDenialCache, + type SandboxDenialCache, +} from '../utils/sandboxDenialUtils.js'; import { handleReadWriteCommands } from '../utils/sandboxReadWriteUtils.js'; export class MacOsSandboxManager implements SandboxManager { + private readonly denialCache: SandboxDenialCache = createSandboxDenialCache(); + constructor(private readonly options: GlobalSandboxOptions) {} isKnownSafeCommand(args: string[]): boolean { @@ -52,7 +58,7 @@ export class MacOsSandboxManager implements SandboxManager { } parseDenials(result: ShellExecutionResult): ParsedSandboxDenial | undefined { - return parsePosixSandboxDenials(result); + return parsePosixSandboxDenials(result, this.denialCache); } getWorkspace(): string { diff --git a/packages/core/src/sandbox/utils/fsUtils.test.ts b/packages/core/src/sandbox/utils/fsUtils.test.ts new file mode 100644 index 0000000000..9439050680 --- /dev/null +++ b/packages/core/src/sandbox/utils/fsUtils.test.ts @@ -0,0 +1,52 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, beforeAll, afterAll } from 'vitest'; +import fs from 'node:fs'; +import path from 'node:path'; +import os from 'node:os'; +import { tryRealpath } from './fsUtils.js'; + +describe('fsUtils', () => { + let tempDir: string; + let realTempDir: string; + + beforeAll(() => { + tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'fs-utils-test-')); + realTempDir = fs.realpathSync(tempDir); + }); + + afterAll(() => { + fs.rmSync(tempDir, { recursive: true, force: true }); + }); + + describe('tryRealpath', () => { + it('should throw error for paths with null bytes', () => { + expect(() => tryRealpath(path.join(tempDir, 'foo\0bar'))).toThrow( + 'Invalid path', + ); + }); + + it('should resolve existing paths', () => { + const resolved = tryRealpath(tempDir); + expect(resolved).toBe(realTempDir); + }); + + it('should handle non-existent paths by resolving parent', () => { + const nonExistentPath = path.join(tempDir, 'non-existent-file-12345'); + const expected = path.join(realTempDir, 'non-existent-file-12345'); + const resolved = tryRealpath(nonExistentPath); + expect(resolved).toBe(expected); + }); + + it('should handle nested non-existent paths', () => { + const nonExistentPath = path.join(tempDir, 'dir1', 'dir2', 'file'); + const expected = path.join(realTempDir, 'dir1', 'dir2', 'file'); + const resolved = tryRealpath(nonExistentPath); + expect(resolved).toBe(expected); + }); + }); +}); diff --git a/packages/core/src/sandbox/utils/fsUtils.ts b/packages/core/src/sandbox/utils/fsUtils.ts index e30d55c72d..2e3eda1342 100644 --- a/packages/core/src/sandbox/utils/fsUtils.ts +++ b/packages/core/src/sandbox/utils/fsUtils.ts @@ -6,12 +6,14 @@ import fs from 'node:fs'; import path from 'node:path'; +import { assertValidPathString } from '../../utils/paths.js'; export function isErrnoException(e: unknown): e is NodeJS.ErrnoException { return e instanceof Error && 'code' in e; } export function tryRealpath(p: string): string { + assertValidPathString(p); try { return fs.realpathSync(p); } catch (e) { diff --git a/packages/core/src/sandbox/utils/sandboxDenialUtils.test.ts b/packages/core/src/sandbox/utils/sandboxDenialUtils.test.ts index 3d3380b057..f803cfa779 100644 --- a/packages/core/src/sandbox/utils/sandboxDenialUtils.test.ts +++ b/packages/core/src/sandbox/utils/sandboxDenialUtils.test.ts @@ -5,7 +5,10 @@ */ import { describe, it, expect } from 'vitest'; -import { parsePosixSandboxDenials } from './sandboxDenialUtils.js'; +import { + parsePosixSandboxDenials, + createSandboxDenialCache, +} from './sandboxDenialUtils.js'; import type { ShellExecutionResult } from '../../services/shellExecutionService.js'; describe('parsePosixSandboxDenials', () => { @@ -116,4 +119,109 @@ EACCES: permission denied, mkdir '/Users/galzahavi/.pnpm-store/v3' expect(parsed).toBeDefined(); expect(parsed?.filePaths).toContain('/Users/galzahavi/.pnpm-store/v3'); }); + + it('should detect Python PermissionError and extract path accurately', () => { + const output = `Caught exception: [Errno 13] Permission denied: '/etc/test_sandbox_denial' +Traceback (most recent call last): + File "/usr/local/google/home/davidapierce/gemini-cli/repro_sandbox.py", line 9, in + raise e + File "/usr/local/google/home/davidapierce/gemini-cli/repro_sandbox.py", line 5, in + with open('/etc/test_sandbox_denial', 'w') as f: + ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +PermissionError: [Errno 13] Permission denied: '/etc/test_sandbox_denial'`; + + const parsed = parsePosixSandboxDenials({ + output, + exitCode: 1, + error: null, + } as unknown as ShellExecutionResult); + + expect(parsed?.filePaths).toEqual(['/etc/test_sandbox_denial']); + }); + + it('should detect new keywords like "access denied" and "forbidden"', () => { + const parsed1 = parsePosixSandboxDenials({ + output: 'Access denied to /var/log/syslog', + exitCode: 1, + error: null, + } as unknown as ShellExecutionResult); + expect(parsed1?.filePaths).toContain('/var/log/syslog'); + + const parsed2 = parsePosixSandboxDenials({ + output: 'Forbidden: access to /root/secret is not allowed', + exitCode: 1, + error: null, + } as unknown as ShellExecutionResult); + expect(parsed2?.filePaths).toContain('/root/secret'); + }); + + it('should detect read-only file system error', () => { + const parsed = parsePosixSandboxDenials({ + output: 'rm: cannot remove /mnt/usb/test: Read-only file system', + exitCode: 1, + error: null, + } as unknown as ShellExecutionResult); + expect(parsed?.filePaths).toContain('/mnt/usb/test'); + }); + + it('should reject paths with directory traversal', () => { + const output = 'ls: /etc/shadow/../../etc/passwd: Operation not permitted'; + const parsed = parsePosixSandboxDenials({ + output, + } as unknown as ShellExecutionResult); + expect(parsed?.filePaths || []).not.toContain( + '/etc/shadow/../../etc/passwd', + ); + }); + + it('should reject home-relative paths with directory traversal', () => { + const output = "Operation not permitted, open '~/../../etc/shadow'"; + const parsed = parsePosixSandboxDenials({ + output, + } as unknown as ShellExecutionResult); + expect(parsed?.filePaths || []).not.toContain('~/../../etc/shadow'); + }); + + it('should reject paths with null bytes', () => { + const output = "Operation not permitted, open '/etc/passwd\0/foo'"; + const parsed = parsePosixSandboxDenials({ + output, + } as unknown as ShellExecutionResult); + expect(parsed?.filePaths || []).not.toContain('/etc/passwd\0/foo'); + }); + + it('should reject paths with internal tildes', () => { + const output = "Operation not permitted, open '/home/user/~/config'"; + const parsed = parsePosixSandboxDenials({ + output, + } as unknown as ShellExecutionResult); + expect(parsed?.filePaths || []).not.toContain('/home/user/~/config'); + }); + + it('should suppress redundant denials if cache is provided', () => { + const cache = createSandboxDenialCache(); + const result = { + output: 'ls: /root: Operation not permitted', + } as unknown as ShellExecutionResult; + + // First call: should process + const parsed1 = parsePosixSandboxDenials(result, cache); + expect(parsed1).toBeDefined(); + + // Second call: should be suppressed + const parsed2 = parsePosixSandboxDenials(result, cache); + expect(parsed2).toBeUndefined(); + }); + + it('should not suppress denials if no cache is provided', () => { + const result = { + output: 'ls: /root: Operation not permitted', + } as unknown as ShellExecutionResult; + + const parsed1 = parsePosixSandboxDenials(result); + expect(parsed1).toBeDefined(); + + const parsed2 = parsePosixSandboxDenials(result); + expect(parsed2).toBeDefined(); + }); }); diff --git a/packages/core/src/sandbox/utils/sandboxDenialUtils.ts b/packages/core/src/sandbox/utils/sandboxDenialUtils.ts index 96082767dd..e288be0ed0 100644 --- a/packages/core/src/sandbox/utils/sandboxDenialUtils.ts +++ b/packages/core/src/sandbox/utils/sandboxDenialUtils.ts @@ -4,8 +4,58 @@ * SPDX-License-Identifier: Apache-2.0 */ +import { LRUCache } from 'mnemonist'; import { type ParsedSandboxDenial } from '../../services/sandboxManager.js'; import type { ShellExecutionResult } from '../../services/shellExecutionService.js'; +import { isValidPathString } from '../../utils/paths.js'; + +/** + * Type for the sandbox denial error cache. + * Stores normalized error output to prevent redundant processing. + */ +export type SandboxDenialCache = LRUCache; + +/** + * Creates a new sandbox denial cache with a standard LRU policy. + */ +export function createSandboxDenialCache(maxSize = 10): SandboxDenialCache { + return new LRUCache(maxSize); +} + +/** + * Sanitizes extracted paths to prevent path traversal vulnerabilities. + * Filters out paths containing '..' or null bytes. + */ +export function sanitizeExtractedPath(p: string): string | undefined { + if (!isValidPathString(p)) return undefined; + + // Reject paths with directory traversal components + const parts = p.split(/[/\\]/); + if (parts.includes('..')) { + return undefined; + } + + // Reject paths with internal tildes (tilde should only be at the beginning) + if (p.indexOf('~') > 0) { + return undefined; + } + + // Basic normalization without resolving symlinks or accessing the file system + let normalized = p; + + // Collapse multiple slashes + normalized = normalized.replace(/\/+/g, '/'); + + // Remove single dot segments + normalized = normalized.replace(/\/\.\//g, '/'); + + // Remove trailing slashes (unless it's exactly '/') + if (normalized.length > 1 && normalized.endsWith('/')) { + normalized = normalized.slice(0, -1); + } + + return normalized; +} /** * Common POSIX-style sandbox denial detection. @@ -13,10 +63,18 @@ import type { ShellExecutionResult } from '../../services/shellExecutionService. */ export function parsePosixSandboxDenials( result: ShellExecutionResult, + cache?: SandboxDenialCache, ): ParsedSandboxDenial | undefined { const output = result.output || ''; const errorOutput = result.error?.message; - const combined = (output + ' ' + (errorOutput || '')).toLowerCase(); + const fullText = output + '\n' + (errorOutput || ''); + const combined = fullText.toLowerCase(); + + // Cache by the first 200 characters of the error to handle variable data (timestamps, PIDs) + const cacheKey = combined.trim().slice(0, 200); + if (cacheKey && cache?.has(cacheKey)) { + return undefined; + } const isFileDenial = [ 'operation not permitted', @@ -27,6 +85,12 @@ export function parsePosixSandboxDenials( 'should be read/write', 'sandbox_apply', 'sandbox: ', + 'access denied', + 'read-only file system', + 'permissionerror', + 'fs.permissiondenied', + 'forbidden', + 'system.unauthorizedaccessexception', ].some((keyword) => combined.includes(keyword)); const isNetworkDenial = [ @@ -46,6 +110,8 @@ export function parsePosixSandboxDenials( 'err_pnpm_fetch', 'err_pnpm_no_matching_version', "syscall: 'listen'", + 'socketexception', + 'networkaccessdenied', ].some((keyword) => combined.includes(keyword)); if (!isFileDenial && !isNetworkDenial) { @@ -57,27 +123,28 @@ export function parsePosixSandboxDenials( // Extract denied paths (POSIX absolute paths or home-relative paths starting with ~) const regexes = [ // format: /path: operation not permitted - /(?:^|\s)['"]?((?:\/|~)[\w.\-/:~]+)['"]?:\s*[Oo]peration not permitted/gi, + /(?:^|\s)['"]?((?:\/|~)(?:[\w.\-/:~]*[\w.\-/~])?)['"]?[\s:,'"[\]]*operation not permitted/gi, // format: operation not permitted, open '/path' - /[Oo]peration not permitted,\s*open\s*['"]?((?:\/|~)[\w.\-/:~]+)['"]?/gi, + /operation not permitted[\s:,'"[\]]*open[\s:,'"[\]]*['"]?((?:\/|~)(?:[\w.\-/:~]*[\w.\-/~])?)['"]?/gi, // format: permission denied, open '/path' - /[Pp]ermission denied,\s*open\s*['"]?((?:\/|~)[\w.\-/:~]+)['"]?/gi, + /permission denied[\s:,'"[\]]*open[\s:,'"[\]]*['"]?((?:\/|~)(?:[\w.\-/:~]*[\w.\-/~])?)['"]?/gi, // format: npm error path /path or npm ERR! path /path - /npm\s+(?:error|ERR!)\s+path\s+((?:\/|~)[\w.\-/:~]+)/gi, - // format: EACCES: permission denied, mkdir '/path' - /EACCES:\s*permission denied,\s*\w+\s*['"]?((?:\/|~)[\w.\-/:~]+)['"]?/gi, + /npm[\s!]*[A-Za-z]*err[A-Za-z!]*[\s!]+path[\s!]*((?:\/|~)(?:[\w.\-/:~]*[\w.\-/~])?)/gi, + // format: eacces: permission denied, mkdir '/path' + /eacces[\s:,'"[\]]*permission denied[\s:,'"[\]]*\w+[\s:,'"[\]]*['"]?((?:\/|~)[\w.\-/:~]*[\w.\-/~])?/gi, + // format: PermissionError: [Errno 13] Permission denied: '/path' + /permissionerror[\s:,'"[\]]*(?:[^'"]*)['"]((?:\/|~)[\w.\-/:~]*[\w.\-/~])?['"]/gi, + // format: FileNotFoundError: [Errno 2] No such file or directory: '/path' (sometimes returned in sandbox denials if directory is hidden) + /filenotfounderror[\s:,'"[\]]*(?:[^'"]*)['"]((?:\/|~)[\w.\-/:~]*[\w.\-/~])?['"]/gi, + // format: Error: EACCES: permission denied, open '/path' + /error[\s:,'"[\]]*eacces[\s:,'"[\]]*permission denied[\s:,'"[\]]*(?:[^'"]*)['"]((?:\/|~)[\w.\-/:~]*[\w.\-/~])?['"]/gi, ]; for (const regex of regexes) { let match; - while ((match = regex.exec(output)) !== null) { - filePaths.add(match[1]); - } - if (errorOutput) { - regex.lastIndex = 0; // Reset for next use - while ((match = regex.exec(errorOutput)) !== null) { - filePaths.add(match[1]); - } + while ((match = regex.exec(fullText)) !== null) { + const sanitized = sanitizeExtractedPath(match[1]); + if (sanitized) filePaths.add(sanitized); } } @@ -86,22 +153,16 @@ export function parsePosixSandboxDenials( const fallbackRegex = /(?:^|[\s"'[\]])(\/[a-zA-Z0-9_.-]+(?:\/[a-zA-Z0-9_.-]+)+)(?:$|[\s"'[\]:])/gi; let m; - while ((m = fallbackRegex.exec(output)) !== null) { - const p = m[1]; - if (p && !p.startsWith('/bin/') && !p.startsWith('/usr/bin/')) { - filePaths.add(p); - } - } - if (errorOutput) { - while ((m = fallbackRegex.exec(errorOutput)) !== null) { - const p = m[1]; - if (p && !p.startsWith('/bin/') && !p.startsWith('/usr/bin/')) { - filePaths.add(p); - } - } + while ((m = fallbackRegex.exec(fullText)) !== null) { + const sanitized = sanitizeExtractedPath(m[1]); + if (sanitized) filePaths.add(sanitized); } } + if (cacheKey && cache) { + cache.set(cacheKey, true); + } + return { network: isNetworkDenial || undefined, filePaths: filePaths.size > 0 ? Array.from(filePaths) : undefined, diff --git a/packages/core/src/sandbox/utils/sandboxReadWriteUtils.ts b/packages/core/src/sandbox/utils/sandboxReadWriteUtils.ts index 21f8c1f7c3..c1a611716b 100644 --- a/packages/core/src/sandbox/utils/sandboxReadWriteUtils.ts +++ b/packages/core/src/sandbox/utils/sandboxReadWriteUtils.ts @@ -8,6 +8,7 @@ import { type SandboxPermissions, type SandboxRequest, } from '../../services/sandboxManager.js'; +import { isValidPathString } from '../../utils/paths.js'; /** * Validates if the requested paths are within the allowed workspace or allowed paths. @@ -18,6 +19,9 @@ function validatePaths( allowedPaths: string[], ): boolean { for (const p of paths) { + if (!isValidPathString(p)) { + return false; // Reject malicious paths + } const resolvedPath = path.resolve(p); const resolvedWorkspace = path.resolve(workspace); const isInsideWorkspace = diff --git a/packages/core/src/sandbox/windows/WindowsSandboxManager.ts b/packages/core/src/sandbox/windows/WindowsSandboxManager.ts index 943a339960..a2d6428906 100644 --- a/packages/core/src/sandbox/windows/WindowsSandboxManager.ts +++ b/packages/core/src/sandbox/windows/WindowsSandboxManager.ts @@ -35,7 +35,15 @@ import { } from './commandSafety.js'; import { verifySandboxOverrides } from '../utils/commandUtils.js'; import { parseWindowsSandboxDenials } from './windowsSandboxDenialUtils.js'; -import { isSubpath, resolveToRealPath } from '../../utils/paths.js'; +import { + isSubpath, + resolveToRealPath, + assertValidPathString, +} from '../../utils/paths.js'; +import { + type SandboxDenialCache, + createSandboxDenialCache, +} from '../utils/sandboxDenialUtils.js'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); @@ -54,6 +62,7 @@ export class WindowsSandboxManager implements SandboxManager { private initialized = false; private readonly allowedCache = new Set(); private readonly deniedCache = new Set(); + private readonly denialCache: SandboxDenialCache = createSandboxDenialCache(); constructor(private readonly options: GlobalSandboxOptions) { this.helperPath = path.resolve(__dirname, WindowsSandboxManager.HELPER_EXE); @@ -73,7 +82,7 @@ export class WindowsSandboxManager implements SandboxManager { } parseDenials(result: ShellExecutionResult): ParsedSandboxDenial | undefined { - return parseWindowsSandboxDenials(result); + return parseWindowsSandboxDenials(result, this.denialCache); } getWorkspace(): string { @@ -88,6 +97,7 @@ export class WindowsSandboxManager implements SandboxManager { * Ensures a file or directory exists. */ private touch(filePath: string, isDirectory: boolean): void { + assertValidPathString(filePath); try { // If it exists (even as a broken symlink), do nothing if (fs.lstatSync(filePath)) return; diff --git a/packages/core/src/sandbox/windows/windowsSandboxDenialUtils.ts b/packages/core/src/sandbox/windows/windowsSandboxDenialUtils.ts index a2b12b0336..288f2dc309 100644 --- a/packages/core/src/sandbox/windows/windowsSandboxDenialUtils.ts +++ b/packages/core/src/sandbox/windows/windowsSandboxDenialUtils.ts @@ -6,6 +6,10 @@ import { type ParsedSandboxDenial } from '../../services/sandboxManager.js'; import type { ShellExecutionResult } from '../../services/shellExecutionService.js'; +import { + type SandboxDenialCache, + sanitizeExtractedPath, +} from '../utils/sandboxDenialUtils.js'; /** * Windows-specific sandbox denial detection. @@ -13,10 +17,18 @@ import type { ShellExecutionResult } from '../../services/shellExecutionService. */ export function parseWindowsSandboxDenials( result: ShellExecutionResult, + cache?: SandboxDenialCache, ): ParsedSandboxDenial | undefined { const output = result.output || ''; const errorOutput = result.error?.message; - const combined = (output + ' ' + (errorOutput || '')).toLowerCase(); + const fullText = output + '\n' + (errorOutput || ''); + const combined = fullText.toLowerCase(); + + // Cache by the first 200 characters of the error to handle variable data (timestamps, PIDs) + const cacheKey = combined.trim().slice(0, 200); + if (cacheKey && cache?.has(cacheKey)) { + return undefined; + } const isFileDenial = [ 'access is denied', @@ -46,30 +58,24 @@ export function parseWindowsSandboxDenials( // 1. Quoted paths: 'C:\Foo Bar' or "C:\Foo Bar" const quotedRegex = /['"]((?:\\\\(?:\?|\.)\\)?[a-zA-Z]:[\\/][^'"]+)['"]/g; - for (const match of output.matchAll(quotedRegex)) { - filePaths.add(match[1]); - } - if (errorOutput) { - for (const match of errorOutput.matchAll(quotedRegex)) { - filePaths.add(match[1]); - } + for (const match of fullText.matchAll(quotedRegex)) { + const sanitized = sanitizeExtractedPath(match[1]); + if (sanitized) filePaths.add(sanitized); } // 2. Unquoted paths or paths in PowerShell error format: PermissionDenied: (C:\path:String) const generalRegex = /(?:^|[\s(])((?:\\\\(?:\?|\.)\\)?[a-zA-Z]:[\\/][^"'\s()<>|?*]+)/g; - for (const match of output.matchAll(generalRegex)) { + for (const match of fullText.matchAll(generalRegex)) { // Clean up trailing colon which might be part of the error message rather than the path let p = match[1]; if (p.endsWith(':')) p = p.slice(0, -1); - filePaths.add(p); + const sanitized = sanitizeExtractedPath(p); + if (sanitized) filePaths.add(sanitized); } - if (errorOutput) { - for (const match of errorOutput.matchAll(generalRegex)) { - let p = match[1]; - if (p.endsWith(':')) p = p.slice(0, -1); - filePaths.add(p); - } + + if (cacheKey && cache) { + cache.set(cacheKey, true); } return { diff --git a/packages/core/src/utils/paths.ts b/packages/core/src/utils/paths.ts index 135e047530..b83860eadb 100644 --- a/packages/core/src/utils/paths.ts +++ b/packages/core/src/utils/paths.ts @@ -369,6 +369,22 @@ export function isSubpath(parentPath: string, childPath: string): boolean { ); } +/** + * Type guard to verify a value is a string and does not contain null bytes. + */ +export function isValidPathString(p: unknown): p is string { + return typeof p === 'string' && !p.includes('\0'); +} + +/** + * Asserts that a value is a valid path string, throwing an Error otherwise. + */ +export function assertValidPathString(p: unknown): asserts p is string { + if (!isValidPathString(p)) { + throw new Error(`Invalid path: ${String(p)}`); + } +} + /** * Resolves a path to its real path, sanitizing it first. * - Removes 'file://' protocol if present. @@ -379,6 +395,7 @@ export function isSubpath(parentPath: string, childPath: string): boolean { * @returns The resolved real path. */ export function resolveToRealPath(pathStr: string): string { + assertValidPathString(pathStr); let resolvedPath = pathStr; try { From 986293bd388e2124930b47bc0054d7b39e49977a Mon Sep 17 00:00:00 2001 From: Michael Bleigh Date: Tue, 7 Apr 2026 14:45:18 -0700 Subject: [PATCH 03/39] feat(core): add agent protocol UI types and experimental flag (#24275) Co-authored-by: Adam Weidman Co-authored-by: Adam Weidman --- docs/reference/configuration.md | 6 ++++ packages/cli/src/config/settingsSchema.ts | 10 ++++++ .../cli/src/nonInteractiveCliAgentSession.ts | 2 ++ packages/core/src/agent/event-translator.ts | 1 + packages/core/src/agent/types.ts | 31 +++++++++++++++++++ packages/core/src/config/config.ts | 8 +++++ schemas/settings.schema.json | 7 +++++ 7 files changed, 65 insertions(+) diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index 5c9a3e7044..1955507c62 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -1606,6 +1606,12 @@ their corresponding top-level category object in your `settings.json` file. - **Default:** `false` - **Requires restart:** Yes +- **`experimental.adk.agentSessionInteractiveEnabled`** (boolean): + - **Description:** Enable the agent session implementation for the interactive + CLI. + - **Default:** `false` + - **Requires restart:** Yes + - **`experimental.enableAgents`** (boolean): - **Description:** Enable local and remote subagents. - **Default:** `true` diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index 9343be6b02..730bd4b939 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -1970,6 +1970,16 @@ const SETTINGS_SCHEMA = { description: 'Enable non-interactive agent sessions.', showInDialog: false, }, + agentSessionInteractiveEnabled: { + type: 'boolean', + label: 'Interactive Agent Session Enabled', + category: 'Experimental', + requiresRestart: true, + default: false, + description: + 'Enable the agent session implementation for the interactive CLI.', + showInDialog: false, + }, }, }, enableAgents: { diff --git a/packages/cli/src/nonInteractiveCliAgentSession.ts b/packages/cli/src/nonInteractiveCliAgentSession.ts index 78fc18be4e..fe5fbceba2 100644 --- a/packages/cli/src/nonInteractiveCliAgentSession.ts +++ b/packages/cli/src/nonInteractiveCliAgentSession.ts @@ -37,6 +37,7 @@ import { LegacyAgentSession, ToolErrorType, geminiPartsToContentParts, + debugLogger, } from '@google/gemini-cli-core'; import type { Part } from '@google/genai'; @@ -599,6 +600,7 @@ export async function runNonInteractive({ // Explicitly ignore these non-interactive events break; default: + debugLogger.error('Unknown agent event type:', event); event satisfies never; break; } diff --git a/packages/core/src/agent/event-translator.ts b/packages/core/src/agent/event-translator.ts index 00b5d12b4f..cb299b494c 100644 --- a/packages/core/src/agent/event-translator.ts +++ b/packages/core/src/agent/event-translator.ts @@ -432,6 +432,7 @@ function isStructuredError(error: unknown): error is StructuredError { return ( typeof error === 'object' && error !== null && + 'status' in error && 'message' in error && typeof error.message === 'string' ); diff --git a/packages/core/src/agent/types.ts b/packages/core/src/agent/types.ts index 9bc3e81e0f..19837c138e 100644 --- a/packages/core/src/agent/types.ts +++ b/packages/core/src/agent/types.ts @@ -4,6 +4,8 @@ * SPDX-License-Identifier: Apache-2.0 */ +import type { Kind } from '../tools/tools.js'; + export type WithMeta = { _meta?: Record }; export type Unsubscribe = () => void; @@ -180,6 +182,16 @@ export interface ToolRequest { name: string; /** The arguments for the tool. */ args: Record; + /** UI specific metadata */ + _meta?: { + legacyState?: { + displayName?: string; + isOutputMarkdown?: boolean; + description?: string; + kind?: Kind; + }; + [key: string]: unknown; + }; } /** @@ -192,6 +204,18 @@ export interface ToolUpdate { displayContent?: ContentPart[]; content?: ContentPart[]; data?: Record; + /** UI specific metadata */ + _meta?: { + legacyState?: { + status?: string; + progressMessage?: string; + progress?: number; + progressTotal?: number; + pid?: number; + description?: string; + }; + [key: string]: unknown; + }; } export interface ToolResponse { @@ -205,6 +229,13 @@ export interface ToolResponse { data?: Record; /** When true, the tool call encountered an error that will be sent to the model. */ isError?: boolean; + /** UI specific metadata */ + _meta?: { + legacyState?: { + outputFile?: string; + }; + [key: string]: unknown; + }; } export type ElicitationRequest = { diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index a36d3b7a02..4ec526569f 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -225,6 +225,7 @@ export interface GemmaModelRouterSettings { export interface ADKSettings { agentSessionNoninteractiveEnabled?: boolean; + agentSessionInteractiveEnabled?: boolean; } export interface ExtensionSetting { @@ -894,6 +895,7 @@ export class Config implements McpContext, AgentLoopContext { private readonly gemmaModelRouter: GemmaModelRouterSettings; private readonly agentSessionNoninteractiveEnabled: boolean; + private readonly agentSessionInteractiveEnabled: boolean; private readonly continueOnFailedApiCall: boolean; private readonly retryFetchErrors: boolean; @@ -1325,6 +1327,8 @@ export class Config implements McpContext, AgentLoopContext { this.agentSessionNoninteractiveEnabled = params.adk?.agentSessionNoninteractiveEnabled ?? false; + this.agentSessionInteractiveEnabled = + params.adk?.agentSessionInteractiveEnabled ?? false; this.retryFetchErrors = params.retryFetchErrors ?? true; this.maxAttempts = Math.min( params.maxAttempts ?? DEFAULT_MAX_ATTEMPTS, @@ -3396,6 +3400,10 @@ export class Config implements McpContext, AgentLoopContext { return this.agentSessionNoninteractiveEnabled; } + getAgentSessionInteractiveEnabled(): boolean { + return this.agentSessionInteractiveEnabled; + } + /** * Get override settings for a specific agent. * Reads from agents.overrides.. diff --git a/schemas/settings.schema.json b/schemas/settings.schema.json index 71172717e4..5179263596 100644 --- a/schemas/settings.schema.json +++ b/schemas/settings.schema.json @@ -2775,6 +2775,13 @@ "markdownDescription": "Enable non-interactive agent sessions.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `false`", "default": false, "type": "boolean" + }, + "agentSessionInteractiveEnabled": { + "title": "Interactive Agent Session Enabled", + "description": "Enable the agent session implementation for the interactive CLI.", + "markdownDescription": "Enable the agent session implementation for the interactive CLI.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `false`", + "default": false, + "type": "boolean" } }, "additionalProperties": false From 3c5b5db034eda1d04dd4dd48d7c94b325658933a Mon Sep 17 00:00:00 2001 From: Yuna Seol Date: Tue, 7 Apr 2026 18:35:04 -0400 Subject: [PATCH 04/39] feat(core): use experiment flags for default fetch timeouts (#24261) --- packages/cli/src/test-utils/mockConfig.ts | 1 + .../src/code_assist/experiments/flagNames.ts | 1 + packages/core/src/config/config.test.ts | 62 +++++++++++++++++++ packages/core/src/config/config.ts | 39 +++++++++--- packages/core/src/core/baseLlmClient.test.ts | 1 + packages/core/src/core/client.test.ts | 1 + packages/core/src/core/geminiChat.test.ts | 1 + .../src/core/geminiChat_network_retry.test.ts | 1 + packages/core/src/utils/fetch.test.ts | 43 +++++++++++-- packages/core/src/utils/fetch.ts | 34 +++++++--- 10 files changed, 164 insertions(+), 20 deletions(-) diff --git a/packages/cli/src/test-utils/mockConfig.ts b/packages/cli/src/test-utils/mockConfig.ts index 7be8463382..6561ac1db0 100644 --- a/packages/cli/src/test-utils/mockConfig.ts +++ b/packages/cli/src/test-utils/mockConfig.ts @@ -136,6 +136,7 @@ export const createMockConfig = (overrides: Partial = {}): Config => getRetryFetchErrors: vi.fn().mockReturnValue(true), getEnableShellOutputEfficiency: vi.fn().mockReturnValue(true), getShellToolInactivityTimeout: vi.fn().mockReturnValue(300000), + getRequestTimeoutMs: vi.fn().mockReturnValue(undefined), getShellExecutionConfig: vi.fn().mockReturnValue({ sandboxManager: new NoopSandboxManager(), sanitizationConfig: { diff --git a/packages/core/src/code_assist/experiments/flagNames.ts b/packages/core/src/code_assist/experiments/flagNames.ts index 99f2f88cc7..125ff005a9 100644 --- a/packages/core/src/code_assist/experiments/flagNames.ts +++ b/packages/core/src/code_assist/experiments/flagNames.ts @@ -19,6 +19,7 @@ export const ExperimentFlags = { GEMINI_3_1_PRO_LAUNCHED: 45760185, PRO_MODEL_NO_ACCESS: 45768879, GEMINI_3_1_FLASH_LITE_LAUNCHED: 45771641, + DEFAULT_REQUEST_TIMEOUT: 45773134, } as const; export type ExperimentFlagName = diff --git a/packages/core/src/config/config.test.ts b/packages/core/src/config/config.test.ts index 002d4da50e..24f6f5256e 100644 --- a/packages/core/src/config/config.test.ts +++ b/packages/core/src/config/config.test.ts @@ -644,6 +644,58 @@ describe('Server Config (config.ts)', () => { }, ); }); + + describe('getRequestTimeoutMs', () => { + it('should return undefined if the flag is not set', () => { + const config = new Config(baseParams); + expect(config.getRequestTimeoutMs()).toBeUndefined(); + }); + + it('should return timeout in milliseconds if flag is set', () => { + const config = new Config({ + ...baseParams, + experiments: { + flags: { + [ExperimentFlags.DEFAULT_REQUEST_TIMEOUT]: { + intValue: '30', + }, + }, + experimentIds: [], + }, + } as unknown as ConfigParameters); + expect(config.getRequestTimeoutMs()).toBe(30000); + }); + + it('should return undefined if intValue is not a valid integer', () => { + const config = new Config({ + ...baseParams, + experiments: { + flags: { + [ExperimentFlags.DEFAULT_REQUEST_TIMEOUT]: { + intValue: 'abc', + }, + }, + experimentIds: [], + }, + } as unknown as ConfigParameters); + expect(config.getRequestTimeoutMs()).toBeUndefined(); + }); + + it('should return undefined if intValue is negative', () => { + const config = new Config({ + ...baseParams, + experiments: { + flags: { + [ExperimentFlags.DEFAULT_REQUEST_TIMEOUT]: { + intValue: '-10', + }, + }, + experimentIds: [], + }, + } as unknown as ConfigParameters); + expect(config.getRequestTimeoutMs()).toBeUndefined(); + }); + }); }); describe('refreshAuth', () => { @@ -2078,8 +2130,18 @@ describe('BaseLlmClient Lifecycle', () => { usageStatisticsEnabled: false, }; + it('should throw an error if getBaseLlmClient is called before experiments have been fetched', () => { + const config = new Config(baseParams); + // By default on a new Config instance, experiments are undefined + expect(() => config.getBaseLlmClient()).toThrow( + 'BaseLlmClient not initialized. Ensure experiments have been fetched and configuration is ready.', + ); + }); + it('should throw an error if getBaseLlmClient is called before refreshAuth', () => { const config = new Config(baseParams); + // Explicitly set experiments to avoid triggering the new missing-experiments error + config.setExperiments({ flags: {}, experimentIds: [] }); expect(() => config.getBaseLlmClient()).toThrow( 'BaseLlmClient not initialized. Ensure authentication has occurred and ContentGenerator is ready.', ); diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 4ec526569f..d4c7c498a5 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -160,7 +160,7 @@ import { } from '../code_assist/experiments/experiments.js'; import { AgentRegistry } from '../agents/registry.js'; import { AcknowledgedAgentsService } from '../agents/acknowledgedAgents.js'; -import { setGlobalProxy } from '../utils/fetch.js'; +import { setGlobalProxy, updateGlobalFetchTimeouts } from '../utils/fetch.js'; import { SubagentTool } from '../agents/subagent-tool.js'; import { ExperimentFlags } from '../code_assist/experiments/flagNames.js'; import { debugLogger } from '../utils/debugLogger.js'; @@ -1548,9 +1548,6 @@ export class Config implements McpContext, AgentLoopContext { // Only assign to instance properties after successful initialization this.contentGeneratorConfig = newContentGeneratorConfig; - // Initialize BaseLlmClient now that the ContentGenerator is available - this.baseLlmClient = new BaseLlmClient(this.contentGenerator, this); - const codeAssistServer = getCodeAssistServer(this); const quotaPromise = codeAssistServer?.projectId ? this.refreshUserQuota() @@ -1566,6 +1563,17 @@ export class Config implements McpContext, AgentLoopContext { return undefined; }); + // Fetch experiments and update timeouts before continuing initialization + const experiments = await this.experimentsPromise; + + const requestTimeoutMs = this.getRequestTimeoutMs(); + if (requestTimeoutMs !== undefined) { + updateGlobalFetchTimeouts(requestTimeoutMs); + } + + // Initialize BaseLlmClient now that the ContentGenerator and experiments are available + this.baseLlmClient = new BaseLlmClient(this.contentGenerator, this); + await quotaPromise; const authType = this.contentGeneratorConfig.authType; @@ -1585,9 +1593,6 @@ export class Config implements McpContext, AgentLoopContext { this.setModel(DEFAULT_GEMINI_MODEL_AUTO); } - // Fetch admin controls - const experiments = await this.experimentsPromise; - const adminControlsEnabled = experiments?.flags[ExperimentFlags.ENABLE_ADMIN_CONTROLS]?.boolValue ?? false; @@ -1633,6 +1638,11 @@ export class Config implements McpContext, AgentLoopContext { getBaseLlmClient(): BaseLlmClient { if (!this.baseLlmClient) { // Handle cases where initialization might be deferred or authentication failed + if (!this.experiments) { + throw new Error( + 'BaseLlmClient not initialized. Ensure experiments have been fetched and configuration is ready.', + ); + } if (this.contentGenerator) { this.baseLlmClient = new BaseLlmClient( this.getContentGenerator(), @@ -3153,6 +3163,21 @@ export class Config implements McpContext, AgentLoopContext { ); } + /** + * Returns the configured default request timeout in milliseconds. + */ + getRequestTimeoutMs(): number | undefined { + const flag = + this.experiments?.flags?.[ExperimentFlags.DEFAULT_REQUEST_TIMEOUT]; + if (flag?.intValue !== undefined) { + const seconds = parseInt(flag.intValue, 10); + if (Number.isInteger(seconds) && seconds >= 0) { + return seconds * 1000; // Convert seconds to milliseconds + } + } + return undefined; + } + /** * Returns whether Gemini 3.1 Flash Lite has been launched. * diff --git a/packages/core/src/core/baseLlmClient.test.ts b/packages/core/src/core/baseLlmClient.test.ts index a35096f528..5bfefa6665 100644 --- a/packages/core/src/core/baseLlmClient.test.ts +++ b/packages/core/src/core/baseLlmClient.test.ts @@ -102,6 +102,7 @@ describe('BaseLlmClient', () => { ); mockConfig = { + getRequestTimeoutMs: vi.fn().mockReturnValue(undefined), getSessionId: vi.fn().mockReturnValue('test-session-id'), getContentGeneratorConfig: vi .fn() diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts index 8863bcd24f..f8178488bd 100644 --- a/packages/core/src/core/client.test.ts +++ b/packages/core/src/core/client.test.ts @@ -203,6 +203,7 @@ describe('Gemini Client (client.ts)', () => { authType: AuthType.USE_GEMINI, }; mockConfig = { + getRequestTimeoutMs: vi.fn().mockReturnValue(undefined), getContentGeneratorConfig: vi .fn() .mockReturnValue(contentGeneratorConfig), diff --git a/packages/core/src/core/geminiChat.test.ts b/packages/core/src/core/geminiChat.test.ts index aad2054ad0..e822fd7fd6 100644 --- a/packages/core/src/core/geminiChat.test.ts +++ b/packages/core/src/core/geminiChat.test.ts @@ -142,6 +142,7 @@ describe('GeminiChat', () => { let currentActiveModel = 'gemini-pro'; mockConfig = { + getRequestTimeoutMs: vi.fn().mockReturnValue(undefined), get config() { return this; }, diff --git a/packages/core/src/core/geminiChat_network_retry.test.ts b/packages/core/src/core/geminiChat_network_retry.test.ts index 4dd060214c..4683e29261 100644 --- a/packages/core/src/core/geminiChat_network_retry.test.ts +++ b/packages/core/src/core/geminiChat_network_retry.test.ts @@ -83,6 +83,7 @@ describe('GeminiChat Network Retries', () => { const testMessageBus = { publish: vi.fn(), subscribe: vi.fn() }; mockConfig = { + getRequestTimeoutMs: vi.fn().mockReturnValue(undefined), get config() { return this; }, diff --git a/packages/core/src/utils/fetch.test.ts b/packages/core/src/utils/fetch.test.ts index c4644c3cba..e4da21ffa0 100644 --- a/packages/core/src/utils/fetch.test.ts +++ b/packages/core/src/utils/fetch.test.ts @@ -4,21 +4,37 @@ * SPDX-License-Identifier: Apache-2.0 */ +import { updateGlobalFetchTimeouts } from './fetch.js'; import { describe, it, expect, vi, beforeEach, afterAll } from 'vitest'; -import { - isPrivateIp, - isPrivateIpAsync, - isAddressPrivate, - fetchWithTimeout, -} from './fetch.js'; import * as dnsPromises from 'node:dns/promises'; import type { LookupAddress, LookupAllOptions } from 'node:dns'; import ipaddr from 'ipaddr.js'; +const { setGlobalDispatcher, Agent, ProxyAgent } = vi.hoisted(() => ({ + setGlobalDispatcher: vi.fn(), + Agent: vi.fn(), + ProxyAgent: vi.fn(), +})); + +vi.mock('undici', () => ({ + setGlobalDispatcher, + Agent, + ProxyAgent, +})); + vi.mock('node:dns/promises', () => ({ lookup: vi.fn(), })); +// Import after mocks are established +const { + isPrivateIp, + isPrivateIpAsync, + isAddressPrivate, + fetchWithTimeout, + setGlobalProxy, +} = await import('./fetch.js'); + // Mock global fetch const originalFetch = global.fetch; global.fetch = vi.fn(); @@ -183,4 +199,19 @@ describe('fetch utils', () => { ); }); }); + + describe('setGlobalProxy', () => { + it('should configure ProxyAgent with experiment flag timeout', () => { + const proxyUrl = 'http://proxy.example.com'; + updateGlobalFetchTimeouts(45773134); + setGlobalProxy(proxyUrl); + + expect(ProxyAgent).toHaveBeenCalledWith({ + uri: proxyUrl, + headersTimeout: 45773134, + bodyTimeout: 45773134, + }); + expect(setGlobalDispatcher).toHaveBeenCalled(); + }); + }); }); diff --git a/packages/core/src/utils/fetch.ts b/packages/core/src/utils/fetch.ts index 8f1ddf864f..755875ff75 100644 --- a/packages/core/src/utils/fetch.ts +++ b/packages/core/src/utils/fetch.ts @@ -10,9 +10,6 @@ import { Agent, ProxyAgent, setGlobalDispatcher } from 'undici'; import ipaddr from 'ipaddr.js'; import { lookup } from 'node:dns/promises'; -const DEFAULT_HEADERS_TIMEOUT = 300000; // 5 minutes -const DEFAULT_BODY_TIMEOUT = 300000; // 5 minutes - export class FetchError extends Error { constructor( message: string, @@ -31,14 +28,36 @@ export class PrivateIpError extends Error { } } +let defaultTimeout = 300000; // 5 minutes +let currentProxy: string | undefined = undefined; + // Configure default global dispatcher with higher timeouts setGlobalDispatcher( new Agent({ - headersTimeout: DEFAULT_HEADERS_TIMEOUT, - bodyTimeout: DEFAULT_BODY_TIMEOUT, + headersTimeout: defaultTimeout, + bodyTimeout: defaultTimeout, }), ); +export function updateGlobalFetchTimeouts(timeoutMs: number) { + if (!Number.isFinite(timeoutMs) || timeoutMs <= 0) { + throw new RangeError( + `Invalid timeout value: ${timeoutMs}. Must be a positive finite number.`, + ); + } + defaultTimeout = timeoutMs; + if (currentProxy) { + setGlobalProxy(currentProxy); + } else { + setGlobalDispatcher( + new Agent({ + headersTimeout: defaultTimeout, + bodyTimeout: defaultTimeout, + }), + ); + } +} + /** * Sanitizes a hostname by stripping IPv6 brackets if present. */ @@ -191,11 +210,12 @@ export async function fetchWithTimeout( } export function setGlobalProxy(proxy: string) { + currentProxy = proxy; setGlobalDispatcher( new ProxyAgent({ uri: proxy, - headersTimeout: DEFAULT_HEADERS_TIMEOUT, - bodyTimeout: DEFAULT_BODY_TIMEOUT, + headersTimeout: defaultTimeout, + bodyTimeout: defaultTimeout, }), ); } From f96d5f98feaff353e1e91859e5b0a5f4ef9dc1d8 Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Tue, 7 Apr 2026 22:45:40 +0000 Subject: [PATCH 05/39] =?UTF-8?q?Revert=20"fix(ui):=20improve=20narration?= =?UTF-8?q?=20suppression=20and=20reduce=20flicker=20(#2=E2=80=A6=20(#2485?= =?UTF-8?q?7)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/ui/components/MainContent.test.tsx | 158 +----------------- .../cli/src/ui/components/MainContent.tsx | 43 +---- 2 files changed, 9 insertions(+), 192 deletions(-) diff --git a/packages/cli/src/ui/components/MainContent.test.tsx b/packages/cli/src/ui/components/MainContent.test.tsx index ec75573d75..2bc6ee27bc 100644 --- a/packages/cli/src/ui/components/MainContent.test.tsx +++ b/packages/cli/src/ui/components/MainContent.test.tsx @@ -6,11 +6,7 @@ import { renderWithProviders } from '../../test-utils/render.js'; import { createMockSettings } from '../../test-utils/settings.js'; -import { - makeFakeConfig, - CoreToolCallStatus, - UPDATE_TOPIC_TOOL_NAME, -} from '@google/gemini-cli-core'; +import { makeFakeConfig, CoreToolCallStatus } from '@google/gemini-cli-core'; import { waitFor } from '../../test-utils/async.js'; import { MainContent } from './MainContent.js'; import { getToolGroupBorderAppearance } from '../utils/borderStyles.js'; @@ -732,158 +728,6 @@ describe('MainContent', () => { unmount(); }); - describe('Narration Suppression', () => { - const settingsWithNarration = createMockSettings({ - merged: { - ui: { inlineThinkingMode: 'expanded' }, - experimental: { topicUpdateNarration: true }, - }, - }); - - it('suppresses thinking ALWAYS when narration is enabled', async () => { - mockUseSettings.mockReturnValue(settingsWithNarration); - const uiState = { - ...defaultMockUiState, - history: [ - { id: 1, type: 'user' as const, text: 'Hello' }, - { - id: 2, - type: 'thinking' as const, - thought: { - subject: 'Thinking...', - description: 'Thinking about hello', - }, - }, - { id: 3, type: 'gemini' as const, text: 'I am helping.' }, - ], - }; - - const { lastFrame, unmount } = await renderWithProviders( - , - { - uiState: uiState as Partial, - settings: settingsWithNarration, - }, - ); - - const output = lastFrame(); - expect(output).not.toContain('Thinking...'); - expect(output).toContain('I am helping.'); - unmount(); - }); - - it('suppresses text in intermediate turns (contains non-topic tools)', async () => { - mockUseSettings.mockReturnValue(settingsWithNarration); - const uiState = { - ...defaultMockUiState, - history: [ - { id: 100, type: 'user' as const, text: 'Search' }, - { - id: 101, - type: 'gemini' as const, - text: 'I will now search the files.', - }, - { - id: 102, - type: 'tool_group' as const, - tools: [ - { - callId: '1', - name: 'ls', - args: { path: '.' }, - status: CoreToolCallStatus.Success, - }, - ], - }, - ], - }; - - const { lastFrame, unmount } = await renderWithProviders( - , - { - uiState: uiState as Partial, - settings: settingsWithNarration, - }, - ); - - const output = lastFrame(); - expect(output).not.toContain('I will now search the files.'); - unmount(); - }); - - it('suppresses text that precedes a topic tool in the same turn', async () => { - mockUseSettings.mockReturnValue(settingsWithNarration); - const uiState = { - ...defaultMockUiState, - history: [ - { id: 200, type: 'user' as const, text: 'Hello' }, - { id: 201, type: 'gemini' as const, text: 'I will now help you.' }, - { - id: 202, - type: 'tool_group' as const, - tools: [ - { - callId: '1', - name: UPDATE_TOPIC_TOOL_NAME, - args: { title: 'Helping', summary: 'Helping the user' }, - status: CoreToolCallStatus.Success, - }, - ], - }, - ], - }; - - const { lastFrame, unmount } = await renderWithProviders( - , - { - uiState: uiState as Partial, - settings: settingsWithNarration, - }, - ); - - const output = lastFrame(); - expect(output).not.toContain('I will now help you.'); - expect(output).toContain('Helping'); - expect(output).toContain('Helping the user'); - unmount(); - }); - - it('shows text in the final turn if it comes AFTER the topic tool', async () => { - mockUseSettings.mockReturnValue(settingsWithNarration); - const uiState = { - ...defaultMockUiState, - history: [ - { id: 300, type: 'user' as const, text: 'Hello' }, - { - id: 301, - type: 'tool_group' as const, - tools: [ - { - callId: '1', - name: UPDATE_TOPIC_TOOL_NAME, - args: { title: 'Final Answer', summary: 'I have finished' }, - status: CoreToolCallStatus.Success, - }, - ], - }, - { id: 302, type: 'gemini' as const, text: 'Here is your answer.' }, - ], - }; - - const { lastFrame, unmount } = await renderWithProviders( - , - { - uiState: uiState as Partial, - settings: settingsWithNarration, - }, - ); - - const output = lastFrame(); - expect(output).toContain('Here is your answer.'); - unmount(); - }); - }); - it('renders multiple thinking messages sequentially correctly', async () => { mockUseSettings.mockReturnValue({ merged: { diff --git a/packages/cli/src/ui/components/MainContent.tsx b/packages/cli/src/ui/components/MainContent.tsx index 527462be28..b46af4965b 100644 --- a/packages/cli/src/ui/components/MainContent.tsx +++ b/packages/cli/src/ui/components/MainContent.tsx @@ -91,47 +91,20 @@ export const MainContent = () => { const flags = new Array(combinedHistory.length).fill(false); if (topicUpdateNarrationEnabled) { - let turnIsIntermediate = false; - let hasTopicToolInTurn = false; - + let toolGroupInTurn = false; for (let i = combinedHistory.length - 1; i >= 0; i--) { const item = combinedHistory[i]; if (item.type === 'user' || item.type === 'user_shell') { - turnIsIntermediate = false; - hasTopicToolInTurn = false; + toolGroupInTurn = false; } else if (item.type === 'tool_group') { - const hasTopic = item.tools.some((t) => isTopicTool(t.name)); - const hasNonTopic = item.tools.some((t) => !isTopicTool(t.name)); - if (hasTopic) { - hasTopicToolInTurn = true; - } - if (hasNonTopic) { - turnIsIntermediate = true; - } + toolGroupInTurn = item.tools.some((t) => isTopicTool(t.name)); } else if ( - item.type === 'thinking' || - item.type === 'gemini' || - item.type === 'gemini_content' + (item.type === 'thinking' || + item.type === 'gemini' || + item.type === 'gemini_content') && + toolGroupInTurn ) { - // Rule 1: Always suppress thinking when narration is enabled to avoid - // "flashing" as the model starts its response, and because the Topic - // UI provides the necessary high-level intent. - if (item.type === 'thinking') { - flags[i] = true; - continue; - } - - // Rule 2: Suppress text in intermediate turns (turns containing non-topic - // tools) to hide mechanical narration. - if (turnIsIntermediate) { - flags[i] = true; - } - - // Rule 3: Suppress text that precedes a topic tool in the same turn, - // as the topic tool "replaces" it. - if (hasTopicToolInTurn) { - flags[i] = true; - } + flags[i] = true; } } } From 1aa798dd18326efcfbe8ca856bfc958f51938d07 Mon Sep 17 00:00:00 2001 From: JAYADITYA <96861162+JayadityaGit@users.noreply.github.com> Date: Wed, 8 Apr 2026 05:06:44 +0530 Subject: [PATCH 06/39] refactor(cli): remove duplication in interactive shell awaiting input hint (#24801) --- packages/cli/src/ui/components/StatusRow.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/cli/src/ui/components/StatusRow.tsx b/packages/cli/src/ui/components/StatusRow.tsx index 24b5a97d4e..f162481ce5 100644 --- a/packages/cli/src/ui/components/StatusRow.tsx +++ b/packages/cli/src/ui/components/StatusRow.tsx @@ -331,7 +331,7 @@ export const StatusRow: React.FC = ({ ) : isInteractiveShellWaiting ? ( - ! Shell awaiting input (Tab to focus) + {INTERACTIVE_SHELL_WAITING_PHRASE} ) : ( From 16768c08f2fa1f0fe2dca32021f81d9f6c8e3316 Mon Sep 17 00:00:00 2001 From: Michael Bleigh Date: Tue, 7 Apr 2026 16:45:22 -0700 Subject: [PATCH 07/39] refactor(core): make LegacyAgentSession dependencies optional (#24287) Co-authored-by: Adam Weidman Co-authored-by: Adam Weidman --- .../src/agent/legacy-agent-session.test.ts | 23 +++++++----- .../core/src/agent/legacy-agent-session.ts | 36 ++++++++++++++----- 2 files changed, 43 insertions(+), 16 deletions(-) diff --git a/packages/core/src/agent/legacy-agent-session.test.ts b/packages/core/src/agent/legacy-agent-session.test.ts index 38bea34910..1de5d90e20 100644 --- a/packages/core/src/agent/legacy-agent-session.test.ts +++ b/packages/core/src/agent/legacy-agent-session.test.ts @@ -17,6 +17,9 @@ import type { ToolCallRequestInfo, } from '../scheduler/types.js'; import { CoreToolCallStatus } from '../scheduler/types.js'; +import type { GeminiClient } from '../core/client.js'; +import type { Scheduler } from '../scheduler/scheduler.js'; +import type { Config } from '../config/config.js'; // --------------------------------------------------------------------------- // Mock helpers @@ -24,7 +27,7 @@ import { CoreToolCallStatus } from '../scheduler/types.js'; function createMockDeps( overrides?: Partial, -): LegacyAgentSessionDeps { +): Required { const mockClient = { sendMessageStream: vi.fn(), getChat: vi.fn().mockReturnValue({ @@ -40,18 +43,22 @@ function createMockDeps( const mockConfig = { getMaxSessionTurns: vi.fn().mockReturnValue(-1), getModel: vi.fn().mockReturnValue('gemini-2.5-pro'), + getGeminiClient: vi.fn().mockReturnValue(mockClient), + getMessageBus: vi.fn().mockImplementation(() => ({ + subscribe: vi.fn(), + unsubscribe: vi.fn(), + })), }; return { - client: mockClient as unknown as LegacyAgentSessionDeps['client'], - - scheduler: mockScheduler as unknown as LegacyAgentSessionDeps['scheduler'], - - config: mockConfig as unknown as LegacyAgentSessionDeps['config'], + client: mockClient as unknown as GeminiClient, + scheduler: mockScheduler as unknown as Scheduler, + config: mockConfig as unknown as Config, promptId: 'test-prompt', streamId: 'test-stream', + getPreferredEditor: vi.fn().mockReturnValue(undefined), ...overrides, - }; + } as Required; } async function* makeStream( @@ -129,7 +136,7 @@ async function collectEvents( // --------------------------------------------------------------------------- describe('LegacyAgentSession', () => { - let deps: LegacyAgentSessionDeps; + let deps: Required; beforeEach(() => { deps = createMockDeps(); diff --git a/packages/core/src/agent/legacy-agent-session.ts b/packages/core/src/agent/legacy-agent-session.ts index 667c85f5ed..757dbdb952 100644 --- a/packages/core/src/agent/legacy-agent-session.ts +++ b/packages/core/src/agent/legacy-agent-session.ts @@ -14,10 +14,11 @@ import type { Part } from '@google/genai'; import type { GeminiClient } from '../core/client.js'; import type { Config } from '../config/config.js'; import type { ToolCallRequestInfo } from '../scheduler/types.js'; -import type { Scheduler } from '../scheduler/scheduler.js'; +import { Scheduler } from '../scheduler/scheduler.js'; import { recordToolCallInteractions } from '../code_assist/telemetry.js'; import { ToolErrorType, isFatalToolError } from '../tools/tool-error.js'; import { debugLogger } from '../utils/debugLogger.js'; +import type { EditorType } from '../utils/editor.js'; import { buildToolResponseData, contentPartsToGeminiParts, @@ -45,14 +46,17 @@ function isAbortLikeError(err: unknown): boolean { } export interface LegacyAgentSessionDeps { - client: GeminiClient; - scheduler: Scheduler; config: Config; - promptId: string; + client?: GeminiClient; + scheduler?: Scheduler; + promptId?: string; streamId?: string; + getPreferredEditor?: () => EditorType | undefined; } -class LegacyAgentProtocol implements AgentProtocol { +const schedulerMap = new WeakMap(); + +export class LegacyAgentProtocol implements AgentProtocol { private _events: AgentEvent[] = []; private _subscribers = new Set<(event: AgentEvent) => void>(); private _translationState: TranslationState; @@ -69,10 +73,26 @@ class LegacyAgentProtocol implements AgentProtocol { constructor(deps: LegacyAgentSessionDeps) { this._translationState = createTranslationState(deps.streamId); this._nextStreamIdOverride = deps.streamId; - this._client = deps.client; - this._scheduler = deps.scheduler; this._config = deps.config; - this._promptId = deps.promptId; + this._client = deps.client ?? deps.config.getGeminiClient(); + this._promptId = deps.promptId ?? deps.config.promptId ?? ''; + + if (deps.scheduler) { + this._scheduler = deps.scheduler; + } else { + let scheduler = schedulerMap.get(deps.config); + if (!scheduler) { + const sessionId = deps.config.getSessionId(); + const schedulerId = `legacy-agent-scheduler-${sessionId}`; + scheduler = new Scheduler({ + context: deps.config, + schedulerId, + getPreferredEditor: deps.getPreferredEditor ?? (() => undefined), + }); + schedulerMap.set(deps.config, scheduler); + } + this._scheduler = scheduler; + } } get events(): readonly AgentEvent[] { From 9fd92c0eeacae8c7e612f9cb795e7a37373bc452 Mon Sep 17 00:00:00 2001 From: gemini-cli-robot Date: Tue, 7 Apr 2026 17:13:06 -0700 Subject: [PATCH 08/39] Changelog for v0.37.0-preview.2 (#24848) Co-authored-by: gemini-cli-robot <224641728+gemini-cli-robot@users.noreply.github.com> --- docs/changelogs/preview.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/docs/changelogs/preview.md b/docs/changelogs/preview.md index 5bb8d5b575..95feee1e2a 100644 --- a/docs/changelogs/preview.md +++ b/docs/changelogs/preview.md @@ -1,6 +1,6 @@ -# Preview release: v0.37.0-preview.1 +# Preview release: v0.37.0-preview.2 -Released: April 02, 2026 +Released: April 07, 2026 Our preview release includes the latest, new, and experimental features. This release may not be as stable as our [latest weekly release](latest.md). @@ -33,6 +33,10 @@ npm install -g @google/gemini-cli@preview ## What's Changed +- fix(patch): cherry-pick cb7f7d6 to release/v0.37.0-preview.1-pr-24342 to patch + version v0.37.0-preview.1 and create version 0.37.0-preview.2 by + @gemini-cli-robot in + [#24842](https://github.com/google-gemini/gemini-cli/pull/24842) - fix(patch): cherry-pick 64c928f to release/v0.37.0-preview.0-pr-23257 to patch version v0.37.0-preview.0 and create version 0.37.0-preview.1 by @gemini-cli-robot in @@ -419,4 +423,4 @@ npm install -g @google/gemini-cli@preview [#23275](https://github.com/google-gemini/gemini-cli/pull/23275) **Full Changelog**: -https://github.com/google-gemini/gemini-cli/compare/v0.36.0-preview.8...v0.37.0-preview.1 +https://github.com/google-gemini/gemini-cli/compare/v0.36.0-preview.8...v0.37.0-preview.2 From 28efab483fc921ee889eacc723a84806928d956d Mon Sep 17 00:00:00 2001 From: Jacob Richman Date: Tue, 7 Apr 2026 18:52:33 -0700 Subject: [PATCH 09/39] fix(cli): always show shell command description or actual command (#24774) --- packages/core/src/tools/shell.test.ts | 26 ++++++++++++++++++++++++++ packages/core/src/tools/shell.ts | 4 +++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/packages/core/src/tools/shell.test.ts b/packages/core/src/tools/shell.test.ts index 245b7f0eee..9551fd9638 100644 --- a/packages/core/src/tools/shell.test.ts +++ b/packages/core/src/tools/shell.test.ts @@ -803,6 +803,32 @@ describe('ShellTool', () => { }); }); + describe('invocation getDescription', () => { + it('should return the description if it is present and not empty whitespace', () => { + const invocation = shellTool.build({ + command: 'echo hello', + description: 'prints hello', + }); + expect(invocation.getDescription()).toBe('prints hello'); + }); + + it('should return the raw command if description is an empty string', () => { + const invocation = shellTool.build({ + command: 'echo hello', + description: '', + }); + expect(invocation.getDescription()).toBe('echo hello'); + }); + + it('should return the raw command if description is just whitespace', () => { + const invocation = shellTool.build({ + command: 'echo hello', + description: ' ', + }); + expect(invocation.getDescription()).toBe('echo hello'); + }); + }); + describe('llmContent output format', () => { const mockAbortSignal = new AbortController().signal; diff --git a/packages/core/src/tools/shell.ts b/packages/core/src/tools/shell.ts index 7ca475808a..3ea29474c6 100644 --- a/packages/core/src/tools/shell.ts +++ b/packages/core/src/tools/shell.ts @@ -136,7 +136,9 @@ export class ShellToolInvocation extends BaseToolInvocation< } getDescription(): string { - return this.params.description || ''; + return this.params.description?.trim() + ? this.params.description + : this.params.command; } private simplifyPaths(paths: Set): string[] { From 47c5d25d93a767e96f2fd4f1e2d9092b2c024cc9 Mon Sep 17 00:00:00 2001 From: Dev Randalpura Date: Tue, 7 Apr 2026 23:03:36 -0400 Subject: [PATCH 10/39] Added flag for ept size and increased default size (#24859) --- packages/cli/src/gemini.test.tsx | 28 ++++++++++++++++++++++++---- packages/cli/src/gemini.tsx | 25 +++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 6 deletions(-) diff --git a/packages/cli/src/gemini.test.tsx b/packages/cli/src/gemini.test.tsx index fd19ffa79c..611850bd4a 100644 --- a/packages/cli/src/gemini.test.tsx +++ b/packages/cli/src/gemini.test.tsx @@ -379,15 +379,30 @@ describe('initializeOutputListenersAndFlush', () => { describe('getNodeMemoryArgs', () => { let osTotalMemSpy: MockInstance; let v8GetHeapStatisticsSpy: MockInstance; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + let originalConfig: any; beforeEach(() => { osTotalMemSpy = vi.spyOn(os, 'totalmem'); v8GetHeapStatisticsSpy = vi.spyOn(v8, 'getHeapStatistics'); delete process.env['GEMINI_CLI_NO_RELAUNCH']; + + originalConfig = process.config; + Object.defineProperty(process, 'config', { + value: { + ...originalConfig, + variables: { ...originalConfig?.variables, v8_enable_sandbox: 1 }, + }, + configurable: true, + }); }); afterEach(() => { vi.restoreAllMocks(); + Object.defineProperty(process, 'config', { + value: originalConfig, + configurable: true, + }); }); it('should return empty array if GEMINI_CLI_NO_RELAUNCH is set', () => { @@ -400,8 +415,10 @@ describe('getNodeMemoryArgs', () => { v8GetHeapStatisticsSpy.mockReturnValue({ heap_size_limit: 8 * 1024 * 1024 * 1024, // 8GB }); - // Target is 50% of 16GB = 8GB. Current is 8GB. No relaunch needed. - expect(getNodeMemoryArgs(false)).toEqual([]); + // Target is 50% of 16GB = 8GB. Current is 8GB. Relaunch needed for EPT size only. + expect(getNodeMemoryArgs(false)).toEqual([ + '--max-external-pointer-table-size=268435456', + ]); }); it('should return memory args if current heap limit is insufficient', () => { @@ -409,8 +426,11 @@ describe('getNodeMemoryArgs', () => { v8GetHeapStatisticsSpy.mockReturnValue({ heap_size_limit: 4 * 1024 * 1024 * 1024, // 4GB }); - // Target is 50% of 16GB = 8GB. Current is 4GB. Relaunch needed. - expect(getNodeMemoryArgs(false)).toEqual(['--max-old-space-size=8192']); + // Target is 50% of 16GB = 8GB. Current is 4GB. Relaunch needed for both. + expect(getNodeMemoryArgs(false)).toEqual([ + '--max-external-pointer-table-size=268435456', + '--max-old-space-size=8192', + ]); }); it('should log debug info when isDebugMode is true', () => { diff --git a/packages/cli/src/gemini.tsx b/packages/cli/src/gemini.tsx index fa22f59267..f77fc11d61 100644 --- a/packages/cli/src/gemini.tsx +++ b/packages/cli/src/gemini.tsx @@ -111,6 +111,8 @@ export function validateDnsResolutionOrder( return defaultValue; } +const DEFAULT_EPT_SIZE = (256 * 1024 * 1024).toString(); + export function getNodeMemoryArgs(isDebugMode: boolean): string[] { const totalMemoryMB = os.totalmem() / (1024 * 1024); const heapStats = v8.getHeapStatistics(); @@ -130,16 +132,35 @@ export function getNodeMemoryArgs(isDebugMode: boolean): string[] { return []; } + const args: string[] = []; + + // Automatically expand the V8 External Pointer Table to 256MB to prevent + // out-of-memory crashes during high native-handle concurrency. + // Note: Only supported in specific Node.js versions compiled with V8 Sandbox enabled. + const eptFlag = `--max-external-pointer-table-size=${DEFAULT_EPT_SIZE}`; + const isV8SandboxEnabled = + // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-type-assertion + (process.config?.variables as any)?.v8_enable_sandbox === 1; + + if ( + isV8SandboxEnabled && + !process.execArgv.some((arg) => + arg.startsWith('--max-external-pointer-table-size'), + ) + ) { + args.push(eptFlag); + } + if (targetMaxOldSpaceSizeInMB > currentMaxOldSpaceSizeMb) { if (isDebugMode) { debugLogger.debug( `Need to relaunch with more memory: ${targetMaxOldSpaceSizeInMB.toFixed(2)} MB`, ); } - return [`--max-old-space-size=${targetMaxOldSpaceSizeInMB}`]; + args.push(`--max-old-space-size=${targetMaxOldSpaceSizeInMB}`); } - return []; + return args; } export function setupUnhandledRejectionHandler() { From b9f1d832c80b644eec2e997e85a6105b9d0c0b5d Mon Sep 17 00:00:00 2001 From: Anjaligarhwal Date: Wed, 8 Apr 2026 08:35:53 +0530 Subject: [PATCH 11/39] fix(core): dispose Scheduler to prevent McpProgress listener leak (#24870) --- packages/cli/src/nonInteractiveCli.test.ts | 1 + packages/cli/src/nonInteractiveCli.ts | 4 +- .../src/nonInteractiveCliAgentSession.test.ts | 1 + .../cli/src/nonInteractiveCliAgentSession.ts | 4 +- .../core/src/agents/agent-scheduler.test.ts | 52 +++++++++++++++++++ packages/core/src/agents/agent-scheduler.ts | 6 ++- 6 files changed, 65 insertions(+), 3 deletions(-) diff --git a/packages/cli/src/nonInteractiveCli.test.ts b/packages/cli/src/nonInteractiveCli.test.ts index 855707de9e..5d0c3d1016 100644 --- a/packages/cli/src/nonInteractiveCli.test.ts +++ b/packages/cli/src/nonInteractiveCli.test.ts @@ -71,6 +71,7 @@ vi.mock('@google/gemini-cli-core', async (importOriginal) => { Scheduler: class { schedule = mockSchedulerSchedule; cancelAll = vi.fn(); + dispose = vi.fn(); }, isTelemetrySdkInitialized: vi.fn().mockReturnValue(true), ChatRecordingService: MockChatRecordingService, diff --git a/packages/cli/src/nonInteractiveCli.ts b/packages/cli/src/nonInteractiveCli.ts index 26daaf66a1..dc5255edee 100644 --- a/packages/cli/src/nonInteractiveCli.ts +++ b/packages/cli/src/nonInteractiveCli.ts @@ -187,6 +187,7 @@ export async function runNonInteractive( }; let errorToHandle: unknown | undefined; + let scheduler: Scheduler | undefined; try { consolePatcher.patch(); @@ -215,7 +216,7 @@ export async function runNonInteractive( }); const geminiClient = config.getGeminiClient(); - const scheduler = new Scheduler({ + scheduler = new Scheduler({ context: config, messageBus: config.getMessageBus(), getPreferredEditor: () => undefined, @@ -528,6 +529,7 @@ export async function runNonInteractive( // Cleanup stdin cancellation before other cleanup cleanupStdinCancellation(); + scheduler?.dispose(); consolePatcher.cleanup(); coreEvents.off(CoreEvent.UserFeedback, handleUserFeedback); } diff --git a/packages/cli/src/nonInteractiveCliAgentSession.test.ts b/packages/cli/src/nonInteractiveCliAgentSession.test.ts index 617f80aca6..923109643c 100644 --- a/packages/cli/src/nonInteractiveCliAgentSession.test.ts +++ b/packages/cli/src/nonInteractiveCliAgentSession.test.ts @@ -71,6 +71,7 @@ vi.mock('@google/gemini-cli-core', async (importOriginal) => { Scheduler: class { schedule = mockSchedulerSchedule; cancelAll = vi.fn(); + dispose = vi.fn(); }, isTelemetrySdkInitialized: vi.fn().mockReturnValue(true), ChatRecordingService: MockChatRecordingService, diff --git a/packages/cli/src/nonInteractiveCliAgentSession.ts b/packages/cli/src/nonInteractiveCliAgentSession.ts index fe5fbceba2..7f36ce6cf5 100644 --- a/packages/cli/src/nonInteractiveCliAgentSession.ts +++ b/packages/cli/src/nonInteractiveCliAgentSession.ts @@ -184,6 +184,7 @@ export async function runNonInteractive({ }; let errorToHandle: unknown | undefined; + let scheduler: Scheduler | undefined; let abortSession = () => {}; try { consolePatcher.patch(); @@ -215,7 +216,7 @@ export async function runNonInteractive({ }); const geminiClient = config.getGeminiClient(); - const scheduler = new Scheduler({ + scheduler = new Scheduler({ context: config, messageBus: config.getMessageBus(), getPreferredEditor: () => undefined, @@ -612,6 +613,7 @@ export async function runNonInteractive({ cleanupStdinCancellation(); abortController.signal.removeEventListener('abort', abortSession); + scheduler?.dispose(); consolePatcher.cleanup(); coreEvents.off(CoreEvent.UserFeedback, handleUserFeedback); } diff --git a/packages/core/src/agents/agent-scheduler.test.ts b/packages/core/src/agents/agent-scheduler.test.ts index 5d5b6569af..8ac15f181e 100644 --- a/packages/core/src/agents/agent-scheduler.test.ts +++ b/packages/core/src/agents/agent-scheduler.test.ts @@ -15,6 +15,7 @@ import type { MessageBus } from '../confirmation-bus/message-bus.js'; vi.mock('../scheduler/scheduler.js', () => ({ Scheduler: vi.fn().mockImplementation(() => ({ schedule: vi.fn().mockResolvedValue([{ status: 'success' }]), + dispose: vi.fn(), })), })); @@ -125,6 +126,57 @@ describe('agent-scheduler', () => { expect(schedulerConfig.toolRegistry).not.toBe(mainRegistry); }); + it('should dispose the scheduler after schedule completes', async () => { + const mockConfig = { + getPromptRegistry: vi.fn(), + getResourceRegistry: vi.fn(), + messageBus: mockMessageBus, + toolRegistry: mockToolRegistry, + } as unknown as Mocked; + + const options = { + schedulerId: 'subagent-1', + toolRegistry: mockToolRegistry as unknown as ToolRegistry, + signal: new AbortController().signal, + }; + + await scheduleAgentTools(mockConfig as unknown as Config, [], options); + + const schedulerInstance = vi.mocked(Scheduler).mock.results[0].value; + expect(schedulerInstance.dispose).toHaveBeenCalledOnce(); + }); + + it('should dispose the scheduler even when schedule throws', async () => { + const scheduleError = new Error('schedule failed'); + vi.mocked(Scheduler).mockImplementationOnce( + () => + ({ + schedule: vi.fn().mockRejectedValue(scheduleError), + dispose: vi.fn(), + }) as unknown as Scheduler, + ); + + const mockConfig = { + getPromptRegistry: vi.fn(), + getResourceRegistry: vi.fn(), + messageBus: mockMessageBus, + toolRegistry: mockToolRegistry, + } as unknown as Mocked; + + const options = { + schedulerId: 'subagent-1', + toolRegistry: mockToolRegistry as unknown as ToolRegistry, + signal: new AbortController().signal, + }; + + await expect( + scheduleAgentTools(mockConfig as unknown as Config, [], options), + ).rejects.toThrow('schedule failed'); + + const schedulerInstance = vi.mocked(Scheduler).mock.results[0].value; + expect(schedulerInstance.dispose).toHaveBeenCalledOnce(); + }); + it('should create an AgentLoopContext that has a defined .config property', async () => { const mockConfig = { getPromptRegistry: vi.fn(), diff --git a/packages/core/src/agents/agent-scheduler.ts b/packages/core/src/agents/agent-scheduler.ts index 8bed1de00b..09b32980a9 100644 --- a/packages/core/src/agents/agent-scheduler.ts +++ b/packages/core/src/agents/agent-scheduler.ts @@ -85,5 +85,9 @@ export async function scheduleAgentTools( onWaitingForConfirmation, }); - return scheduler.schedule(requests, signal); + try { + return await scheduler.schedule(requests, signal); + } finally { + scheduler.dispose(); + } } From 7e1938c1bc9d00156ee0650e0a7fdcb3e167308f Mon Sep 17 00:00:00 2001 From: Jacob Richman Date: Tue, 7 Apr 2026 22:47:54 -0700 Subject: [PATCH 12/39] fix(cli): switch default back to terminalBuffer=false and fix regressions introduced for that mode (#24873) --- docs/cli/settings.md | 2 +- docs/reference/configuration.md | 2 +- packages/cli/src/config/settingsSchema.ts | 2 +- packages/cli/src/interactiveCli.tsx | 3 +- .../src/ui/__snapshots__/App.test.tsx.snap | 6 ++ .../src/ui/components/InputPrompt.test.tsx | 48 +++++----- .../cli/src/ui/components/InputPrompt.tsx | 66 ++++++++++---- .../HistoryItemDisplay.test.tsx.snap | 86 +----------------- .../__snapshots__/InputPrompt.test.tsx.snap | 14 +-- .../messages/ShellToolMessage.test.tsx | 4 +- .../components/messages/ToolMessage.test.tsx | 4 +- .../messages/ToolResultDisplay.test.tsx | 86 +++++++++++++++++- .../components/messages/ToolResultDisplay.tsx | 90 ++++++++++++++----- .../ToolResultDisplayOverflow.test.tsx | 19 ++-- ...ccepted-file-edit-with-diff-stats.snap.svg | 23 ++++- .../DenseToolMessage.test.tsx.snap | 32 ++++++- ...ilableTerminalHeight-is-undefined.snap.svg | 41 +++------ .../ToolResultDisplay.test.tsx.snap | 71 ++++++++------- .../src/ui/components/shared/MaxSizedBox.tsx | 2 +- packages/core/src/config/config.ts | 2 +- schemas/settings.schema.json | 4 +- 21 files changed, 363 insertions(+), 244 deletions(-) diff --git a/docs/cli/settings.md b/docs/cli/settings.md index 4a6b9a77b7..dbb3651a4f 100644 --- a/docs/cli/settings.md +++ b/docs/cli/settings.md @@ -75,7 +75,7 @@ they appear in the UI. | Show User Identity | `ui.showUserIdentity` | Show the signed-in user's identity (e.g. email) in the UI. | `true` | | Use Alternate Screen Buffer | `ui.useAlternateBuffer` | Use an alternate screen buffer for the UI, preserving shell history. | `false` | | Render Process | `ui.renderProcess` | Enable Ink render process for the UI. | `true` | -| Terminal Buffer | `ui.terminalBuffer` | Use the new terminal buffer architecture for rendering. | `true` | +| Terminal Buffer | `ui.terminalBuffer` | Use the new terminal buffer architecture for rendering. | `false` | | Use Background Color | `ui.useBackgroundColor` | Whether to use background colors in the UI. | `true` | | Incremental Rendering | `ui.incrementalRendering` | Enable incremental rendering for the UI. This option will reduce flickering but may cause rendering artifacts. Only supported when useAlternateBuffer is enabled. | `true` | | Show Spinner | `ui.showSpinner` | Show the spinner during operations. | `true` | diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index 1955507c62..1fdbc755f0 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -346,7 +346,7 @@ their corresponding top-level category object in your `settings.json` file. - **`ui.terminalBuffer`** (boolean): - **Description:** Use the new terminal buffer architecture for rendering. - - **Default:** `true` + - **Default:** `false` - **Requires restart:** Yes - **`ui.useBackgroundColor`** (boolean): diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index 730bd4b939..c041aaa8c3 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -757,7 +757,7 @@ const SETTINGS_SCHEMA = { label: 'Terminal Buffer', category: 'UI', requiresRestart: true, - default: true, + default: false, description: 'Use the new terminal buffer architecture for rendering.', showInDialog: true, }, diff --git a/packages/cli/src/interactiveCli.tsx b/packages/cli/src/interactiveCli.tsx index 418f58b193..965bc27693 100644 --- a/packages/cli/src/interactiveCli.tsx +++ b/packages/cli/src/interactiveCli.tsx @@ -156,8 +156,9 @@ export async function startInteractiveUI( useAlternateBuffer || config.getUseTerminalBuffer(), patchConsole: false, alternateBuffer: useAlternateBuffer, - renderProcess: config.getUseRenderProcess(), terminalBuffer: config.getUseTerminalBuffer(), + renderProcess: + config.getUseRenderProcess() && config.getUseTerminalBuffer(), incrementalRendering: settings.merged.ui.incrementalRendering !== false && useAlternateBuffer && diff --git a/packages/cli/src/ui/__snapshots__/App.test.tsx.snap b/packages/cli/src/ui/__snapshots__/App.test.tsx.snap index 94b1f9b1a4..611f2e0908 100644 --- a/packages/cli/src/ui/__snapshots__/App.test.tsx.snap +++ b/packages/cli/src/ui/__snapshots__/App.test.tsx.snap @@ -55,6 +55,12 @@ Footer Gemini CLI v1.2.3 + +Tips for getting started: +1. Create GEMINI.md files to customize your interactions +2. /help for more information +3. Ask coding questions, edit code or run commands +4. Be specific for the best results Composer " `; diff --git a/packages/cli/src/ui/components/InputPrompt.test.tsx b/packages/cli/src/ui/components/InputPrompt.test.tsx index 4d40809837..3fdaa479cc 100644 --- a/packages/cli/src/ui/components/InputPrompt.test.tsx +++ b/packages/cli/src/ui/components/InputPrompt.test.tsx @@ -69,6 +69,7 @@ import { AppEvent, TransientMessageType, } from '../../utils/events.js'; +import '../../test-utils/customMatchers.js'; vi.mock('../hooks/useShellHistory.js'); vi.mock('../hooks/useCommandCompletion.js'); @@ -254,7 +255,7 @@ describe('InputPrompt', () => { setText: vi.fn( (newText: string, cursorPosition?: 'start' | 'end' | number) => { mockBuffer.text = newText; - mockBuffer.lines = [newText]; + mockBuffer.lines = newText.split('\n'); let col = 0; if (typeof cursorPosition === 'number') { col = cursorPosition; @@ -264,11 +265,18 @@ describe('InputPrompt', () => { col = newText.length; } mockBuffer.cursor = [0, col]; - mockBuffer.allVisualLines = [newText]; - mockBuffer.viewportVisualLines = [newText]; - mockBuffer.allVisualLines = [newText]; - mockBuffer.visualToLogicalMap = [[0, 0]]; + mockBuffer.allVisualLines = newText.split('\n'); + mockBuffer.viewportVisualLines = newText.split('\n'); + mockBuffer.visualToLogicalMap = newText + .split('\n') + .map((_, i) => [i, 0] as [number, number]); mockBuffer.visualCursor = [0, col]; + mockBuffer.visualScrollRow = 0; + mockBuffer.viewportHeight = 10; + mockBuffer.visualToTransformedMap = newText + .split('\n') + .map((_, i) => i); + mockBuffer.transformationsByLine = newText.split('\n').map(() => []); }, ), replaceRangeByOffset: vi.fn(), @@ -276,6 +284,7 @@ describe('InputPrompt', () => { allVisualLines: [''], visualCursor: [0, 0], visualScrollRow: 0, + viewportHeight: 10, handleInput: vi.fn((key: Key) => { if (defaultKeyMatchers[Command.CLEAR_INPUT](key)) { if (mockBuffer.text.length > 0) { @@ -409,6 +418,7 @@ describe('InputPrompt', () => { getTargetDir: () => path.join('test', 'project', 'src'), getVimMode: () => false, getUseBackgroundColor: () => true, + getUseTerminalBuffer: () => false, getTerminalBackground: () => undefined, getWorkspaceContext: () => ({ getDirectories: () => ['/test/project/src'], @@ -3779,11 +3789,7 @@ describe('InputPrompt', () => { ); it('should unfocus embedded shell on click', async () => { - props.buffer.text = 'hello'; - props.buffer.lines = ['hello']; - props.buffer.allVisualLines = ['hello']; - props.buffer.viewportVisualLines = ['hello']; - props.buffer.visualToLogicalMap = [[0, 0]]; + props.buffer.setText('hello'); props.isEmbeddedShellFocused = true; const { stdin, stdout, unmount } = await renderWithProviders( @@ -4291,11 +4297,7 @@ describe('InputPrompt', () => { describe('IME Cursor Support', () => { it('should report correct cursor position for simple ASCII text', async () => { const text = 'hello'; - mockBuffer.text = text; - mockBuffer.lines = [text]; - mockBuffer.allVisualLines = [text]; - mockBuffer.viewportVisualLines = [text]; - mockBuffer.visualToLogicalMap = [[0, 0]]; + mockBuffer.setText(text); mockBuffer.visualCursor = [0, 3]; // Cursor after 'hel' mockBuffer.visualScrollRow = 0; @@ -4322,11 +4324,7 @@ describe('InputPrompt', () => { it('should report correct cursor position for text with double-width characters', async () => { const text = '👍hello'; - mockBuffer.text = text; - mockBuffer.lines = [text]; - mockBuffer.allVisualLines = [text]; - mockBuffer.viewportVisualLines = [text]; - mockBuffer.visualToLogicalMap = [[0, 0]]; + mockBuffer.setText(text); mockBuffer.visualCursor = [0, 2]; // Cursor after '👍h' (Note: '👍' is one code point but width 2) mockBuffer.visualScrollRow = 0; @@ -4352,11 +4350,7 @@ describe('InputPrompt', () => { it('should report correct cursor position for a line full of "😀" emojis', async () => { const text = '😀😀😀'; - mockBuffer.text = text; - mockBuffer.lines = [text]; - mockBuffer.allVisualLines = [text]; - mockBuffer.viewportVisualLines = [text]; - mockBuffer.visualToLogicalMap = [[0, 0]]; + mockBuffer.setText(text); mockBuffer.visualCursor = [0, 2]; // Cursor after 2 emojis (each 1 code point, width 2) mockBuffer.visualScrollRow = 0; @@ -4501,12 +4495,12 @@ describe('InputPrompt', () => { mockBuffer.lines = [logicalLine]; mockBuffer.allVisualLines = [visualLine]; mockBuffer.viewportVisualLines = [visualLine]; - mockBuffer.allVisualLines = [visualLine]; mockBuffer.visualToLogicalMap = [[0, 0]]; mockBuffer.visualToTransformedMap = [0]; mockBuffer.transformationsByLine = [transformations]; mockBuffer.cursor = [0, cursorCol]; - mockBuffer.visualCursor = [0, 0]; + mockBuffer.visualCursor = [0, cursorCol]; + mockBuffer.visualScrollRow = 0; }; it('should snapshot collapsed image path', async () => { diff --git a/packages/cli/src/ui/components/InputPrompt.tsx b/packages/cli/src/ui/components/InputPrompt.tsx index c8d7efa1b4..7e59ab4d14 100644 --- a/packages/cli/src/ui/components/InputPrompt.tsx +++ b/packages/cli/src/ui/components/InputPrompt.tsx @@ -5,7 +5,14 @@ */ import type React from 'react'; -import { useCallback, useEffect, useState, useRef, useMemo } from 'react'; +import { + useCallback, + useEffect, + useState, + useRef, + useMemo, + Fragment, +} from 'react'; import clipboardy from 'clipboardy'; import { Box, Text, useStdout, type DOMElement } from 'ink'; import { SuggestionsDisplay, MAX_WIDTH } from './SuggestionsDisplay.js'; @@ -1820,24 +1827,45 @@ export const InputPrompt: React.FC = ({ height={Math.min(buffer.viewportHeight, scrollableData.length)} width="100%" > - 1} - keyExtractor={(item) => - item.type === 'visualLine' - ? `line-${item.absoluteVisualIdx}` - : `ghost-${item.index}` - } - width="100%" - backgroundColor={listBackgroundColor} - containerHeight={Math.min( - buffer.viewportHeight, - scrollableData.length, - )} - /> + {isAlternateBuffer ? ( + 1} + fixedItemHeight={true} + keyExtractor={(item) => + item.type === 'visualLine' + ? `line-${item.absoluteVisualIdx}` + : `ghost-${item.index}` + } + width={inputWidth} + backgroundColor={listBackgroundColor} + containerHeight={Math.min( + buffer.viewportHeight, + scrollableData.length, + )} + /> + ) : ( + scrollableData + .slice( + buffer.visualScrollRow, + buffer.visualScrollRow + buffer.viewportHeight, + ) + .map((item, index) => { + const actualIndex = buffer.visualScrollRow + index; + const key = + item.type === 'visualLine' + ? `line-${item.absoluteVisualIdx}` + : `ghost-${item.index}`; + return ( + + {renderItem({ item, index: actualIndex })} + + ); + }) + )} )} diff --git a/packages/cli/src/ui/components/__snapshots__/HistoryItemDisplay.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/HistoryItemDisplay.test.tsx.snap index 7d6fdeb42c..d237b30f99 100644 --- a/packages/cli/src/ui/components/__snapshots__/HistoryItemDisplay.test.tsx.snap +++ b/packages/cli/src/ui/components/__snapshots__/HistoryItemDisplay.test.tsx.snap @@ -112,48 +112,7 @@ exports[` > gemini items (alternateBuffer=false) > should exports[` > gemini items (alternateBuffer=false) > should render a truncated gemini item 1`] = ` "✦ Example code block: - 1 Line 1 - 2 Line 2 - 3 Line 3 - 4 Line 4 - 5 Line 5 - 6 Line 6 - 7 Line 7 - 8 Line 8 - 9 Line 9 - 10 Line 10 - 11 Line 11 - 12 Line 12 - 13 Line 13 - 14 Line 14 - 15 Line 15 - 16 Line 16 - 17 Line 17 - 18 Line 18 - 19 Line 19 - 20 Line 20 - 21 Line 21 - 22 Line 22 - 23 Line 23 - 24 Line 24 - 25 Line 25 - 26 Line 26 - 27 Line 27 - 28 Line 28 - 29 Line 29 - 30 Line 30 - 31 Line 31 - 32 Line 32 - 33 Line 33 - 34 Line 34 - 35 Line 35 - 36 Line 36 - 37 Line 37 - 38 Line 38 - 39 Line 39 - 40 Line 40 - 41 Line 41 - 42 Line 42 + ... 42 hidden (Ctrl+O) ... 43 Line 43 44 Line 44 45 Line 45 @@ -167,48 +126,7 @@ exports[` > gemini items (alternateBuffer=false) > should exports[` > gemini items (alternateBuffer=false) > should render a truncated gemini_content item 1`] = ` " Example code block: - 1 Line 1 - 2 Line 2 - 3 Line 3 - 4 Line 4 - 5 Line 5 - 6 Line 6 - 7 Line 7 - 8 Line 8 - 9 Line 9 - 10 Line 10 - 11 Line 11 - 12 Line 12 - 13 Line 13 - 14 Line 14 - 15 Line 15 - 16 Line 16 - 17 Line 17 - 18 Line 18 - 19 Line 19 - 20 Line 20 - 21 Line 21 - 22 Line 22 - 23 Line 23 - 24 Line 24 - 25 Line 25 - 26 Line 26 - 27 Line 27 - 28 Line 28 - 29 Line 29 - 30 Line 30 - 31 Line 31 - 32 Line 32 - 33 Line 33 - 34 Line 34 - 35 Line 35 - 36 Line 36 - 37 Line 37 - 38 Line 38 - 39 Line 39 - 40 Line 40 - 41 Line 41 - 42 Line 42 + ... 42 hidden (Ctrl+O) ... 43 Line 43 44 Line 44 45 Line 45 diff --git a/packages/cli/src/ui/components/__snapshots__/InputPrompt.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/InputPrompt.test.tsx.snap index caa270d8c4..ab6fe9b928 100644 --- a/packages/cli/src/ui/components/__snapshots__/InputPrompt.test.tsx.snap +++ b/packages/cli/src/ui/components/__snapshots__/InputPrompt.test.tsx.snap @@ -93,7 +93,7 @@ exports[`InputPrompt > Highlighting and Cursor Display > single-line scenarios > exports[`InputPrompt > History Navigation and Completion Suppression > should not render suggestions during history navigation 1`] = ` "▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀ > second message - +▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄ " `; @@ -120,30 +120,30 @@ exports[`InputPrompt > command search (Ctrl+R when not in shell) > expands and c exports[`InputPrompt > command search (Ctrl+R when not in shell) > renders match window and expanded view (snapshots) > command-search-render-collapsed-match 1`] = ` "▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀ (r:) commit - - +▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄ + git commit -m "feat: add search" in src/app " `; exports[`InputPrompt > command search (Ctrl+R when not in shell) > renders match window and expanded view (snapshots) > command-search-render-expanded-match 1`] = ` "▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀ (r:) commit - - +▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄ + git commit -m "feat: add search" in src/app " `; exports[`InputPrompt > image path transformation snapshots > should snapshot collapsed image path 1`] = ` "▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀ > [Image ...reenshot2x.png] - +▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄ " `; exports[`InputPrompt > image path transformation snapshots > should snapshot expanded image path when cursor is on it 1`] = ` "▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀ > @/path/to/screenshots/screenshot2x.png - +▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄ " `; diff --git a/packages/cli/src/ui/components/messages/ShellToolMessage.test.tsx b/packages/cli/src/ui/components/messages/ShellToolMessage.test.tsx index 57c9050560..676051501c 100644 --- a/packages/cli/src/ui/components/messages/ShellToolMessage.test.tsx +++ b/packages/cli/src/ui/components/messages/ShellToolMessage.test.tsx @@ -293,8 +293,8 @@ describe('', () => { await waitUntilReady(); const frame = lastFrame(); // Since it's Executing, it might still constrain to ACTIVE_SHELL_MAX_LINES (10) - // Actually let's just assert on the behaviour that happens right now (which is 10 lines) - expect(frame.match(/Line \d+/g)?.length).toBe(10); + // Actually let's just assert on the behaviour that happens right now (which is 100 lines because we removed the terminalBuffer check) + expect(frame.match(/Line \d+/g)?.length).toBe(100); unmount(); }); diff --git a/packages/cli/src/ui/components/messages/ToolMessage.test.tsx b/packages/cli/src/ui/components/messages/ToolMessage.test.tsx index c7e5df8750..bdf9f207ed 100644 --- a/packages/cli/src/ui/components/messages/ToolMessage.test.tsx +++ b/packages/cli/src/ui/components/messages/ToolMessage.test.tsx @@ -444,8 +444,8 @@ describe('', () => { constrainHeight: true, }, width: 80, - config: makeFakeConfig({ useAlternateBuffer: false }), - settings: createMockSettings({ ui: { useAlternateBuffer: false } }), + config: makeFakeConfig({ useAlternateBuffer: true }), + settings: createMockSettings({ ui: { useAlternateBuffer: true } }), }, ); const output = lastFrame(); diff --git a/packages/cli/src/ui/components/messages/ToolResultDisplay.test.tsx b/packages/cli/src/ui/components/messages/ToolResultDisplay.test.tsx index f30c309898..c273fa7f47 100644 --- a/packages/cli/src/ui/components/messages/ToolResultDisplay.test.tsx +++ b/packages/cli/src/ui/components/messages/ToolResultDisplay.test.tsx @@ -5,6 +5,7 @@ */ import { renderWithProviders } from '../../../test-utils/render.js'; +import { waitFor } from '../../../test-utils/async.js'; import { createMockSettings } from '../../../test-utils/settings.js'; import { ToolResultDisplay } from './ToolResultDisplay.js'; import { describe, it, expect, vi } from 'vitest'; @@ -351,9 +352,10 @@ describe('ToolResultDisplay', () => { expect(output).not.toContain('Line 1'); expect(output).not.toContain('Line 2'); - expect(output).toContain('Line 3'); + expect(output).not.toContain('Line 3'); expect(output).toContain('Line 4'); expect(output).toContain('Line 5'); + expect(output).toContain('hidden'); expect(output).toMatchSnapshot(); unmount(); }); @@ -391,4 +393,86 @@ describe('ToolResultDisplay', () => { await expect(renderResult).toMatchSvgSnapshot(); unmount(); }); + + it('stays scrolled to the bottom when lines are incrementally added', async () => { + const createAnsiLine = (text: string) => [ + { + text, + fg: '', + bg: '', + bold: false, + italic: false, + underline: false, + dim: false, + inverse: false, + isUninitialized: false, + }, + ]; + + let currentLines: AnsiOutput = []; + + // Start with 3 lines, max lines 5. It should fit without scrolling. + for (let i = 1; i <= 3; i++) { + currentLines.push(createAnsiLine(`Line ${i}`)); + } + + const renderResult = await renderWithProviders( + , + { + config: makeFakeConfig({ useAlternateBuffer: false }), + settings: createMockSettings({ ui: { useAlternateBuffer: false } }), + uiState: { constrainHeight: true, terminalHeight: 10 }, + }, + ); + + const { waitUntilReady, rerender, lastFrame, unmount } = renderResult; + await waitUntilReady(); + + // Verify initial render has the first 3 lines + expect(lastFrame()).toContain('Line 1'); + expect(lastFrame()).toContain('Line 3'); + + // Incrementally add lines up to 8. Max lines is 5. + // So by the end, it should only show lines 4-8. + for (let i = 4; i <= 8; i++) { + currentLines = [...currentLines, createAnsiLine(`Line ${i}`)]; + rerender( + , + ); + // Wait for the new line to be rendered + await waitFor(() => { + expect(lastFrame()).toContain(`Line ${i}`); + }); + } + + await waitUntilReady(); + const output = lastFrame(); + + // The component should have automatically scrolled to the bottom. + // Lines 1, 2, 3, 4 should be scrolled out of view. + expect(output).not.toContain('Line 1'); + expect(output).not.toContain('Line 2'); + expect(output).not.toContain('Line 3'); + expect(output).not.toContain('Line 4'); + // Lines 5, 6, 7, 8 should be visible along with the truncation indicator. + expect(output).toContain('hidden'); + expect(output).toContain('Line 5'); + expect(output).toContain('Line 8'); + + expect(output).toMatchSnapshot(); + + unmount(); + }); }); diff --git a/packages/cli/src/ui/components/messages/ToolResultDisplay.tsx b/packages/cli/src/ui/components/messages/ToolResultDisplay.tsx index aaa30a74d7..16c6019c98 100644 --- a/packages/cli/src/ui/components/messages/ToolResultDisplay.tsx +++ b/packages/cli/src/ui/components/messages/ToolResultDisplay.tsx @@ -10,6 +10,7 @@ import { DiffRenderer } from './DiffRenderer.js'; import { MarkdownDisplay } from '../../utils/MarkdownDisplay.js'; import { AnsiOutputText, AnsiLineText } from '../AnsiOutput.js'; import { SlicingMaxSizedBox } from '../shared/SlicingMaxSizedBox.js'; +import { MaxSizedBox } from '../shared/MaxSizedBox.js'; import { theme } from '../../semantic-colors.js'; import { type AnsiOutput, @@ -51,7 +52,7 @@ export const ToolResultDisplay: React.FC = ({ hasFocus = false, overflowDirection = 'top', }) => { - const { renderMarkdown } = useUIState(); + const { renderMarkdown, constrainHeight } = useUIState(); const isAlternateBuffer = useAlternateBuffer(); const availableHeight = calculateToolContentMaxLines({ @@ -209,30 +210,73 @@ export const ToolResultDisplay: React.FC = ({ if (Array.isArray(resultDisplay)) { const limit = maxLines ?? availableHeight ?? ACTIVE_SHELL_MAX_LINES; - const listHeight = Math.min( - // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion - (resultDisplay as AnsiOutput).length, - limit, - ); + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion + const data = resultDisplay as AnsiOutput; - const initialScrollIndex = - overflowDirection === 'bottom' ? 0 : SCROLL_TO_ITEM_END; + // Calculate list height: if not constrained, use full data length. + // If constrained (e.g. alternate buffer), limit to available height + // to ensure virtualization works and fits within the viewport. + const listHeight = !constrainHeight + ? data.length + : Math.min(data.length, limit); - return ( - - 1} - keyExtractor={keyExtractor} - initialScrollIndex={initialScrollIndex} - hasFocus={hasFocus} - fixedItemHeight={true} - /> - - ); + if (isAlternateBuffer) { + const initialScrollIndex = + overflowDirection === 'bottom' ? 0 : SCROLL_TO_ITEM_END; + + return ( + + 1} + fixedItemHeight={true} + keyExtractor={keyExtractor} + initialScrollIndex={initialScrollIndex} + hasFocus={hasFocus} + /> + + ); + } else { + let displayData = data; + let hiddenLines = 0; + + if (constrainHeight && data.length > listHeight) { + hiddenLines = data.length - listHeight; + if (overflowDirection === 'top') { + displayData = data.slice(hiddenLines); + } else { + displayData = data.slice(0, listHeight); + } + } + + return ( + + + {displayData.map((item, index) => { + const actualIndex = + (overflowDirection === 'top' ? hiddenLines : 0) + index; + return ( + + + + ); + })} + + + ); + } } // ASB Mode Handling (Interactive/Fullscreen) diff --git a/packages/cli/src/ui/components/messages/ToolResultDisplayOverflow.test.tsx b/packages/cli/src/ui/components/messages/ToolResultDisplayOverflow.test.tsx index cd06d93616..397f1ba1a7 100644 --- a/packages/cli/src/ui/components/messages/ToolResultDisplayOverflow.test.tsx +++ b/packages/cli/src/ui/components/messages/ToolResultDisplayOverflow.test.tsx @@ -29,11 +29,12 @@ describe('ToolResultDisplay Overflow', () => { await waitUntilReady(); const output = lastFrame(); - expect(output).not.toContain('Line 1'); - expect(output).not.toContain('Line 2'); - expect(output).toContain('Line 3'); - expect(output).toContain('Line 4'); - expect(output).toContain('Line 5'); + expect(output).toContain('Line 1'); + expect(output).toContain('Line 2'); + expect(output).not.toContain('Line 3'); + expect(output).not.toContain('Line 4'); + expect(output).not.toContain('Line 5'); + expect(output).toContain('hidden'); unmount(); }); @@ -57,9 +58,10 @@ describe('ToolResultDisplay Overflow', () => { expect(output).not.toContain('Line 1'); expect(output).not.toContain('Line 2'); - expect(output).toContain('Line 3'); + expect(output).not.toContain('Line 3'); expect(output).toContain('Line 4'); expect(output).toContain('Line 5'); + expect(output).toContain('hidden'); unmount(); }); @@ -95,11 +97,10 @@ describe('ToolResultDisplay Overflow', () => { expect(output).toContain('Line 1'); expect(output).toContain('Line 2'); - expect(output).toContain('Line 3'); + expect(output).not.toContain('Line 3'); expect(output).not.toContain('Line 4'); expect(output).not.toContain('Line 5'); - // ScrollableList uses a scroll thumb rather than writing "hidden" - expect(output).toContain('█'); + expect(output).toContain('hidden'); unmount(); }); }); diff --git a/packages/cli/src/ui/components/messages/__snapshots__/DenseToolMessage-DenseToolMessage-Visual-Regression-matches-SVG-snapshot-for-an-Accepted-file-edit-with-diff-stats.snap.svg b/packages/cli/src/ui/components/messages/__snapshots__/DenseToolMessage-DenseToolMessage-Visual-Regression-matches-SVG-snapshot-for-an-Accepted-file-edit-with-diff-stats.snap.svg index 39e6604692..7b21bd65a0 100644 --- a/packages/cli/src/ui/components/messages/__snapshots__/DenseToolMessage-DenseToolMessage-Visual-Regression-matches-SVG-snapshot-for-an-Accepted-file-edit-with-diff-stats.snap.svg +++ b/packages/cli/src/ui/components/messages/__snapshots__/DenseToolMessage-DenseToolMessage-Visual-Regression-matches-SVG-snapshot-for-an-Accepted-file-edit-with-diff-stats.snap.svg @@ -1,18 +1,33 @@ - + - + edit test.ts - - Accepted + → Accepted ( +1 , -1 ) + + 1 + + + - + + + old + + 1 + + + + + + + new \ No newline at end of file diff --git a/packages/cli/src/ui/components/messages/__snapshots__/DenseToolMessage.test.tsx.snap b/packages/cli/src/ui/components/messages/__snapshots__/DenseToolMessage.test.tsx.snap index 18f5f93a9f..d08b84c1a9 100644 --- a/packages/cli/src/ui/components/messages/__snapshots__/DenseToolMessage.test.tsx.snap +++ b/packages/cli/src/ui/components/messages/__snapshots__/DenseToolMessage.test.tsx.snap @@ -7,12 +7,21 @@ exports[`DenseToolMessage > Toggleable Diff View (Alternate Buffer) > hides diff exports[`DenseToolMessage > Toggleable Diff View (Alternate Buffer) > shows diff content by default when NOT in alternate buffer mode 1`] = ` " ✓ test-tool test.ts → Accepted + + 1 - old line + 1 + new line " `; exports[`DenseToolMessage > Visual Regression > matches SVG snapshot for a Rejected tool call 1`] = `" - read_file Reading important.txt"`; -exports[`DenseToolMessage > Visual Regression > matches SVG snapshot for an Accepted file edit with diff stats 1`] = `" ✓ edit test.ts → Accepted (+1, -1)"`; +exports[`DenseToolMessage > Visual Regression > matches SVG snapshot for an Accepted file edit with diff stats 1`] = ` +" ✓ edit test.ts → Accepted (+1, -1) + + 1 - old + 1 + new +" +`; exports[`DenseToolMessage > does not render result arrow if resultDisplay is missing 1`] = ` " o test-tool Test description @@ -26,11 +35,17 @@ exports[`DenseToolMessage > flattens newlines in string results 1`] = ` exports[`DenseToolMessage > renders correctly for Edit tool using confirmationDetails 1`] = ` " ? Edit styles.scss → Confirming + + 1 - body { color: blue; } + 1 + body { color: red; } " `; exports[`DenseToolMessage > renders correctly for Errored Edit tool 1`] = ` " x Edit styles.scss → Failed (+1, -1) + + 1 - old line + 1 + new line " `; @@ -45,21 +60,33 @@ exports[`DenseToolMessage > renders correctly for ReadManyFiles results 1`] = ` exports[`DenseToolMessage > renders correctly for Rejected Edit tool 1`] = ` " - Edit styles.scss → Rejected (+1, -1) + + 1 - old line + 1 + new line " `; exports[`DenseToolMessage > renders correctly for Rejected Edit tool with confirmationDetails and diffStat 1`] = ` " - Edit styles.scss → Rejected (+1, -1) + + 1 - body { color: blue; } + 1 + body { color: red; } " `; exports[`DenseToolMessage > renders correctly for Rejected WriteFile tool 1`] = ` " - WriteFile config.json → Rejected + + 1 - old content + 1 + new content " `; exports[`DenseToolMessage > renders correctly for WriteFile tool 1`] = ` " ✓ WriteFile config.json → Accepted (+1, -1) + + 1 - old content + 1 + new content " `; @@ -75,6 +102,9 @@ exports[`DenseToolMessage > renders correctly for error status with string messa exports[`DenseToolMessage > renders correctly for file diff results with stats 1`] = ` " ✓ test-tool test.ts → Accepted (+15, -6) + + 1 - old line + 1 + diff content " `; diff --git a/packages/cli/src/ui/components/messages/__snapshots__/ToolResultDisplay-ToolResultDisplay-truncates-ANSI-output-when-maxLines-is-provided-even-if-availableTerminalHeight-is-undefined.snap.svg b/packages/cli/src/ui/components/messages/__snapshots__/ToolResultDisplay-ToolResultDisplay-truncates-ANSI-output-when-maxLines-is-provided-even-if-availableTerminalHeight-is-undefined.snap.svg index 2638c4ad3b..619362a3f4 100644 --- a/packages/cli/src/ui/components/messages/__snapshots__/ToolResultDisplay-ToolResultDisplay-truncates-ANSI-output-when-maxLines-is-provided-even-if-availableTerminalHeight-is-undefined.snap.svg +++ b/packages/cli/src/ui/components/messages/__snapshots__/ToolResultDisplay-ToolResultDisplay-truncates-ANSI-output-when-maxLines-is-provided-even-if-availableTerminalHeight-is-undefined.snap.svg @@ -4,7 +4,7 @@ - Line 26 + ... 26 hidden (Ctrl+O) ... Line 27 Line 28 Line 29 @@ -16,31 +16,18 @@ Line 35 Line 36 Line 37 - Line 38 - - Line 39 - - Line 40 - - Line 41 - - Line 42 - - Line 43 - - Line 44 - - Line 45 - - Line 46 - - Line 47 - - Line 48 - - Line 49 - - Line 50 - + Line 38 + Line 39 + Line 40 + Line 41 + Line 42 + Line 43 + Line 44 + Line 45 + Line 46 + Line 47 + Line 48 + Line 49 + Line 50 \ No newline at end of file diff --git a/packages/cli/src/ui/components/messages/__snapshots__/ToolResultDisplay.test.tsx.snap b/packages/cli/src/ui/components/messages/__snapshots__/ToolResultDisplay.test.tsx.snap index 12eff841b8..2175679bfa 100644 --- a/packages/cli/src/ui/components/messages/__snapshots__/ToolResultDisplay.test.tsx.snap +++ b/packages/cli/src/ui/components/messages/__snapshots__/ToolResultDisplay.test.tsx.snap @@ -33,15 +33,24 @@ exports[`ToolResultDisplay > renders string result as plain text when renderOutp " `; +exports[`ToolResultDisplay > stays scrolled to the bottom when lines are incrementally added 1`] = ` +"... 4 hidden (Ctrl+O) ... +Line 5 +Line 6 +Line 7 +Line 8 +" +`; + exports[`ToolResultDisplay > truncates ANSI output when maxLines is provided 1`] = ` -"Line 3 -Line 4 █ -Line 5 █ +"... 3 hidden (Ctrl+O) ... +Line 4 +Line 5 " `; exports[`ToolResultDisplay > truncates ANSI output when maxLines is provided, even if availableTerminalHeight is undefined 1`] = ` -"Line 26 +"... 26 hidden (Ctrl+O) ... Line 27 Line 28 Line 29 @@ -53,34 +62,36 @@ Line 34 Line 35 Line 36 Line 37 -Line 38 ▄ -Line 39 █ -Line 40 █ -Line 41 █ -Line 42 █ -Line 43 █ -Line 44 █ -Line 45 █ -Line 46 █ -Line 47 █ -Line 48 █ -Line 49 █ -Line 50 █" +Line 38 +Line 39 +Line 40 +Line 41 +Line 42 +Line 43 +Line 44 +Line 45 +Line 46 +Line 47 +Line 48 +Line 49 +Line 50" `; exports[`ToolResultDisplay > truncates very long string results 1`] = ` -"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa… █ +"... 250 hidden (Ctrl+O) ... +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +aaaaaaaaaaaaaaa " `; diff --git a/packages/cli/src/ui/components/shared/MaxSizedBox.tsx b/packages/cli/src/ui/components/shared/MaxSizedBox.tsx index baadb3b9d8..1f751cc116 100644 --- a/packages/cli/src/ui/components/shared/MaxSizedBox.tsx +++ b/packages/cli/src/ui/components/shared/MaxSizedBox.tsx @@ -115,7 +115,7 @@ export const MaxSizedBox: React.FC = ({ [id, removeOverflowingId], ); - if (effectiveMaxHeight === undefined) { + if (effectiveMaxHeight === undefined && totalHiddenLines === 0) { return ( {children} diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index d4c7c498a5..0edd4af7b0 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -1224,7 +1224,7 @@ export class Config implements McpContext, AgentLoopContext { this.useRipgrep = params.useRipgrep ?? true; this.useBackgroundColor = params.useBackgroundColor ?? true; this.useAlternateBuffer = params.useAlternateBuffer ?? false; - this.useTerminalBuffer = params.useTerminalBuffer ?? true; + this.useTerminalBuffer = params.useTerminalBuffer ?? false; this.useRenderProcess = params.useRenderProcess ?? true; this.enableInteractiveShell = params.enableInteractiveShell ?? false; diff --git a/schemas/settings.schema.json b/schemas/settings.schema.json index 5179263596..bb5c9a9d54 100644 --- a/schemas/settings.schema.json +++ b/schemas/settings.schema.json @@ -465,8 +465,8 @@ "terminalBuffer": { "title": "Terminal Buffer", "description": "Use the new terminal buffer architecture for rendering.", - "markdownDescription": "Use the new terminal buffer architecture for rendering.\n\n- Category: `UI`\n- Requires restart: `yes`\n- Default: `true`", - "default": true, + "markdownDescription": "Use the new terminal buffer architecture for rendering.\n\n- Category: `UI`\n- Requires restart: `yes`\n- Default: `false`", + "default": false, "type": "boolean" }, "useBackgroundColor": { From cbacdc67d0622c2b4ae632aaa261e61d124ae1a0 Mon Sep 17 00:00:00 2001 From: Jacob Richman Date: Tue, 7 Apr 2026 23:22:45 -0700 Subject: [PATCH 13/39] feat(cli): switch to ctrl+g from ctrl-x (#24861) --- docs/reference/keyboard-shortcuts.md | 17 +++++++++-------- .../src/ui/components/ContextSummaryDisplay.tsx | 4 +++- .../ui/components/ExitPlanModeDialog.test.tsx | 6 +++--- .../src/ui/components/ExitPlanModeDialog.tsx | 13 +++++++++++++ .../cli/src/ui/components/InputPrompt.test.tsx | 4 ++-- packages/cli/src/ui/components/InputPrompt.tsx | 9 +++++++++ .../ContextSummaryDisplay.test.tsx.snap | 6 +++--- .../ExitPlanModeDialog.test.tsx.snap | 16 ++++++++-------- .../__snapshots__/ShortcutsHelp.test.tsx.snap | 8 ++++---- .../ToolConfirmationQueue.test.tsx.snap | 2 +- packages/cli/src/ui/constants/tips.ts | 4 ++-- packages/cli/src/ui/key/keyBindings.ts | 9 +++++++-- packages/cli/src/ui/key/keyMatchers.test.ts | 9 +++++++-- 13 files changed, 71 insertions(+), 36 deletions(-) diff --git a/docs/reference/keyboard-shortcuts.md b/docs/reference/keyboard-shortcuts.md index 68b3d884fe..4ef61ac003 100644 --- a/docs/reference/keyboard-shortcuts.md +++ b/docs/reference/keyboard-shortcuts.md @@ -86,13 +86,14 @@ available combinations. #### Text Input -| Command | Action | Keys | -| -------------------------- | ------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | -| `input.submit` | Submit the current prompt. | `Enter` | -| `input.queueMessage` | Queue the current prompt to be processed after the current task finishes. | `Tab` | -| `input.newline` | Insert a newline without submitting. | `Ctrl+Enter`
`Cmd/Win+Enter`
`Alt+Enter`
`Shift+Enter`
`Ctrl+J` | -| `input.openExternalEditor` | Open the current prompt or the plan in an external editor. | `Ctrl+X` | -| `input.paste` | Paste from the clipboard. | `Ctrl+V`
`Cmd/Win+V`
`Alt+V` | +| Command | Action | Keys | +| ------------------------------------ | ------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | +| `input.submit` | Submit the current prompt. | `Enter` | +| `input.queueMessage` | Queue the current prompt to be processed after the current task finishes. | `Tab` | +| `input.newline` | Insert a newline without submitting. | `Ctrl+Enter`
`Cmd/Win+Enter`
`Alt+Enter`
`Shift+Enter`
`Ctrl+J` | +| `input.openExternalEditor` | Open the current prompt or the plan in an external editor. | `Ctrl+G` | +| `input.deprecatedOpenExternalEditor` | Deprecated command to open external editor. | `Ctrl+X` | +| `input.paste` | Paste from the clipboard. | `Ctrl+V`
`Cmd/Win+V`
`Alt+V` | #### App Controls @@ -100,7 +101,7 @@ available combinations. | ----------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------ | | `app.showErrorDetails` | Toggle detailed error information. | `F12` | | `app.showFullTodos` | Toggle the full TODO list. | `Ctrl+T` | -| `app.showIdeContextDetail` | Show IDE context details. | `Ctrl+G` | +| `app.showIdeContextDetail` | Show IDE context details. | `F4` | | `app.toggleMarkdown` | Toggle Markdown rendering. | `Alt+M` | | `app.toggleCopyMode` | Toggle copy mode when in alternate buffer mode. | `F9` | | `app.toggleMouseMode` | Toggle mouse mode (scrolling and clicking). | `Ctrl+S` | diff --git a/packages/cli/src/ui/components/ContextSummaryDisplay.tsx b/packages/cli/src/ui/components/ContextSummaryDisplay.tsx index 696793bc06..171e29e905 100644 --- a/packages/cli/src/ui/components/ContextSummaryDisplay.tsx +++ b/packages/cli/src/ui/components/ContextSummaryDisplay.tsx @@ -8,6 +8,8 @@ import type React from 'react'; import { Box, Text } from 'ink'; import { theme } from '../semantic-colors.js'; import { type IdeContext, type MCPServerConfig } from '@google/gemini-cli-core'; +import { Command } from '../key/keyMatchers.js'; +import { formatCommand } from '../key/keybindingUtils.js'; interface ContextSummaryDisplayProps { geminiMdFileCount: number; @@ -49,7 +51,7 @@ export const ContextSummaryDisplay: React.FC = ({ } return `${openFileCount} open file${ openFileCount > 1 ? 's' : '' - } (ctrl+g to view)`; + } (${formatCommand(Command.SHOW_IDE_CONTEXT_DETAIL)} to view)`; })(); const geminiMdText = (() => { diff --git a/packages/cli/src/ui/components/ExitPlanModeDialog.test.tsx b/packages/cli/src/ui/components/ExitPlanModeDialog.test.tsx index 18f2f02224..6925c749d7 100644 --- a/packages/cli/src/ui/components/ExitPlanModeDialog.test.tsx +++ b/packages/cli/src/ui/components/ExitPlanModeDialog.test.tsx @@ -587,7 +587,7 @@ Implement a comprehensive authentication system with multiple providers. expect(onFeedback).not.toHaveBeenCalled(); }); - it('automatically submits feedback when Ctrl+X is used to edit the plan', async () => { + it('automatically submits feedback when Ctrl+G is used to edit the plan', async () => { const { stdin, lastFrame } = await act(async () => renderDialog({ useAlternateBuffer }), ); @@ -600,9 +600,9 @@ Implement a comprehensive authentication system with multiple providers. expect(lastFrame()).toContain('Add user authentication'); }); - // Press Ctrl+X + // Press Ctrl+G await act(async () => { - writeKey(stdin, '\x18'); // Ctrl+X + writeKey(stdin, '\x07'); // Ctrl+G }); await waitFor(() => { diff --git a/packages/cli/src/ui/components/ExitPlanModeDialog.tsx b/packages/cli/src/ui/components/ExitPlanModeDialog.tsx index b2c28abaeb..11adf8e82b 100644 --- a/packages/cli/src/ui/components/ExitPlanModeDialog.tsx +++ b/packages/cli/src/ui/components/ExitPlanModeDialog.tsx @@ -25,6 +25,11 @@ import { useKeypress } from '../hooks/useKeypress.js'; import { Command } from '../key/keyMatchers.js'; import { formatCommand } from '../key/keybindingUtils.js'; import { useKeyMatchers } from '../hooks/useKeyMatchers.js'; +import { + appEvents, + AppEvent, + TransientMessageType, +} from '../../utils/events.js'; export interface ExitPlanModeDialogProps { planPath: string; @@ -173,6 +178,14 @@ export const ExitPlanModeDialog: React.FC = ({ void handleOpenEditor(); return true; } + if (keyMatchers[Command.DEPRECATED_OPEN_EXTERNAL_EDITOR](key)) { + const cmdKey = formatCommand(Command.OPEN_EXTERNAL_EDITOR); + appEvents.emit(AppEvent.TransientMessage, { + message: `Use ${cmdKey} to open the external editor.`, + type: TransientMessageType.Hint, + }); + return true; + } return false; }, { isActive: true, priority: true }, diff --git a/packages/cli/src/ui/components/InputPrompt.test.tsx b/packages/cli/src/ui/components/InputPrompt.test.tsx index 3fdaa479cc..7a241691e8 100644 --- a/packages/cli/src/ui/components/InputPrompt.test.tsx +++ b/packages/cli/src/ui/components/InputPrompt.test.tsx @@ -5065,8 +5065,8 @@ describe('InputPrompt', () => { input: '\x12', }, { - name: 'Ctrl+X hotkey is pressed', - input: '\x18', + name: 'Ctrl+G hotkey is pressed', + input: '\x07', }, { name: 'F12 hotkey is pressed', diff --git a/packages/cli/src/ui/components/InputPrompt.tsx b/packages/cli/src/ui/components/InputPrompt.tsx index 7e59ab4d14..b36de8ebb0 100644 --- a/packages/cli/src/ui/components/InputPrompt.tsx +++ b/packages/cli/src/ui/components/InputPrompt.tsx @@ -1272,6 +1272,15 @@ export const InputPrompt: React.FC = ({ return true; } + if (keyMatchers[Command.DEPRECATED_OPEN_EXTERNAL_EDITOR](key)) { + const cmdKey = formatCommand(Command.OPEN_EXTERNAL_EDITOR); + appEvents.emit(AppEvent.TransientMessage, { + message: `Use ${cmdKey} to open the external editor.`, + type: TransientMessageType.Hint, + }); + return true; + } + // Ctrl+V for clipboard paste if (keyMatchers[Command.PASTE_CLIPBOARD](key)) { // eslint-disable-next-line @typescript-eslint/no-floating-promises diff --git a/packages/cli/src/ui/components/__snapshots__/ContextSummaryDisplay.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/ContextSummaryDisplay.test.tsx.snap index 876524bdb8..7330b89e4d 100644 --- a/packages/cli/src/ui/components/__snapshots__/ContextSummaryDisplay.test.tsx.snap +++ b/packages/cli/src/ui/components/__snapshots__/ContextSummaryDisplay.test.tsx.snap @@ -1,16 +1,16 @@ // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html exports[` > should not render empty parts 1`] = ` -" 1 open file (ctrl+g to view) +" 1 open file (F4 to view) " `; exports[` > should render on a single line on a wide screen 1`] = ` -" 1 open file (ctrl+g to view) · 1 GEMINI.md file · 1 MCP server · 1 skill +" 1 open file (F4 to view) · 1 GEMINI.md file · 1 MCP server · 1 skill " `; exports[` > should render on multiple lines on a narrow screen 1`] = ` -" 1 open file (ctrl+g to view) · 1 GEMINI.md file · 1 MCP server · 1 skill +" 1 open file (F4 to view) · 1 GEMINI.md file · 1 MCP server · 1 skill " `; diff --git a/packages/cli/src/ui/components/__snapshots__/ExitPlanModeDialog.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/ExitPlanModeDialog.test.tsx.snap index 073c106ceb..71acb9388c 100644 --- a/packages/cli/src/ui/components/__snapshots__/ExitPlanModeDialog.test.tsx.snap +++ b/packages/cli/src/ui/components/__snapshots__/ExitPlanModeDialog.test.tsx.snap @@ -23,7 +23,7 @@ Files to Modify Approves plan but requires confirmation for each tool 3. Type your feedback... -Enter to select · ↑/↓ to navigate · Ctrl+X to edit plan · Esc to cancel +Enter to select · ↑/↓ to navigate · Ctrl+G to edit plan · Esc to cancel " `; @@ -50,7 +50,7 @@ Files to Modify Approves plan but requires confirmation for each tool 3. Type your feedback... -Enter to select · ↑/↓ to navigate · Ctrl+X to edit plan · Esc to cancel +Enter to select · ↑/↓ to navigate · Ctrl+G to edit plan · Esc to cancel " `; @@ -82,7 +82,7 @@ Implementation Steps Approves plan but requires confirmation for each tool 3. Type your feedback... -Enter to select · ↑/↓ to navigate · Ctrl+X to edit plan · Esc to cancel +Enter to select · ↑/↓ to navigate · Ctrl+G to edit plan · Esc to cancel " `; @@ -109,7 +109,7 @@ Files to Modify Approves plan but requires confirmation for each tool 3. Type your feedback... -Enter to select · ↑/↓ to navigate · Ctrl+X to edit plan · Esc to cancel +Enter to select · ↑/↓ to navigate · Ctrl+G to edit plan · Esc to cancel " `; @@ -136,7 +136,7 @@ Files to Modify Approves plan but requires confirmation for each tool 3. Type your feedback... -Enter to select · ↑/↓ to navigate · Ctrl+X to edit plan · Esc to cancel +Enter to select · ↑/↓ to navigate · Ctrl+G to edit plan · Esc to cancel " `; @@ -163,7 +163,7 @@ Files to Modify Approves plan but requires confirmation for each tool 3. Type your feedback... -Enter to select · ↑/↓ to navigate · Ctrl+X to edit plan · Esc to cancel +Enter to select · ↑/↓ to navigate · Ctrl+G to edit plan · Esc to cancel " `; @@ -216,7 +216,7 @@ Testing Strategy Approves plan but requires confirmation for each tool 3. Type your feedback... -Enter to select · ↑/↓ to navigate · Ctrl+X to edit plan · Esc to cancel +Enter to select · ↑/↓ to navigate · Ctrl+G to edit plan · Esc to cancel " `; @@ -243,6 +243,6 @@ Files to Modify Approves plan but requires confirmation for each tool 3. Type your feedback... -Enter to select · ↑/↓ to navigate · Ctrl+X to edit plan · Esc to cancel +Enter to select · ↑/↓ to navigate · Ctrl+G to edit plan · Esc to cancel " `; diff --git a/packages/cli/src/ui/components/__snapshots__/ShortcutsHelp.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/ShortcutsHelp.test.tsx.snap index 9e65c72f69..f51dca0860 100644 --- a/packages/cli/src/ui/components/__snapshots__/ShortcutsHelp.test.tsx.snap +++ b/packages/cli/src/ui/components/__snapshots__/ShortcutsHelp.test.tsx.snap @@ -12,7 +12,7 @@ exports[`ShortcutsHelp > renders correctly in 'narrow' mode on 'linux' 1`] = ` Ctrl+V paste images Alt+M raw markdown mode Ctrl+R reverse-search history - Ctrl+X open external editor + Ctrl+G open external editor " `; @@ -28,7 +28,7 @@ exports[`ShortcutsHelp > renders correctly in 'narrow' mode on 'mac' 1`] = ` Ctrl+V paste images Option+M raw markdown mode Ctrl+R reverse-search history - Ctrl+X open external editor + Ctrl+G open external editor " `; @@ -37,7 +37,7 @@ exports[`ShortcutsHelp > renders correctly in 'wide' mode on 'linux' 1`] = ` Shortcuts See /help for more ! shell mode Shift+Tab cycle mode Ctrl+V paste images @ select file or folder Ctrl+Y YOLO mode Alt+M raw markdown mode - Double Esc clear & rewind Ctrl+R reverse-search history Ctrl+X open external editor + Double Esc clear & rewind Ctrl+R reverse-search history Ctrl+G open external editor Tab focus UI " `; @@ -47,7 +47,7 @@ exports[`ShortcutsHelp > renders correctly in 'wide' mode on 'mac' 1`] = ` Shortcuts See /help for more ! shell mode Shift+Tab cycle mode Ctrl+V paste images @ select file or folder Ctrl+Y YOLO mode Option+M raw markdown mode - Double Esc clear & rewind Ctrl+R reverse-search history Ctrl+X open external editor + Double Esc clear & rewind Ctrl+R reverse-search history Ctrl+G open external editor Tab focus UI " `; diff --git a/packages/cli/src/ui/components/__snapshots__/ToolConfirmationQueue.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/ToolConfirmationQueue.test.tsx.snap index 8d8667b51d..9214e58713 100644 --- a/packages/cli/src/ui/components/__snapshots__/ToolConfirmationQueue.test.tsx.snap +++ b/packages/cli/src/ui/components/__snapshots__/ToolConfirmationQueue.test.tsx.snap @@ -191,7 +191,7 @@ exports[`ToolConfirmationQueue > renders ExitPlanMode tool confirmation with Suc │ Approves plan but requires confirmation for each tool │ │ 3. Type your feedback... │ │ │ -│ Enter to select · ↑/↓ to navigate · Ctrl+X to edit plan · Esc to cancel │ +│ Enter to select · ↑/↓ to navigate · Ctrl+G to edit plan · Esc to cancel │ ╰──────────────────────────────────────────────────────────────────────────────╯ " `; diff --git a/packages/cli/src/ui/constants/tips.ts b/packages/cli/src/ui/constants/tips.ts index 922465347a..78bc16f039 100644 --- a/packages/cli/src/ui/constants/tips.ts +++ b/packages/cli/src/ui/constants/tips.ts @@ -111,10 +111,10 @@ export const INFORMATIVE_TIPS = [ 'Paste from your clipboard with Ctrl+V', 'Undo text edits in the input with Alt+Z or Cmd+Z', 'Redo undone text edits with Shift+Alt+Z or Shift+Cmd+Z', - 'Open the current prompt in an external editor with Ctrl+X', + 'Open the current prompt in an external editor with Ctrl+G', 'In menus, move up/down with k/j or the arrow keys', 'In menus, select an item by typing its number', - "If you're using an IDE, see the context with Ctrl+G", + "If you're using an IDE, see the context with F4", 'Toggle background shells with Ctrl+B or /shells', 'Toggle the background shell process list with Ctrl+L', // Keyboard shortcut tips end here diff --git a/packages/cli/src/ui/key/keyBindings.ts b/packages/cli/src/ui/key/keyBindings.ts index c23596dc0f..0079d743d5 100644 --- a/packages/cli/src/ui/key/keyBindings.ts +++ b/packages/cli/src/ui/key/keyBindings.ts @@ -77,6 +77,7 @@ export enum Command { QUEUE_MESSAGE = 'input.queueMessage', NEWLINE = 'input.newline', OPEN_EXTERNAL_EDITOR = 'input.openExternalEditor', + DEPRECATED_OPEN_EXTERNAL_EDITOR = 'input.deprecatedOpenExternalEditor', PASTE_CLIPBOARD = 'input.paste', // App Controls @@ -375,7 +376,8 @@ export const defaultKeyBindingConfig: KeyBindingConfig = new Map([ new KeyBinding('ctrl+j'), ], ], - [Command.OPEN_EXTERNAL_EDITOR, [new KeyBinding('ctrl+x')]], + [Command.OPEN_EXTERNAL_EDITOR, [new KeyBinding('ctrl+g')]], + [Command.DEPRECATED_OPEN_EXTERNAL_EDITOR, [new KeyBinding('ctrl+x')]], [ Command.PASTE_CLIPBOARD, [ @@ -388,7 +390,7 @@ export const defaultKeyBindingConfig: KeyBindingConfig = new Map([ // App Controls [Command.SHOW_ERROR_DETAILS, [new KeyBinding('f12')]], [Command.SHOW_FULL_TODOS, [new KeyBinding('ctrl+t')]], - [Command.SHOW_IDE_CONTEXT_DETAIL, [new KeyBinding('ctrl+g')]], + [Command.SHOW_IDE_CONTEXT_DETAIL, [new KeyBinding('f4')]], [Command.TOGGLE_MARKDOWN, [new KeyBinding('alt+m')]], [Command.TOGGLE_COPY_MODE, [new KeyBinding('f9')]], [Command.TOGGLE_MOUSE_MODE, [new KeyBinding('ctrl+s')]], @@ -510,6 +512,7 @@ export const commandCategories: readonly CommandCategory[] = [ Command.QUEUE_MESSAGE, Command.NEWLINE, Command.OPEN_EXTERNAL_EDITOR, + Command.DEPRECATED_OPEN_EXTERNAL_EDITOR, Command.PASTE_CLIPBOARD, ], }, @@ -626,6 +629,8 @@ export const commandDescriptions: Readonly> = { [Command.NEWLINE]: 'Insert a newline without submitting.', [Command.OPEN_EXTERNAL_EDITOR]: 'Open the current prompt or the plan in an external editor.', + [Command.DEPRECATED_OPEN_EXTERNAL_EDITOR]: + 'Deprecated command to open external editor.', [Command.PASTE_CLIPBOARD]: 'Paste from the clipboard.', // App Controls diff --git a/packages/cli/src/ui/key/keyMatchers.test.ts b/packages/cli/src/ui/key/keyMatchers.test.ts index 2a3709350f..0fc2f00ac7 100644 --- a/packages/cli/src/ui/key/keyMatchers.test.ts +++ b/packages/cli/src/ui/key/keyMatchers.test.ts @@ -311,6 +311,11 @@ describe('keyMatchers', () => { // External tools { command: Command.OPEN_EXTERNAL_EDITOR, + positive: [createKey('g', { ctrl: true })], + negative: [createKey('g'), createKey('c', { ctrl: true })], + }, + { + command: Command.DEPRECATED_OPEN_EXTERNAL_EDITOR, positive: [createKey('x', { ctrl: true })], negative: [createKey('x'), createKey('c', { ctrl: true })], }, @@ -336,8 +341,8 @@ describe('keyMatchers', () => { }, { command: Command.SHOW_IDE_CONTEXT_DETAIL, - positive: [createKey('g', { ctrl: true })], - negative: [createKey('g'), createKey('t', { ctrl: true })], + positive: [createKey('f4')], + negative: [createKey('f5'), createKey('t', { ctrl: true })], }, { command: Command.TOGGLE_MARKDOWN, From 651ad63ed6daf4decf9071d5aa0bc9a4e715434d Mon Sep 17 00:00:00 2001 From: Gaurav Ghosh Date: Fri, 20 Mar 2026 13:39:10 -0700 Subject: [PATCH 14/39] feat: Introduce an AI-driven interactive shell mode with new `read-shell` and `write-to-shell` tools and a configurable mode setting. --- packages/cli/src/config/config.ts | 1 + packages/cli/src/config/settingsSchema.ts | 20 ++ packages/cli/src/ui/hooks/shellReducer.ts | 18 +- .../src/ui/hooks/useBackgroundShellManager.ts | 101 ++++++++ .../cli/src/ui/hooks/useExecutionLifecycle.ts | 5 + packages/cli/src/ui/hooks/useGeminiStream.ts | 3 + packages/core/src/config/config.ts | 27 +- packages/core/src/prompts/promptProvider.ts | 1 + packages/core/src/prompts/snippets.ts | 16 +- .../src/services/shellExecutionService.ts | 41 ++++ .../tools/definitions/base-declarations.ts | 12 + .../core/src/tools/definitions/coreTools.ts | 11 + .../dynamic-declaration-helpers.ts | 30 +++ .../model-family-sets/default-legacy.ts | 2 + .../definitions/model-family-sets/gemini-3.ts | 2 + packages/core/src/tools/definitions/types.ts | 1 + packages/core/src/tools/read-shell.ts | 148 +++++++++++ packages/core/src/tools/shell.test.ts | 6 +- packages/core/src/tools/shell.ts | 167 +++++++------ .../core/src/tools/shellOutputFormatter.ts | 128 ++++++++++ packages/core/src/tools/tool-names.ts | 19 ++ packages/core/src/tools/write-to-shell.ts | 230 ++++++++++++++++++ 22 files changed, 906 insertions(+), 83 deletions(-) create mode 100644 packages/cli/src/ui/hooks/useBackgroundShellManager.ts create mode 100644 packages/core/src/tools/read-shell.ts create mode 100644 packages/core/src/tools/shellOutputFormatter.ts create mode 100644 packages/core/src/tools/write-to-shell.ts diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index 4e7e1db6f2..499b57b522 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -1009,6 +1009,7 @@ export async function loadCliConfig( enableInteractiveShell: settings.tools?.shell?.enableInteractiveShell, shellBackgroundCompletionBehavior: settings.tools?.shell ?.backgroundCompletionBehavior as string | undefined, + interactiveShellMode: settings.tools?.shell?.interactiveShellMode, shellToolInactivityTimeout: settings.tools?.shell?.inactivityTimeout, enableShellOutputEfficiency: settings.tools?.shell?.enableShellOutputEfficiency ?? true, diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index c041aaa8c3..e654391566 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -1512,6 +1512,26 @@ const SETTINGS_SCHEMA = { { label: 'Notify', value: 'notify' }, ], }, + interactiveShellMode: { + type: 'enum', + label: 'Interactive Shell Mode', + category: 'Tools', + requiresRestart: true, + default: undefined as 'human' | 'ai' | 'off' | undefined, + description: oneLine` + Controls who can interact with backgrounded shell processes. + "human": user can Tab-focus and type into shells (default). + "ai": model gets write_to_shell/read_shell tools for TUI interaction. + "off": no interactive shell. + When set, overrides enableInteractiveShell. + `, + showInDialog: true, + options: [ + { value: 'human', label: 'Human (Tab to focus)' }, + { value: 'ai', label: 'AI (model-driven tools)' }, + { value: 'off', label: 'Off' }, + ], + }, pager: { type: 'string', label: 'Pager', diff --git a/packages/cli/src/ui/hooks/shellReducer.ts b/packages/cli/src/ui/hooks/shellReducer.ts index 0e9307259d..ea467fc327 100644 --- a/packages/cli/src/ui/hooks/shellReducer.ts +++ b/packages/cli/src/ui/hooks/shellReducer.ts @@ -92,7 +92,23 @@ export function shellReducer( nextTasks.delete(action.pid); } nextTasks.set(action.pid, updatedTask); - return { ...state, backgroundTasks: nextTasks }; + + // Auto-hide panel when all tasks have exited + let nextVisible = state.isBackgroundTaskVisible; + if (action.update.status === 'exited') { + const hasRunning = Array.from(nextTasks.values()).some( + (s) => s.status === 'running', + ); + if (!hasRunning) { + nextVisible = false; + } + } + + return { + ...state, + backgroundTasks: nextTasks, + isBackgroundTaskVisible: nextVisible, + }; } case 'APPEND_TASK_OUTPUT': { const task = state.backgroundTasks.get(action.pid); diff --git a/packages/cli/src/ui/hooks/useBackgroundShellManager.ts b/packages/cli/src/ui/hooks/useBackgroundShellManager.ts new file mode 100644 index 0000000000..eb43ae1cfb --- /dev/null +++ b/packages/cli/src/ui/hooks/useBackgroundShellManager.ts @@ -0,0 +1,101 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { useState, useEffect, useMemo, useRef } from 'react'; +import { type BackgroundTask } from './shellReducer.js'; + +export interface BackgroundShellManagerProps { + backgroundTasks: Map; + backgroundTaskCount: number; + isBackgroundTaskVisible: boolean; + activePtyId: number | null | undefined; + embeddedShellFocused: boolean; + setEmbeddedShellFocused: (focused: boolean) => void; + terminalHeight: number; +} + +export function useBackgroundShellManager({ + backgroundTasks, + backgroundTaskCount, + isBackgroundTaskVisible, + activePtyId, + embeddedShellFocused, + setEmbeddedShellFocused, + terminalHeight, +}: BackgroundShellManagerProps) { + const [isBackgroundShellListOpen, setIsBackgroundShellListOpen] = + useState(false); + const [activeBackgroundShellPid, setActiveBackgroundShellPid] = useState< + number | null + >(null); + + const prevShellCountRef = useRef(backgroundTaskCount); + + useEffect(() => { + if (backgroundTasks.size === 0) { + if (activeBackgroundShellPid !== null) { + setActiveBackgroundShellPid(null); + } + if (isBackgroundShellListOpen) { + setIsBackgroundShellListOpen(false); + } + } else if ( + activeBackgroundShellPid === null || + !backgroundTasks.has(activeBackgroundShellPid) + ) { + // If active shell is closed or none selected, select the first one + setActiveBackgroundShellPid(backgroundTasks.keys().next().value ?? null); + } else if (backgroundTaskCount > prevShellCountRef.current) { + // A new shell was added — auto-switch to the newest one (last in the map) + const pids = Array.from(backgroundTasks.keys()); + const newestPid = pids[pids.length - 1]; + if (newestPid !== undefined && newestPid !== activeBackgroundShellPid) { + setActiveBackgroundShellPid(newestPid); + } + } + prevShellCountRef.current = backgroundTaskCount; + }, [ + backgroundTasks, + activeBackgroundShellPid, + backgroundTaskCount, + isBackgroundShellListOpen, + ]); + + useEffect(() => { + if (embeddedShellFocused) { + const hasActiveForegroundShell = !!activePtyId; + const hasVisibleBackgroundShell = + isBackgroundTaskVisible && backgroundTasks.size > 0; + + if (!hasActiveForegroundShell && !hasVisibleBackgroundShell) { + setEmbeddedShellFocused(false); + } + } + }, [ + isBackgroundTaskVisible, + backgroundTasks, + embeddedShellFocused, + backgroundTaskCount, + activePtyId, + setEmbeddedShellFocused, + ]); + + const backgroundShellHeight = useMemo( + () => + isBackgroundTaskVisible && backgroundTasks.size > 0 + ? Math.max(Math.floor(terminalHeight * 0.3), 5) + : 0, + [isBackgroundTaskVisible, backgroundTasks.size, terminalHeight], + ); + + return { + isBackgroundShellListOpen, + setIsBackgroundShellListOpen, + activeBackgroundShellPid, + setActiveBackgroundShellPid, + backgroundShellHeight, + }; +} diff --git a/packages/cli/src/ui/hooks/useExecutionLifecycle.ts b/packages/cli/src/ui/hooks/useExecutionLifecycle.ts index 2e80bf8f95..02e9e88cf5 100644 --- a/packages/cli/src/ui/hooks/useExecutionLifecycle.ts +++ b/packages/cli/src/ui/hooks/useExecutionLifecycle.ts @@ -661,6 +661,10 @@ export const useExecutionLifecycle = ( (s: BackgroundTask) => s.status === 'running', ).length; + const showBackgroundShell = useCallback(() => { + dispatch({ type: 'SET_VISIBILITY', visible: true }); + }, [dispatch]); + return { handleShellCommand, activeShellPtyId: state.activeShellPtyId, @@ -668,6 +672,7 @@ export const useExecutionLifecycle = ( backgroundTaskCount, isBackgroundTaskVisible: state.isBackgroundTaskVisible, toggleBackgroundTasks, + showBackgroundShell, backgroundCurrentExecution, registerBackgroundTask, dismissBackgroundTask, diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index a2621c4546..c4a9c58d5e 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -390,6 +390,7 @@ export const useGeminiStream = ( backgroundTaskCount, isBackgroundTaskVisible, toggleBackgroundTasks, + showBackgroundShell, backgroundCurrentExecution, registerBackgroundTask, dismissBackgroundTask, @@ -1917,6 +1918,7 @@ export const useGeminiStream = ( backgroundedTool.command, backgroundedTool.initialOutput, ); + showBackgroundShell(); } } @@ -2056,6 +2058,7 @@ export const useGeminiStream = ( modelSwitchedFromQuotaError, addItem, registerBackgroundTask, + showBackgroundShell, consumeUserHint, isLowErrorVerbosity, maybeAddSuppressedToolErrorNote, diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 0edd4af7b0..c82cc315b7 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -36,6 +36,8 @@ import { GlobTool } from '../tools/glob.js'; import { ActivateSkillTool } from '../tools/activate-skill.js'; import { EditTool } from '../tools/edit.js'; import { ShellTool } from '../tools/shell.js'; +import { WriteToShellTool } from '../tools/write-to-shell.js'; +import { ReadShellTool } from '../tools/read-shell.js'; import { WriteFileTool } from '../tools/write-file.js'; import { WebFetchTool } from '../tools/web-fetch.js'; import { MemoryTool, setGeminiMdFilename } from '../tools/memoryTool.js'; @@ -656,6 +658,7 @@ export interface ConfigParameters { useRipgrep?: boolean; enableInteractiveShell?: boolean; shellBackgroundCompletionBehavior?: string; + interactiveShellMode?: 'human' | 'ai' | 'off'; skipNextSpeakerCheck?: boolean; shellExecutionConfig?: ShellExecutionConfig; extensionManagement?: boolean; @@ -868,6 +871,7 @@ export class Config implements McpContext, AgentLoopContext { | 'inject' | 'notify' | 'silent'; + private readonly interactiveShellMode: 'human' | 'ai' | 'off'; private readonly skipNextSpeakerCheck: boolean; private readonly useBackgroundColor: boolean; private readonly useAlternateBuffer: boolean; @@ -1235,6 +1239,14 @@ export class Config implements McpContext, AgentLoopContext { this.shellBackgroundCompletionBehavior = 'silent'; } + // interactiveShellMode takes precedence over enableInteractiveShell. + // If not set, derive from enableInteractiveShell for backward compat. + if (params.interactiveShellMode) { + this.interactiveShellMode = params.interactiveShellMode; + } else { + this.interactiveShellMode = this.enableInteractiveShell ? 'human' : 'off'; + } + this.skipNextSpeakerCheck = params.skipNextSpeakerCheck ?? true; this.shellExecutionConfig = { terminalWidth: params.shellExecutionConfig?.terminalWidth ?? 80, @@ -3211,10 +3223,14 @@ export class Config implements McpContext, AgentLoopContext { return ( this.interactive && this.ptyInfo !== 'child_process' && - this.enableInteractiveShell + this.interactiveShellMode !== 'off' ); } + getInteractiveShellMode(): 'human' | 'ai' | 'off' { + return this.interactiveShellMode; + } + isSkillsSupportEnabled(): boolean { return this.skillsSupport; } @@ -3575,6 +3591,15 @@ export class Config implements McpContext, AgentLoopContext { new ReadBackgroundOutputTool(this, this.messageBus), ), ); + // Register AI-driven interactive shell tools when mode is 'ai' + if (this.getInteractiveShellMode() === 'ai') { + maybeRegister(WriteToShellTool, () => + registry.registerTool(new WriteToShellTool(this.messageBus)), + ); + maybeRegister(ReadShellTool, () => + registry.registerTool(new ReadShellTool(this.messageBus)), + ); + } if (!this.isMemoryManagerEnabled()) { maybeRegister(MemoryTool, () => registry.registerTool(new MemoryTool(this.messageBus, this.storage)), diff --git a/packages/core/src/prompts/promptProvider.ts b/packages/core/src/prompts/promptProvider.ts index 0036dae560..c4077afc95 100644 --- a/packages/core/src/prompts/promptProvider.ts +++ b/packages/core/src/prompts/promptProvider.ts @@ -200,6 +200,7 @@ export class PromptProvider { enableShellEfficiency: context.config.getEnableShellOutputEfficiency(), interactiveShellEnabled: context.config.isInteractiveShellEnabled(), + interactiveShellMode: context.config.getInteractiveShellMode(), topicUpdateNarration: context.config.isTopicUpdateNarrationEnabled(), memoryManagerEnabled: context.config.isMemoryManagerEnabled(), diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index 59315e1ca6..b049ddf58e 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -18,6 +18,8 @@ import { MEMORY_TOOL_NAME, READ_FILE_TOOL_NAME, SHELL_TOOL_NAME, + WRITE_TO_SHELL_TOOL_NAME, + READ_SHELL_TOOL_NAME, WRITE_FILE_TOOL_NAME, WRITE_TODOS_TOOL_NAME, GREP_PARAM_TOTAL_MAX_MATCHES, @@ -81,6 +83,7 @@ export interface PrimaryWorkflowsOptions { export interface OperationalGuidelinesOptions { interactive: boolean; interactiveShellEnabled: boolean; + interactiveShellMode?: 'human' | 'ai' | 'off'; topicUpdateNarration: boolean; memoryManagerEnabled: boolean; } @@ -391,7 +394,7 @@ export function renderOperationalGuidelines( - **Command Execution:** Use the ${formatToolName(SHELL_TOOL_NAME)} tool for running shell commands, remembering the safety rule to explain modifying commands first.${toolUsageInteractive( options.interactive, options.interactiveShellEnabled, - )}${toolUsageRememberingFacts(options)} + )}${toolUsageRememberingFacts(options)}${toolUsageAiShell(options)} - **Confirmation Protocol:** If a tool call is declined or cancelled, respect the decision immediately. Do not re-attempt the action or "negotiate" for the same tool call unless the user explicitly directs you to. Offer an alternative technical path if possible. ## Interaction Details @@ -800,6 +803,17 @@ function toolUsageInteractive( - **Interactive Commands:** Always prefer non-interactive commands (e.g., using 'run once' or 'CI' flags for test runners to avoid persistent watch modes or 'git --no-pager') unless a persistent process is specifically required; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim).`; } +function toolUsageAiShell(options: OperationalGuidelinesOptions): string { + if (options.interactiveShellMode !== 'ai') return ''; + return ` +- **AI-Driven Interactive Shell:** Commands using \`wait_for_output_seconds\` auto-promote to background when they stall. Once promoted, use ${formatToolName(READ_SHELL_TOOL_NAME)} to see the terminal screen, then ${formatToolName(WRITE_TO_SHELL_TOOL_NAME)} to send text input and/or special keys (arrows, Enter, Ctrl-C, etc.). + - Set \`wait_for_output_seconds\` **low (2-5)** for commands that prompt for input (npx, installers, REPLs). Set **high (60+)** for long builds. Omit for instant commands. + - **Always read the screen before writing input.** The screen state tells you what the process is waiting for. + - When waiting for a command to finish (e.g. npm install), use ${formatToolName(READ_SHELL_TOOL_NAME)} with \`wait_seconds\` to delay before reading. Do NOT poll in a tight loop. + - **Clean up when done:** when your task is complete, kill background processes with ${formatToolName(WRITE_TO_SHELL_TOOL_NAME)} sending Ctrl-C, or note the PID for the user to clean up. + - You are the sole operator of promoted shells — the user cannot type into them.`; +} + function toolUsageRememberingFacts( options: OperationalGuidelinesOptions, ): string { diff --git a/packages/core/src/services/shellExecutionService.ts b/packages/core/src/services/shellExecutionService.ts index dfbb3a5033..95b3f2d17b 100644 --- a/packages/core/src/services/shellExecutionService.ts +++ b/packages/core/src/services/shellExecutionService.ts @@ -105,6 +105,7 @@ export interface ShellExecutionConfig { backgroundCompletionBehavior?: 'inject' | 'notify' | 'silent'; originalCommand?: string; sessionId?: string; + autoPromoteTimeoutMs?: number; } /** @@ -889,6 +890,21 @@ export class ShellExecutionService { sessionId: shellExecutionConfig.sessionId, }); + let autoPromoteTimer: NodeJS.Timeout | undefined; + const resetAutoPromoteTimer = () => { + if (shellExecutionConfig.autoPromoteTimeoutMs !== undefined) { + if (autoPromoteTimer) clearTimeout(autoPromoteTimer); + autoPromoteTimer = setTimeout(() => { + ShellExecutionService.background( + ptyPid, + shellExecutionConfig.sessionId, + ); + }, shellExecutionConfig.autoPromoteTimeoutMs); + } + }; + + resetAutoPromoteTimer(); + const result = ExecutionLifecycleService.attachExecution(ptyPid, { executionMethod: ptyInfo?.name ?? 'node-pty', writeInput: (input) => { @@ -1066,6 +1082,7 @@ export class ShellExecutionService { }); const handleOutput = (data: Buffer) => { + resetAutoPromoteTimer(); processingChain = processingChain.then( () => new Promise((resolveChunk) => { @@ -1135,6 +1152,7 @@ export class ShellExecutionService { ptyProcess.onExit( ({ exitCode, signal }: { exitCode: number; signal?: number }) => { + if (autoPromoteTimer) clearTimeout(autoPromoteTimer); exited = true; abortSignal.removeEventListener('abort', abortHandler); // Attempt to destroy the PTY to ensure FD is closed @@ -1220,6 +1238,7 @@ export class ShellExecutionService { ); const abortHandler = async () => { + if (autoPromoteTimer) clearTimeout(autoPromoteTimer); if (ptyProcess.pid && !exited) { await killProcessGroup({ pid: ptyPid, @@ -1398,6 +1417,28 @@ export class ShellExecutionService { return ExecutionLifecycleService.subscribe(pid, listener); } + /** + * Reads the current rendered screen state of a running process. + * Returns the full terminal buffer text for PTY processes, + * or the accumulated output for child processes. + * + * @param pid The process ID of the target process. + * @returns The screen text, or null if the process is not found. + */ + static readScreen(pid: number): string | null { + const activePty = this.activePtys.get(pid); + if (activePty) { + return getFullBufferText(activePty.headlessTerminal); + } + + const activeChild = this.activeChildProcesses.get(pid); + if (activeChild) { + return activeChild.state.output; + } + + return null; + } + /** * Resizes the pseudo-terminal (PTY) of a running process. * diff --git a/packages/core/src/tools/definitions/base-declarations.ts b/packages/core/src/tools/definitions/base-declarations.ts index 89a5aa1614..e1575966af 100644 --- a/packages/core/src/tools/definitions/base-declarations.ts +++ b/packages/core/src/tools/definitions/base-declarations.ts @@ -56,6 +56,18 @@ export const READ_FILE_PARAM_END_LINE = 'end_line'; export const SHELL_TOOL_NAME = 'run_shell_command'; export const SHELL_PARAM_COMMAND = 'command'; export const SHELL_PARAM_IS_BACKGROUND = 'is_background'; +export const SHELL_PARAM_WAIT_SECONDS = 'wait_for_output_seconds'; + +// -- write_to_shell -- +export const WRITE_TO_SHELL_TOOL_NAME = 'write_to_shell'; +export const WRITE_TO_SHELL_PARAM_PID = 'pid'; +export const WRITE_TO_SHELL_PARAM_INPUT = 'input'; +export const WRITE_TO_SHELL_PARAM_SPECIAL_KEYS = 'special_keys'; + +// -- read_shell -- +export const READ_SHELL_TOOL_NAME = 'read_shell'; +export const READ_SHELL_PARAM_PID = 'pid'; +export const READ_SHELL_PARAM_WAIT_SECONDS = 'wait_seconds'; // -- write_file -- export const WRITE_FILE_TOOL_NAME = 'write_file'; diff --git a/packages/core/src/tools/definitions/coreTools.ts b/packages/core/src/tools/definitions/coreTools.ts index d1b81a6e99..a70ed1a33c 100644 --- a/packages/core/src/tools/definitions/coreTools.ts +++ b/packages/core/src/tools/definitions/coreTools.ts @@ -27,6 +27,8 @@ export { LS_TOOL_NAME, READ_FILE_TOOL_NAME, SHELL_TOOL_NAME, + WRITE_TO_SHELL_TOOL_NAME, + READ_SHELL_TOOL_NAME, WRITE_FILE_TOOL_NAME, EDIT_TOOL_NAME, WEB_SEARCH_TOOL_NAME, @@ -73,6 +75,12 @@ export { LS_PARAM_IGNORE, SHELL_PARAM_COMMAND, SHELL_PARAM_IS_BACKGROUND, + SHELL_PARAM_WAIT_SECONDS, + WRITE_TO_SHELL_PARAM_PID, + WRITE_TO_SHELL_PARAM_INPUT, + WRITE_TO_SHELL_PARAM_SPECIAL_KEYS, + READ_SHELL_PARAM_PID, + READ_SHELL_PARAM_WAIT_SECONDS, WEB_SEARCH_PARAM_QUERY, WEB_FETCH_PARAM_PROMPT, READ_MANY_PARAM_INCLUDE, @@ -249,18 +257,21 @@ export function getShellDefinition( enableInteractiveShell: boolean, enableEfficiency: boolean, enableToolSandboxing: boolean = false, + interactiveShellMode?: string, ): ToolDefinition { return { base: getShellDeclaration( enableInteractiveShell, enableEfficiency, enableToolSandboxing, + interactiveShellMode, ), overrides: (modelId) => getToolSet(modelId).run_shell_command( enableInteractiveShell, enableEfficiency, enableToolSandboxing, + interactiveShellMode, ), }; } diff --git a/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts b/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts index 29da313bf4..6f001c7459 100644 --- a/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts +++ b/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts @@ -22,6 +22,7 @@ import { PARAM_DIR_PATH, SHELL_PARAM_IS_BACKGROUND, EXIT_PLAN_PARAM_PLAN_FILENAME, + SHELL_PARAM_WAIT_SECONDS, SKILL_PARAM_NAME, PARAM_ADDITIONAL_PERMISSIONS, UPDATE_TOPIC_TOOL_NAME, @@ -36,7 +37,9 @@ import { export function getShellToolDescription( enableInteractiveShell: boolean, enableEfficiency: boolean, + interactiveShellMode?: string, ): string { + const isAiMode = interactiveShellMode === 'ai'; const efficiencyGuidelines = enableEfficiency ? ` @@ -56,6 +59,11 @@ export function getShellToolDescription( Background PIDs: Only included if background processes were started. Process Group PGID: Only included if available.`; + if (isAiMode) { + const autoPromoteInstructions = `Commands that do not complete within \`${SHELL_PARAM_WAIT_SECONDS}\` seconds are automatically promoted to background. Once promoted, use \`write_to_shell\` and \`read_shell\` to interact with the process. Do NOT use \`&\` to background commands.`; + return `This tool executes a given shell command as \`bash -c \`. ${autoPromoteInstructions} Command is executed as a subprocess that leads its own process group. Command process group can be terminated as \`kill -- -PGID\` or signaled as \`kill -s SIGNAL -- -PGID\`.${efficiencyGuidelines}${returnedInfo}`; + } + if (os.platform() === 'win32') { const backgroundInstructions = enableInteractiveShell ? `To run a command in the background, set the \`${SHELL_PARAM_IS_BACKGROUND}\` parameter to true. Do NOT use PowerShell background constructs.` @@ -86,12 +94,33 @@ export function getShellDeclaration( enableInteractiveShell: boolean, enableEfficiency: boolean, enableToolSandboxing: boolean = false, + interactiveShellMode?: string, ): FunctionDeclaration { + const isAiMode = interactiveShellMode === 'ai'; + + // In AI mode, use wait_for_output_seconds instead of is_background + const backgroundParam = isAiMode + ? { + [SHELL_PARAM_WAIT_SECONDS]: { + type: 'number' as const, + description: + 'Max seconds to wait for command to complete before auto-promoting to background (default: 5). Set low (2-5) for commands likely to prompt for input (npx, installers, REPLs). Set high (60-300) for long builds or installs. Once promoted, use write_to_shell/read_shell to interact.', + }, + } + : { + [SHELL_PARAM_IS_BACKGROUND]: { + type: 'boolean' as const, + description: + 'Set to true if this command should be run in the background (e.g. for long-running servers or watchers). The command will be started, allowed to run for a brief moment to check for immediate errors, and then moved to the background.', + }, + }; + return { name: SHELL_TOOL_NAME, description: getShellToolDescription( enableInteractiveShell, enableEfficiency, + interactiveShellMode, ), parametersJsonSchema: { type: 'object', @@ -120,6 +149,7 @@ export function getShellDeclaration( description: 'Optional. Delay in milliseconds to wait after starting the process in the background. Useful to allow the process to start and generate initial output before returning.', }, + ...backgroundParam, ...(enableToolSandboxing ? { [PARAM_ADDITIONAL_PERMISSIONS]: { diff --git a/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts b/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts index 60a52fc6ad..5441c39d09 100644 --- a/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts +++ b/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts @@ -337,11 +337,13 @@ export const DEFAULT_LEGACY_SET: CoreToolSet = { enableInteractiveShell, enableEfficiency, enableToolSandboxing, + interactiveShellMode, ) => getShellDeclaration( enableInteractiveShell, enableEfficiency, enableToolSandboxing, + interactiveShellMode, ), replace: { diff --git a/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts b/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts index a86a20378e..f29f9e6814 100644 --- a/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts +++ b/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts @@ -344,11 +344,13 @@ export const GEMINI_3_SET: CoreToolSet = { enableInteractiveShell, enableEfficiency, enableToolSandboxing, + interactiveShellMode, ) => getShellDeclaration( enableInteractiveShell, enableEfficiency, enableToolSandboxing, + interactiveShellMode, ), replace: { diff --git a/packages/core/src/tools/definitions/types.ts b/packages/core/src/tools/definitions/types.ts index 42c0cc7028..d4f532f513 100644 --- a/packages/core/src/tools/definitions/types.ts +++ b/packages/core/src/tools/definitions/types.ts @@ -38,6 +38,7 @@ export interface CoreToolSet { enableInteractiveShell: boolean, enableEfficiency: boolean, enableToolSandboxing: boolean, + interactiveShellMode?: string, ) => FunctionDeclaration; replace: FunctionDeclaration; google_web_search: FunctionDeclaration; diff --git a/packages/core/src/tools/read-shell.ts b/packages/core/src/tools/read-shell.ts new file mode 100644 index 0000000000..4e74cbbfa5 --- /dev/null +++ b/packages/core/src/tools/read-shell.ts @@ -0,0 +1,148 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { + BaseDeclarativeTool, + BaseToolInvocation, + Kind, + type ToolInvocation, + type ToolResult, +} from './tools.js'; +import { ShellExecutionService } from '../services/shellExecutionService.js'; +import { + READ_SHELL_TOOL_NAME, + READ_SHELL_PARAM_PID, + READ_SHELL_PARAM_WAIT_SECONDS, +} from './tool-names.js'; +import type { MessageBus } from '../confirmation-bus/message-bus.js'; + +export interface ReadShellParams { + pid: number; + wait_seconds?: number; +} + +export class ReadShellToolInvocation extends BaseToolInvocation< + ReadShellParams, + ToolResult +> { + constructor( + params: ReadShellParams, + messageBus: MessageBus, + _toolName?: string, + _toolDisplayName?: string, + ) { + super(params, messageBus, _toolName, _toolDisplayName); + } + + getDescription(): string { + const waitPart = + this.params.wait_seconds !== undefined + ? ` (after ${this.params.wait_seconds}s)` + : ''; + return `read shell screen PID ${this.params.pid}${waitPart}`; + } + + async execute(signal: AbortSignal): Promise { + const { pid, wait_seconds } = this.params; + + // Wait before reading if requested + if (wait_seconds !== undefined && wait_seconds > 0) { + const waitMs = Math.min(wait_seconds, 30) * 1000; // Cap at 30s + await new Promise((resolve) => { + const timer = setTimeout(resolve, waitMs); + const onAbort = () => { + clearTimeout(timer); + resolve(); + }; + signal.addEventListener('abort', onAbort, { once: true }); + }); + } + + // Validate the PID is active + if (!ShellExecutionService.isPtyActive(pid)) { + return { + llmContent: `Error: No active process found with PID ${pid}. The process may have exited.`, + returnDisplay: `No active process with PID ${pid}.`, + }; + } + + const screen = ShellExecutionService.readScreen(pid); + if (screen === null) { + return { + llmContent: `Error: Could not read screen for PID ${pid}. The process may have exited.`, + returnDisplay: `Could not read screen for PID ${pid}.`, + }; + } + + return { + llmContent: screen, + returnDisplay: `Screen read from PID ${pid} (${screen.split('\n').length} lines).`, + }; + } +} + +export class ReadShellTool extends BaseDeclarativeTool< + ReadShellParams, + ToolResult +> { + static readonly Name = READ_SHELL_TOOL_NAME; + + constructor(messageBus: MessageBus) { + super( + ReadShellTool.Name, + 'ReadShell', + 'Reads the current screen state of a running background shell process. Returns the rendered terminal screen as text, preserving the visual layout. Use after write_to_shell to see updated output, or to check progress of a running command.', + Kind.Read, + { + type: 'object', + properties: { + [READ_SHELL_PARAM_PID]: { + type: 'number', + description: + 'The PID of the background process to read from. Obtained from a previous run_shell_command call that was auto-promoted to background or started with is_background=true.', + }, + [READ_SHELL_PARAM_WAIT_SECONDS]: { + type: 'number', + description: + 'Seconds to wait before reading the screen. Use this to let the process run for a while before checking output (e.g. wait for a build to finish). Max 30 seconds.', + }, + }, + required: [READ_SHELL_PARAM_PID], + }, + messageBus, + false, // output is not markdown + ); + } + + protected override validateToolParamValues( + params: ReadShellParams, + ): string | null { + if (!params.pid || params.pid <= 0) { + return 'PID must be a positive number.'; + } + if ( + params.wait_seconds !== undefined && + (params.wait_seconds < 0 || params.wait_seconds > 30) + ) { + return 'wait_seconds must be between 0 and 30.'; + } + return null; + } + + protected createInvocation( + params: ReadShellParams, + messageBus: MessageBus, + _toolName?: string, + _toolDisplayName?: string, + ): ToolInvocation { + return new ReadShellToolInvocation( + params, + messageBus, + _toolName, + _toolDisplayName, + ); + } +} diff --git a/packages/core/src/tools/shell.test.ts b/packages/core/src/tools/shell.test.ts index 9551fd9638..8ed78ba464 100644 --- a/packages/core/src/tools/shell.test.ts +++ b/packages/core/src/tools/shell.test.ts @@ -149,6 +149,8 @@ describe('ShellTool', () => { getShellBackgroundCompletionBehavior: vi.fn().mockReturnValue('silent'), getEnableShellOutputEfficiency: vi.fn().mockReturnValue(true), getSandboxEnabled: vi.fn().mockReturnValue(false), + getInteractiveShellMode: vi.fn().mockReturnValue('off'), + getSessionId: vi.fn().mockReturnValue('test-session-id'), sanitizationConfig: {}, get sandboxManager() { return mockSandboxManager; @@ -422,7 +424,7 @@ describe('ShellTool', () => { expect(mockShellBackground).toHaveBeenCalledWith( 12345, - 'default', + 'test-session-id', 'sleep 10', ); @@ -666,7 +668,7 @@ describe('ShellTool', () => { expect(mockShellBackground).toHaveBeenCalledWith( 12345, - 'default', + 'test-session-id', 'sleep 10', ); diff --git a/packages/core/src/tools/shell.ts b/packages/core/src/tools/shell.ts index 3ea29474c6..0407cb99bf 100644 --- a/packages/core/src/tools/shell.ts +++ b/packages/core/src/tools/shell.ts @@ -33,6 +33,7 @@ import { import { getErrorMessage } from '../utils/errors.js'; import { summarizeToolOutput } from '../utils/summarizer.js'; +import { formatShellOutput } from './shellOutputFormatter.js'; import { ShellExecutionService, type ShellOutputEvent, @@ -71,6 +72,7 @@ export interface ShellToolParams { is_background?: boolean; delay_ms?: number; [PARAM_ADDITIONAL_PERMISSIONS]?: SandboxPermissions; + wait_for_output_seconds?: number; } export class ShellToolInvocation extends BaseToolInvocation< @@ -78,6 +80,7 @@ export class ShellToolInvocation extends BaseToolInvocation< ToolResult > { private proactivePermissionsConfirmed?: SandboxPermissions; + private _autoPromoteTimer?: NodeJS.Timeout; constructor( private readonly context: AgentLoopContext, @@ -223,7 +226,12 @@ export class ShellToolInvocation extends BaseToolInvocation< } override getExplanation(): string { - return this.getContextualDetails().trim(); + let explanation = this.getContextualDetails().trim(); + const isAiMode = this.context.config.getInteractiveShellMode() === 'ai'; + if (this.params.wait_for_output_seconds !== undefined || isAiMode) { + explanation += ` [auto-background after ${this.params.wait_for_output_seconds ?? 5}s]`; + } + return explanation; } override getPolicyUpdateOptions( @@ -497,6 +505,21 @@ export class ShellToolInvocation extends BaseToolInvocation< }, timeoutMs); }; + let currentPid: number | undefined; + const isAiMode = this.context.config.getInteractiveShellMode() === 'ai'; + const shouldAutoPromote = + this.params.wait_for_output_seconds !== undefined || isAiMode; + const waitMs = (this.params.wait_for_output_seconds ?? 5) * 1000; + + const resetAutoPromoteTimer = () => { + if (shouldAutoPromote && currentPid) { + if (this._autoPromoteTimer) clearTimeout(this._autoPromoteTimer); + this._autoPromoteTimer = setTimeout(() => { + ShellExecutionService.background(currentPid!); + }, waitMs); + } + }; + signal.addEventListener('abort', onAbort, { once: true }); timeoutController.signal.addEventListener('abort', onAbort, { once: true, @@ -511,6 +534,7 @@ export class ShellToolInvocation extends BaseToolInvocation< cwd, (event: ShellOutputEvent) => { resetTimeout(); // Reset timeout on any event + resetAutoPromoteTimer(); // Reset auto-promote on any event if (!updateOutput) { return; } @@ -582,6 +606,7 @@ export class ShellToolInvocation extends BaseToolInvocation< backgroundCompletionBehavior: this.context.config.getShellBackgroundCompletionBehavior(), originalCommand: strippedCommand, + autoPromoteTimeoutMs: shouldAutoPromote ? waitMs : undefined, }, ); @@ -618,6 +643,11 @@ export class ShellToolInvocation extends BaseToolInvocation< }; } } + + // In AI mode with wait_for_output_seconds, set up auto-promotion timer. + // When the timer fires, promote to background instead of cancelling. + currentPid = pid; + resetAutoPromoteTimer(); } const result = await resultPromise; @@ -658,97 +688,75 @@ export class ShellToolInvocation extends BaseToolInvocation< } } - let data: BackgroundExecutionData | undefined; - - let llmContent = ''; let timeoutMessage = ''; if (result.aborted) { if (timeoutController.signal.aborted) { timeoutMessage = `Command was automatically cancelled because it exceeded the timeout of ${( timeoutMs / 60000 ).toFixed(1)} minutes without output.`; - llmContent = timeoutMessage; - } else { - llmContent = - 'Command was cancelled by user before it could complete.'; } - if (result.output.trim()) { - llmContent += ` Below is the output before it was cancelled:\n${result.output}`; - } else { - llmContent += ' There was no output before it was cancelled.'; - } - } else if (this.params.is_background || result.backgrounded) { - llmContent = `Command moved to background (PID: ${result.pid}). Output hidden. Press Ctrl+B to view.`; - data = { - pid: result.pid, - command: this.params.command, - initialOutput: result.output, - }; - } else { - // Create a formatted error string for display, replacing the wrapper command - // with the user-facing command. - const llmContentParts = [`Output: ${result.output || '(empty)'}`]; - - if (result.error) { - const finalError = result.error.message.replaceAll( - commandToExecute, - this.params.command, - ); - llmContentParts.push(`Error: ${finalError}`); - } - - if (result.exitCode !== null && result.exitCode !== 0) { - llmContentParts.push(`Exit Code: ${result.exitCode}`); - data = { - exitCode: result.exitCode, - isError: true, - }; - } - - if (result.signal) { - llmContentParts.push(`Signal: ${result.signal}`); - } - if (backgroundPIDs.length) { - llmContentParts.push(`Background PIDs: ${backgroundPIDs.join(', ')}`); - } - if (result.pid) { - llmContentParts.push(`Process Group PGID: ${result.pid}`); - } - - llmContent = llmContentParts.join('\n'); } - let returnDisplay: string | AnsiOutput = ''; - if (this.context.config.getDebugMode()) { - returnDisplay = llmContent; - } else { - if (this.params.is_background || result.backgrounded) { - returnDisplay = `Command moved to background (PID: ${result.pid}). Output hidden. Press Ctrl+B to view.`; - } else if (result.aborted) { - const cancelMsg = timeoutMessage || 'Command cancelled by user.'; - if (result.output.trim()) { - returnDisplay = `${cancelMsg}\n\nOutput before cancellation:\n${result.output}`; + const formatterOutput = formatShellOutput({ + params: this.params, + result, + debugMode: this.context.config.getDebugMode(), + backgroundPIDs, + isAiMode, + timeoutMessage, + }); + + let data: BackgroundExecutionData | undefined; + data = formatterOutput.data as BackgroundExecutionData | undefined; + let returnDisplay: string | AnsiOutput = formatterOutput.returnDisplay; + let llmContent = formatterOutput.llmContent; + + if (!this.context.config.getDebugMode()) { + if ( + !this.params.is_background && + !result.backgrounded && + !result.aborted + ) { + if (result.output.trim() || result.ansiOutput) { + returnDisplay = + result.ansiOutput && result.ansiOutput.length > 0 + ? result.ansiOutput + : result.output; } else { - returnDisplay = cancelMsg; + if (result.signal) { + returnDisplay = `Command terminated by signal: ${result.signal}`; + } else if (result.error) { + returnDisplay = `Command failed: ${getErrorMessage(result.error)}`; + } else if (result.exitCode !== null && result.exitCode !== 0) { + returnDisplay = `Command exited with code: ${result.exitCode}`; + } } - } else if (result.output.trim() || result.ansiOutput) { - returnDisplay = - result.ansiOutput && result.ansiOutput.length > 0 - ? result.ansiOutput - : result.output; - } else { - if (result.signal) { - returnDisplay = `Command terminated by signal: ${result.signal}`; - } else if (result.error) { - returnDisplay = `Command failed: ${getErrorMessage(result.error)}`; - } else if (result.exitCode !== null && result.exitCode !== 0) { - returnDisplay = `Command exited with code: ${result.exitCode}`; - } - // If output is empty and command succeeded (code 0, no error/signal/abort), - // returnDisplay will remain empty, which is fine. } } + // Replace wrapper command with actual command in error messages + if (result.error && !result.aborted) { + llmContent = llmContent.replaceAll( + commandToExecute, + this.params.command, + ); + } + + // Update data with specific things needed by ShellTool + if (this.params.is_background || result.backgrounded) { + data = { + ...data, + initialOutput: result.output, + pid: result.pid!, + command: this.params.command, + }; + } else if (result.exitCode !== null && result.exitCode !== 0) { + data = { + exitCode: result.exitCode, + isError: true, + } as BackgroundExecutionData; + } + // Heuristic Sandbox Denial Detection if ( !!result.error || @@ -929,6 +937,8 @@ export class ShellToolInvocation extends BaseToolInvocation< }; } finally { if (timeoutTimer) clearTimeout(timeoutTimer); + const autoTimer = this._autoPromoteTimer; + if (autoTimer) clearTimeout(autoTimer); signal.removeEventListener('abort', onAbort); timeoutController.signal.removeEventListener('abort', onAbort); try { @@ -1007,6 +1017,7 @@ export class ShellTool extends BaseDeclarativeTool< this.context.config.getEnableInteractiveShell(), this.context.config.getEnableShellOutputEfficiency(), this.context.config.getSandboxEnabled(), + this.context.config.getInteractiveShellMode(), ); return resolveToolDeclaration(definition, modelId); } diff --git a/packages/core/src/tools/shellOutputFormatter.ts b/packages/core/src/tools/shellOutputFormatter.ts new file mode 100644 index 0000000000..04d16fb42e --- /dev/null +++ b/packages/core/src/tools/shellOutputFormatter.ts @@ -0,0 +1,128 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { type ShellExecutionResult } from '../services/shellExecutionService.js'; +import { type ShellToolParams } from './shell.js'; + +export interface FormatShellOutputOptions { + params: ShellToolParams; + result: ShellExecutionResult; + debugMode: boolean; + timeoutMessage?: string; + backgroundPIDs: number[]; + summarizedOutput?: string; + isAiMode: boolean; +} + +export interface FormattedShellOutput { + llmContent: string; + returnDisplay: string; + data: Record; +} + +export function formatShellOutput( + options: FormatShellOutputOptions, +): FormattedShellOutput { + const { + params, + result, + debugMode, + timeoutMessage, + backgroundPIDs, + summarizedOutput, + } = options; + + let llmContent = ''; + let data: Record = {}; + + if (result.aborted) { + llmContent = timeoutMessage || 'Command cancelled by user.'; + if (result.output.trim()) { + llmContent += ` Below is the output before it was cancelled:\n${result.output}`; + } else { + llmContent += ' There was no output before it was cancelled.'; + } + } else if (params.is_background || result.backgrounded) { + const isAutoPromoted = result.backgrounded && !params.is_background; + if (isAutoPromoted) { + llmContent = `Command auto-promoted to background (PID: ${result.pid}). The process is still running. To check its screen state, call the read_shell tool with pid ${result.pid}. To send input or keystrokes, call the write_to_shell tool with pid ${result.pid}. If the process does not exit on its own when done, kill it with write_to_shell using special_keys=["Ctrl-C"].`; + } else { + llmContent = `Command moved to background (PID: ${result.pid}). Output hidden. Press Ctrl+B to view.`; + } + data = { + pid: result.pid, + command: params.command, + directory: params.dir_path, + backgrounded: true, + }; + } else { + const llmContentParts: string[] = []; + + let content = summarizedOutput ?? result.output.trim(); + if (!content) { + content = '(empty)'; + } + + llmContentParts.push(`Output: ${content}`); + + if (result.error) { + llmContentParts.push(`Error: ${result.error.message}`); + } + + if (result.exitCode !== null && result.exitCode !== 0) { + llmContentParts.push(`Exit Code: ${result.exitCode}`); + } + if (result.signal !== null) { + llmContentParts.push(`Signal: ${result.signal}`); + } + if (backgroundPIDs.length) { + llmContentParts.push(`Background PIDs: ${backgroundPIDs.join(', ')}`); + } + if (result.pid) { + llmContentParts.push(`Process Group PGID: ${result.pid}`); + } + + llmContent = llmContentParts.join('\n'); + } + + let returnDisplay = ''; + if (debugMode) { + returnDisplay = llmContent; + } else { + if (params.is_background || result.backgrounded) { + const isAutoPromotedDisplay = + result.backgrounded && !params.is_background; + if (isAutoPromotedDisplay) { + returnDisplay = `Command auto-promoted to background (PID: ${result.pid}).`; + } else { + returnDisplay = `Command moved to background (PID: ${result.pid}). Output hidden. Press Ctrl+B to view.`; + } + } else if (result.aborted) { + const cancelMsg = timeoutMessage || 'Command cancelled by user.'; + if (result.output.trim()) { + returnDisplay = `${cancelMsg}\n\nOutput before cancellation:\n${result.output}`; + } else { + returnDisplay = cancelMsg; + } + } else if (result.error) { + returnDisplay = `Command failed: ${result.error.message}`; + } else if (result.exitCode !== 0 && result.exitCode !== null) { + returnDisplay = `Command exited with code ${result.exitCode}`; + if (result.output.trim()) { + returnDisplay += `\n\n${result.output}`; + } + } else if (summarizedOutput) { + returnDisplay = `Command succeeded. Output summarized:\n${summarizedOutput}`; + } else { + returnDisplay = `Command succeeded.`; + if (result.output.trim()) { + returnDisplay += `\n\n${result.output}`; + } + } + } + + return { llmContent, returnDisplay, data }; +} diff --git a/packages/core/src/tools/tool-names.ts b/packages/core/src/tools/tool-names.ts index 224f2ab0d5..47cc906c27 100644 --- a/packages/core/src/tools/tool-names.ts +++ b/packages/core/src/tools/tool-names.ts @@ -10,6 +10,8 @@ import { LS_TOOL_NAME, READ_FILE_TOOL_NAME, SHELL_TOOL_NAME, + WRITE_TO_SHELL_TOOL_NAME, + READ_SHELL_TOOL_NAME, WRITE_FILE_TOOL_NAME, EDIT_TOOL_NAME, WEB_SEARCH_TOOL_NAME, @@ -52,6 +54,12 @@ import { LS_PARAM_IGNORE, SHELL_PARAM_COMMAND, SHELL_PARAM_IS_BACKGROUND, + SHELL_PARAM_WAIT_SECONDS, + WRITE_TO_SHELL_PARAM_PID, + WRITE_TO_SHELL_PARAM_INPUT, + WRITE_TO_SHELL_PARAM_SPECIAL_KEYS, + READ_SHELL_PARAM_PID, + READ_SHELL_PARAM_WAIT_SECONDS, WEB_SEARCH_PARAM_QUERY, WEB_FETCH_PARAM_PROMPT, READ_MANY_PARAM_INCLUDE, @@ -90,6 +98,8 @@ export { LS_TOOL_NAME, READ_FILE_TOOL_NAME, SHELL_TOOL_NAME, + WRITE_TO_SHELL_TOOL_NAME, + READ_SHELL_TOOL_NAME, WRITE_FILE_TOOL_NAME, EDIT_TOOL_NAME, WEB_SEARCH_TOOL_NAME, @@ -136,6 +146,12 @@ export { LS_PARAM_IGNORE, SHELL_PARAM_COMMAND, SHELL_PARAM_IS_BACKGROUND, + SHELL_PARAM_WAIT_SECONDS, + WRITE_TO_SHELL_PARAM_PID, + WRITE_TO_SHELL_PARAM_INPUT, + WRITE_TO_SHELL_PARAM_SPECIAL_KEYS, + READ_SHELL_PARAM_PID, + READ_SHELL_PARAM_WAIT_SECONDS, WEB_SEARCH_PARAM_QUERY, WEB_FETCH_PARAM_PROMPT, READ_MANY_PARAM_INCLUDE, @@ -179,6 +195,7 @@ export const TOOLS_REQUIRING_NARROWING = new Set([ WRITE_FILE_TOOL_NAME, EDIT_TOOL_NAME, SHELL_TOOL_NAME, + WRITE_TO_SHELL_TOOL_NAME, ]); export const TRACKER_CREATE_TASK_TOOL_NAME = 'tracker_create_task'; @@ -251,6 +268,8 @@ export const ALL_BUILTIN_TOOL_NAMES = [ WEB_FETCH_TOOL_NAME, EDIT_TOOL_NAME, SHELL_TOOL_NAME, + WRITE_TO_SHELL_TOOL_NAME, + READ_SHELL_TOOL_NAME, GREP_TOOL_NAME, READ_MANY_FILES_TOOL_NAME, READ_FILE_TOOL_NAME, diff --git a/packages/core/src/tools/write-to-shell.ts b/packages/core/src/tools/write-to-shell.ts new file mode 100644 index 0000000000..652cb31bf5 --- /dev/null +++ b/packages/core/src/tools/write-to-shell.ts @@ -0,0 +1,230 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { + type ToolConfirmationOutcome, + BaseDeclarativeTool, + BaseToolInvocation, + Kind, + type ToolInvocation, + type ToolResult, + type ToolCallConfirmationDetails, + type ToolExecuteConfirmationDetails, +} from './tools.js'; +import { ShellExecutionService } from '../services/shellExecutionService.js'; +import { + WRITE_TO_SHELL_TOOL_NAME, + WRITE_TO_SHELL_PARAM_PID, + WRITE_TO_SHELL_PARAM_INPUT, + WRITE_TO_SHELL_PARAM_SPECIAL_KEYS, +} from './tool-names.js'; +import type { MessageBus } from '../confirmation-bus/message-bus.js'; + +/** + * Mapping of named special keys to their ANSI escape sequences. + */ +const SPECIAL_KEY_MAP: Record = { + Enter: '\r', + Tab: '\t', + Up: '\x1b[A', + Down: '\x1b[B', + Left: '\x1b[D', + Right: '\x1b[C', + Escape: '\x1b', + Backspace: '\x7f', + 'Ctrl-C': '\x03', + 'Ctrl-D': '\x04', + 'Ctrl-Z': '\x1a', + Space: ' ', + Delete: '\x1b[3~', + Home: '\x1b[H', + End: '\x1b[F', +}; + +const VALID_SPECIAL_KEYS = Object.keys(SPECIAL_KEY_MAP); + +/** Delay in ms to wait after writing input for the process to react. */ +const POST_INPUT_DELAY_MS = 150; + +export interface WriteToShellParams { + pid: number; + input?: string; + special_keys?: string[]; +} + +export class WriteToShellToolInvocation extends BaseToolInvocation< + WriteToShellParams, + ToolResult +> { + constructor( + params: WriteToShellParams, + messageBus: MessageBus, + _toolName?: string, + _toolDisplayName?: string, + ) { + super(params, messageBus, _toolName, _toolDisplayName); + } + + getDescription(): string { + const parts: string[] = [`write to shell PID ${this.params.pid}`]; + if (this.params.input) { + const display = + this.params.input.length > 50 + ? `${this.params.input.substring(0, 50)}...` + : this.params.input; + parts.push(`input: "${display}"`); + } + if (this.params.special_keys?.length) { + parts.push(`keys: [${this.params.special_keys.join(', ')}]`); + } + return parts.join(' '); + } + + protected override async getConfirmationDetails( + _abortSignal: AbortSignal, + ): Promise { + const confirmationDetails: ToolExecuteConfirmationDetails = { + type: 'exec', + title: 'Confirm Shell Input', + command: this.getDescription(), + rootCommand: 'write_to_shell', + rootCommands: ['write_to_shell'], + onConfirm: async (_outcome: ToolConfirmationOutcome) => { + // Policy updates handled centrally + }, + }; + return confirmationDetails; + } + + async execute(_signal: AbortSignal): Promise { + const { pid, input, special_keys } = this.params; + + // Validate the PID is active + if (!ShellExecutionService.isPtyActive(pid)) { + return { + llmContent: `Error: No active process found with PID ${pid}. The process may have exited.`, + returnDisplay: `No active process with PID ${pid}.`, + }; + } + + // Validate special keys + if (special_keys?.length) { + const invalidKeys = special_keys.filter( + (k) => !VALID_SPECIAL_KEYS.includes(k), + ); + if (invalidKeys.length > 0) { + return { + llmContent: `Error: Invalid special keys: ${invalidKeys.join(', ')}. Valid keys are: ${VALID_SPECIAL_KEYS.join(', ')}`, + returnDisplay: `Invalid special keys: ${invalidKeys.join(', ')}`, + }; + } + } + + // Send text input + if (input) { + ShellExecutionService.writeToPty(pid, input); + } + + // Send special keys + if (special_keys?.length) { + for (const key of special_keys) { + const sequence = SPECIAL_KEY_MAP[key]; + if (sequence) { + ShellExecutionService.writeToPty(pid, sequence); + } + } + } + + // Wait briefly for the process to react + await new Promise((resolve) => setTimeout(resolve, POST_INPUT_DELAY_MS)); + + // Read the screen after writing + const screen = ShellExecutionService.readScreen(pid); + if (screen === null) { + return { + llmContent: `Input sent, but the process (PID ${pid}) has exited.`, + returnDisplay: `Process exited after input.`, + }; + } + + return { + llmContent: `Input sent to PID ${pid}. Current screen:\n${screen}`, + returnDisplay: `Input sent to PID ${pid}.`, + }; + } +} + +export class WriteToShellTool extends BaseDeclarativeTool< + WriteToShellParams, + ToolResult +> { + static readonly Name = WRITE_TO_SHELL_TOOL_NAME; + + constructor(messageBus: MessageBus) { + super( + WriteToShellTool.Name, + 'WriteToShell', + 'Sends input to a running background shell process. Use this to interact with TUI applications, REPLs, and interactive commands. After writing, the current screen state is returned. Works with processes that were auto-promoted to background via wait_for_output_seconds or started with is_background=true.', + Kind.Execute, + { + type: 'object', + properties: { + [WRITE_TO_SHELL_PARAM_PID]: { + type: 'number', + description: + 'The PID of the background process to write to. Obtained from a previous run_shell_command call that was auto-promoted to background or started with is_background=true.', + }, + [WRITE_TO_SHELL_PARAM_INPUT]: { + type: 'string', + description: + '(OPTIONAL) Text to send to the process. This is literal text typed into the terminal.', + }, + [WRITE_TO_SHELL_PARAM_SPECIAL_KEYS]: { + type: 'array', + items: { + type: 'string', + enum: VALID_SPECIAL_KEYS, + }, + description: + '(OPTIONAL) Named special keys to send after the input text. Each key is sent in sequence. Examples: ["Enter"], ["Tab"], ["Up", "Enter"], ["Ctrl-C"].', + }, + }, + required: [WRITE_TO_SHELL_PARAM_PID], + }, + messageBus, + false, // output is not markdown + ); + } + + protected override validateToolParamValues( + params: WriteToShellParams, + ): string | null { + if (!params.pid || params.pid <= 0) { + return 'PID must be a positive number.'; + } + if ( + !params.input && + (!params.special_keys || !params.special_keys.length) + ) { + return 'At least one of input or special_keys must be provided.'; + } + return null; + } + + protected createInvocation( + params: WriteToShellParams, + messageBus: MessageBus, + _toolName?: string, + _toolDisplayName?: string, + ): ToolInvocation { + return new WriteToShellToolInvocation( + params, + messageBus, + _toolName, + _toolDisplayName, + ); + } +} From e7f8d9cf1ac64f18d196a9b60f8dc6cd4049ed37 Mon Sep 17 00:00:00 2001 From: Gaurav Ghosh Date: Wed, 8 Apr 2026 07:31:17 -0700 Subject: [PATCH 15/39] Revert "feat: Introduce an AI-driven interactive shell mode with new" This reverts commit 651ad63ed6daf4decf9071d5aa0bc9a4e715434d. --- packages/cli/src/config/config.ts | 1 - packages/cli/src/config/settingsSchema.ts | 20 -- packages/cli/src/ui/hooks/shellReducer.ts | 18 +- .../src/ui/hooks/useBackgroundShellManager.ts | 101 -------- .../cli/src/ui/hooks/useExecutionLifecycle.ts | 5 - packages/cli/src/ui/hooks/useGeminiStream.ts | 3 - packages/core/src/config/config.ts | 27 +- packages/core/src/prompts/promptProvider.ts | 1 - packages/core/src/prompts/snippets.ts | 16 +- .../src/services/shellExecutionService.ts | 41 ---- .../tools/definitions/base-declarations.ts | 12 - .../core/src/tools/definitions/coreTools.ts | 11 - .../dynamic-declaration-helpers.ts | 30 --- .../model-family-sets/default-legacy.ts | 2 - .../definitions/model-family-sets/gemini-3.ts | 2 - packages/core/src/tools/definitions/types.ts | 1 - packages/core/src/tools/read-shell.ts | 148 ----------- packages/core/src/tools/shell.test.ts | 6 +- packages/core/src/tools/shell.ts | 169 ++++++------- .../core/src/tools/shellOutputFormatter.ts | 128 ---------- packages/core/src/tools/tool-names.ts | 19 -- packages/core/src/tools/write-to-shell.ts | 230 ------------------ 22 files changed, 84 insertions(+), 907 deletions(-) delete mode 100644 packages/cli/src/ui/hooks/useBackgroundShellManager.ts delete mode 100644 packages/core/src/tools/read-shell.ts delete mode 100644 packages/core/src/tools/shellOutputFormatter.ts delete mode 100644 packages/core/src/tools/write-to-shell.ts diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index 499b57b522..4e7e1db6f2 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -1009,7 +1009,6 @@ export async function loadCliConfig( enableInteractiveShell: settings.tools?.shell?.enableInteractiveShell, shellBackgroundCompletionBehavior: settings.tools?.shell ?.backgroundCompletionBehavior as string | undefined, - interactiveShellMode: settings.tools?.shell?.interactiveShellMode, shellToolInactivityTimeout: settings.tools?.shell?.inactivityTimeout, enableShellOutputEfficiency: settings.tools?.shell?.enableShellOutputEfficiency ?? true, diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index e654391566..c041aaa8c3 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -1512,26 +1512,6 @@ const SETTINGS_SCHEMA = { { label: 'Notify', value: 'notify' }, ], }, - interactiveShellMode: { - type: 'enum', - label: 'Interactive Shell Mode', - category: 'Tools', - requiresRestart: true, - default: undefined as 'human' | 'ai' | 'off' | undefined, - description: oneLine` - Controls who can interact with backgrounded shell processes. - "human": user can Tab-focus and type into shells (default). - "ai": model gets write_to_shell/read_shell tools for TUI interaction. - "off": no interactive shell. - When set, overrides enableInteractiveShell. - `, - showInDialog: true, - options: [ - { value: 'human', label: 'Human (Tab to focus)' }, - { value: 'ai', label: 'AI (model-driven tools)' }, - { value: 'off', label: 'Off' }, - ], - }, pager: { type: 'string', label: 'Pager', diff --git a/packages/cli/src/ui/hooks/shellReducer.ts b/packages/cli/src/ui/hooks/shellReducer.ts index ea467fc327..0e9307259d 100644 --- a/packages/cli/src/ui/hooks/shellReducer.ts +++ b/packages/cli/src/ui/hooks/shellReducer.ts @@ -92,23 +92,7 @@ export function shellReducer( nextTasks.delete(action.pid); } nextTasks.set(action.pid, updatedTask); - - // Auto-hide panel when all tasks have exited - let nextVisible = state.isBackgroundTaskVisible; - if (action.update.status === 'exited') { - const hasRunning = Array.from(nextTasks.values()).some( - (s) => s.status === 'running', - ); - if (!hasRunning) { - nextVisible = false; - } - } - - return { - ...state, - backgroundTasks: nextTasks, - isBackgroundTaskVisible: nextVisible, - }; + return { ...state, backgroundTasks: nextTasks }; } case 'APPEND_TASK_OUTPUT': { const task = state.backgroundTasks.get(action.pid); diff --git a/packages/cli/src/ui/hooks/useBackgroundShellManager.ts b/packages/cli/src/ui/hooks/useBackgroundShellManager.ts deleted file mode 100644 index eb43ae1cfb..0000000000 --- a/packages/cli/src/ui/hooks/useBackgroundShellManager.ts +++ /dev/null @@ -1,101 +0,0 @@ -/** - * @license - * Copyright 2025 Google LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import { useState, useEffect, useMemo, useRef } from 'react'; -import { type BackgroundTask } from './shellReducer.js'; - -export interface BackgroundShellManagerProps { - backgroundTasks: Map; - backgroundTaskCount: number; - isBackgroundTaskVisible: boolean; - activePtyId: number | null | undefined; - embeddedShellFocused: boolean; - setEmbeddedShellFocused: (focused: boolean) => void; - terminalHeight: number; -} - -export function useBackgroundShellManager({ - backgroundTasks, - backgroundTaskCount, - isBackgroundTaskVisible, - activePtyId, - embeddedShellFocused, - setEmbeddedShellFocused, - terminalHeight, -}: BackgroundShellManagerProps) { - const [isBackgroundShellListOpen, setIsBackgroundShellListOpen] = - useState(false); - const [activeBackgroundShellPid, setActiveBackgroundShellPid] = useState< - number | null - >(null); - - const prevShellCountRef = useRef(backgroundTaskCount); - - useEffect(() => { - if (backgroundTasks.size === 0) { - if (activeBackgroundShellPid !== null) { - setActiveBackgroundShellPid(null); - } - if (isBackgroundShellListOpen) { - setIsBackgroundShellListOpen(false); - } - } else if ( - activeBackgroundShellPid === null || - !backgroundTasks.has(activeBackgroundShellPid) - ) { - // If active shell is closed or none selected, select the first one - setActiveBackgroundShellPid(backgroundTasks.keys().next().value ?? null); - } else if (backgroundTaskCount > prevShellCountRef.current) { - // A new shell was added — auto-switch to the newest one (last in the map) - const pids = Array.from(backgroundTasks.keys()); - const newestPid = pids[pids.length - 1]; - if (newestPid !== undefined && newestPid !== activeBackgroundShellPid) { - setActiveBackgroundShellPid(newestPid); - } - } - prevShellCountRef.current = backgroundTaskCount; - }, [ - backgroundTasks, - activeBackgroundShellPid, - backgroundTaskCount, - isBackgroundShellListOpen, - ]); - - useEffect(() => { - if (embeddedShellFocused) { - const hasActiveForegroundShell = !!activePtyId; - const hasVisibleBackgroundShell = - isBackgroundTaskVisible && backgroundTasks.size > 0; - - if (!hasActiveForegroundShell && !hasVisibleBackgroundShell) { - setEmbeddedShellFocused(false); - } - } - }, [ - isBackgroundTaskVisible, - backgroundTasks, - embeddedShellFocused, - backgroundTaskCount, - activePtyId, - setEmbeddedShellFocused, - ]); - - const backgroundShellHeight = useMemo( - () => - isBackgroundTaskVisible && backgroundTasks.size > 0 - ? Math.max(Math.floor(terminalHeight * 0.3), 5) - : 0, - [isBackgroundTaskVisible, backgroundTasks.size, terminalHeight], - ); - - return { - isBackgroundShellListOpen, - setIsBackgroundShellListOpen, - activeBackgroundShellPid, - setActiveBackgroundShellPid, - backgroundShellHeight, - }; -} diff --git a/packages/cli/src/ui/hooks/useExecutionLifecycle.ts b/packages/cli/src/ui/hooks/useExecutionLifecycle.ts index 02e9e88cf5..2e80bf8f95 100644 --- a/packages/cli/src/ui/hooks/useExecutionLifecycle.ts +++ b/packages/cli/src/ui/hooks/useExecutionLifecycle.ts @@ -661,10 +661,6 @@ export const useExecutionLifecycle = ( (s: BackgroundTask) => s.status === 'running', ).length; - const showBackgroundShell = useCallback(() => { - dispatch({ type: 'SET_VISIBILITY', visible: true }); - }, [dispatch]); - return { handleShellCommand, activeShellPtyId: state.activeShellPtyId, @@ -672,7 +668,6 @@ export const useExecutionLifecycle = ( backgroundTaskCount, isBackgroundTaskVisible: state.isBackgroundTaskVisible, toggleBackgroundTasks, - showBackgroundShell, backgroundCurrentExecution, registerBackgroundTask, dismissBackgroundTask, diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index c4a9c58d5e..a2621c4546 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -390,7 +390,6 @@ export const useGeminiStream = ( backgroundTaskCount, isBackgroundTaskVisible, toggleBackgroundTasks, - showBackgroundShell, backgroundCurrentExecution, registerBackgroundTask, dismissBackgroundTask, @@ -1918,7 +1917,6 @@ export const useGeminiStream = ( backgroundedTool.command, backgroundedTool.initialOutput, ); - showBackgroundShell(); } } @@ -2058,7 +2056,6 @@ export const useGeminiStream = ( modelSwitchedFromQuotaError, addItem, registerBackgroundTask, - showBackgroundShell, consumeUserHint, isLowErrorVerbosity, maybeAddSuppressedToolErrorNote, diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index c82cc315b7..0edd4af7b0 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -36,8 +36,6 @@ import { GlobTool } from '../tools/glob.js'; import { ActivateSkillTool } from '../tools/activate-skill.js'; import { EditTool } from '../tools/edit.js'; import { ShellTool } from '../tools/shell.js'; -import { WriteToShellTool } from '../tools/write-to-shell.js'; -import { ReadShellTool } from '../tools/read-shell.js'; import { WriteFileTool } from '../tools/write-file.js'; import { WebFetchTool } from '../tools/web-fetch.js'; import { MemoryTool, setGeminiMdFilename } from '../tools/memoryTool.js'; @@ -658,7 +656,6 @@ export interface ConfigParameters { useRipgrep?: boolean; enableInteractiveShell?: boolean; shellBackgroundCompletionBehavior?: string; - interactiveShellMode?: 'human' | 'ai' | 'off'; skipNextSpeakerCheck?: boolean; shellExecutionConfig?: ShellExecutionConfig; extensionManagement?: boolean; @@ -871,7 +868,6 @@ export class Config implements McpContext, AgentLoopContext { | 'inject' | 'notify' | 'silent'; - private readonly interactiveShellMode: 'human' | 'ai' | 'off'; private readonly skipNextSpeakerCheck: boolean; private readonly useBackgroundColor: boolean; private readonly useAlternateBuffer: boolean; @@ -1239,14 +1235,6 @@ export class Config implements McpContext, AgentLoopContext { this.shellBackgroundCompletionBehavior = 'silent'; } - // interactiveShellMode takes precedence over enableInteractiveShell. - // If not set, derive from enableInteractiveShell for backward compat. - if (params.interactiveShellMode) { - this.interactiveShellMode = params.interactiveShellMode; - } else { - this.interactiveShellMode = this.enableInteractiveShell ? 'human' : 'off'; - } - this.skipNextSpeakerCheck = params.skipNextSpeakerCheck ?? true; this.shellExecutionConfig = { terminalWidth: params.shellExecutionConfig?.terminalWidth ?? 80, @@ -3223,14 +3211,10 @@ export class Config implements McpContext, AgentLoopContext { return ( this.interactive && this.ptyInfo !== 'child_process' && - this.interactiveShellMode !== 'off' + this.enableInteractiveShell ); } - getInteractiveShellMode(): 'human' | 'ai' | 'off' { - return this.interactiveShellMode; - } - isSkillsSupportEnabled(): boolean { return this.skillsSupport; } @@ -3591,15 +3575,6 @@ export class Config implements McpContext, AgentLoopContext { new ReadBackgroundOutputTool(this, this.messageBus), ), ); - // Register AI-driven interactive shell tools when mode is 'ai' - if (this.getInteractiveShellMode() === 'ai') { - maybeRegister(WriteToShellTool, () => - registry.registerTool(new WriteToShellTool(this.messageBus)), - ); - maybeRegister(ReadShellTool, () => - registry.registerTool(new ReadShellTool(this.messageBus)), - ); - } if (!this.isMemoryManagerEnabled()) { maybeRegister(MemoryTool, () => registry.registerTool(new MemoryTool(this.messageBus, this.storage)), diff --git a/packages/core/src/prompts/promptProvider.ts b/packages/core/src/prompts/promptProvider.ts index c4077afc95..0036dae560 100644 --- a/packages/core/src/prompts/promptProvider.ts +++ b/packages/core/src/prompts/promptProvider.ts @@ -200,7 +200,6 @@ export class PromptProvider { enableShellEfficiency: context.config.getEnableShellOutputEfficiency(), interactiveShellEnabled: context.config.isInteractiveShellEnabled(), - interactiveShellMode: context.config.getInteractiveShellMode(), topicUpdateNarration: context.config.isTopicUpdateNarrationEnabled(), memoryManagerEnabled: context.config.isMemoryManagerEnabled(), diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index b049ddf58e..59315e1ca6 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -18,8 +18,6 @@ import { MEMORY_TOOL_NAME, READ_FILE_TOOL_NAME, SHELL_TOOL_NAME, - WRITE_TO_SHELL_TOOL_NAME, - READ_SHELL_TOOL_NAME, WRITE_FILE_TOOL_NAME, WRITE_TODOS_TOOL_NAME, GREP_PARAM_TOTAL_MAX_MATCHES, @@ -83,7 +81,6 @@ export interface PrimaryWorkflowsOptions { export interface OperationalGuidelinesOptions { interactive: boolean; interactiveShellEnabled: boolean; - interactiveShellMode?: 'human' | 'ai' | 'off'; topicUpdateNarration: boolean; memoryManagerEnabled: boolean; } @@ -394,7 +391,7 @@ export function renderOperationalGuidelines( - **Command Execution:** Use the ${formatToolName(SHELL_TOOL_NAME)} tool for running shell commands, remembering the safety rule to explain modifying commands first.${toolUsageInteractive( options.interactive, options.interactiveShellEnabled, - )}${toolUsageRememberingFacts(options)}${toolUsageAiShell(options)} + )}${toolUsageRememberingFacts(options)} - **Confirmation Protocol:** If a tool call is declined or cancelled, respect the decision immediately. Do not re-attempt the action or "negotiate" for the same tool call unless the user explicitly directs you to. Offer an alternative technical path if possible. ## Interaction Details @@ -803,17 +800,6 @@ function toolUsageInteractive( - **Interactive Commands:** Always prefer non-interactive commands (e.g., using 'run once' or 'CI' flags for test runners to avoid persistent watch modes or 'git --no-pager') unless a persistent process is specifically required; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim).`; } -function toolUsageAiShell(options: OperationalGuidelinesOptions): string { - if (options.interactiveShellMode !== 'ai') return ''; - return ` -- **AI-Driven Interactive Shell:** Commands using \`wait_for_output_seconds\` auto-promote to background when they stall. Once promoted, use ${formatToolName(READ_SHELL_TOOL_NAME)} to see the terminal screen, then ${formatToolName(WRITE_TO_SHELL_TOOL_NAME)} to send text input and/or special keys (arrows, Enter, Ctrl-C, etc.). - - Set \`wait_for_output_seconds\` **low (2-5)** for commands that prompt for input (npx, installers, REPLs). Set **high (60+)** for long builds. Omit for instant commands. - - **Always read the screen before writing input.** The screen state tells you what the process is waiting for. - - When waiting for a command to finish (e.g. npm install), use ${formatToolName(READ_SHELL_TOOL_NAME)} with \`wait_seconds\` to delay before reading. Do NOT poll in a tight loop. - - **Clean up when done:** when your task is complete, kill background processes with ${formatToolName(WRITE_TO_SHELL_TOOL_NAME)} sending Ctrl-C, or note the PID for the user to clean up. - - You are the sole operator of promoted shells — the user cannot type into them.`; -} - function toolUsageRememberingFacts( options: OperationalGuidelinesOptions, ): string { diff --git a/packages/core/src/services/shellExecutionService.ts b/packages/core/src/services/shellExecutionService.ts index 95b3f2d17b..dfbb3a5033 100644 --- a/packages/core/src/services/shellExecutionService.ts +++ b/packages/core/src/services/shellExecutionService.ts @@ -105,7 +105,6 @@ export interface ShellExecutionConfig { backgroundCompletionBehavior?: 'inject' | 'notify' | 'silent'; originalCommand?: string; sessionId?: string; - autoPromoteTimeoutMs?: number; } /** @@ -890,21 +889,6 @@ export class ShellExecutionService { sessionId: shellExecutionConfig.sessionId, }); - let autoPromoteTimer: NodeJS.Timeout | undefined; - const resetAutoPromoteTimer = () => { - if (shellExecutionConfig.autoPromoteTimeoutMs !== undefined) { - if (autoPromoteTimer) clearTimeout(autoPromoteTimer); - autoPromoteTimer = setTimeout(() => { - ShellExecutionService.background( - ptyPid, - shellExecutionConfig.sessionId, - ); - }, shellExecutionConfig.autoPromoteTimeoutMs); - } - }; - - resetAutoPromoteTimer(); - const result = ExecutionLifecycleService.attachExecution(ptyPid, { executionMethod: ptyInfo?.name ?? 'node-pty', writeInput: (input) => { @@ -1082,7 +1066,6 @@ export class ShellExecutionService { }); const handleOutput = (data: Buffer) => { - resetAutoPromoteTimer(); processingChain = processingChain.then( () => new Promise((resolveChunk) => { @@ -1152,7 +1135,6 @@ export class ShellExecutionService { ptyProcess.onExit( ({ exitCode, signal }: { exitCode: number; signal?: number }) => { - if (autoPromoteTimer) clearTimeout(autoPromoteTimer); exited = true; abortSignal.removeEventListener('abort', abortHandler); // Attempt to destroy the PTY to ensure FD is closed @@ -1238,7 +1220,6 @@ export class ShellExecutionService { ); const abortHandler = async () => { - if (autoPromoteTimer) clearTimeout(autoPromoteTimer); if (ptyProcess.pid && !exited) { await killProcessGroup({ pid: ptyPid, @@ -1417,28 +1398,6 @@ export class ShellExecutionService { return ExecutionLifecycleService.subscribe(pid, listener); } - /** - * Reads the current rendered screen state of a running process. - * Returns the full terminal buffer text for PTY processes, - * or the accumulated output for child processes. - * - * @param pid The process ID of the target process. - * @returns The screen text, or null if the process is not found. - */ - static readScreen(pid: number): string | null { - const activePty = this.activePtys.get(pid); - if (activePty) { - return getFullBufferText(activePty.headlessTerminal); - } - - const activeChild = this.activeChildProcesses.get(pid); - if (activeChild) { - return activeChild.state.output; - } - - return null; - } - /** * Resizes the pseudo-terminal (PTY) of a running process. * diff --git a/packages/core/src/tools/definitions/base-declarations.ts b/packages/core/src/tools/definitions/base-declarations.ts index e1575966af..89a5aa1614 100644 --- a/packages/core/src/tools/definitions/base-declarations.ts +++ b/packages/core/src/tools/definitions/base-declarations.ts @@ -56,18 +56,6 @@ export const READ_FILE_PARAM_END_LINE = 'end_line'; export const SHELL_TOOL_NAME = 'run_shell_command'; export const SHELL_PARAM_COMMAND = 'command'; export const SHELL_PARAM_IS_BACKGROUND = 'is_background'; -export const SHELL_PARAM_WAIT_SECONDS = 'wait_for_output_seconds'; - -// -- write_to_shell -- -export const WRITE_TO_SHELL_TOOL_NAME = 'write_to_shell'; -export const WRITE_TO_SHELL_PARAM_PID = 'pid'; -export const WRITE_TO_SHELL_PARAM_INPUT = 'input'; -export const WRITE_TO_SHELL_PARAM_SPECIAL_KEYS = 'special_keys'; - -// -- read_shell -- -export const READ_SHELL_TOOL_NAME = 'read_shell'; -export const READ_SHELL_PARAM_PID = 'pid'; -export const READ_SHELL_PARAM_WAIT_SECONDS = 'wait_seconds'; // -- write_file -- export const WRITE_FILE_TOOL_NAME = 'write_file'; diff --git a/packages/core/src/tools/definitions/coreTools.ts b/packages/core/src/tools/definitions/coreTools.ts index a70ed1a33c..d1b81a6e99 100644 --- a/packages/core/src/tools/definitions/coreTools.ts +++ b/packages/core/src/tools/definitions/coreTools.ts @@ -27,8 +27,6 @@ export { LS_TOOL_NAME, READ_FILE_TOOL_NAME, SHELL_TOOL_NAME, - WRITE_TO_SHELL_TOOL_NAME, - READ_SHELL_TOOL_NAME, WRITE_FILE_TOOL_NAME, EDIT_TOOL_NAME, WEB_SEARCH_TOOL_NAME, @@ -75,12 +73,6 @@ export { LS_PARAM_IGNORE, SHELL_PARAM_COMMAND, SHELL_PARAM_IS_BACKGROUND, - SHELL_PARAM_WAIT_SECONDS, - WRITE_TO_SHELL_PARAM_PID, - WRITE_TO_SHELL_PARAM_INPUT, - WRITE_TO_SHELL_PARAM_SPECIAL_KEYS, - READ_SHELL_PARAM_PID, - READ_SHELL_PARAM_WAIT_SECONDS, WEB_SEARCH_PARAM_QUERY, WEB_FETCH_PARAM_PROMPT, READ_MANY_PARAM_INCLUDE, @@ -257,21 +249,18 @@ export function getShellDefinition( enableInteractiveShell: boolean, enableEfficiency: boolean, enableToolSandboxing: boolean = false, - interactiveShellMode?: string, ): ToolDefinition { return { base: getShellDeclaration( enableInteractiveShell, enableEfficiency, enableToolSandboxing, - interactiveShellMode, ), overrides: (modelId) => getToolSet(modelId).run_shell_command( enableInteractiveShell, enableEfficiency, enableToolSandboxing, - interactiveShellMode, ), }; } diff --git a/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts b/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts index 6f001c7459..29da313bf4 100644 --- a/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts +++ b/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts @@ -22,7 +22,6 @@ import { PARAM_DIR_PATH, SHELL_PARAM_IS_BACKGROUND, EXIT_PLAN_PARAM_PLAN_FILENAME, - SHELL_PARAM_WAIT_SECONDS, SKILL_PARAM_NAME, PARAM_ADDITIONAL_PERMISSIONS, UPDATE_TOPIC_TOOL_NAME, @@ -37,9 +36,7 @@ import { export function getShellToolDescription( enableInteractiveShell: boolean, enableEfficiency: boolean, - interactiveShellMode?: string, ): string { - const isAiMode = interactiveShellMode === 'ai'; const efficiencyGuidelines = enableEfficiency ? ` @@ -59,11 +56,6 @@ export function getShellToolDescription( Background PIDs: Only included if background processes were started. Process Group PGID: Only included if available.`; - if (isAiMode) { - const autoPromoteInstructions = `Commands that do not complete within \`${SHELL_PARAM_WAIT_SECONDS}\` seconds are automatically promoted to background. Once promoted, use \`write_to_shell\` and \`read_shell\` to interact with the process. Do NOT use \`&\` to background commands.`; - return `This tool executes a given shell command as \`bash -c \`. ${autoPromoteInstructions} Command is executed as a subprocess that leads its own process group. Command process group can be terminated as \`kill -- -PGID\` or signaled as \`kill -s SIGNAL -- -PGID\`.${efficiencyGuidelines}${returnedInfo}`; - } - if (os.platform() === 'win32') { const backgroundInstructions = enableInteractiveShell ? `To run a command in the background, set the \`${SHELL_PARAM_IS_BACKGROUND}\` parameter to true. Do NOT use PowerShell background constructs.` @@ -94,33 +86,12 @@ export function getShellDeclaration( enableInteractiveShell: boolean, enableEfficiency: boolean, enableToolSandboxing: boolean = false, - interactiveShellMode?: string, ): FunctionDeclaration { - const isAiMode = interactiveShellMode === 'ai'; - - // In AI mode, use wait_for_output_seconds instead of is_background - const backgroundParam = isAiMode - ? { - [SHELL_PARAM_WAIT_SECONDS]: { - type: 'number' as const, - description: - 'Max seconds to wait for command to complete before auto-promoting to background (default: 5). Set low (2-5) for commands likely to prompt for input (npx, installers, REPLs). Set high (60-300) for long builds or installs. Once promoted, use write_to_shell/read_shell to interact.', - }, - } - : { - [SHELL_PARAM_IS_BACKGROUND]: { - type: 'boolean' as const, - description: - 'Set to true if this command should be run in the background (e.g. for long-running servers or watchers). The command will be started, allowed to run for a brief moment to check for immediate errors, and then moved to the background.', - }, - }; - return { name: SHELL_TOOL_NAME, description: getShellToolDescription( enableInteractiveShell, enableEfficiency, - interactiveShellMode, ), parametersJsonSchema: { type: 'object', @@ -149,7 +120,6 @@ export function getShellDeclaration( description: 'Optional. Delay in milliseconds to wait after starting the process in the background. Useful to allow the process to start and generate initial output before returning.', }, - ...backgroundParam, ...(enableToolSandboxing ? { [PARAM_ADDITIONAL_PERMISSIONS]: { diff --git a/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts b/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts index 5441c39d09..60a52fc6ad 100644 --- a/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts +++ b/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts @@ -337,13 +337,11 @@ export const DEFAULT_LEGACY_SET: CoreToolSet = { enableInteractiveShell, enableEfficiency, enableToolSandboxing, - interactiveShellMode, ) => getShellDeclaration( enableInteractiveShell, enableEfficiency, enableToolSandboxing, - interactiveShellMode, ), replace: { diff --git a/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts b/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts index f29f9e6814..a86a20378e 100644 --- a/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts +++ b/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts @@ -344,13 +344,11 @@ export const GEMINI_3_SET: CoreToolSet = { enableInteractiveShell, enableEfficiency, enableToolSandboxing, - interactiveShellMode, ) => getShellDeclaration( enableInteractiveShell, enableEfficiency, enableToolSandboxing, - interactiveShellMode, ), replace: { diff --git a/packages/core/src/tools/definitions/types.ts b/packages/core/src/tools/definitions/types.ts index d4f532f513..42c0cc7028 100644 --- a/packages/core/src/tools/definitions/types.ts +++ b/packages/core/src/tools/definitions/types.ts @@ -38,7 +38,6 @@ export interface CoreToolSet { enableInteractiveShell: boolean, enableEfficiency: boolean, enableToolSandboxing: boolean, - interactiveShellMode?: string, ) => FunctionDeclaration; replace: FunctionDeclaration; google_web_search: FunctionDeclaration; diff --git a/packages/core/src/tools/read-shell.ts b/packages/core/src/tools/read-shell.ts deleted file mode 100644 index 4e74cbbfa5..0000000000 --- a/packages/core/src/tools/read-shell.ts +++ /dev/null @@ -1,148 +0,0 @@ -/** - * @license - * Copyright 2026 Google LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import { - BaseDeclarativeTool, - BaseToolInvocation, - Kind, - type ToolInvocation, - type ToolResult, -} from './tools.js'; -import { ShellExecutionService } from '../services/shellExecutionService.js'; -import { - READ_SHELL_TOOL_NAME, - READ_SHELL_PARAM_PID, - READ_SHELL_PARAM_WAIT_SECONDS, -} from './tool-names.js'; -import type { MessageBus } from '../confirmation-bus/message-bus.js'; - -export interface ReadShellParams { - pid: number; - wait_seconds?: number; -} - -export class ReadShellToolInvocation extends BaseToolInvocation< - ReadShellParams, - ToolResult -> { - constructor( - params: ReadShellParams, - messageBus: MessageBus, - _toolName?: string, - _toolDisplayName?: string, - ) { - super(params, messageBus, _toolName, _toolDisplayName); - } - - getDescription(): string { - const waitPart = - this.params.wait_seconds !== undefined - ? ` (after ${this.params.wait_seconds}s)` - : ''; - return `read shell screen PID ${this.params.pid}${waitPart}`; - } - - async execute(signal: AbortSignal): Promise { - const { pid, wait_seconds } = this.params; - - // Wait before reading if requested - if (wait_seconds !== undefined && wait_seconds > 0) { - const waitMs = Math.min(wait_seconds, 30) * 1000; // Cap at 30s - await new Promise((resolve) => { - const timer = setTimeout(resolve, waitMs); - const onAbort = () => { - clearTimeout(timer); - resolve(); - }; - signal.addEventListener('abort', onAbort, { once: true }); - }); - } - - // Validate the PID is active - if (!ShellExecutionService.isPtyActive(pid)) { - return { - llmContent: `Error: No active process found with PID ${pid}. The process may have exited.`, - returnDisplay: `No active process with PID ${pid}.`, - }; - } - - const screen = ShellExecutionService.readScreen(pid); - if (screen === null) { - return { - llmContent: `Error: Could not read screen for PID ${pid}. The process may have exited.`, - returnDisplay: `Could not read screen for PID ${pid}.`, - }; - } - - return { - llmContent: screen, - returnDisplay: `Screen read from PID ${pid} (${screen.split('\n').length} lines).`, - }; - } -} - -export class ReadShellTool extends BaseDeclarativeTool< - ReadShellParams, - ToolResult -> { - static readonly Name = READ_SHELL_TOOL_NAME; - - constructor(messageBus: MessageBus) { - super( - ReadShellTool.Name, - 'ReadShell', - 'Reads the current screen state of a running background shell process. Returns the rendered terminal screen as text, preserving the visual layout. Use after write_to_shell to see updated output, or to check progress of a running command.', - Kind.Read, - { - type: 'object', - properties: { - [READ_SHELL_PARAM_PID]: { - type: 'number', - description: - 'The PID of the background process to read from. Obtained from a previous run_shell_command call that was auto-promoted to background or started with is_background=true.', - }, - [READ_SHELL_PARAM_WAIT_SECONDS]: { - type: 'number', - description: - 'Seconds to wait before reading the screen. Use this to let the process run for a while before checking output (e.g. wait for a build to finish). Max 30 seconds.', - }, - }, - required: [READ_SHELL_PARAM_PID], - }, - messageBus, - false, // output is not markdown - ); - } - - protected override validateToolParamValues( - params: ReadShellParams, - ): string | null { - if (!params.pid || params.pid <= 0) { - return 'PID must be a positive number.'; - } - if ( - params.wait_seconds !== undefined && - (params.wait_seconds < 0 || params.wait_seconds > 30) - ) { - return 'wait_seconds must be between 0 and 30.'; - } - return null; - } - - protected createInvocation( - params: ReadShellParams, - messageBus: MessageBus, - _toolName?: string, - _toolDisplayName?: string, - ): ToolInvocation { - return new ReadShellToolInvocation( - params, - messageBus, - _toolName, - _toolDisplayName, - ); - } -} diff --git a/packages/core/src/tools/shell.test.ts b/packages/core/src/tools/shell.test.ts index 8ed78ba464..9551fd9638 100644 --- a/packages/core/src/tools/shell.test.ts +++ b/packages/core/src/tools/shell.test.ts @@ -149,8 +149,6 @@ describe('ShellTool', () => { getShellBackgroundCompletionBehavior: vi.fn().mockReturnValue('silent'), getEnableShellOutputEfficiency: vi.fn().mockReturnValue(true), getSandboxEnabled: vi.fn().mockReturnValue(false), - getInteractiveShellMode: vi.fn().mockReturnValue('off'), - getSessionId: vi.fn().mockReturnValue('test-session-id'), sanitizationConfig: {}, get sandboxManager() { return mockSandboxManager; @@ -424,7 +422,7 @@ describe('ShellTool', () => { expect(mockShellBackground).toHaveBeenCalledWith( 12345, - 'test-session-id', + 'default', 'sleep 10', ); @@ -668,7 +666,7 @@ describe('ShellTool', () => { expect(mockShellBackground).toHaveBeenCalledWith( 12345, - 'test-session-id', + 'default', 'sleep 10', ); diff --git a/packages/core/src/tools/shell.ts b/packages/core/src/tools/shell.ts index 0407cb99bf..3ea29474c6 100644 --- a/packages/core/src/tools/shell.ts +++ b/packages/core/src/tools/shell.ts @@ -33,7 +33,6 @@ import { import { getErrorMessage } from '../utils/errors.js'; import { summarizeToolOutput } from '../utils/summarizer.js'; -import { formatShellOutput } from './shellOutputFormatter.js'; import { ShellExecutionService, type ShellOutputEvent, @@ -72,7 +71,6 @@ export interface ShellToolParams { is_background?: boolean; delay_ms?: number; [PARAM_ADDITIONAL_PERMISSIONS]?: SandboxPermissions; - wait_for_output_seconds?: number; } export class ShellToolInvocation extends BaseToolInvocation< @@ -80,7 +78,6 @@ export class ShellToolInvocation extends BaseToolInvocation< ToolResult > { private proactivePermissionsConfirmed?: SandboxPermissions; - private _autoPromoteTimer?: NodeJS.Timeout; constructor( private readonly context: AgentLoopContext, @@ -226,12 +223,7 @@ export class ShellToolInvocation extends BaseToolInvocation< } override getExplanation(): string { - let explanation = this.getContextualDetails().trim(); - const isAiMode = this.context.config.getInteractiveShellMode() === 'ai'; - if (this.params.wait_for_output_seconds !== undefined || isAiMode) { - explanation += ` [auto-background after ${this.params.wait_for_output_seconds ?? 5}s]`; - } - return explanation; + return this.getContextualDetails().trim(); } override getPolicyUpdateOptions( @@ -505,21 +497,6 @@ export class ShellToolInvocation extends BaseToolInvocation< }, timeoutMs); }; - let currentPid: number | undefined; - const isAiMode = this.context.config.getInteractiveShellMode() === 'ai'; - const shouldAutoPromote = - this.params.wait_for_output_seconds !== undefined || isAiMode; - const waitMs = (this.params.wait_for_output_seconds ?? 5) * 1000; - - const resetAutoPromoteTimer = () => { - if (shouldAutoPromote && currentPid) { - if (this._autoPromoteTimer) clearTimeout(this._autoPromoteTimer); - this._autoPromoteTimer = setTimeout(() => { - ShellExecutionService.background(currentPid!); - }, waitMs); - } - }; - signal.addEventListener('abort', onAbort, { once: true }); timeoutController.signal.addEventListener('abort', onAbort, { once: true, @@ -534,7 +511,6 @@ export class ShellToolInvocation extends BaseToolInvocation< cwd, (event: ShellOutputEvent) => { resetTimeout(); // Reset timeout on any event - resetAutoPromoteTimer(); // Reset auto-promote on any event if (!updateOutput) { return; } @@ -606,7 +582,6 @@ export class ShellToolInvocation extends BaseToolInvocation< backgroundCompletionBehavior: this.context.config.getShellBackgroundCompletionBehavior(), originalCommand: strippedCommand, - autoPromoteTimeoutMs: shouldAutoPromote ? waitMs : undefined, }, ); @@ -643,11 +618,6 @@ export class ShellToolInvocation extends BaseToolInvocation< }; } } - - // In AI mode with wait_for_output_seconds, set up auto-promotion timer. - // When the timer fires, promote to background instead of cancelling. - currentPid = pid; - resetAutoPromoteTimer(); } const result = await resultPromise; @@ -688,73 +658,95 @@ export class ShellToolInvocation extends BaseToolInvocation< } } + let data: BackgroundExecutionData | undefined; + + let llmContent = ''; let timeoutMessage = ''; if (result.aborted) { if (timeoutController.signal.aborted) { timeoutMessage = `Command was automatically cancelled because it exceeded the timeout of ${( timeoutMs / 60000 ).toFixed(1)} minutes without output.`; + llmContent = timeoutMessage; + } else { + llmContent = + 'Command was cancelled by user before it could complete.'; } - } - - const formatterOutput = formatShellOutput({ - params: this.params, - result, - debugMode: this.context.config.getDebugMode(), - backgroundPIDs, - isAiMode, - timeoutMessage, - }); - - let data: BackgroundExecutionData | undefined; - data = formatterOutput.data as BackgroundExecutionData | undefined; - let returnDisplay: string | AnsiOutput = formatterOutput.returnDisplay; - let llmContent = formatterOutput.llmContent; - - if (!this.context.config.getDebugMode()) { - if ( - !this.params.is_background && - !result.backgrounded && - !result.aborted - ) { - if (result.output.trim() || result.ansiOutput) { - returnDisplay = - result.ansiOutput && result.ansiOutput.length > 0 - ? result.ansiOutput - : result.output; - } else { - if (result.signal) { - returnDisplay = `Command terminated by signal: ${result.signal}`; - } else if (result.error) { - returnDisplay = `Command failed: ${getErrorMessage(result.error)}`; - } else if (result.exitCode !== null && result.exitCode !== 0) { - returnDisplay = `Command exited with code: ${result.exitCode}`; - } - } + if (result.output.trim()) { + llmContent += ` Below is the output before it was cancelled:\n${result.output}`; + } else { + llmContent += ' There was no output before it was cancelled.'; } - } - - // Replace wrapper command with actual command in error messages - if (result.error && !result.aborted) { - llmContent = llmContent.replaceAll( - commandToExecute, - this.params.command, - ); - } - - // Update data with specific things needed by ShellTool - if (this.params.is_background || result.backgrounded) { + } else if (this.params.is_background || result.backgrounded) { + llmContent = `Command moved to background (PID: ${result.pid}). Output hidden. Press Ctrl+B to view.`; data = { - ...data, - initialOutput: result.output, - pid: result.pid!, + pid: result.pid, command: this.params.command, + initialOutput: result.output, }; - } else if (result.exitCode !== null && result.exitCode !== 0) { - data = { - exitCode: result.exitCode, - isError: true, - } as BackgroundExecutionData; + } else { + // Create a formatted error string for display, replacing the wrapper command + // with the user-facing command. + const llmContentParts = [`Output: ${result.output || '(empty)'}`]; + + if (result.error) { + const finalError = result.error.message.replaceAll( + commandToExecute, + this.params.command, + ); + llmContentParts.push(`Error: ${finalError}`); + } + + if (result.exitCode !== null && result.exitCode !== 0) { + llmContentParts.push(`Exit Code: ${result.exitCode}`); + data = { + exitCode: result.exitCode, + isError: true, + }; + } + + if (result.signal) { + llmContentParts.push(`Signal: ${result.signal}`); + } + if (backgroundPIDs.length) { + llmContentParts.push(`Background PIDs: ${backgroundPIDs.join(', ')}`); + } + if (result.pid) { + llmContentParts.push(`Process Group PGID: ${result.pid}`); + } + + llmContent = llmContentParts.join('\n'); + } + + let returnDisplay: string | AnsiOutput = ''; + if (this.context.config.getDebugMode()) { + returnDisplay = llmContent; + } else { + if (this.params.is_background || result.backgrounded) { + returnDisplay = `Command moved to background (PID: ${result.pid}). Output hidden. Press Ctrl+B to view.`; + } else if (result.aborted) { + const cancelMsg = timeoutMessage || 'Command cancelled by user.'; + if (result.output.trim()) { + returnDisplay = `${cancelMsg}\n\nOutput before cancellation:\n${result.output}`; + } else { + returnDisplay = cancelMsg; + } + } else if (result.output.trim() || result.ansiOutput) { + returnDisplay = + result.ansiOutput && result.ansiOutput.length > 0 + ? result.ansiOutput + : result.output; + } else { + if (result.signal) { + returnDisplay = `Command terminated by signal: ${result.signal}`; + } else if (result.error) { + returnDisplay = `Command failed: ${getErrorMessage(result.error)}`; + } else if (result.exitCode !== null && result.exitCode !== 0) { + returnDisplay = `Command exited with code: ${result.exitCode}`; + } + // If output is empty and command succeeded (code 0, no error/signal/abort), + // returnDisplay will remain empty, which is fine. + } } // Heuristic Sandbox Denial Detection @@ -937,8 +929,6 @@ export class ShellToolInvocation extends BaseToolInvocation< }; } finally { if (timeoutTimer) clearTimeout(timeoutTimer); - const autoTimer = this._autoPromoteTimer; - if (autoTimer) clearTimeout(autoTimer); signal.removeEventListener('abort', onAbort); timeoutController.signal.removeEventListener('abort', onAbort); try { @@ -1017,7 +1007,6 @@ export class ShellTool extends BaseDeclarativeTool< this.context.config.getEnableInteractiveShell(), this.context.config.getEnableShellOutputEfficiency(), this.context.config.getSandboxEnabled(), - this.context.config.getInteractiveShellMode(), ); return resolveToolDeclaration(definition, modelId); } diff --git a/packages/core/src/tools/shellOutputFormatter.ts b/packages/core/src/tools/shellOutputFormatter.ts deleted file mode 100644 index 04d16fb42e..0000000000 --- a/packages/core/src/tools/shellOutputFormatter.ts +++ /dev/null @@ -1,128 +0,0 @@ -/** - * @license - * Copyright 2026 Google LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import { type ShellExecutionResult } from '../services/shellExecutionService.js'; -import { type ShellToolParams } from './shell.js'; - -export interface FormatShellOutputOptions { - params: ShellToolParams; - result: ShellExecutionResult; - debugMode: boolean; - timeoutMessage?: string; - backgroundPIDs: number[]; - summarizedOutput?: string; - isAiMode: boolean; -} - -export interface FormattedShellOutput { - llmContent: string; - returnDisplay: string; - data: Record; -} - -export function formatShellOutput( - options: FormatShellOutputOptions, -): FormattedShellOutput { - const { - params, - result, - debugMode, - timeoutMessage, - backgroundPIDs, - summarizedOutput, - } = options; - - let llmContent = ''; - let data: Record = {}; - - if (result.aborted) { - llmContent = timeoutMessage || 'Command cancelled by user.'; - if (result.output.trim()) { - llmContent += ` Below is the output before it was cancelled:\n${result.output}`; - } else { - llmContent += ' There was no output before it was cancelled.'; - } - } else if (params.is_background || result.backgrounded) { - const isAutoPromoted = result.backgrounded && !params.is_background; - if (isAutoPromoted) { - llmContent = `Command auto-promoted to background (PID: ${result.pid}). The process is still running. To check its screen state, call the read_shell tool with pid ${result.pid}. To send input or keystrokes, call the write_to_shell tool with pid ${result.pid}. If the process does not exit on its own when done, kill it with write_to_shell using special_keys=["Ctrl-C"].`; - } else { - llmContent = `Command moved to background (PID: ${result.pid}). Output hidden. Press Ctrl+B to view.`; - } - data = { - pid: result.pid, - command: params.command, - directory: params.dir_path, - backgrounded: true, - }; - } else { - const llmContentParts: string[] = []; - - let content = summarizedOutput ?? result.output.trim(); - if (!content) { - content = '(empty)'; - } - - llmContentParts.push(`Output: ${content}`); - - if (result.error) { - llmContentParts.push(`Error: ${result.error.message}`); - } - - if (result.exitCode !== null && result.exitCode !== 0) { - llmContentParts.push(`Exit Code: ${result.exitCode}`); - } - if (result.signal !== null) { - llmContentParts.push(`Signal: ${result.signal}`); - } - if (backgroundPIDs.length) { - llmContentParts.push(`Background PIDs: ${backgroundPIDs.join(', ')}`); - } - if (result.pid) { - llmContentParts.push(`Process Group PGID: ${result.pid}`); - } - - llmContent = llmContentParts.join('\n'); - } - - let returnDisplay = ''; - if (debugMode) { - returnDisplay = llmContent; - } else { - if (params.is_background || result.backgrounded) { - const isAutoPromotedDisplay = - result.backgrounded && !params.is_background; - if (isAutoPromotedDisplay) { - returnDisplay = `Command auto-promoted to background (PID: ${result.pid}).`; - } else { - returnDisplay = `Command moved to background (PID: ${result.pid}). Output hidden. Press Ctrl+B to view.`; - } - } else if (result.aborted) { - const cancelMsg = timeoutMessage || 'Command cancelled by user.'; - if (result.output.trim()) { - returnDisplay = `${cancelMsg}\n\nOutput before cancellation:\n${result.output}`; - } else { - returnDisplay = cancelMsg; - } - } else if (result.error) { - returnDisplay = `Command failed: ${result.error.message}`; - } else if (result.exitCode !== 0 && result.exitCode !== null) { - returnDisplay = `Command exited with code ${result.exitCode}`; - if (result.output.trim()) { - returnDisplay += `\n\n${result.output}`; - } - } else if (summarizedOutput) { - returnDisplay = `Command succeeded. Output summarized:\n${summarizedOutput}`; - } else { - returnDisplay = `Command succeeded.`; - if (result.output.trim()) { - returnDisplay += `\n\n${result.output}`; - } - } - } - - return { llmContent, returnDisplay, data }; -} diff --git a/packages/core/src/tools/tool-names.ts b/packages/core/src/tools/tool-names.ts index 47cc906c27..224f2ab0d5 100644 --- a/packages/core/src/tools/tool-names.ts +++ b/packages/core/src/tools/tool-names.ts @@ -10,8 +10,6 @@ import { LS_TOOL_NAME, READ_FILE_TOOL_NAME, SHELL_TOOL_NAME, - WRITE_TO_SHELL_TOOL_NAME, - READ_SHELL_TOOL_NAME, WRITE_FILE_TOOL_NAME, EDIT_TOOL_NAME, WEB_SEARCH_TOOL_NAME, @@ -54,12 +52,6 @@ import { LS_PARAM_IGNORE, SHELL_PARAM_COMMAND, SHELL_PARAM_IS_BACKGROUND, - SHELL_PARAM_WAIT_SECONDS, - WRITE_TO_SHELL_PARAM_PID, - WRITE_TO_SHELL_PARAM_INPUT, - WRITE_TO_SHELL_PARAM_SPECIAL_KEYS, - READ_SHELL_PARAM_PID, - READ_SHELL_PARAM_WAIT_SECONDS, WEB_SEARCH_PARAM_QUERY, WEB_FETCH_PARAM_PROMPT, READ_MANY_PARAM_INCLUDE, @@ -98,8 +90,6 @@ export { LS_TOOL_NAME, READ_FILE_TOOL_NAME, SHELL_TOOL_NAME, - WRITE_TO_SHELL_TOOL_NAME, - READ_SHELL_TOOL_NAME, WRITE_FILE_TOOL_NAME, EDIT_TOOL_NAME, WEB_SEARCH_TOOL_NAME, @@ -146,12 +136,6 @@ export { LS_PARAM_IGNORE, SHELL_PARAM_COMMAND, SHELL_PARAM_IS_BACKGROUND, - SHELL_PARAM_WAIT_SECONDS, - WRITE_TO_SHELL_PARAM_PID, - WRITE_TO_SHELL_PARAM_INPUT, - WRITE_TO_SHELL_PARAM_SPECIAL_KEYS, - READ_SHELL_PARAM_PID, - READ_SHELL_PARAM_WAIT_SECONDS, WEB_SEARCH_PARAM_QUERY, WEB_FETCH_PARAM_PROMPT, READ_MANY_PARAM_INCLUDE, @@ -195,7 +179,6 @@ export const TOOLS_REQUIRING_NARROWING = new Set([ WRITE_FILE_TOOL_NAME, EDIT_TOOL_NAME, SHELL_TOOL_NAME, - WRITE_TO_SHELL_TOOL_NAME, ]); export const TRACKER_CREATE_TASK_TOOL_NAME = 'tracker_create_task'; @@ -268,8 +251,6 @@ export const ALL_BUILTIN_TOOL_NAMES = [ WEB_FETCH_TOOL_NAME, EDIT_TOOL_NAME, SHELL_TOOL_NAME, - WRITE_TO_SHELL_TOOL_NAME, - READ_SHELL_TOOL_NAME, GREP_TOOL_NAME, READ_MANY_FILES_TOOL_NAME, READ_FILE_TOOL_NAME, diff --git a/packages/core/src/tools/write-to-shell.ts b/packages/core/src/tools/write-to-shell.ts deleted file mode 100644 index 652cb31bf5..0000000000 --- a/packages/core/src/tools/write-to-shell.ts +++ /dev/null @@ -1,230 +0,0 @@ -/** - * @license - * Copyright 2026 Google LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import { - type ToolConfirmationOutcome, - BaseDeclarativeTool, - BaseToolInvocation, - Kind, - type ToolInvocation, - type ToolResult, - type ToolCallConfirmationDetails, - type ToolExecuteConfirmationDetails, -} from './tools.js'; -import { ShellExecutionService } from '../services/shellExecutionService.js'; -import { - WRITE_TO_SHELL_TOOL_NAME, - WRITE_TO_SHELL_PARAM_PID, - WRITE_TO_SHELL_PARAM_INPUT, - WRITE_TO_SHELL_PARAM_SPECIAL_KEYS, -} from './tool-names.js'; -import type { MessageBus } from '../confirmation-bus/message-bus.js'; - -/** - * Mapping of named special keys to their ANSI escape sequences. - */ -const SPECIAL_KEY_MAP: Record = { - Enter: '\r', - Tab: '\t', - Up: '\x1b[A', - Down: '\x1b[B', - Left: '\x1b[D', - Right: '\x1b[C', - Escape: '\x1b', - Backspace: '\x7f', - 'Ctrl-C': '\x03', - 'Ctrl-D': '\x04', - 'Ctrl-Z': '\x1a', - Space: ' ', - Delete: '\x1b[3~', - Home: '\x1b[H', - End: '\x1b[F', -}; - -const VALID_SPECIAL_KEYS = Object.keys(SPECIAL_KEY_MAP); - -/** Delay in ms to wait after writing input for the process to react. */ -const POST_INPUT_DELAY_MS = 150; - -export interface WriteToShellParams { - pid: number; - input?: string; - special_keys?: string[]; -} - -export class WriteToShellToolInvocation extends BaseToolInvocation< - WriteToShellParams, - ToolResult -> { - constructor( - params: WriteToShellParams, - messageBus: MessageBus, - _toolName?: string, - _toolDisplayName?: string, - ) { - super(params, messageBus, _toolName, _toolDisplayName); - } - - getDescription(): string { - const parts: string[] = [`write to shell PID ${this.params.pid}`]; - if (this.params.input) { - const display = - this.params.input.length > 50 - ? `${this.params.input.substring(0, 50)}...` - : this.params.input; - parts.push(`input: "${display}"`); - } - if (this.params.special_keys?.length) { - parts.push(`keys: [${this.params.special_keys.join(', ')}]`); - } - return parts.join(' '); - } - - protected override async getConfirmationDetails( - _abortSignal: AbortSignal, - ): Promise { - const confirmationDetails: ToolExecuteConfirmationDetails = { - type: 'exec', - title: 'Confirm Shell Input', - command: this.getDescription(), - rootCommand: 'write_to_shell', - rootCommands: ['write_to_shell'], - onConfirm: async (_outcome: ToolConfirmationOutcome) => { - // Policy updates handled centrally - }, - }; - return confirmationDetails; - } - - async execute(_signal: AbortSignal): Promise { - const { pid, input, special_keys } = this.params; - - // Validate the PID is active - if (!ShellExecutionService.isPtyActive(pid)) { - return { - llmContent: `Error: No active process found with PID ${pid}. The process may have exited.`, - returnDisplay: `No active process with PID ${pid}.`, - }; - } - - // Validate special keys - if (special_keys?.length) { - const invalidKeys = special_keys.filter( - (k) => !VALID_SPECIAL_KEYS.includes(k), - ); - if (invalidKeys.length > 0) { - return { - llmContent: `Error: Invalid special keys: ${invalidKeys.join(', ')}. Valid keys are: ${VALID_SPECIAL_KEYS.join(', ')}`, - returnDisplay: `Invalid special keys: ${invalidKeys.join(', ')}`, - }; - } - } - - // Send text input - if (input) { - ShellExecutionService.writeToPty(pid, input); - } - - // Send special keys - if (special_keys?.length) { - for (const key of special_keys) { - const sequence = SPECIAL_KEY_MAP[key]; - if (sequence) { - ShellExecutionService.writeToPty(pid, sequence); - } - } - } - - // Wait briefly for the process to react - await new Promise((resolve) => setTimeout(resolve, POST_INPUT_DELAY_MS)); - - // Read the screen after writing - const screen = ShellExecutionService.readScreen(pid); - if (screen === null) { - return { - llmContent: `Input sent, but the process (PID ${pid}) has exited.`, - returnDisplay: `Process exited after input.`, - }; - } - - return { - llmContent: `Input sent to PID ${pid}. Current screen:\n${screen}`, - returnDisplay: `Input sent to PID ${pid}.`, - }; - } -} - -export class WriteToShellTool extends BaseDeclarativeTool< - WriteToShellParams, - ToolResult -> { - static readonly Name = WRITE_TO_SHELL_TOOL_NAME; - - constructor(messageBus: MessageBus) { - super( - WriteToShellTool.Name, - 'WriteToShell', - 'Sends input to a running background shell process. Use this to interact with TUI applications, REPLs, and interactive commands. After writing, the current screen state is returned. Works with processes that were auto-promoted to background via wait_for_output_seconds or started with is_background=true.', - Kind.Execute, - { - type: 'object', - properties: { - [WRITE_TO_SHELL_PARAM_PID]: { - type: 'number', - description: - 'The PID of the background process to write to. Obtained from a previous run_shell_command call that was auto-promoted to background or started with is_background=true.', - }, - [WRITE_TO_SHELL_PARAM_INPUT]: { - type: 'string', - description: - '(OPTIONAL) Text to send to the process. This is literal text typed into the terminal.', - }, - [WRITE_TO_SHELL_PARAM_SPECIAL_KEYS]: { - type: 'array', - items: { - type: 'string', - enum: VALID_SPECIAL_KEYS, - }, - description: - '(OPTIONAL) Named special keys to send after the input text. Each key is sent in sequence. Examples: ["Enter"], ["Tab"], ["Up", "Enter"], ["Ctrl-C"].', - }, - }, - required: [WRITE_TO_SHELL_PARAM_PID], - }, - messageBus, - false, // output is not markdown - ); - } - - protected override validateToolParamValues( - params: WriteToShellParams, - ): string | null { - if (!params.pid || params.pid <= 0) { - return 'PID must be a positive number.'; - } - if ( - !params.input && - (!params.special_keys || !params.special_keys.length) - ) { - return 'At least one of input or special_keys must be provided.'; - } - return null; - } - - protected createInvocation( - params: WriteToShellParams, - messageBus: MessageBus, - _toolName?: string, - _toolDisplayName?: string, - ): ToolInvocation { - return new WriteToShellToolInvocation( - params, - messageBus, - _toolName, - _toolDisplayName, - ); - } -} From 1b3e7d674f48f08aa4e1bfa3bb525d90fc2f0b66 Mon Sep 17 00:00:00 2001 From: Adam Weidman <65992621+adamfweidman@users.noreply.github.com> Date: Wed, 8 Apr 2026 07:06:30 -0700 Subject: [PATCH 16/39] docs: update MCP server OAuth redirect port documentation (#24844) --- docs/tools/mcp-server.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/tools/mcp-server.md b/docs/tools/mcp-server.md index 9fc84d54c0..3baeb746df 100644 --- a/docs/tools/mcp-server.md +++ b/docs/tools/mcp-server.md @@ -290,7 +290,7 @@ When connecting to an OAuth-enabled server: > OAuth authentication requires that your local machine can: > > - Open a web browser for authentication -> - Receive redirects on `http://localhost:7777/oauth/callback` +> - Receive redirects on `http://localhost:/oauth/callback` (or a specific port if configured via `redirectUri`) This feature will not work in: @@ -323,8 +323,8 @@ Use the `/mcp auth` command to manage OAuth authentication: if omitted) - **`tokenUrl`** (string): OAuth token endpoint (auto-discovered if omitted) - **`scopes`** (string[]): Required OAuth scopes -- **`redirectUri`** (string): Custom redirect URI (defaults to - `http://localhost:7777/oauth/callback`) +- **`redirectUri`** (string): Custom redirect URI (defaults to an OS-assigned + random port, e.g., `http://localhost:/oauth/callback`) - **`tokenParamName`** (string): Query parameter name for tokens in SSE URLs - **`audiences`** (string[]): Audiences the token is valid for From e77b22e638869f741d0c8d2760abcfeebf94ae35 Mon Sep 17 00:00:00 2001 From: Gaurav <39389231+gsquared94@users.noreply.github.com> Date: Wed, 8 Apr 2026 22:31:10 +0800 Subject: [PATCH 17/39] fix: isolate concurrent browser agent instances (#24794) --- .../browser-agent.concurrent.responses | 8 + integration-tests/browser-agent.test.ts | 44 ++ .../browser/browserAgentFactory.test.ts | 2 + .../src/agents/browser/browserAgentFactory.ts | 379 +++++++++--------- .../browser/browserAgentInvocation.test.ts | 6 +- .../agents/browser/browserAgentInvocation.ts | 2 + .../src/agents/browser/browserManager.test.ts | 116 ++++++ .../core/src/agents/browser/browserManager.ts | 86 ++++ 8 files changed, 458 insertions(+), 185 deletions(-) create mode 100644 integration-tests/browser-agent.concurrent.responses diff --git a/integration-tests/browser-agent.concurrent.responses b/integration-tests/browser-agent.concurrent.responses new file mode 100644 index 0000000000..f64397e02d --- /dev/null +++ b/integration-tests/browser-agent.concurrent.responses @@ -0,0 +1,8 @@ +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll launch two browser agents concurrently to check both repositories."},{"functionCall":{"name":"browser_agent","args":{"task":"Navigate to https://example.com and get the page title"}}},{"functionCall":{"name":"browser_agent","args":{"task":"Navigate to https://example.com and get the page title"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":50,"totalTokenCount":150}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"navigate_page","args":{"url":"https://example.com"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":20,"totalTokenCount":120}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"navigate_page","args":{"url":"https://example.com"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":20,"totalTokenCount":120}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"take_snapshot","args":{}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":150,"candidatesTokenCount":15,"totalTokenCount":165}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"take_snapshot","args":{}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":150,"candidatesTokenCount":15,"totalTokenCount":165}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"complete_task","args":{"result":{"success":true,"summary":"Page title is Example Domain."}}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":30,"totalTokenCount":230}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"complete_task","args":{"result":{"success":true,"summary":"Page title is Example Domain."}}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":30,"totalTokenCount":230}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Both browser agents completed successfully. Agent 1 and Agent 2 both navigated to their respective pages and confirmed the page titles."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":300,"candidatesTokenCount":40,"totalTokenCount":340}}]} diff --git a/integration-tests/browser-agent.test.ts b/integration-tests/browser-agent.test.ts index 09e20bcb26..325fdc1db5 100644 --- a/integration-tests/browser-agent.test.ts +++ b/integration-tests/browser-agent.test.ts @@ -307,4 +307,48 @@ describe.skipIf(!chromeAvailable)('browser-agent', () => { await run.expectText('successfully written', 15000); }); + + it('should handle concurrent browser agents with isolated session mode', async () => { + rig.setup('browser-concurrent', { + fakeResponsesPath: join(__dirname, 'browser-agent.concurrent.responses'), + settings: { + agents: { + overrides: { + browser_agent: { + enabled: true, + }, + }, + browser: { + headless: true, + // Isolated mode supports concurrent browser agents. + // Persistent/existing modes reject concurrent calls to prevent + // Chrome profile lock conflicts. + sessionMode: 'isolated', + }, + }, + }, + }); + + const result = await rig.run({ + args: 'Launch two browser agents concurrently to check example.com', + }); + + assertModelHasOutput(result); + + const toolLogs = rig.readToolLogs(); + const browserCalls = toolLogs.filter( + (t) => t.toolRequest.name === 'browser_agent', + ); + + // Both browser_agent invocations should have been called + expect(browserCalls.length).toBe(2); + + // Both should complete successfully (no errors) + for (const call of browserCalls) { + expect( + call.toolRequest.success, + `browser_agent call failed: ${JSON.stringify(call.toolRequest)}`, + ).toBe(true); + } + }); }); diff --git a/packages/core/src/agents/browser/browserAgentFactory.test.ts b/packages/core/src/agents/browser/browserAgentFactory.test.ts index 1be28e60c4..b071a420ab 100644 --- a/packages/core/src/agents/browser/browserAgentFactory.test.ts +++ b/packages/core/src/agents/browser/browserAgentFactory.test.ts @@ -38,6 +38,8 @@ const mockBrowserManager = { ]), callTool: vi.fn().mockResolvedValue({ content: [] }), close: vi.fn().mockResolvedValue(undefined), + acquire: vi.fn(), + release: vi.fn(), }; // Mock dependencies diff --git a/packages/core/src/agents/browser/browserAgentFactory.ts b/packages/core/src/agents/browser/browserAgentFactory.ts index e07f403ba7..f26dc79c69 100644 --- a/packages/core/src/agents/browser/browserAgentFactory.ts +++ b/packages/core/src/agents/browser/browserAgentFactory.ts @@ -81,207 +81,218 @@ export async function createBrowserAgentDefinition( // Get or create browser manager singleton for this session mode/profile const browserManager = BrowserManager.getInstance(config); - await browserManager.ensureConnection(); + browserManager.acquire(); - debugLogger.log('Browser connected with isolated MCP client.'); + try { + await browserManager.ensureConnection(); - // Determine if input blocker should be active (non-headless + enabled) - const shouldDisableInput = config.shouldDisableBrowserUserInput(); - // Inject automation overlay and input blocker if not in headless mode - const browserConfig = config.getBrowserAgentConfig(); - if (!browserConfig?.customConfig?.headless) { - debugLogger.log('Injecting automation overlay...'); - await injectAutomationOverlay(browserManager); - if (shouldDisableInput) { - debugLogger.log('Injecting input blocker...'); - await injectInputBlocker(browserManager); - } - } + debugLogger.log('Browser connected with isolated MCP client.'); - // Create declarative tools from dynamically discovered MCP tools - // These tools dispatch to browserManager's isolated client - const mcpTools = await createMcpDeclarativeTools( - browserManager, - messageBus, - shouldDisableInput, - browserConfig.customConfig.blockFileUploads, - ); - const availableToolNames = mcpTools.map((t) => t.name); - - // Register high-priority policy rules for sensitive actions which is not - // able to be overwrite by YOLO mode. - const policyEngine = config.getPolicyEngine(); - - if (policyEngine) { - const existingRules = policyEngine.getRules(); - - const restrictedTools = ['fill', 'fill_form']; - - // ASK_USER for upload_file and evaluate_script when sensitive action - // need confirmation. - if (browserConfig.customConfig.confirmSensitiveActions) { - restrictedTools.push('upload_file', 'evaluate_script'); - } - - for (const toolName of restrictedTools) { - const rule = generateAskUserRules(toolName); - if (!existingRules.some((r) => isRuleEqual(r, rule))) { - policyEngine.addRule(rule); + // Determine if input blocker should be active (non-headless + enabled) + const shouldDisableInput = config.shouldDisableBrowserUserInput(); + // Inject automation overlay and input blocker if not in headless mode + const browserConfig = config.getBrowserAgentConfig(); + if (!browserConfig?.customConfig?.headless) { + debugLogger.log('Injecting automation overlay...'); + await injectAutomationOverlay(browserManager); + if (shouldDisableInput) { + debugLogger.log('Injecting input blocker...'); + await injectInputBlocker(browserManager); } } - // Reduce noise for read-only tools in default mode - const readOnlyTools = (await browserManager.getDiscoveredTools()) - .filter((t) => !!t.annotations?.readOnlyHint) - .map((t) => t.name); - const allowlistedReadonlyTools = ['take_snapshot', 'take_screenshot']; + // Create declarative tools from dynamically discovered MCP tools + // These tools dispatch to browserManager's isolated client + const mcpTools = await createMcpDeclarativeTools( + browserManager, + messageBus, + shouldDisableInput, + browserConfig.customConfig.blockFileUploads, + ); + const availableToolNames = mcpTools.map((t) => t.name); - for (const toolName of [...readOnlyTools, ...allowlistedReadonlyTools]) { - if (availableToolNames.includes(toolName)) { - const rule = generateAllowRules(toolName); + // Register high-priority policy rules for sensitive actions which is not + // able to be overwrite by YOLO mode. + const policyEngine = config.getPolicyEngine(); + + if (policyEngine) { + const existingRules = policyEngine.getRules(); + + const restrictedTools = ['fill', 'fill_form']; + + // ASK_USER for upload_file and evaluate_script when sensitive action + // need confirmation. + if (browserConfig.customConfig.confirmSensitiveActions) { + restrictedTools.push('upload_file', 'evaluate_script'); + } + + for (const toolName of restrictedTools) { + const rule = generateAskUserRules(toolName); if (!existingRules.some((r) => isRuleEqual(r, rule))) { policyEngine.addRule(rule); } } - } - } - function generateAskUserRules(toolName: string): PolicyRule { - return { - toolName: `${MCP_TOOL_PREFIX}${BROWSER_AGENT_NAME}_${toolName}`, - decision: PolicyDecision.ASK_USER, - priority: 999, - source: 'BrowserAgent (Sensitive Actions)', - mcpName: BROWSER_AGENT_NAME, + // Reduce noise for read-only tools in default mode + const readOnlyTools = (await browserManager.getDiscoveredTools()) + .filter((t) => !!t.annotations?.readOnlyHint) + .map((t) => t.name); + const allowlistedReadonlyTools = ['take_snapshot', 'take_screenshot']; + + for (const toolName of [...readOnlyTools, ...allowlistedReadonlyTools]) { + if (availableToolNames.includes(toolName)) { + const rule = generateAllowRules(toolName); + if (!existingRules.some((r) => isRuleEqual(r, rule))) { + policyEngine.addRule(rule); + } + } + } + } + + function generateAskUserRules(toolName: string): PolicyRule { + return { + toolName: `${MCP_TOOL_PREFIX}${BROWSER_AGENT_NAME}_${toolName}`, + decision: PolicyDecision.ASK_USER, + priority: 999, + source: 'BrowserAgent (Sensitive Actions)', + mcpName: BROWSER_AGENT_NAME, + }; + } + + function generateAllowRules(toolName: string): PolicyRule { + return { + toolName: `${MCP_TOOL_PREFIX}${BROWSER_AGENT_NAME}_${toolName}`, + decision: PolicyDecision.ALLOW, + priority: PRIORITY_SUBAGENT_TOOL, + source: 'BrowserAgent (Read-Only)', + mcpName: BROWSER_AGENT_NAME, + }; + } + + // Check if policy rule the same in all the attributes that we care about + function isRuleEqual(rule1: PolicyRule, rule2: PolicyRule) { + return ( + rule1.toolName === rule2.toolName && + rule1.decision === rule2.decision && + rule1.priority === rule2.priority && + rule1.mcpName === rule2.mcpName + ); + } + + // Validate required semantic tools are available + const requiredSemanticTools = [ + 'click', + 'fill', + 'navigate_page', + 'take_snapshot', + ]; + const missingSemanticTools = requiredSemanticTools.filter( + (t) => !availableToolNames.includes(t), + ); + + const rawSessionMode = browserConfig?.customConfig?.sessionMode; + const sessionMode = + rawSessionMode === 'isolated' || rawSessionMode === 'existing' + ? rawSessionMode + : 'persistent'; + + recordBrowserAgentToolDiscovery( + config, + mcpTools.length, + missingSemanticTools, + sessionMode, + ); + + if (missingSemanticTools.length > 0) { + debugLogger.warn( + `Semantic tools missing (${missingSemanticTools.join(', ')}). ` + + 'Some browser interactions may not work correctly.', + ); + } + + // Only click_at is strictly required — text input can use press_key or fill. + const requiredVisualTools = ['click_at']; + const missingVisualTools = requiredVisualTools.filter( + (t) => !availableToolNames.includes(t), + ); + + // Check whether vision can be enabled; returns structured type with code and message. + function getVisionDisabledReason(): VisionDisabledReason { + const browserConfig = config.getBrowserAgentConfig(); + if (!browserConfig.customConfig.visualModel) { + return { + code: 'no_visual_model', + message: 'No visualModel configured.', + }; + } + if (missingVisualTools.length > 0) { + return { + code: 'missing_visual_tools', + message: + `Visual tools missing (${missingVisualTools.join(', ')}). ` + + `The installed chrome-devtools-mcp version may be too old.`, + }; + } + const authType = config.getContentGeneratorConfig()?.authType; + const blockedAuthTypes = new Set([ + AuthType.LOGIN_WITH_GOOGLE, + AuthType.LEGACY_CLOUD_SHELL, + AuthType.COMPUTE_ADC, + ]); + if (authType && blockedAuthTypes.has(authType)) { + return { + code: 'blocked_auth_type', + message: 'Visual agent model not available for current auth type.', + }; + } + return undefined; + } + + const allTools: AnyDeclarativeTool[] = [...mcpTools]; + const visionDisabledReason = getVisionDisabledReason(); + + logBrowserAgentVisionStatus(config, { + enabled: !visionDisabledReason, + disabled_reason: visionDisabledReason?.code, + }); + + if (visionDisabledReason) { + debugLogger.log(`Vision disabled: ${visionDisabledReason.message}`); + } else { + allTools.push( + createAnalyzeScreenshotTool(browserManager, config, messageBus), + ); + } + + debugLogger.log( + `Created ${allTools.length} tools for browser agent: ` + + allTools.map((t) => t.name).join(', '), + ); + + // Create configured definition with tools + // BrowserAgentDefinition is a factory function - call it with config + const baseDefinition = BrowserAgentDefinition( + config, + !visionDisabledReason, + ); + const definition: LocalAgentDefinition = { + ...baseDefinition, + toolConfig: { + tools: allTools, + }, }; - } - function generateAllowRules(toolName: string): PolicyRule { return { - toolName: `${MCP_TOOL_PREFIX}${BROWSER_AGENT_NAME}_${toolName}`, - decision: PolicyDecision.ALLOW, - priority: PRIORITY_SUBAGENT_TOOL, - source: 'BrowserAgent (Read-Only)', - mcpName: BROWSER_AGENT_NAME, + definition, + browserManager, + visionEnabled: !visionDisabledReason, + sessionMode, }; + } catch (error) { + // Release the browser manager if setup fails, so concurrent tasks can try again. + browserManager.release(); + throw error; } - - // Check if policy rule the same in all the attributes that we care about - function isRuleEqual(rule1: PolicyRule, rule2: PolicyRule) { - return ( - rule1.toolName === rule2.toolName && - rule1.decision === rule2.decision && - rule1.priority === rule2.priority && - rule1.mcpName === rule2.mcpName - ); - } - - // Validate required semantic tools are available - const requiredSemanticTools = [ - 'click', - 'fill', - 'navigate_page', - 'take_snapshot', - ]; - const missingSemanticTools = requiredSemanticTools.filter( - (t) => !availableToolNames.includes(t), - ); - - const rawSessionMode = browserConfig?.customConfig?.sessionMode; - const sessionMode = - rawSessionMode === 'isolated' || rawSessionMode === 'existing' - ? rawSessionMode - : 'persistent'; - - recordBrowserAgentToolDiscovery( - config, - mcpTools.length, - missingSemanticTools, - sessionMode, - ); - - if (missingSemanticTools.length > 0) { - debugLogger.warn( - `Semantic tools missing (${missingSemanticTools.join(', ')}). ` + - 'Some browser interactions may not work correctly.', - ); - } - - // Only click_at is strictly required — text input can use press_key or fill. - const requiredVisualTools = ['click_at']; - const missingVisualTools = requiredVisualTools.filter( - (t) => !availableToolNames.includes(t), - ); - - // Check whether vision can be enabled; returns structured type with code and message. - function getVisionDisabledReason(): VisionDisabledReason { - const browserConfig = config.getBrowserAgentConfig(); - if (!browserConfig.customConfig.visualModel) { - return { - code: 'no_visual_model', - message: 'No visualModel configured.', - }; - } - if (missingVisualTools.length > 0) { - return { - code: 'missing_visual_tools', - message: - `Visual tools missing (${missingVisualTools.join(', ')}). ` + - `The installed chrome-devtools-mcp version may be too old.`, - }; - } - const authType = config.getContentGeneratorConfig()?.authType; - const blockedAuthTypes = new Set([ - AuthType.LOGIN_WITH_GOOGLE, - AuthType.LEGACY_CLOUD_SHELL, - AuthType.COMPUTE_ADC, - ]); - if (authType && blockedAuthTypes.has(authType)) { - return { - code: 'blocked_auth_type', - message: 'Visual agent model not available for current auth type.', - }; - } - return undefined; - } - - const allTools: AnyDeclarativeTool[] = [...mcpTools]; - const visionDisabledReason = getVisionDisabledReason(); - - logBrowserAgentVisionStatus(config, { - enabled: !visionDisabledReason, - disabled_reason: visionDisabledReason?.code, - }); - - if (visionDisabledReason) { - debugLogger.log(`Vision disabled: ${visionDisabledReason.message}`); - } else { - allTools.push( - createAnalyzeScreenshotTool(browserManager, config, messageBus), - ); - } - - debugLogger.log( - `Created ${allTools.length} tools for browser agent: ` + - allTools.map((t) => t.name).join(', '), - ); - - // Create configured definition with tools - // BrowserAgentDefinition is a factory function - call it with config - const baseDefinition = BrowserAgentDefinition(config, !visionDisabledReason); - const definition: LocalAgentDefinition = { - ...baseDefinition, - toolConfig: { - tools: allTools, - }, - }; - - return { - definition, - browserManager, - visionEnabled: !visionDisabledReason, - sessionMode, - }; } /** diff --git a/packages/core/src/agents/browser/browserAgentInvocation.test.ts b/packages/core/src/agents/browser/browserAgentInvocation.test.ts index a87b88cb1b..ac90564f06 100644 --- a/packages/core/src/agents/browser/browserAgentInvocation.test.ts +++ b/packages/core/src/agents/browser/browserAgentInvocation.test.ts @@ -192,7 +192,10 @@ describe('BrowserAgentInvocation', () => { promptConfig: { query: '', systemPrompt: '' }, toolConfig: { tools: ['analyze_screenshot', 'click'] }, }, - browserManager: {} as never, + browserManager: { + release: vi.fn(), + callTool: vi.fn().mockResolvedValue({ content: [] }), + } as never, visionEnabled: true, sessionMode: 'persistent', }); @@ -766,6 +769,7 @@ describe('BrowserAgentInvocation', () => { } return { isError: false }; }), + release: vi.fn(), }; vi.mocked(createBrowserAgentDefinition).mockResolvedValue({ diff --git a/packages/core/src/agents/browser/browserAgentInvocation.ts b/packages/core/src/agents/browser/browserAgentInvocation.ts index 6fb05753ee..e71d82cf55 100644 --- a/packages/core/src/agents/browser/browserAgentInvocation.ts +++ b/packages/core/src/agents/browser/browserAgentInvocation.ts @@ -440,6 +440,8 @@ ${output.result}`; } } catch { // Ignore errors for removing the overlays. + } finally { + browserManager.release(); } } } diff --git a/packages/core/src/agents/browser/browserManager.test.ts b/packages/core/src/agents/browser/browserManager.test.ts index baabc80bcb..65c17bfb09 100644 --- a/packages/core/src/agents/browser/browserManager.test.ts +++ b/packages/core/src/agents/browser/browserManager.test.ts @@ -873,6 +873,122 @@ describe('BrowserManager', () => { expect(instance1).not.toBe(instance2); }); + + it('should throw when acquired instance is requested in persistent mode', () => { + // mockConfig defaults to persistent mode + const instance1 = BrowserManager.getInstance(mockConfig); + instance1.acquire(); + + expect(() => BrowserManager.getInstance(mockConfig)).toThrow( + /Cannot launch a concurrent browser agent in "persistent" session mode/, + ); + }); + + it('should throw when acquired instance is requested in existing mode', () => { + const existingConfig = makeFakeConfig({ + agents: { + overrides: { browser_agent: { enabled: true } }, + browser: { sessionMode: 'existing' }, + }, + }); + + const instance1 = BrowserManager.getInstance(existingConfig); + instance1.acquire(); + + expect(() => BrowserManager.getInstance(existingConfig)).toThrow( + /Cannot launch a concurrent browser agent in "existing" session mode/, + ); + }); + + it('should return a different instance when the primary is acquired in isolated mode', () => { + const isolatedConfig = makeFakeConfig({ + agents: { + overrides: { browser_agent: { enabled: true } }, + browser: { sessionMode: 'isolated' }, + }, + }); + + const instance1 = BrowserManager.getInstance(isolatedConfig); + instance1.acquire(); + + const instance2 = BrowserManager.getInstance(isolatedConfig); + + expect(instance2).not.toBe(instance1); + expect(instance1.isAcquired()).toBe(true); + expect(instance2.isAcquired()).toBe(false); + }); + + it('should reuse the primary when it has been released', () => { + const instance1 = BrowserManager.getInstance(mockConfig); + instance1.acquire(); + instance1.release(); + + const instance2 = BrowserManager.getInstance(mockConfig); + + expect(instance2).toBe(instance1); + expect(instance1.isAcquired()).toBe(false); + }); + + it('should reuse a released parallel instance in isolated mode', () => { + const isolatedConfig = makeFakeConfig({ + agents: { + overrides: { browser_agent: { enabled: true } }, + browser: { sessionMode: 'isolated' }, + }, + }); + + const instance1 = BrowserManager.getInstance(isolatedConfig); + instance1.acquire(); + + const instance2 = BrowserManager.getInstance(isolatedConfig); + instance2.acquire(); + instance2.release(); + + // Primary is still acquired, parallel is released — should reuse parallel + const instance3 = BrowserManager.getInstance(isolatedConfig); + expect(instance3).toBe(instance2); + }); + + it('should create multiple parallel instances in isolated mode', () => { + const isolatedConfig = makeFakeConfig({ + agents: { + overrides: { browser_agent: { enabled: true } }, + browser: { sessionMode: 'isolated' }, + }, + }); + + const instance1 = BrowserManager.getInstance(isolatedConfig); + instance1.acquire(); + + const instance2 = BrowserManager.getInstance(isolatedConfig); + instance2.acquire(); + + const instance3 = BrowserManager.getInstance(isolatedConfig); + + expect(instance1).not.toBe(instance2); + expect(instance2).not.toBe(instance3); + expect(instance1).not.toBe(instance3); + }); + + it('should throw when MAX_PARALLEL_INSTANCES is reached in isolated mode', () => { + const isolatedConfig = makeFakeConfig({ + agents: { + overrides: { browser_agent: { enabled: true } }, + browser: { sessionMode: 'isolated' }, + }, + }); + + // Acquire MAX_PARALLEL_INSTANCES instances + for (let i = 0; i < BrowserManager.MAX_PARALLEL_INSTANCES; i++) { + const instance = BrowserManager.getInstance(isolatedConfig); + instance.acquire(); + } + + // Next call should throw + expect(() => BrowserManager.getInstance(isolatedConfig)).toThrow( + /Maximum number of parallel browser instances/, + ); + }); }); describe('resetAll', () => { diff --git a/packages/core/src/agents/browser/browserManager.ts b/packages/core/src/agents/browser/browserManager.ts index 89d54e9c72..ebc43bc374 100644 --- a/packages/core/src/agents/browser/browserManager.ts +++ b/packages/core/src/agents/browser/browserManager.ts @@ -114,6 +114,12 @@ export class BrowserManager { // --- Static singleton management --- private static instances = new Map(); + /** + * Maximum number of parallel browser instances allowed in isolated mode. + * Prevents unbounded resource consumption from concurrent browser_agent calls. + */ + static readonly MAX_PARALLEL_INSTANCES = 5; + /** * Returns the cache key for a given config. * Uses `sessionMode:profilePath` so different profiles get separate instances. @@ -128,14 +134,64 @@ export class BrowserManager { /** * Returns an existing BrowserManager for the current config's session mode * and profile, or creates a new one. + * + * Concurrency rules: + * - **persistent / existing mode**: Only one instance is allowed at a time. + * If the instance is already in-use, an error is thrown instructing the + * caller to run browser tasks sequentially. + * - **isolated mode**: Parallel instances are allowed up to + * MAX_PARALLEL_INSTANCES. Each isolated instance gets its own temp profile. */ static getInstance(config: Config): BrowserManager { const key = BrowserManager.getInstanceKey(config); + const sessionMode = + config.getBrowserAgentConfig().customConfig.sessionMode ?? 'persistent'; let instance = BrowserManager.instances.get(key); if (!instance) { instance = new BrowserManager(config); BrowserManager.instances.set(key, instance); debugLogger.log(`Created new BrowserManager singleton (key: ${key})`); + } else if (instance.inUse) { + // Persistent and existing modes share a browser profile directory. + // Chrome prevents multiple instances from using the same profile, so + // concurrent usage would cause "profile locked" errors. + if (sessionMode === 'persistent' || sessionMode === 'existing') { + throw new Error( + `Cannot launch a concurrent browser agent in "${sessionMode}" session mode. ` + + `The browser instance is already in use by another task. ` + + `Please run browser tasks sequentially, or switch to "isolated" session mode for concurrent browser usage.`, + ); + } + + // Isolated mode: allow parallel instances up to the limit. + let inUseCount = 1; // primary is already in-use + let suffix = 1; + let parallelKey = `${key}:${suffix}`; + let parallel = BrowserManager.instances.get(parallelKey); + while (parallel?.inUse) { + inUseCount++; + if (inUseCount >= BrowserManager.MAX_PARALLEL_INSTANCES) { + throw new Error( + `Maximum number of parallel browser instances (${BrowserManager.MAX_PARALLEL_INSTANCES}) reached. ` + + `Please wait for an existing browser task to complete before starting a new one.`, + ); + } + suffix++; + parallelKey = `${key}:${suffix}`; + parallel = BrowserManager.instances.get(parallelKey); + } + if (!parallel) { + parallel = new BrowserManager(config); + BrowserManager.instances.set(parallelKey, parallel); + debugLogger.log( + `Created parallel BrowserManager (key: ${parallelKey})`, + ); + } else { + debugLogger.log( + `Reusing released parallel BrowserManager (key: ${parallelKey})`, + ); + } + instance = parallel; } else { debugLogger.log( `Reusing existing BrowserManager singleton (key: ${key})`, @@ -180,6 +236,36 @@ export class BrowserManager { private isClosing = false; private connectionPromise: Promise | undefined; + /** + * Whether this instance is currently acquired by an active invocation. + * Used by getInstance() to avoid handing the same browser to concurrent + * browser_agent calls. + */ + private inUse = false; + + /** + * Marks this instance as in-use. Call this when starting a browser agent + * invocation so concurrent calls get a separate instance. + */ + acquire(): void { + this.inUse = true; + } + + /** + * Marks this instance as available for reuse. Call this in the finally + * block of a browser agent invocation. + */ + release(): void { + this.inUse = false; + } + + /** + * Returns whether this instance is currently acquired by an active invocation. + */ + isAcquired(): boolean { + return this.inUse; + } + /** State for action rate limiting */ private actionCounter = 0; private readonly maxActionsPerTask: number; From 34b4f1c6e4f2468cd35caac8bde87011f2691063 Mon Sep 17 00:00:00 2001 From: ruomeng Date: Wed, 8 Apr 2026 11:58:29 -0400 Subject: [PATCH 18/39] refactor(plan): simplify policy priorities and consolidate read-only rules (#24849) --- .../config/policy-engine.integration.test.ts | 8 +-- packages/core/src/agents/registry.test.ts | 4 +- packages/core/src/policy/config.ts | 5 +- packages/core/src/policy/policies/plan.toml | 46 +++++---------- .../core/src/policy/policies/read-only.toml | 59 +++++++------------ .../core/src/policy/policies/tracker.toml | 34 ----------- .../core/src/policy/policy-engine.test.ts | 4 +- packages/core/src/policy/toml-loader.test.ts | 27 ++++++--- packages/core/src/policy/types.ts | 6 +- 9 files changed, 71 insertions(+), 122 deletions(-) delete mode 100644 packages/core/src/policy/policies/tracker.toml diff --git a/packages/cli/src/config/policy-engine.integration.test.ts b/packages/cli/src/config/policy-engine.integration.test.ts index edc06bfbf0..b7b9be1193 100644 --- a/packages/cli/src/config/policy-engine.integration.test.ts +++ b/packages/cli/src/config/policy-engine.integration.test.ts @@ -520,8 +520,8 @@ describe('Policy Engine Integration Tests', () => { const readOnlyToolRule = rules.find( (r) => r.toolName === 'glob' && !r.subagent, ); - // Priority 70 in default tier → 1.07 (Overriding Plan Mode Deny) - expect(readOnlyToolRule?.priority).toBeCloseTo(1.07, 5); + // Priority 50 in default tier → 1.05 (Overriding Plan Mode Deny) + expect(readOnlyToolRule?.priority).toBeCloseTo(1.05, 5); // Verify the engine applies these priorities correctly expect( @@ -677,8 +677,8 @@ describe('Policy Engine Integration Tests', () => { expect(server1Rule?.priority).toBe(4.1); // Allowed servers (user tier) const globRule = rules.find((r) => r.toolName === 'glob' && !r.subagent); - // Priority 70 in default tier → 1.07 - expect(globRule?.priority).toBeCloseTo(1.07, 5); // Auto-accept read-only + // Priority 50 in default tier → 1.05 + expect(globRule?.priority).toBeCloseTo(1.05, 5); // Auto-accept read-only // The PolicyEngine will sort these by priority when it's created const engine = new PolicyEngine(config); diff --git a/packages/core/src/agents/registry.test.ts b/packages/core/src/agents/registry.test.ts index 55517a20d5..22ac42e6ed 100644 --- a/packages/core/src/agents/registry.test.ts +++ b/packages/core/src/agents/registry.test.ts @@ -1075,7 +1075,7 @@ describe('AgentRegistry', () => { expect.objectContaining({ toolName: 'PolicyTestAgent', decision: PolicyDecision.ALLOW, - priority: 1.05, + priority: 1.03, }), ); }); @@ -1102,7 +1102,7 @@ describe('AgentRegistry', () => { expect.objectContaining({ toolName: 'RemotePolicyAgent', decision: PolicyDecision.ASK_USER, - priority: 1.05, + priority: 1.03, }), ); }); diff --git a/packages/core/src/policy/config.ts b/packages/core/src/policy/config.ts index 9147a66a9d..359054add3 100644 --- a/packages/core/src/policy/config.ts +++ b/packages/core/src/policy/config.ts @@ -398,9 +398,10 @@ export async function createPolicyEngineConfig( // TOML policy priorities (before transformation): // 10: Write tools default to ASK_USER (becomes 1.010 in default tier) // 15: Auto-edit tool override (becomes 1.015 in default tier) + // 30: Unknown subagents (blocked by Plan Mode's 40) + // 40: Plan mode catch-all DENY override (becomes 1.040 in default tier) // 50: Read-only tools (becomes 1.050 in default tier) - // 60: Plan mode catch-all DENY override (becomes 1.060 in default tier) - // 70: Plan mode explicit ALLOW override (becomes 1.070 in default tier) + // 70: Mode transition overrides (becomes 1.070 in default tier) // 999: YOLO mode allow-all (becomes 1.999 in default tier) // MCP servers that are explicitly excluded in settings.mcp.excluded diff --git a/packages/core/src/policy/policies/plan.toml b/packages/core/src/policy/policies/plan.toml index 80b59ba2d5..eaf1f9471b 100644 --- a/packages/core/src/policy/policies/plan.toml +++ b/packages/core/src/policy/policies/plan.toml @@ -23,8 +23,10 @@ # # TOML policy priorities (before transformation): # 10: Write tools default to ASK_USER (becomes 1.010 in default tier) -# 60: Plan mode catch-all DENY override (becomes 1.060 in default tier) -# 70: Plan mode explicit ALLOW override (becomes 1.070 in default tier) +# 30: Unknown subagents (blocked by Plan Mode's 40) +# 40: Plan mode catch-all DENY override (becomes 1.040 in default tier) +# 50: Read-only tools / Plan mode explicit ALLOW (becomes 1.050 in default tier) +# 70: Mode transition overrides (into/out of Plan Mode) # 999: YOLO mode allow-all (becomes 1.999 in default tier) # Mode Transitions (into/out of Plan Mode) @@ -59,6 +61,7 @@ interactive = true toolName = "exit_plan_mode" decision = "allow" priority = 70 +modes = ["plan"] interactive = false [[rule]] @@ -73,18 +76,23 @@ denyMessage = "You are not currently in Plan Mode. Use enter_plan_mode first to [[rule]] toolName = "*" decision = "deny" -priority = 60 +priority = 40 modes = ["plan"] denyMessage = "You are in Plan Mode with access to read-only tools. Execution of scripts (including those from skills) is blocked." # Explicitly Allow Read-Only Tools in Plan mode. +[[rule]] +toolName = ["activate_skill"] +decision = "allow" +priority = 50 +modes = ["plan"] [[rule]] toolName = "*" mcpName = "*" toolAnnotations = { readOnlyHint = true } decision = "ask_user" -priority = 70 +priority = 50 modes = ["plan"] interactive = true @@ -93,45 +101,21 @@ toolName = "*" mcpName = "*" toolAnnotations = { readOnlyHint = true } decision = "deny" -priority = 70 +priority = 50 modes = ["plan"] interactive = false -[[rule]] -toolName = [ - "glob", - "grep_search", - "list_directory", - "read_file", - "google_web_search", - "activate_skill", - "codebase_investigator", - "cli_help", - "get_internal_docs", - "complete_task" -] -decision = "allow" -priority = 70 -modes = ["plan"] - -# Topic grouping tool is innocuous and used for UI organization. -[[rule]] -toolName = "update_topic" -decision = "allow" -priority = 70 -modes = ["plan"] - [[rule]] toolName = ["ask_user", "save_memory", "web_fetch"] decision = "ask_user" -priority = 70 +priority = 50 modes = ["plan"] interactive = true [[rule]] toolName = ["ask_user", "save_memory", "web_fetch"] decision = "deny" -priority = 70 +priority = 50 modes = ["plan"] interactive = false diff --git a/packages/core/src/policy/policies/read-only.toml b/packages/core/src/policy/policies/read-only.toml index c56984b522..0a8b465fe8 100644 --- a/packages/core/src/policy/policies/read-only.toml +++ b/packages/core/src/policy/policies/read-only.toml @@ -28,43 +28,26 @@ # 999: YOLO mode allow-all (becomes 1.999 in default tier) [[rule]] -toolName = "glob" +toolName = [ + "glob", + "grep_search", + "list_directory", + "read_file", + "google_web_search", + "codebase_investigator", + "cli_help", + "get_internal_docs", + # Tracker tools for task management (safe as they only modify internal state) + "tracker_create_task", + "tracker_update_task", + "tracker_get_task", + "tracker_list_tasks", + "tracker_add_dependency", + "tracker_visualize", + # Topic grouping tool is innocuous and used for UI organization. + "update_topic", + # Core agent lifecycle tool + "complete_task" +] decision = "allow" priority = 50 - -[[rule]] -toolName = "grep_search" -decision = "allow" -priority = 50 - -[[rule]] -toolName = "list_directory" -decision = "allow" -priority = 50 - -[[rule]] -toolName = "read_file" -decision = "allow" -priority = 50 - -[[rule]] -toolName = "google_web_search" -decision = "allow" -priority = 50 - -[[rule]] -toolName = ["codebase_investigator", "cli_help", "get_internal_docs"] -decision = "allow" -priority = 50 - -# Topic grouping tool is innocuous and used for UI organization. -[[rule]] -toolName = "update_topic" -decision = "allow" -priority = 50 - -# Core agent lifecycle tool -[[rule]] -toolName = "complete_task" -decision = "allow" -priority = 50 \ No newline at end of file diff --git a/packages/core/src/policy/policies/tracker.toml b/packages/core/src/policy/policies/tracker.toml deleted file mode 100644 index e17c4fc387..0000000000 --- a/packages/core/src/policy/policies/tracker.toml +++ /dev/null @@ -1,34 +0,0 @@ -# Priority system for policy rules: -# - Higher priority numbers win over lower priority numbers -# - When multiple rules match, the highest priority rule is applied -# - Rules are evaluated in order of priority (highest first) -# -# Priority bands (tiers): -# - Default policies (TOML): 1 + priority/1000 (e.g., priority 100 → 1.100) -# - Extension policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100) -# - Workspace policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100) -# - User policies (TOML): 4 + priority/1000 (e.g., priority 100 → 4.100) -# - Admin policies (TOML): 5 + priority/1000 (e.g., priority 100 → 5.100) -# -# Settings-based and dynamic rules (all in user tier 4.x): -# 4.95: Tools that the user has selected as "Always Allow" in the interactive UI -# 4.9: MCP servers excluded list (security: persistent server blocks) -# 4.4: Command line flag --exclude-tools (explicit temporary blocks) -# 4.3: Command line flag --allowed-tools (explicit temporary allows) -# 4.2: MCP servers with trust=true (persistent trusted servers) -# 4.1: MCP servers allowed list (persistent general server allows) - -# Allow tracker tools to execute without asking the user. -# These tools are only registered when the tracker feature is enabled, -# so this rule is a no-op when the feature is disabled. -[[rule]] -toolName = [ - "tracker_create_task", - "tracker_update_task", - "tracker_get_task", - "tracker_list_tasks", - "tracker_add_dependency", - "tracker_visualize" -] -decision = "allow" -priority = 50 diff --git a/packages/core/src/policy/policy-engine.test.ts b/packages/core/src/policy/policy-engine.test.ts index 0299000f73..1d27107ee2 100644 --- a/packages/core/src/policy/policy-engine.test.ts +++ b/packages/core/src/policy/policy-engine.test.ts @@ -1715,13 +1715,13 @@ describe('PolicyEngine', () => { describe('Plan Mode vs Subagent Priority (Regression)', () => { it('should DENY subagents in Plan Mode despite dynamic allow rules', async () => { - // Plan Mode Deny (1.06) > Subagent Allow (1.05) + // Plan Mode Deny (1.04) > Subagent Allow (1.03) const fixedRules: PolicyRule[] = [ { toolName: '*', decision: PolicyDecision.DENY, - priority: 1.06, + priority: 1.04, modes: [ApprovalMode.PLAN], }, { diff --git a/packages/core/src/policy/toml-loader.test.ts b/packages/core/src/policy/toml-loader.test.ts index 6835e200b4..9c1e424c60 100644 --- a/packages/core/src/policy/toml-loader.test.ts +++ b/packages/core/src/policy/toml-loader.test.ts @@ -890,8 +890,8 @@ priority = 100 readOnlyHint: true, }); expect(annotationRule!.decision).toBe(PolicyDecision.ASK_USER); - // Priority 70 in tier 1 => 1.070 - expect(annotationRule!.priority).toBe(1.07); + // Priority 50 in tier 1 => 1.050 + expect(annotationRule!.priority).toBe(1.05); // Verify deny rule was loaded correctly const denyRule = result.rules.find( @@ -904,8 +904,8 @@ priority = 100 denyRule, 'Should have loaded the catch-all deny rule', ).toBeDefined(); - // Priority 60 in tier 1 => 1.060 - expect(denyRule!.priority).toBe(1.06); + // Priority 40 in tier 1 => 1.040 + expect(denyRule!.priority).toBe(1.04); // 2. Initialize Policy Engine in Plan Mode const engine = new PolicyEngine({ @@ -974,12 +974,23 @@ priority = 100 it('should override default subagent rules when in Plan Mode for unknown subagents', async () => { const planTomlPath = path.resolve(__dirname, 'policies', 'plan.toml'); - const fileContent = await fs.readFile(planTomlPath, 'utf-8'); + const readOnlyTomlPath = path.resolve( + __dirname, + 'policies', + 'read-only.toml', + ); + const planContent = await fs.readFile(planTomlPath, 'utf-8'); + const readOnlyContent = await fs.readFile(readOnlyTomlPath, 'utf-8'); + const tempPolicyDir = await fs.mkdtemp( path.join(os.tmpdir(), 'plan-policy-test-'), ); try { - await fs.writeFile(path.join(tempPolicyDir, 'plan.toml'), fileContent); + await fs.writeFile(path.join(tempPolicyDir, 'plan.toml'), planContent); + await fs.writeFile( + path.join(tempPolicyDir, 'read-only.toml'), + readOnlyContent, + ); const getPolicyTier = () => 1; // Default tier // 1. Load the actual Plan Mode policies @@ -1004,6 +1015,7 @@ priority = 100 // 4. Verify Behavior: // The Plan Mode "Catch-All Deny" (from plan.toml) should override the Subagent Allow + // Plan Mode Deny (1.04) > Subagent Allow (1.03) const checkResult = await engine.check( { name: 'unknown_subagent' }, undefined, @@ -1015,7 +1027,7 @@ priority = 100 ).toBe(PolicyDecision.DENY); // 5. Verify Explicit Allows still work - // e.g. 'read_file' should be allowed because its priority in plan.toml (70) is higher than the deny (60) + // e.g. 'read_file' should be allowed because its priority in read-only.toml (50) is higher than the deny (40) const readResult = await engine.check({ name: 'read_file' }, undefined); expect( readResult.decision, @@ -1023,6 +1035,7 @@ priority = 100 ).toBe(PolicyDecision.ALLOW); // 6. Verify Built-in Research Subagents are ALLOWED + // codebase_investigator is priority 50 in read-only.toml const codebaseResult = await engine.check( { name: 'codebase_investigator' }, undefined, diff --git a/packages/core/src/policy/types.ts b/packages/core/src/policy/types.ts index 622cde0abd..b843129c99 100644 --- a/packages/core/src/policy/types.ts +++ b/packages/core/src/policy/types.ts @@ -354,9 +354,11 @@ export interface CheckResult { /** * Priority for subagent tools (registered dynamically). - * Effective priority matching Tier 1 (Default) read-only tools. + * Effective priority matching Tier 1 (Default) at priority 30. + * This ensures they are blocked by Plan Mode (priority 40) while + * remaining above directive write tools (priority 10). */ -export const PRIORITY_SUBAGENT_TOOL = 1.05; +export const PRIORITY_SUBAGENT_TOOL = 1.03; /** * The fractional priority of "Always allow" rules (e.g., 950/1000). From 4ebc43bc668962c3ac7e3aa933e7751c57e14ef0 Mon Sep 17 00:00:00 2001 From: Sri Pasumarthi <111310667+sripasg@users.noreply.github.com> Date: Wed, 8 Apr 2026 10:42:18 -0700 Subject: [PATCH 19/39] feat(test-utils): add memory usage integration test harness (#24876) --- .github/workflows/memory-nightly.yml | 33 ++ GEMINI.md | 2 + docs/integration-tests.md | 40 ++ memory-tests/baselines.json | 30 ++ memory-tests/globalSetup.ts | 71 +++ memory-tests/memory-usage.test.ts | 185 +++++++ memory-tests/memory.idle-startup.responses | 2 + .../memory.multi-function-call.responses | 4 + memory-tests/memory.multi-turn.responses | 10 + memory-tests/memory.simple-prompt.responses | 2 + memory-tests/tsconfig.json | 12 + memory-tests/vitest.config.ts | 28 + package-lock.json | 41 +- package.json | 2 + packages/test-utils/package.json | 1 + packages/test-utils/src/index.ts | 2 + packages/test-utils/src/memory-baselines.ts | 76 +++ .../test-utils/src/memory-test-harness.ts | 483 ++++++++++++++++++ 18 files changed, 1021 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/memory-nightly.yml create mode 100644 memory-tests/baselines.json create mode 100644 memory-tests/globalSetup.ts create mode 100644 memory-tests/memory-usage.test.ts create mode 100644 memory-tests/memory.idle-startup.responses create mode 100644 memory-tests/memory.multi-function-call.responses create mode 100644 memory-tests/memory.multi-turn.responses create mode 100644 memory-tests/memory.simple-prompt.responses create mode 100644 memory-tests/tsconfig.json create mode 100644 memory-tests/vitest.config.ts create mode 100644 packages/test-utils/src/memory-baselines.ts create mode 100644 packages/test-utils/src/memory-test-harness.ts diff --git a/.github/workflows/memory-nightly.yml b/.github/workflows/memory-nightly.yml new file mode 100644 index 0000000000..ee4e5e589c --- /dev/null +++ b/.github/workflows/memory-nightly.yml @@ -0,0 +1,33 @@ +name: 'Memory Tests: Nightly' + +on: + schedule: + - cron: '0 2 * * *' # Runs at 2 AM every day + workflow_dispatch: # Allow manual trigger + +permissions: + contents: 'read' + +jobs: + memory-test: + name: 'Run Memory Usage Tests' + runs-on: 'gemini-cli-ubuntu-16-core' + if: "github.repository == 'google-gemini/gemini-cli'" + steps: + - name: 'Checkout' + uses: 'actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8' # ratchet:actions/checkout@v5 + + - name: 'Set up Node.js' + uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions/setup-node@v4 + with: + node-version-file: '.nvmrc' + cache: 'npm' + + - name: 'Install dependencies' + run: 'npm ci' + + - name: 'Build project' + run: 'npm run build' + + - name: 'Run Memory Tests' + run: 'npm run test:memory' diff --git a/GEMINI.md b/GEMINI.md index c08e486b22..60824972d3 100644 --- a/GEMINI.md +++ b/GEMINI.md @@ -44,6 +44,8 @@ powerful tool for developers. - **Test Commands:** - **Unit (All):** `npm run test` - **Integration (E2E):** `npm run test:e2e` + - **Memory (Nightly):** `npm run test:memory` (Runs memory regression tests + against baselines. Excluded from `preflight`, run nightly.) - **Workspace-Specific:** `npm test -w -- ` (Note: `` must be relative to the workspace root, e.g., `-w @google/gemini-cli-core -- src/routing/modelRouterService.test.ts`) diff --git a/docs/integration-tests.md b/docs/integration-tests.md index f5784c344b..bfed813ebc 100644 --- a/docs/integration-tests.md +++ b/docs/integration-tests.md @@ -117,6 +117,46 @@ npm run test:integration:sandbox:docker npm run test:integration:sandbox:podman ``` +## Memory regression tests + +Memory regression tests are designed to detect heap growth and leaks across key +CLI scenarios. They are located in the `memory-tests` directory. + +These tests are distinct from standard integration tests because they measure +memory usage and compare it against committed baselines. + +### Running memory tests + +Memory tests are not run as part of the default `npm run test` or +`npm run test:e2e` commands. They are run nightly in CI but can be run manually: + +```bash +npm run test:memory +``` + +### Updating baselines + +If you intentionally change behavior that affects memory usage, you may need to +update the baselines. Set the `UPDATE_MEMORY_BASELINES` environment variable to +`true`: + +```bash +UPDATE_MEMORY_BASELINES=true npm run test:memory +``` + +This will run the tests, take median snapshots, and overwrite +`memory-tests/baselines.json`. You should review the changes and commit the +updated baseline file. + +### How it works + +The harness (`MemoryTestHarness` in `packages/test-utils`): + +- Forces garbage collection multiple times to reduce noise. +- Takes median snapshots to filter spikes. +- Compares against baselines with a 10% tolerance. +- Can analyze sustained leaks across 3 snapshots using `analyzeSnapshots()`. + ## Diagnostics The integration test runner provides several options for diagnostics to help diff --git a/memory-tests/baselines.json b/memory-tests/baselines.json new file mode 100644 index 0000000000..0fcab5dc02 --- /dev/null +++ b/memory-tests/baselines.json @@ -0,0 +1,30 @@ +{ + "version": 1, + "updatedAt": "2026-04-08T01:21:58.770Z", + "scenarios": { + "multi-turn-conversation": { + "heapUsedBytes": 120082704, + "heapTotalBytes": 177586176, + "rssBytes": 269172736, + "timestamp": "2026-04-08T01:21:57.127Z" + }, + "multi-function-call-repo-search": { + "heapUsedBytes": 104644984, + "heapTotalBytes": 111575040, + "rssBytes": 204079104, + "timestamp": "2026-04-08T01:21:58.770Z" + }, + "idle-session-startup": { + "heapUsedBytes": 119813672, + "heapTotalBytes": 177061888, + "rssBytes": 267943936, + "timestamp": "2026-04-08T01:21:53.855Z" + }, + "simple-prompt-response": { + "heapUsedBytes": 119722064, + "heapTotalBytes": 177324032, + "rssBytes": 268812288, + "timestamp": "2026-04-08T01:21:55.491Z" + } + } +} diff --git a/memory-tests/globalSetup.ts b/memory-tests/globalSetup.ts new file mode 100644 index 0000000000..3f52501838 --- /dev/null +++ b/memory-tests/globalSetup.ts @@ -0,0 +1,71 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { mkdir, readdir, rm } from 'node:fs/promises'; +import { join, dirname } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { canUseRipgrep } from '../packages/core/src/tools/ripGrep.js'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const rootDir = join(__dirname, '..'); +const memoryTestsDir = join(rootDir, '.memory-tests'); +let runDir = ''; + +export async function setup() { + runDir = join(memoryTestsDir, `${Date.now()}`); + await mkdir(runDir, { recursive: true }); + + // Set the home directory to the test run directory to avoid conflicts + // with the user's local config. + process.env['HOME'] = runDir; + if (process.platform === 'win32') { + process.env['USERPROFILE'] = runDir; + } + process.env['GEMINI_CONFIG_DIR'] = join(runDir, '.gemini'); + + // Download ripgrep to avoid race conditions + const available = await canUseRipgrep(); + if (!available) { + throw new Error('Failed to download ripgrep binary'); + } + + // Clean up old test runs, keeping the latest few for debugging + try { + const testRuns = await readdir(memoryTestsDir); + if (testRuns.length > 3) { + const oldRuns = testRuns.sort().slice(0, testRuns.length - 3); + await Promise.all( + oldRuns.map((oldRun) => + rm(join(memoryTestsDir, oldRun), { + recursive: true, + force: true, + }), + ), + ); + } + } catch (e) { + console.error('Error cleaning up old memory test runs:', e); + } + + process.env['INTEGRATION_TEST_FILE_DIR'] = runDir; + process.env['GEMINI_CLI_INTEGRATION_TEST'] = 'true'; + process.env['GEMINI_FORCE_FILE_STORAGE'] = 'true'; + process.env['TELEMETRY_LOG_FILE'] = join(runDir, 'telemetry.log'); + process.env['VERBOSE'] = process.env['VERBOSE'] ?? 'false'; + + console.log(`\nMemory test output directory: ${runDir}`); +} + +export async function teardown() { + // Cleanup unless KEEP_OUTPUT is set + if (process.env['KEEP_OUTPUT'] !== 'true' && runDir) { + try { + await rm(runDir, { recursive: true, force: true }); + } catch (e) { + console.warn('Failed to clean up memory test directory:', e); + } + } +} diff --git a/memory-tests/memory-usage.test.ts b/memory-tests/memory-usage.test.ts new file mode 100644 index 0000000000..6455eec632 --- /dev/null +++ b/memory-tests/memory-usage.test.ts @@ -0,0 +1,185 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, beforeAll, afterAll, afterEach } from 'vitest'; +import { TestRig, MemoryTestHarness } from '@google/gemini-cli-test-utils'; +import { join, dirname } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const BASELINES_PATH = join(__dirname, 'baselines.json'); +const UPDATE_BASELINES = process.env['UPDATE_MEMORY_BASELINES'] === 'true'; +const TOLERANCE_PERCENT = 10; + +// Fake API key for tests using fake responses +const TEST_ENV = { GEMINI_API_KEY: 'fake-memory-test-key' }; + +describe('Memory Usage Tests', () => { + let harness: MemoryTestHarness; + let rig: TestRig; + + beforeAll(() => { + harness = new MemoryTestHarness({ + baselinesPath: BASELINES_PATH, + defaultTolerancePercent: TOLERANCE_PERCENT, + gcCycles: 3, + gcDelayMs: 100, + sampleCount: 3, + }); + }); + + afterEach(async () => { + await rig.cleanup(); + }); + + afterAll(async () => { + // Generate the summary report after all tests + await harness.generateReport(); + }); + + it('idle-session-startup: memory usage within baseline', async () => { + rig = new TestRig(); + rig.setup('memory-idle-startup', { + fakeResponsesPath: join(__dirname, 'memory.idle-startup.responses'), + }); + + const result = await harness.runScenario( + 'idle-session-startup', + async (recordSnapshot) => { + await rig.run({ + args: ['hello'], + timeout: 120000, + env: TEST_ENV, + }); + + await recordSnapshot('after-startup'); + }, + ); + + if (UPDATE_BASELINES) { + harness.updateScenarioBaseline(result); + console.log( + `Updated baseline for idle-session-startup: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`, + ); + } else { + harness.assertWithinBaseline(result); + } + }); + + it('simple-prompt-response: memory usage within baseline', async () => { + rig = new TestRig(); + rig.setup('memory-simple-prompt', { + fakeResponsesPath: join(__dirname, 'memory.simple-prompt.responses'), + }); + + const result = await harness.runScenario( + 'simple-prompt-response', + async (recordSnapshot) => { + await rig.run({ + args: ['What is the capital of France?'], + timeout: 120000, + env: TEST_ENV, + }); + + await recordSnapshot('after-response'); + }, + ); + + if (UPDATE_BASELINES) { + harness.updateScenarioBaseline(result); + console.log( + `Updated baseline for simple-prompt-response: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`, + ); + } else { + harness.assertWithinBaseline(result); + } + }); + + it('multi-turn-conversation: memory remains stable over turns', async () => { + rig = new TestRig(); + rig.setup('memory-multi-turn', { + fakeResponsesPath: join(__dirname, 'memory.multi-turn.responses'), + }); + + const prompts = [ + 'Hello, what can you help me with?', + 'Tell me about JavaScript', + 'How is TypeScript different?', + 'Can you write a simple TypeScript function?', + 'What are some TypeScript best practices?', + ]; + + const result = await harness.runScenario( + 'multi-turn-conversation', + async (recordSnapshot) => { + // Run through all turns as a piped sequence + const stdinContent = prompts.join('\n'); + await rig.run({ + stdin: stdinContent, + timeout: 120000, + env: TEST_ENV, + }); + + // Take snapshots after the conversation completes + await recordSnapshot('after-all-turns'); + }, + ); + + if (UPDATE_BASELINES) { + harness.updateScenarioBaseline(result); + console.log( + `Updated baseline for multi-turn-conversation: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`, + ); + } else { + harness.assertWithinBaseline(result); + } + }); + + it('multi-function-call-repo-search: memory after tool use', async () => { + rig = new TestRig(); + rig.setup('memory-multi-func-call', { + fakeResponsesPath: join( + __dirname, + 'memory.multi-function-call.responses', + ), + }); + + // Create directories first, then files in the workspace so the tools have targets + rig.mkdir('packages/core/src/telemetry'); + rig.createFile( + 'packages/core/src/telemetry/memory-monitor.ts', + 'export class MemoryMonitor { constructor() {} }', + ); + rig.createFile( + 'packages/core/src/telemetry/metrics.ts', + 'export function recordMemoryUsage() {}', + ); + + const result = await harness.runScenario( + 'multi-function-call-repo-search', + async (recordSnapshot) => { + await rig.run({ + args: [ + 'Search this repository for MemoryMonitor and tell me what it does', + ], + timeout: 120000, + env: TEST_ENV, + }); + + await recordSnapshot('after-tool-calls'); + }, + ); + + if (UPDATE_BASELINES) { + harness.updateScenarioBaseline(result); + console.log( + `Updated baseline for multi-function-call-repo-search: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`, + ); + } else { + harness.assertWithinBaseline(result); + } + }); +}); diff --git a/memory-tests/memory.idle-startup.responses b/memory-tests/memory.idle-startup.responses new file mode 100644 index 0000000000..7a5703e3d2 --- /dev/null +++ b/memory-tests/memory.idle-startup.responses @@ -0,0 +1,2 @@ +{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Hello! I'm ready to help. What would you like to work on?"}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":5,"candidatesTokenCount":12,"totalTokenCount":17,"promptTokensDetails":[{"modality":"TEXT","tokenCount":5}]}}]} diff --git a/memory-tests/memory.multi-function-call.responses b/memory-tests/memory.multi-function-call.responses new file mode 100644 index 0000000000..8bdf75afc9 --- /dev/null +++ b/memory-tests/memory.multi-function-call.responses @@ -0,0 +1,4 @@ +{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll search for MemoryMonitor in the repository and analyze what it does."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":30,"candidatesTokenCount":15,"totalTokenCount":45,"promptTokensDetails":[{"modality":"TEXT","tokenCount":30}]}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"grep_search","args":{"pattern":"MemoryMonitor","path":".","include_pattern":"*.ts"}}},{"functionCall":{"name":"list_directory","args":{"path":"packages/core/src/telemetry"}}},{"functionCall":{"name":"read_file","args":{"file_path":"packages/core/src/telemetry/memory-monitor.ts"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":30,"candidatesTokenCount":80,"totalTokenCount":110,"promptTokensDetails":[{"modality":"TEXT","tokenCount":30}]}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I found the memory monitoring code. Here's a summary:\n\nThe `MemoryMonitor` class in `packages/core/src/telemetry/memory-monitor.ts` provides:\n\n1. **Continuous monitoring** via `start()`/`stop()` with configurable intervals\n2. **V8 heap snapshots** using `v8.getHeapStatistics()` and `process.memoryUsage()`\n3. **High-water mark tracking** to detect significant memory growth\n4. **Rate-limited recording** to avoid metric flood\n5. **Activity detection** — only records when user is active\n\nThe class uses a singleton pattern via `initializeMemoryMonitor()` for global access."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":500,"candidatesTokenCount":120,"totalTokenCount":620,"promptTokensDetails":[{"modality":"TEXT","tokenCount":500}]}}]} diff --git a/memory-tests/memory.multi-turn.responses b/memory-tests/memory.multi-turn.responses new file mode 100644 index 0000000000..df428b56db --- /dev/null +++ b/memory-tests/memory.multi-turn.responses @@ -0,0 +1,10 @@ +{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Hello! I'm ready to help you with your coding tasks. What would you like to work on today?"}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":5,"candidatesTokenCount":18,"totalTokenCount":23,"promptTokensDetails":[{"modality":"TEXT","tokenCount":5}]}}]} +{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"JavaScript is a high-level, interpreted programming language. It was originally designed for adding interactivity to web pages."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":25,"candidatesTokenCount":60,"totalTokenCount":85,"promptTokensDetails":[{"modality":"TEXT","tokenCount":25}]}}]} +{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"TypeScript is a typed superset of JavaScript developed by Microsoft. The main differences from JavaScript are static typing and better tooling."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":45,"candidatesTokenCount":80,"totalTokenCount":125,"promptTokensDetails":[{"modality":"TEXT","tokenCount":45}]}}]} +{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Here is a simple TypeScript function:\n\nfunction greet(name: string): string { return `Hello, ${name}!`; }"}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":60,"candidatesTokenCount":55,"totalTokenCount":115,"promptTokensDetails":[{"modality":"TEXT","tokenCount":60}]}}]} +{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Here are 5 key TypeScript best practices: Enable strict mode, prefer interfaces, use union types, leverage type inference, and use readonly."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":75,"candidatesTokenCount":70,"totalTokenCount":145,"promptTokensDetails":[{"modality":"TEXT","tokenCount":75}]}}]} diff --git a/memory-tests/memory.simple-prompt.responses b/memory-tests/memory.simple-prompt.responses new file mode 100644 index 0000000000..ad3f20c9a1 --- /dev/null +++ b/memory-tests/memory.simple-prompt.responses @@ -0,0 +1,2 @@ +{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The capital of France is Paris. It has been the capital since the 10th century and is known for iconic landmarks like the Eiffel Tower, the Louvre Museum, and Notre-Dame Cathedral. Paris is also the most populous city in France, with a metropolitan area population of over 12 million people."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":7,"candidatesTokenCount":55,"totalTokenCount":62,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7}]}}]} diff --git a/memory-tests/tsconfig.json b/memory-tests/tsconfig.json new file mode 100644 index 0000000000..7f2c199703 --- /dev/null +++ b/memory-tests/tsconfig.json @@ -0,0 +1,12 @@ +{ + "extends": "../tsconfig.json", + "compilerOptions": { + "noEmit": true, + "allowJs": true + }, + "include": ["**/*.ts"], + "references": [ + { "path": "../packages/core" }, + { "path": "../packages/test-utils" } + ] +} diff --git a/memory-tests/vitest.config.ts b/memory-tests/vitest.config.ts new file mode 100644 index 0000000000..c69af28826 --- /dev/null +++ b/memory-tests/vitest.config.ts @@ -0,0 +1,28 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { defineConfig } from 'vitest/config'; + +export default defineConfig({ + test: { + testTimeout: 600000, // 10 minutes — memory profiling is slow + globalSetup: './globalSetup.ts', + reporters: ['default'], + include: ['**/*.test.ts'], + retry: 0, // No retries for memory tests — noise is handled by tolerance + fileParallelism: false, // Must run serially to avoid memory interference + pool: 'forks', // Use forks pool for --expose-gc support + poolOptions: { + forks: { + singleFork: true, // Single process for accurate per-test memory readings + execArgv: ['--expose-gc'], // Enable global.gc() for forced GC + }, + }, + env: { + GEMINI_TEST_TYPE: 'memory', + }, + }, +}); diff --git a/package-lock.json b/package-lock.json index 2c8a4b64b8..7ec397323e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -446,7 +446,8 @@ "version": "2.11.0", "resolved": "https://registry.npmjs.org/@bufbuild/protobuf/-/protobuf-2.11.0.tgz", "integrity": "sha512-sBXGT13cpmPR5BMgHE6UEEfEaShh5Ror6rfN3yEK5si7QVrtZg8LEPQb0VVhiLRUslD2yLnXtnRzG035J/mZXQ==", - "license": "(Apache-2.0 AND BSD-3-Clause)" + "license": "(Apache-2.0 AND BSD-3-Clause)", + "peer": true }, "node_modules/@bundled-es-modules/cookie": { "version": "2.0.1", @@ -1449,6 +1450,7 @@ "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.13.4.tgz", "integrity": "sha512-GsFaMXCkMqkKIvwCQjCrwH+GHbPKBjhwo/8ZuUkWHqbI73Kky9I+pQltrlT0+MWpedCoosda53lgjYfyEPgxBg==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@grpc/proto-loader": "^0.7.13", "@js-sdsl/ordered-map": "^4.4.2" @@ -2155,6 +2157,7 @@ "integrity": "sha512-t54CUOsFMappY1Jbzb7fetWeO0n6K0k/4+/ZpkS+3Joz8I4VcvY9OiEBFRYISqaI2fq5sCiPtAjRDOzVYG8m+Q==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@octokit/auth-token": "^6.0.0", "@octokit/graphql": "^9.0.2", @@ -2335,6 +2338,7 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz", "integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==", "license": "Apache-2.0", + "peer": true, "engines": { "node": ">=8.0.0" } @@ -2384,6 +2388,7 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/core/-/core-2.5.0.tgz", "integrity": "sha512-ka4H8OM6+DlUhSAZpONu0cPBtPPTQKxbxVzC4CzVx5+K4JnroJVBtDzLAMx4/3CDTJXRvVFhpFjtl4SaiTNoyQ==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@opentelemetry/semantic-conventions": "^1.29.0" }, @@ -2758,6 +2763,7 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/resources/-/resources-2.5.0.tgz", "integrity": "sha512-F8W52ApePshpoSrfsSk1H2yJn9aKjCrbpQF1M9Qii0GHzbfVeFUB+rc3X4aggyZD8x9Gu3Slua+s6krmq6Dt8g==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@opentelemetry/core": "2.5.0", "@opentelemetry/semantic-conventions": "^1.29.0" @@ -2791,6 +2797,7 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-metrics/-/sdk-metrics-2.5.0.tgz", "integrity": "sha512-BeJLtU+f5Gf905cJX9vXFQorAr6TAfK3SPvTFqP+scfIpDQEJfRaGJWta7sJgP+m4dNtBf9y3yvBKVAZZtJQVA==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@opentelemetry/core": "2.5.0", "@opentelemetry/resources": "2.5.0" @@ -2845,6 +2852,7 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-trace-base/-/sdk-trace-base-2.5.0.tgz", "integrity": "sha512-VzRf8LzotASEyNDUxTdaJ9IRJ1/h692WyArDBInf5puLCjxbICD6XkHgpuudis56EndyS7LYFmtTMny6UABNdQ==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@opentelemetry/core": "2.5.0", "@opentelemetry/resources": "2.5.0", @@ -4081,6 +4089,7 @@ "integrity": "sha512-6mDvHUFSjyT2B2yeNx2nUgMxh9LtOWvkhIU3uePn2I2oyNymUAX1NIsdgviM4CH+JSrp2D2hsMvJOkxY+0wNRA==", "devOptional": true, "license": "MIT", + "peer": true, "dependencies": { "csstype": "^3.0.2" } @@ -4355,6 +4364,7 @@ "integrity": "sha512-6sMvZePQrnZH2/cJkwRpkT7DxoAWh+g6+GFRK6bV3YQo7ogi3SX5rgF6099r5Q53Ma5qeT7LGmOmuIutF4t3lA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.35.0", "@typescript-eslint/types": "8.35.0", @@ -5228,6 +5238,7 @@ "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "license": "MIT", + "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -5569,6 +5580,12 @@ "dev": true, "license": "MIT" }, + "node_modules/asciichart": { + "version": "1.5.25", + "resolved": "https://registry.npmjs.org/asciichart/-/asciichart-1.5.25.tgz", + "integrity": "sha512-PNxzXIPPOtWq8T7bgzBtk9cI2lgS4SJZthUHEiQ1aoIc3lNzGfUvIvo9LiAnq26TACo9t1/4qP6KTGAUbzX9Xg==", + "license": "MIT" + }, "node_modules/assertion-error": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/assertion-error/-/assertion-error-2.0.1.tgz", @@ -7362,7 +7379,8 @@ "version": "0.0.1581282", "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1581282.tgz", "integrity": "sha512-nv7iKtNZQshSW2hKzYNr46nM/Cfh5SEvE2oV0/SEGgc9XupIY5ggf84Cz8eJIkBce7S3bmTAauFD6aysMpnqsQ==", - "license": "BSD-3-Clause" + "license": "BSD-3-Clause", + "peer": true }, "node_modules/dezalgo": { "version": "1.0.4", @@ -7946,6 +7964,7 @@ "integrity": "sha512-GsGizj2Y1rCWDu6XoEekL3RLilp0voSePurjZIkxL3wlm5o5EC9VpgaP7lrCvjnkuLvzFBQWB3vWB3K5KQTveQ==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.2.0", "@eslint-community/regexpp": "^4.12.1", @@ -8463,6 +8482,7 @@ "resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz", "integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==", "license": "MIT", + "peer": true, "dependencies": { "accepts": "^2.0.0", "body-parser": "^2.2.1", @@ -9775,6 +9795,7 @@ "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.7.tgz", "integrity": "sha512-jq9l1DM0zVIvsm3lv9Nw9nlJnMNPOcAtsbsgiUhWcFzPE99Gvo6yRTlszSLLYacMeQ6quHD6hMfId8crVHvexw==", "license": "MIT", + "peer": true, "engines": { "node": ">=16.9.0" } @@ -10053,6 +10074,7 @@ "resolved": "https://registry.npmjs.org/@jrichman/ink/-/ink-6.6.7.tgz", "integrity": "sha512-bDzQLpLzK/dn9Ur/Ku88ZZR9totVcMGrGYAgPHidsAAbe9NKztU1fggj/iu0wRp5g1kBeALb3cfagFGdDxAU1w==", "license": "MIT", + "peer": true, "dependencies": { "ansi-escapes": "^7.0.0", "ansi-styles": "^6.2.3", @@ -13826,6 +13848,7 @@ "resolved": "https://registry.npmjs.org/react/-/react-19.2.4.tgz", "integrity": "sha512-9nfp2hYpCwOjAN+8TZFGhtWEwgvWHXqESH8qT89AT/lWklpLON22Lc8pEtnpsZz7VmawabSU0gCjnj8aC0euHQ==", "license": "MIT", + "peer": true, "engines": { "node": ">=0.10.0" } @@ -13836,6 +13859,7 @@ "integrity": "sha512-ePrwPfxAnB+7hgnEr8vpKxL9cmnp7F322t8oqcPshbIQQhDKgFDW4tjhF2wjVbdXF9O/nyuy3sQWd9JGpiLPvA==", "devOptional": true, "license": "MIT", + "peer": true, "dependencies": { "shell-quote": "^1.6.1", "ws": "^7" @@ -15985,6 +16009,7 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -16207,7 +16232,8 @@ "version": "2.8.1", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "license": "0BSD" + "license": "0BSD", + "peer": true }, "node_modules/tsx": { "version": "4.20.3", @@ -16215,6 +16241,7 @@ "integrity": "sha512-qjbnuR9Tr+FJOMBqJCW5ehvIo/buZq7vH7qD7JziU98h6l3qGy0a/yPFjwO+y0/T7GFpNgNAvEcPPVfyT8rrPQ==", "devOptional": true, "license": "MIT", + "peer": true, "dependencies": { "esbuild": "~0.25.0", "get-tsconfig": "^4.7.5" @@ -16380,6 +16407,7 @@ "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==", "devOptional": true, "license": "Apache-2.0", + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -16602,6 +16630,7 @@ "resolved": "https://registry.npmjs.org/vite/-/vite-7.2.2.tgz", "integrity": "sha512-BxAKBWmIbrDgrokdGZH1IgkIk/5mMHDreLDmCJ0qpyJaAteP8NvMhkwr/ZCQNqNH97bw/dANTE9PDzqwJghfMQ==", "license": "MIT", + "peer": true, "dependencies": { "esbuild": "^0.25.0", "fdir": "^6.5.0", @@ -16715,6 +16744,7 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -16727,6 +16757,7 @@ "resolved": "https://registry.npmjs.org/vitest/-/vitest-3.2.4.tgz", "integrity": "sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A==", "license": "MIT", + "peer": true, "dependencies": { "@types/chai": "^5.2.2", "@vitest/expect": "3.2.4", @@ -17374,6 +17405,7 @@ "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", "license": "MIT", + "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } @@ -17817,6 +17849,7 @@ "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.14.3.tgz", "integrity": "sha512-Iq8QQQ/7X3Sac15oB6p0FmUg/klxQvXLeileoqrTRGJYLV+/9tubbr9ipz0GKHjmXVsgFPo/+W+2cA8eNcR+XA==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@grpc/proto-loader": "^0.8.0", "@js-sdsl/ordered-map": "^4.4.2" @@ -17920,6 +17953,7 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -17979,6 +18013,7 @@ "dependencies": { "@google/gemini-cli-core": "file:../core", "@lydell/node-pty": "1.1.0", + "asciichart": "^1.5.25", "strip-ansi": "^7.1.2", "vitest": "^3.2.4" }, diff --git a/package.json b/package.json index e24f6a20b5..9f67253ccc 100644 --- a/package.json +++ b/package.json @@ -51,6 +51,8 @@ "test:integration:all": "npm run test:integration:sandbox:none && npm run test:integration:sandbox:docker && npm run test:integration:sandbox:podman", "test:integration:flaky": "cross-env RUN_FLAKY_INTEGRATION=1 npm run test:integration:sandbox:none", "test:integration:sandbox:none": "cross-env GEMINI_SANDBOX=false vitest run --root ./integration-tests", + "test:memory": "vitest run --root ./memory-tests", + "test:memory:update-baselines": "cross-env UPDATE_MEMORY_BASELINES=true vitest run --root ./memory-tests", "test:integration:sandbox:docker": "cross-env GEMINI_SANDBOX=docker npm run build:sandbox && cross-env GEMINI_SANDBOX=docker vitest run --root ./integration-tests", "test:integration:sandbox:podman": "cross-env GEMINI_SANDBOX=podman vitest run --root ./integration-tests", "lint": "cross-env NODE_OPTIONS=\"--max-old-space-size=8192\" eslint . --cache --max-warnings 0", diff --git a/packages/test-utils/package.json b/packages/test-utils/package.json index caedd907e4..b16497da3c 100644 --- a/packages/test-utils/package.json +++ b/packages/test-utils/package.json @@ -12,6 +12,7 @@ "dependencies": { "@google/gemini-cli-core": "file:../core", "@lydell/node-pty": "1.1.0", + "asciichart": "^1.5.25", "strip-ansi": "^7.1.2", "vitest": "^3.2.4" }, diff --git a/packages/test-utils/src/index.ts b/packages/test-utils/src/index.ts index 7bae818040..49eaec66d3 100644 --- a/packages/test-utils/src/index.ts +++ b/packages/test-utils/src/index.ts @@ -6,6 +6,8 @@ export * from './file-system-test-helpers.js'; export * from './fixtures/agents.js'; +export * from './memory-baselines.js'; +export * from './memory-test-harness.js'; export * from './mock-utils.js'; export * from './test-mcp-server.js'; export * from './test-rig.js'; diff --git a/packages/test-utils/src/memory-baselines.ts b/packages/test-utils/src/memory-baselines.ts new file mode 100644 index 0000000000..295e80f61b --- /dev/null +++ b/packages/test-utils/src/memory-baselines.ts @@ -0,0 +1,76 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { readFileSync, writeFileSync, existsSync } from 'node:fs'; + +/** + * Baseline entry for a single memory test scenario. + */ +export interface MemoryBaseline { + heapUsedBytes: number; + heapTotalBytes: number; + rssBytes: number; + timestamp: string; +} + +/** + * Top-level structure of the baselines JSON file. + */ +export interface MemoryBaselineFile { + version: number; + updatedAt: string; + scenarios: Record; +} + +/** + * Load baselines from a JSON file. + * Returns an empty baseline file if the file does not exist yet. + */ +export function loadBaselines(path: string): MemoryBaselineFile { + if (!existsSync(path)) { + return { + version: 1, + updatedAt: new Date().toISOString(), + scenarios: {}, + }; + } + + const content = readFileSync(path, 'utf-8'); + return JSON.parse(content) as MemoryBaselineFile; +} + +/** + * Save baselines to a JSON file. + */ +export function saveBaselines( + path: string, + baselines: MemoryBaselineFile, +): void { + baselines.updatedAt = new Date().toISOString(); + writeFileSync(path, JSON.stringify(baselines, null, 2) + '\n'); +} + +/** + * Update (or create) a single scenario baseline in the file. + */ +export function updateBaseline( + path: string, + scenarioName: string, + measured: { + heapUsedBytes: number; + heapTotalBytes: number; + rssBytes: number; + }, +): void { + const baselines = loadBaselines(path); + baselines.scenarios[scenarioName] = { + heapUsedBytes: measured.heapUsedBytes, + heapTotalBytes: measured.heapTotalBytes, + rssBytes: measured.rssBytes, + timestamp: new Date().toISOString(), + }; + saveBaselines(path, baselines); +} diff --git a/packages/test-utils/src/memory-test-harness.ts b/packages/test-utils/src/memory-test-harness.ts new file mode 100644 index 0000000000..7dfb259453 --- /dev/null +++ b/packages/test-utils/src/memory-test-harness.ts @@ -0,0 +1,483 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import v8 from 'node:v8'; +import { setTimeout as sleep } from 'node:timers/promises'; +import { loadBaselines, updateBaseline } from './memory-baselines.js'; +import type { MemoryBaseline, MemoryBaselineFile } from './memory-baselines.js'; + +/** Configuration for asciichart plot function. */ +interface PlotConfig { + height?: number; + format?: (x: number) => string; +} + +/** Type for the asciichart plot function. */ +type PlotFn = (series: number[], config?: PlotConfig) => string; + +/** + * A single memory snapshot at a point in time. + */ +export interface MemorySnapshot { + timestamp: number; + label: string; + heapUsed: number; + heapTotal: number; + rss: number; + external: number; + arrayBuffers: number; + heapSizeLimit: number; + heapSpaces: any[]; +} + +/** + * Result from running a memory test scenario. + */ +export interface MemoryTestResult { + scenarioName: string; + snapshots: MemorySnapshot[]; + peakHeapUsed: number; + peakRss: number; + finalHeapUsed: number; + finalRss: number; + baseline: MemoryBaseline | undefined; + withinTolerance: boolean; + deltaPercent: number; +} + +/** + * Options for the MemoryTestHarness. + */ +export interface MemoryTestHarnessOptions { + /** Path to the baselines JSON file */ + baselinesPath: string; + /** Default tolerance percentage (0-100). Default: 10 */ + defaultTolerancePercent?: number; + /** Number of GC cycles to run before each snapshot. Default: 3 */ + gcCycles?: number; + /** Delay in ms between GC cycles. Default: 100 */ + gcDelayMs?: number; + /** Number of samples to take for median calculation. Default: 3 */ + sampleCount?: number; + /** Pause in ms between samples. Default: 50 */ + samplePauseMs?: number; +} + +/** + * MemoryTestHarness provides infrastructure for running memory usage tests. + * + * It handles: + * - Forcing V8 garbage collection to reduce noise + * - Taking V8 heap snapshots for accurate memory measurement + * - Comparing against baselines with configurable tolerance + * - Generating ASCII chart reports of memory trends + */ +export class MemoryTestHarness { + private baselines: MemoryBaselineFile; + private readonly baselinesPath: string; + private readonly defaultTolerancePercent: number; + private readonly gcCycles: number; + private readonly gcDelayMs: number; + private readonly sampleCount: number; + private readonly samplePauseMs: number; + private allResults: MemoryTestResult[] = []; + + constructor(options: MemoryTestHarnessOptions) { + this.baselinesPath = options.baselinesPath; + this.defaultTolerancePercent = options.defaultTolerancePercent ?? 10; + this.gcCycles = options.gcCycles ?? 3; + this.gcDelayMs = options.gcDelayMs ?? 100; + this.sampleCount = options.sampleCount ?? 3; + this.samplePauseMs = options.samplePauseMs ?? 50; + this.baselines = loadBaselines(this.baselinesPath); + } + + /** + * Force garbage collection multiple times and take a V8 heap snapshot. + * Forces GC multiple times with delays to allow weak references and + * FinalizationRegistry callbacks to run, reducing measurement noise. + */ + async takeSnapshot(label: string = 'snapshot'): Promise { + await this.forceGC(); + + const memUsage = process.memoryUsage(); + const heapStats = v8.getHeapStatistics(); + + return { + timestamp: Date.now(), + label, + heapUsed: memUsage.heapUsed, + heapTotal: memUsage.heapTotal, + rss: memUsage.rss, + external: memUsage.external, + arrayBuffers: memUsage.arrayBuffers, + heapSizeLimit: heapStats.heap_size_limit, + heapSpaces: v8.getHeapSpaceStatistics(), + }; + } + + /** + * Take multiple snapshot samples and return the median to reduce noise. + */ + async takeMedianSnapshot( + label: string = 'median', + count?: number, + ): Promise { + const samples: MemorySnapshot[] = []; + const numSamples = count ?? this.sampleCount; + + for (let i = 0; i < numSamples; i++) { + samples.push(await this.takeSnapshot(`${label}_sample_${i}`)); + if (i < numSamples - 1) { + await sleep(this.samplePauseMs); + } + } + + // Sort by heapUsed and take the median + samples.sort((a, b) => a.heapUsed - b.heapUsed); + const medianIdx = Math.floor(samples.length / 2); + const median = samples[medianIdx]!; + + return { + ...median, + label, + timestamp: Date.now(), + }; + } + + /** + * Run a memory test scenario. + * + * Takes before/after snapshots around the scenario function, collects + * intermediate snapshots if the scenario provides them, and compares + * the result against the stored baseline. + * + * @param name - Scenario name (must match baseline key) + * @param fn - Async function that executes the scenario. Receives a + * `recordSnapshot` callback for recording intermediate snapshots. + * @param tolerancePercent - Override default tolerance for this scenario + */ + async runScenario( + name: string, + fn: ( + recordSnapshot: (label: string) => Promise, + ) => Promise, + tolerancePercent?: number, + ): Promise { + const tolerance = tolerancePercent ?? this.defaultTolerancePercent; + const snapshots: MemorySnapshot[] = []; + + // Record a callback for intermediate snapshots + const recordSnapshot = async (label: string): Promise => { + const snap = await this.takeMedianSnapshot(label); + snapshots.push(snap); + return snap; + }; + + // Before snapshot + const beforeSnap = await this.takeMedianSnapshot('before'); + snapshots.push(beforeSnap); + + // Run the scenario + await fn(recordSnapshot); + + // After snapshot (median of multiple samples) + const afterSnap = await this.takeMedianSnapshot('after'); + snapshots.push(afterSnap); + + // Calculate peak values + const peakHeapUsed = Math.max(...snapshots.map((s) => s.heapUsed)); + const peakRss = Math.max(...snapshots.map((s) => s.rss)); + + // Get baseline + const baseline = this.baselines.scenarios[name]; + + // Determine if within tolerance + let deltaPercent = 0; + let withinTolerance = true; + + if (baseline) { + deltaPercent = + ((afterSnap.heapUsed - baseline.heapUsedBytes) / + baseline.heapUsedBytes) * + 100; + withinTolerance = deltaPercent <= tolerance; + } + + const result: MemoryTestResult = { + scenarioName: name, + snapshots, + peakHeapUsed, + peakRss, + finalHeapUsed: afterSnap.heapUsed, + finalRss: afterSnap.rss, + baseline, + withinTolerance, + deltaPercent, + }; + + this.allResults.push(result); + return result; + } + + /** + * Assert that a scenario result is within the baseline tolerance. + * Throws an assertion error with details if it exceeds the threshold. + */ + assertWithinBaseline( + result: MemoryTestResult, + tolerancePercent?: number, + ): void { + const tolerance = tolerancePercent ?? this.defaultTolerancePercent; + + if (!result.baseline) { + console.warn( + `⚠ No baseline found for "${result.scenarioName}". ` + + `Run with UPDATE_MEMORY_BASELINES=true to create one. ` + + `Measured: ${formatMB(result.finalHeapUsed)} heap used.`, + ); + return; // Don't fail if no baseline exists yet + } + + const deltaPercent = + ((result.finalHeapUsed - result.baseline.heapUsedBytes) / + result.baseline.heapUsedBytes) * + 100; + + if (deltaPercent > tolerance) { + throw new Error( + `Memory regression detected for "${result.scenarioName}"!\n` + + ` Measured: ${formatMB(result.finalHeapUsed)} heap used\n` + + ` Baseline: ${formatMB(result.baseline.heapUsedBytes)} heap used\n` + + ` Delta: ${deltaPercent.toFixed(1)}% (tolerance: ${tolerance}%)\n` + + ` Peak heap: ${formatMB(result.peakHeapUsed)}\n` + + ` Peak RSS: ${formatMB(result.peakRss)}`, + ); + } + } + + /** + * Update the baseline for a scenario with the current measured values. + */ + updateScenarioBaseline(result: MemoryTestResult): void { + updateBaseline(this.baselinesPath, result.scenarioName, { + heapUsedBytes: result.finalHeapUsed, + heapTotalBytes: + result.snapshots[result.snapshots.length - 1]?.heapTotal ?? 0, + rssBytes: result.finalRss, + }); + // Reload baselines after update + this.baselines = loadBaselines(this.baselinesPath); + } + + /** + * Analyze snapshots to detect sustained leaks across 3 snapshots. + * A leak is flagged if growth is observed in both phases for any heap space. + */ + analyzeSnapshots( + snapshots: MemorySnapshot[], + thresholdBytes: number = 1024 * 1024, // 1 MB + ): { leaked: boolean; message: string } { + if (snapshots.length < 3) { + return { leaked: false, message: 'Not enough snapshots to analyze' }; + } + + const snap1 = snapshots[snapshots.length - 3]; + const snap2 = snapshots[snapshots.length - 2]; + const snap3 = snapshots[snapshots.length - 1]; + + if (!snap1 || !snap2 || !snap3) { + return { leaked: false, message: 'Missing snapshots' }; + } + + const spaceNames = new Set(); + snap1.heapSpaces.forEach((s: any) => spaceNames.add(s.space_name)); + snap2.heapSpaces.forEach((s: any) => spaceNames.add(s.space_name)); + snap3.heapSpaces.forEach((s: any) => spaceNames.add(s.space_name)); + + let hasSustainedGrowth = false; + const growthDetails: string[] = []; + + for (const name of spaceNames) { + const size1 = + snap1.heapSpaces.find((s: any) => s.space_name === name) + ?.space_used_size ?? 0; + const size2 = + snap2.heapSpaces.find((s: any) => s.space_name === name) + ?.space_used_size ?? 0; + const size3 = + snap3.heapSpaces.find((s: any) => s.space_name === name) + ?.space_used_size ?? 0; + + const growth1 = size2 - size1; + const growth2 = size3 - size2; + + if (growth1 > thresholdBytes && growth2 > thresholdBytes) { + hasSustainedGrowth = true; + growthDetails.push( + `${name}: sustained growth (${formatMB(growth1)} -> ${formatMB(growth2)})`, + ); + } + } + + let message = ''; + if (hasSustainedGrowth) { + message = + `Memory bloat detected in heap spaces:\n ` + + growthDetails.join('\n '); + } else { + message = `No sustained growth detected in any heap space above threshold.`; + } + + return { leaked: hasSustainedGrowth, message }; + } + + /** + * Assert that memory returns to a baseline level after a peak. + * Useful for verifying that large tool outputs are not retained. + */ + assertMemoryReturnsToBaseline( + snapshots: MemorySnapshot[], + tolerancePercent: number = 10, + ): void { + if (snapshots.length < 3) { + throw new Error('Need at least 3 snapshots to check return to baseline'); + } + + const baseline = snapshots[0]; // Assume first is baseline + const peak = snapshots.reduce( + (max, s) => (s.heapUsed > max.heapUsed ? s : max), + snapshots[0], + ); + const final = snapshots[snapshots.length - 1]; + + if (!baseline || !peak || !final) { + throw new Error('Missing snapshots for return to baseline check'); + } + + const tolerance = baseline.heapUsed * (tolerancePercent / 100); + const delta = final.heapUsed - baseline.heapUsed; + + if (delta > tolerance) { + throw new Error( + `Memory did not return to baseline!\n` + + ` Baseline: ${formatMB(baseline.heapUsed)}\n` + + ` Peak: ${formatMB(peak.heapUsed)}\n` + + ` Final: ${formatMB(final.heapUsed)}\n` + + ` Delta: ${formatMB(delta)} (tolerance: ${formatMB(tolerance)})`, + ); + } + } + + /** + * Generate a report with ASCII charts and summary table. + * Uses the `asciichart` library for terminal visualization. + */ + async generateReport(results?: MemoryTestResult[]): Promise { + const resultsToReport = results ?? this.allResults; + const lines: string[] = []; + + lines.push(''); + lines.push('═══════════════════════════════════════════════════'); + lines.push(' MEMORY USAGE TEST REPORT'); + lines.push('═══════════════════════════════════════════════════'); + lines.push(''); + + for (const result of resultsToReport) { + const measured = formatMB(result.finalHeapUsed); + const baseline = result.baseline + ? formatMB(result.baseline.heapUsedBytes) + : 'N/A'; + const delta = result.baseline + ? `${result.deltaPercent >= 0 ? '+' : ''}${result.deltaPercent.toFixed(1)}%` + : 'N/A'; + const status = !result.baseline + ? 'NEW' + : result.withinTolerance + ? '✅' + : '❌'; + + lines.push( + `${result.scenarioName}: ${measured} (Baseline: ${baseline}, Delta: ${delta}) ${status}`, + ); + } + lines.push(''); + + // Generate ASCII chart for each scenario with multiple snapshots + try { + // @ts-expect-error - asciichart may not have types + const asciichart = (await import('asciichart')) as { + default?: { plot?: PlotFn }; + plot?: PlotFn; + }; + const plot: PlotFn | undefined = + asciichart.default?.plot ?? asciichart.plot; + + for (const result of resultsToReport) { + if (result.snapshots.length > 2) { + lines.push(`📈 Memory trend: ${result.scenarioName}`); + lines.push('─'.repeat(60)); + + const heapDataMB = result.snapshots.map( + (s) => s.heapUsed / (1024 * 1024), + ); + + if (plot) { + const chart = plot(heapDataMB, { + height: 10, + format: (x: number) => `${x.toFixed(1)} MB`.padStart(10), + }); + lines.push(chart); + } + + // Label the x-axis with snapshot labels + const labels = result.snapshots.map((s) => s.label); + lines.push(' ' + labels.join(' → ')); + lines.push(''); + } + } + } catch { + lines.push( + '(asciichart not available — install with: npm install --save-dev asciichart)', + ); + lines.push(''); + } + + lines.push('═══════════════════════════════════════════════════'); + lines.push(''); + + const report = lines.join('\n'); + console.log(report); + return report; + } + + /** + * Force V8 garbage collection. + * Runs multiple GC cycles with delays to allow weak references + * and FinalizationRegistry callbacks to run. + */ + private async forceGC(): Promise { + if (typeof globalThis.gc !== 'function') { + throw new Error( + 'global.gc() not available. Run with --expose-gc for accurate measurements.', + ); + } + + for (let i = 0; i < this.gcCycles; i++) { + globalThis.gc(); + if (i < this.gcCycles - 1) { + await sleep(this.gcDelayMs); + } + } + } +} + +/** + * Format bytes as a human-readable MB string. + */ +function formatMB(bytes: number): string { + return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; +} From a837b39f8d6d4c0eccc05229f7b6ed182bff4340 Mon Sep 17 00:00:00 2001 From: Sandy Tao Date: Wed, 8 Apr 2026 11:08:49 -0700 Subject: [PATCH 20/39] feat(memory): add /memory inbox command for reviewing extracted skills (#24544) --- .gemini/settings.json | 1 - packages/cli/src/acp/commands/memory.ts | 38 ++ .../cli/src/ui/commands/memoryCommand.test.ts | 74 ++++ packages/cli/src/ui/commands/memoryCommand.ts | 43 ++ .../ui/components/SkillInboxDialog.test.tsx | 187 +++++++++ .../src/ui/components/SkillInboxDialog.tsx | 378 ++++++++++++++++++ packages/core/src/commands/memory.test.ts | 326 +++++++++++++++ packages/core/src/commands/memory.ts | 188 +++++++++ .../core/src/services/memoryService.test.ts | 106 +++++ packages/core/src/services/memoryService.ts | 6 + 10 files changed, 1346 insertions(+), 1 deletion(-) create mode 100644 packages/cli/src/ui/components/SkillInboxDialog.test.tsx create mode 100644 packages/cli/src/ui/components/SkillInboxDialog.tsx diff --git a/.gemini/settings.json b/.gemini/settings.json index eb7741997b..6a0121df17 100644 --- a/.gemini/settings.json +++ b/.gemini/settings.json @@ -2,7 +2,6 @@ "experimental": { "extensionReloading": true, "modelSteering": true, - "memoryManager": false, "topicUpdateNarration": true }, "general": { diff --git a/packages/cli/src/acp/commands/memory.ts b/packages/cli/src/acp/commands/memory.ts index ac919f2a9b..4d704cc8dd 100644 --- a/packages/cli/src/acp/commands/memory.ts +++ b/packages/cli/src/acp/commands/memory.ts @@ -6,6 +6,7 @@ import { addMemory, + listInboxSkills, listMemoryFiles, refreshMemory, showMemory, @@ -30,6 +31,7 @@ export class MemoryCommand implements Command { new RefreshMemoryCommand(), new ListMemoryCommand(), new AddMemoryCommand(), + new InboxMemoryCommand(), ]; readonly requiresWorkspace = true; @@ -122,3 +124,39 @@ export class AddMemoryCommand implements Command { } } } + +export class InboxMemoryCommand implements Command { + readonly name = 'memory inbox'; + readonly description = + 'Lists skills extracted from past sessions that are pending review.'; + + async execute( + context: CommandContext, + _: string[], + ): Promise { + if (!context.agentContext.config.isMemoryManagerEnabled()) { + return { + name: this.name, + data: 'The memory inbox requires the experimental memory manager. Enable it with: experimental.memoryManager = true in settings.', + }; + } + + const skills = await listInboxSkills(context.agentContext.config); + + if (skills.length === 0) { + return { name: this.name, data: 'No extracted skills in inbox.' }; + } + + const lines = skills.map((s) => { + const date = s.extractedAt + ? ` (extracted: ${new Date(s.extractedAt).toLocaleDateString()})` + : ''; + return `- **${s.name}**: ${s.description}${date}`; + }); + + return { + name: this.name, + data: `Skill inbox (${skills.length}):\n${lines.join('\n')}`, + }; + } +} diff --git a/packages/cli/src/ui/commands/memoryCommand.test.ts b/packages/cli/src/ui/commands/memoryCommand.test.ts index f02393bef2..c0fdb62ba2 100644 --- a/packages/cli/src/ui/commands/memoryCommand.test.ts +++ b/packages/cli/src/ui/commands/memoryCommand.test.ts @@ -457,4 +457,78 @@ describe('memoryCommand', () => { ); }); }); + + describe('/memory inbox', () => { + let inboxCommand: SlashCommand; + + beforeEach(() => { + inboxCommand = memoryCommand.subCommands!.find( + (cmd) => cmd.name === 'inbox', + )!; + expect(inboxCommand).toBeDefined(); + }); + + it('should return custom_dialog when config is available and flag is enabled', () => { + if (!inboxCommand.action) throw new Error('Command has no action'); + + const mockConfig = { + reloadSkills: vi.fn(), + isMemoryManagerEnabled: vi.fn().mockReturnValue(true), + }; + const context = createMockCommandContext({ + services: { + agentContext: { config: mockConfig }, + }, + ui: { + removeComponent: vi.fn(), + reloadCommands: vi.fn(), + }, + }); + + const result = inboxCommand.action(context, ''); + + expect(result).toHaveProperty('type', 'custom_dialog'); + expect(result).toHaveProperty('component'); + }); + + it('should return info message when memory manager is disabled', () => { + if (!inboxCommand.action) throw new Error('Command has no action'); + + const mockConfig = { + isMemoryManagerEnabled: vi.fn().mockReturnValue(false), + }; + const context = createMockCommandContext({ + services: { + agentContext: { config: mockConfig }, + }, + }); + + const result = inboxCommand.action(context, ''); + + expect(result).toEqual({ + type: 'message', + messageType: 'info', + content: + 'The memory inbox requires the experimental memory manager. Enable it with: experimental.memoryManager = true in settings.', + }); + }); + + it('should return error when config is not loaded', () => { + if (!inboxCommand.action) throw new Error('Command has no action'); + + const context = createMockCommandContext({ + services: { + agentContext: null, + }, + }); + + const result = inboxCommand.action(context, ''); + + expect(result).toEqual({ + type: 'message', + messageType: 'error', + content: 'Config not loaded.', + }); + }); + }); }); diff --git a/packages/cli/src/ui/commands/memoryCommand.ts b/packages/cli/src/ui/commands/memoryCommand.ts index 145fbae9c3..1cb4f27958 100644 --- a/packages/cli/src/ui/commands/memoryCommand.ts +++ b/packages/cli/src/ui/commands/memoryCommand.ts @@ -4,6 +4,7 @@ * SPDX-License-Identifier: Apache-2.0 */ +import React from 'react'; import { addMemory, listMemoryFiles, @@ -13,9 +14,11 @@ import { import { MessageType } from '../types.js'; import { CommandKind, + type OpenCustomDialogActionReturn, type SlashCommand, type SlashCommandActionReturn, } from './types.js'; +import { SkillInboxDialog } from '../components/SkillInboxDialog.js'; export const memoryCommand: SlashCommand = { name: 'memory', @@ -124,5 +127,45 @@ export const memoryCommand: SlashCommand = { ); }, }, + { + name: 'inbox', + description: + 'Review skills extracted from past sessions and move them to global or project skills', + kind: CommandKind.BUILT_IN, + autoExecute: true, + action: ( + context, + ): OpenCustomDialogActionReturn | SlashCommandActionReturn | void => { + const config = context.services.agentContext?.config; + if (!config) { + return { + type: 'message', + messageType: 'error', + content: 'Config not loaded.', + }; + } + + if (!config.isMemoryManagerEnabled()) { + return { + type: 'message', + messageType: 'info', + content: + 'The memory inbox requires the experimental memory manager. Enable it with: experimental.memoryManager = true in settings.', + }; + } + + return { + type: 'custom_dialog', + component: React.createElement(SkillInboxDialog, { + config, + onClose: () => context.ui.removeComponent(), + onReloadSkills: async () => { + await config.reloadSkills(); + context.ui.reloadCommands(); + }, + }), + }; + }, + }, ], }; diff --git a/packages/cli/src/ui/components/SkillInboxDialog.test.tsx b/packages/cli/src/ui/components/SkillInboxDialog.test.tsx new file mode 100644 index 0000000000..e3c1aa9c91 --- /dev/null +++ b/packages/cli/src/ui/components/SkillInboxDialog.test.tsx @@ -0,0 +1,187 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { act } from 'react'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import type { Config, InboxSkill } from '@google/gemini-cli-core'; +import { + dismissInboxSkill, + listInboxSkills, + moveInboxSkill, +} from '@google/gemini-cli-core'; +import { waitFor } from '../../test-utils/async.js'; +import { renderWithProviders } from '../../test-utils/render.js'; +import { SkillInboxDialog } from './SkillInboxDialog.js'; + +vi.mock('@google/gemini-cli-core', async (importOriginal) => { + const original = + await importOriginal(); + + return { + ...original, + dismissInboxSkill: vi.fn(), + listInboxSkills: vi.fn(), + moveInboxSkill: vi.fn(), + getErrorMessage: vi.fn((error: unknown) => + error instanceof Error ? error.message : String(error), + ), + }; +}); + +const mockListInboxSkills = vi.mocked(listInboxSkills); +const mockMoveInboxSkill = vi.mocked(moveInboxSkill); +const mockDismissInboxSkill = vi.mocked(dismissInboxSkill); + +const inboxSkill: InboxSkill = { + dirName: 'inbox-skill', + name: 'Inbox Skill', + description: 'A test skill', + extractedAt: '2025-01-15T10:00:00Z', +}; + +describe('SkillInboxDialog', () => { + beforeEach(() => { + vi.clearAllMocks(); + mockListInboxSkills.mockResolvedValue([inboxSkill]); + mockMoveInboxSkill.mockResolvedValue({ + success: true, + message: 'Moved "inbox-skill" to ~/.gemini/skills.', + }); + mockDismissInboxSkill.mockResolvedValue({ + success: true, + message: 'Dismissed "inbox-skill" from inbox.', + }); + }); + + it('disables the project destination when the workspace is untrusted', async () => { + const config = { + isTrustedFolder: vi.fn().mockReturnValue(false), + } as unknown as Config; + const onReloadSkills = vi.fn().mockResolvedValue(undefined); + const { lastFrame, stdin, unmount, waitUntilReady } = await act(async () => + renderWithProviders( + , + ), + ); + + await waitFor(() => { + expect(lastFrame()).toContain('Inbox Skill'); + }); + + await act(async () => { + stdin.write('\r'); + await waitUntilReady(); + }); + + await waitFor(() => { + const frame = lastFrame(); + expect(frame).toContain('Project'); + expect(frame).toContain('unavailable until this workspace is trusted'); + }); + + await act(async () => { + stdin.write('\x1b[B'); + await waitUntilReady(); + }); + + await act(async () => { + stdin.write('\r'); + await waitUntilReady(); + }); + + await waitFor(() => { + expect(mockDismissInboxSkill).toHaveBeenCalledWith(config, 'inbox-skill'); + }); + expect(mockMoveInboxSkill).not.toHaveBeenCalled(); + expect(onReloadSkills).not.toHaveBeenCalled(); + + unmount(); + }); + + it('shows inline feedback when moving a skill throws', async () => { + mockMoveInboxSkill.mockRejectedValue(new Error('permission denied')); + + const config = { + isTrustedFolder: vi.fn().mockReturnValue(true), + } as unknown as Config; + const { lastFrame, stdin, unmount, waitUntilReady } = await act(async () => + renderWithProviders( + , + ), + ); + + await waitFor(() => { + expect(lastFrame()).toContain('Inbox Skill'); + }); + + await act(async () => { + stdin.write('\r'); + await waitUntilReady(); + }); + + await act(async () => { + stdin.write('\r'); + await waitUntilReady(); + }); + + await waitFor(() => { + const frame = lastFrame(); + expect(frame).toContain('Move "Inbox Skill"'); + expect(frame).toContain('Failed to install skill: permission denied'); + }); + + unmount(); + }); + + it('shows inline feedback when reloading skills fails after a move', async () => { + const config = { + isTrustedFolder: vi.fn().mockReturnValue(true), + } as unknown as Config; + const onReloadSkills = vi + .fn() + .mockRejectedValue(new Error('reload hook failed')); + const { lastFrame, stdin, unmount, waitUntilReady } = await act(async () => + renderWithProviders( + , + ), + ); + + await waitFor(() => { + expect(lastFrame()).toContain('Inbox Skill'); + }); + + await act(async () => { + stdin.write('\r'); + await waitUntilReady(); + }); + + await act(async () => { + stdin.write('\r'); + await waitUntilReady(); + }); + + await waitFor(() => { + expect(lastFrame()).toContain( + 'Moved "inbox-skill" to ~/.gemini/skills. Failed to reload skills: reload hook failed', + ); + }); + expect(onReloadSkills).toHaveBeenCalledTimes(1); + + unmount(); + }); +}); diff --git a/packages/cli/src/ui/components/SkillInboxDialog.tsx b/packages/cli/src/ui/components/SkillInboxDialog.tsx new file mode 100644 index 0000000000..ff2d75527f --- /dev/null +++ b/packages/cli/src/ui/components/SkillInboxDialog.tsx @@ -0,0 +1,378 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type React from 'react'; +import { useState, useMemo, useCallback, useEffect } from 'react'; +import { Box, Text } from 'ink'; +import { theme } from '../semantic-colors.js'; +import { useKeypress } from '../hooks/useKeypress.js'; +import { Command } from '../key/keyMatchers.js'; +import { useKeyMatchers } from '../hooks/useKeyMatchers.js'; +import { BaseSelectionList } from './shared/BaseSelectionList.js'; +import type { SelectionListItem } from '../hooks/useSelectionList.js'; +import { DialogFooter } from './shared/DialogFooter.js'; +import { + type Config, + type InboxSkill, + type InboxSkillDestination, + getErrorMessage, + listInboxSkills, + moveInboxSkill, + dismissInboxSkill, +} from '@google/gemini-cli-core'; + +type Phase = 'list' | 'action'; + +interface DestinationChoice { + destination: InboxSkillDestination | 'dismiss'; + label: string; + description: string; +} + +const DESTINATION_CHOICES: DestinationChoice[] = [ + { + destination: 'global', + label: 'Global', + description: '~/.gemini/skills — available in all projects', + }, + { + destination: 'project', + label: 'Project', + description: '.gemini/skills — available in this workspace', + }, + { + destination: 'dismiss', + label: 'Dismiss', + description: 'Delete from inbox', + }, +]; + +function formatDate(isoString: string): string { + try { + const date = new Date(isoString); + return date.toLocaleDateString(undefined, { + year: 'numeric', + month: 'short', + day: 'numeric', + }); + } catch { + return isoString; + } +} + +interface SkillInboxDialogProps { + config: Config; + onClose: () => void; + onReloadSkills: () => Promise; +} + +export const SkillInboxDialog: React.FC = ({ + config, + onClose, + onReloadSkills, +}) => { + const keyMatchers = useKeyMatchers(); + const isTrustedFolder = config.isTrustedFolder(); + const [phase, setPhase] = useState('list'); + const [skills, setSkills] = useState([]); + const [loading, setLoading] = useState(true); + const [selectedSkill, setSelectedSkill] = useState(null); + const [feedback, setFeedback] = useState<{ + text: string; + isError: boolean; + } | null>(null); + + // Load inbox skills on mount + useEffect(() => { + let cancelled = false; + void (async () => { + try { + const result = await listInboxSkills(config); + if (!cancelled) { + setSkills(result); + setLoading(false); + } + } catch { + if (!cancelled) { + setSkills([]); + setLoading(false); + } + } + })(); + return () => { + cancelled = true; + }; + }, [config]); + + const skillItems: Array> = useMemo( + () => + skills.map((skill) => ({ + key: skill.dirName, + value: skill, + })), + [skills], + ); + + const destinationItems: Array> = useMemo( + () => + DESTINATION_CHOICES.map((choice) => { + if (choice.destination === 'project' && !isTrustedFolder) { + return { + key: choice.destination, + value: { + ...choice, + description: + '.gemini/skills — unavailable until this workspace is trusted', + }, + disabled: true, + }; + } + + return { + key: choice.destination, + value: choice, + }; + }), + [isTrustedFolder], + ); + + const handleSelectSkill = useCallback((skill: InboxSkill) => { + setSelectedSkill(skill); + setFeedback(null); + setPhase('action'); + }, []); + + const handleSelectDestination = useCallback( + (choice: DestinationChoice) => { + if (!selectedSkill) return; + + if (choice.destination === 'project' && !config.isTrustedFolder()) { + setFeedback({ + text: 'Project skills are unavailable until this workspace is trusted.', + isError: true, + }); + return; + } + + setFeedback(null); + + void (async () => { + try { + let result: { success: boolean; message: string }; + if (choice.destination === 'dismiss') { + result = await dismissInboxSkill(config, selectedSkill.dirName); + } else { + result = await moveInboxSkill( + config, + selectedSkill.dirName, + choice.destination, + ); + } + + setFeedback({ text: result.message, isError: !result.success }); + + if (!result.success) { + return; + } + + // Remove the skill from the local list. + setSkills((prev) => + prev.filter((skill) => skill.dirName !== selectedSkill.dirName), + ); + setSelectedSkill(null); + setPhase('list'); + + if (choice.destination === 'dismiss') { + return; + } + + try { + await onReloadSkills(); + } catch (error) { + setFeedback({ + text: `${result.message} Failed to reload skills: ${getErrorMessage(error)}`, + isError: true, + }); + } + } catch (error) { + const operation = + choice.destination === 'dismiss' + ? 'dismiss skill' + : 'install skill'; + setFeedback({ + text: `Failed to ${operation}: ${getErrorMessage(error)}`, + isError: true, + }); + } + })(); + }, + [config, selectedSkill, onReloadSkills], + ); + + useKeypress( + (key) => { + if (keyMatchers[Command.ESCAPE](key)) { + if (phase === 'action') { + setPhase('list'); + setSelectedSkill(null); + setFeedback(null); + } else { + onClose(); + } + return true; + } + return false; + }, + { isActive: true, priority: true }, + ); + + if (loading) { + return ( + + Loading inbox… + + ); + } + + if (skills.length === 0 && !feedback) { + return ( + + Skill Inbox + + + No extracted skills in inbox. + + + + + ); + } + + return ( + + {phase === 'list' ? ( + <> + + Skill Inbox ({skills.length} skill{skills.length !== 1 ? 's' : ''}) + + + Skills extracted from past sessions. Select one to move or dismiss. + + + + + items={skillItems} + onSelect={handleSelectSkill} + isFocused={true} + showNumbers={true} + showScrollArrows={true} + maxItemsToShow={8} + renderItem={(item, { titleColor }) => ( + + + {item.value.name} + + + + {item.value.description} + + {item.value.extractedAt && ( + + {' · '} + {formatDate(item.value.extractedAt)} + + )} + + + )} + /> + + + {feedback && ( + + + {feedback.isError ? '✗ ' : '✓ '} + {feedback.text} + + + )} + + + + ) : ( + <> + Move "{selectedSkill?.name}" + + Choose where to install this skill. + + + + + items={destinationItems} + onSelect={handleSelectDestination} + isFocused={true} + showNumbers={true} + renderItem={(item, { titleColor }) => ( + + + {item.value.label} + + + {item.value.description} + + + )} + /> + + + {feedback && ( + + + {feedback.isError ? '✗ ' : '✓ '} + {feedback.text} + + + )} + + + + )} + + ); +}; diff --git a/packages/core/src/commands/memory.test.ts b/packages/core/src/commands/memory.test.ts index 37ff15052f..113d1b1ec5 100644 --- a/packages/core/src/commands/memory.test.ts +++ b/packages/core/src/commands/memory.test.ts @@ -4,11 +4,18 @@ * SPDX-License-Identifier: Apache-2.0 */ +import * as fs from 'node:fs/promises'; +import * as os from 'node:os'; +import * as path from 'node:path'; import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import type { Config } from '../config/config.js'; +import { Storage } from '../config/storage.js'; import { addMemory, + dismissInboxSkill, + listInboxSkills, listMemoryFiles, + moveInboxSkill, refreshMemory, showMemory, } from './memory.js'; @@ -18,6 +25,12 @@ vi.mock('../utils/memoryDiscovery.js', () => ({ refreshServerHierarchicalMemory: vi.fn(), })); +vi.mock('../config/storage.js', () => ({ + Storage: { + getUserSkillsDir: vi.fn(), + }, +})); + const mockRefresh = vi.mocked(memoryDiscovery.refreshServerHierarchicalMemory); describe('memory commands', () => { @@ -202,4 +215,317 @@ describe('memory commands', () => { } }); }); + + describe('listInboxSkills', () => { + let tmpDir: string; + let skillsDir: string; + let memoryTempDir: string; + let inboxConfig: Config; + + async function writeSkillMd( + dirName: string, + name: string, + description: string, + ): Promise { + const dir = path.join(skillsDir, dirName); + await fs.mkdir(dir, { recursive: true }); + await fs.writeFile( + path.join(dir, 'SKILL.md'), + `---\nname: ${name}\ndescription: ${description}\n---\nBody content here\n`, + ); + } + + beforeEach(async () => { + tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'inbox-test-')); + skillsDir = path.join(tmpDir, 'skills-memory'); + memoryTempDir = path.join(tmpDir, 'memory-temp'); + await fs.mkdir(skillsDir, { recursive: true }); + await fs.mkdir(memoryTempDir, { recursive: true }); + + inboxConfig = { + storage: { + getProjectSkillsMemoryDir: () => skillsDir, + getProjectMemoryTempDir: () => memoryTempDir, + getProjectSkillsDir: () => path.join(tmpDir, 'project-skills'), + }, + } as unknown as Config; + }); + + afterEach(async () => { + await fs.rm(tmpDir, { recursive: true, force: true }); + }); + + it('should return inbox skills with name, description, and extractedAt', async () => { + await writeSkillMd('my-skill', 'my-skill', 'A test skill'); + await writeSkillMd('other-skill', 'other-skill', 'Another skill'); + + const stateContent = JSON.stringify({ + runs: [ + { + runAt: '2025-01-15T10:00:00Z', + sessionIds: ['sess-1'], + skillsCreated: ['my-skill'], + }, + { + runAt: '2025-01-16T12:00:00Z', + sessionIds: ['sess-2'], + skillsCreated: ['other-skill'], + }, + ], + }); + await fs.writeFile( + path.join(memoryTempDir, '.extraction-state.json'), + stateContent, + ); + + const skills = await listInboxSkills(inboxConfig); + + expect(skills).toHaveLength(2); + const mySkill = skills.find((s) => s.dirName === 'my-skill'); + expect(mySkill).toBeDefined(); + expect(mySkill!.name).toBe('my-skill'); + expect(mySkill!.description).toBe('A test skill'); + expect(mySkill!.extractedAt).toBe('2025-01-15T10:00:00Z'); + + const otherSkill = skills.find((s) => s.dirName === 'other-skill'); + expect(otherSkill).toBeDefined(); + expect(otherSkill!.name).toBe('other-skill'); + expect(otherSkill!.description).toBe('Another skill'); + expect(otherSkill!.extractedAt).toBe('2025-01-16T12:00:00Z'); + }); + + it('should return an empty array when the inbox is empty', async () => { + const skills = await listInboxSkills(inboxConfig); + expect(skills).toEqual([]); + }); + + it('should return an empty array when the inbox directory does not exist', async () => { + const missingConfig = { + storage: { + getProjectSkillsMemoryDir: () => path.join(tmpDir, 'nonexistent-dir'), + getProjectMemoryTempDir: () => memoryTempDir, + }, + } as unknown as Config; + + const skills = await listInboxSkills(missingConfig); + expect(skills).toEqual([]); + }); + }); + + describe('moveInboxSkill', () => { + let tmpDir: string; + let skillsDir: string; + let globalSkillsDir: string; + let projectSkillsDir: string; + let moveConfig: Config; + + async function writeSkillMd( + dirName: string, + name: string, + description: string, + ): Promise { + const dir = path.join(skillsDir, dirName); + await fs.mkdir(dir, { recursive: true }); + await fs.writeFile( + path.join(dir, 'SKILL.md'), + `---\nname: ${name}\ndescription: ${description}\n---\nBody content here\n`, + ); + } + + beforeEach(async () => { + tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'move-test-')); + skillsDir = path.join(tmpDir, 'skills-memory'); + globalSkillsDir = path.join(tmpDir, 'global-skills'); + projectSkillsDir = path.join(tmpDir, 'project-skills'); + await fs.mkdir(skillsDir, { recursive: true }); + + moveConfig = { + storage: { + getProjectSkillsMemoryDir: () => skillsDir, + getProjectSkillsDir: () => projectSkillsDir, + }, + } as unknown as Config; + + vi.mocked(Storage.getUserSkillsDir).mockReturnValue(globalSkillsDir); + }); + + afterEach(async () => { + await fs.rm(tmpDir, { recursive: true, force: true }); + }); + + it('should move a skill to global skills directory', async () => { + await writeSkillMd('my-skill', 'my-skill', 'A test skill'); + + const result = await moveInboxSkill(moveConfig, 'my-skill', 'global'); + + expect(result.success).toBe(true); + expect(result.message).toBe('Moved "my-skill" to ~/.gemini/skills.'); + + // Verify the skill was copied to global + const targetSkill = await fs.readFile( + path.join(globalSkillsDir, 'my-skill', 'SKILL.md'), + 'utf-8', + ); + expect(targetSkill).toContain('name: my-skill'); + + // Verify the skill was removed from inbox + await expect( + fs.access(path.join(skillsDir, 'my-skill')), + ).rejects.toThrow(); + }); + + it('should move a skill to project skills directory', async () => { + await writeSkillMd('my-skill', 'my-skill', 'A test skill'); + + const result = await moveInboxSkill(moveConfig, 'my-skill', 'project'); + + expect(result.success).toBe(true); + expect(result.message).toBe('Moved "my-skill" to .gemini/skills.'); + + // Verify the skill was copied to project + const targetSkill = await fs.readFile( + path.join(projectSkillsDir, 'my-skill', 'SKILL.md'), + 'utf-8', + ); + expect(targetSkill).toContain('name: my-skill'); + + // Verify the skill was removed from inbox + await expect( + fs.access(path.join(skillsDir, 'my-skill')), + ).rejects.toThrow(); + }); + + it('should return an error when the source skill does not exist', async () => { + const result = await moveInboxSkill(moveConfig, 'nonexistent', 'global'); + + expect(result.success).toBe(false); + expect(result.message).toBe('Skill "nonexistent" not found in inbox.'); + }); + + it('should reject invalid skill directory names', async () => { + const result = await moveInboxSkill(moveConfig, '../escape', 'global'); + + expect(result.success).toBe(false); + expect(result.message).toBe('Invalid skill name.'); + }); + + it('should return an error when the target already exists', async () => { + await writeSkillMd('my-skill', 'my-skill', 'A test skill'); + + // Pre-create the target + const targetDir = path.join(globalSkillsDir, 'my-skill'); + await fs.mkdir(targetDir, { recursive: true }); + await fs.writeFile(path.join(targetDir, 'SKILL.md'), 'existing content'); + + const result = await moveInboxSkill(moveConfig, 'my-skill', 'global'); + + expect(result.success).toBe(false); + expect(result.message).toBe( + 'A skill named "my-skill" already exists in global skills.', + ); + }); + + it('should detect conflicts based on the normalized skill name', async () => { + await writeSkillMd( + 'inbox-skill', + 'gke:prs-troubleshooter', + 'A test skill', + ); + await fs.mkdir( + path.join(globalSkillsDir, 'existing-gke-prs-troubleshooter'), + { recursive: true }, + ); + await fs.writeFile( + path.join( + globalSkillsDir, + 'existing-gke-prs-troubleshooter', + 'SKILL.md', + ), + [ + '---', + 'name: gke-prs-troubleshooter', + 'description: Existing skill', + '---', + 'Existing body content', + '', + ].join('\n'), + ); + + const result = await moveInboxSkill(moveConfig, 'inbox-skill', 'global'); + + expect(result.success).toBe(false); + expect(result.message).toBe( + 'A skill named "gke-prs-troubleshooter" already exists in global skills.', + ); + await expect( + fs.access(path.join(skillsDir, 'inbox-skill', 'SKILL.md')), + ).resolves.toBeUndefined(); + await expect( + fs.access(path.join(globalSkillsDir, 'inbox-skill')), + ).rejects.toThrow(); + }); + }); + + describe('dismissInboxSkill', () => { + let tmpDir: string; + let skillsDir: string; + let dismissConfig: Config; + + async function writeSkillMd( + dirName: string, + name: string, + description: string, + ): Promise { + const dir = path.join(skillsDir, dirName); + await fs.mkdir(dir, { recursive: true }); + await fs.writeFile( + path.join(dir, 'SKILL.md'), + `---\nname: ${name}\ndescription: ${description}\n---\nBody content here\n`, + ); + } + + beforeEach(async () => { + tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'dismiss-test-')); + skillsDir = path.join(tmpDir, 'skills-memory'); + await fs.mkdir(skillsDir, { recursive: true }); + + dismissConfig = { + storage: { + getProjectSkillsMemoryDir: () => skillsDir, + }, + } as unknown as Config; + }); + + afterEach(async () => { + await fs.rm(tmpDir, { recursive: true, force: true }); + }); + + it('should remove a skill from the inbox', async () => { + await writeSkillMd('my-skill', 'my-skill', 'A test skill'); + + const result = await dismissInboxSkill(dismissConfig, 'my-skill'); + + expect(result.success).toBe(true); + expect(result.message).toBe('Dismissed "my-skill" from inbox.'); + + // Verify the skill directory was removed + await expect( + fs.access(path.join(skillsDir, 'my-skill')), + ).rejects.toThrow(); + }); + + it('should return an error when the skill does not exist', async () => { + const result = await dismissInboxSkill(dismissConfig, 'nonexistent'); + + expect(result.success).toBe(false); + expect(result.message).toBe('Skill "nonexistent" not found in inbox.'); + }); + + it('should reject invalid skill directory names', async () => { + const result = await dismissInboxSkill(dismissConfig, 'nested\\skill'); + + expect(result.success).toBe(false); + expect(result.message).toBe('Invalid skill name.'); + }); + }); }); diff --git a/packages/core/src/commands/memory.ts b/packages/core/src/commands/memory.ts index d8857469bd..fd34601690 100644 --- a/packages/core/src/commands/memory.ts +++ b/packages/core/src/commands/memory.ts @@ -4,8 +4,13 @@ * SPDX-License-Identifier: Apache-2.0 */ +import * as fs from 'node:fs/promises'; +import * as path from 'node:path'; import type { Config } from '../config/config.js'; +import { Storage } from '../config/storage.js'; import { flattenMemory } from '../config/memory.js'; +import { loadSkillFromFile, loadSkillsFromDir } from '../skills/skillLoader.js'; +import { readExtractionState } from '../services/memoryService.js'; import { refreshServerHierarchicalMemory } from '../utils/memoryDiscovery.js'; import type { MessageActionReturn, ToolActionReturn } from './types.js'; @@ -95,3 +100,186 @@ export function listMemoryFiles(config: Config): MessageActionReturn { content, }; } + +/** + * Represents a skill found in the extraction inbox. + */ +export interface InboxSkill { + /** Directory name in the inbox. */ + dirName: string; + /** Skill name from SKILL.md frontmatter. */ + name: string; + /** Skill description from SKILL.md frontmatter. */ + description: string; + /** When the skill was extracted (ISO string), if known. */ + extractedAt?: string; +} + +/** + * Scans the skill extraction inbox and returns structured data + * for each extracted skill. + */ +export async function listInboxSkills(config: Config): Promise { + const skillsDir = config.storage.getProjectSkillsMemoryDir(); + + let entries: Array; + try { + entries = await fs.readdir(skillsDir, { withFileTypes: true }); + } catch { + return []; + } + + const dirs = entries.filter((e) => e.isDirectory()); + if (dirs.length === 0) { + return []; + } + + // Load extraction state to get dates + const memoryDir = config.storage.getProjectMemoryTempDir(); + const statePath = path.join(memoryDir, '.extraction-state.json'); + const state = await readExtractionState(statePath); + + // Build a map: skillDirName → extractedAt + const skillDateMap = new Map(); + for (const run of state.runs) { + for (const skillName of run.skillsCreated) { + skillDateMap.set(skillName, run.runAt); + } + } + + const skills: InboxSkill[] = []; + for (const dir of dirs) { + const skillPath = path.join(skillsDir, dir.name, 'SKILL.md'); + const skillDef = await loadSkillFromFile(skillPath); + if (!skillDef) continue; + + skills.push({ + dirName: dir.name, + name: skillDef.name, + description: skillDef.description, + extractedAt: skillDateMap.get(dir.name), + }); + } + + return skills; +} + +export type InboxSkillDestination = 'global' | 'project'; + +function isValidInboxSkillDirName(dirName: string): boolean { + return ( + dirName.length > 0 && + dirName !== '.' && + dirName !== '..' && + !dirName.includes('/') && + !dirName.includes('\\') + ); +} + +async function getSkillNameForConflictCheck( + skillDir: string, + fallbackName: string, +): Promise { + const skill = await loadSkillFromFile(path.join(skillDir, 'SKILL.md')); + return skill?.name ?? fallbackName; +} + +/** + * Copies an inbox skill to the target skills directory. + */ +export async function moveInboxSkill( + config: Config, + dirName: string, + destination: InboxSkillDestination, +): Promise<{ success: boolean; message: string }> { + if (!isValidInboxSkillDirName(dirName)) { + return { + success: false, + message: 'Invalid skill name.', + }; + } + + const skillsDir = config.storage.getProjectSkillsMemoryDir(); + const sourcePath = path.join(skillsDir, dirName); + + try { + await fs.access(sourcePath); + } catch { + return { + success: false, + message: `Skill "${dirName}" not found in inbox.`, + }; + } + + const targetBase = + destination === 'global' + ? Storage.getUserSkillsDir() + : config.storage.getProjectSkillsDir(); + const targetPath = path.join(targetBase, dirName); + const skillName = await getSkillNameForConflictCheck(sourcePath, dirName); + + try { + await fs.access(targetPath); + return { + success: false, + message: `A skill named "${skillName}" already exists in ${destination} skills.`, + }; + } catch { + // Target doesn't exist — good + } + + const existingTargetSkills = await loadSkillsFromDir(targetBase); + if (existingTargetSkills.some((skill) => skill.name === skillName)) { + return { + success: false, + message: `A skill named "${skillName}" already exists in ${destination} skills.`, + }; + } + + await fs.mkdir(targetBase, { recursive: true }); + await fs.cp(sourcePath, targetPath, { recursive: true }); + + // Remove from inbox after successful copy + await fs.rm(sourcePath, { recursive: true, force: true }); + + const label = + destination === 'global' ? '~/.gemini/skills' : '.gemini/skills'; + return { + success: true, + message: `Moved "${dirName}" to ${label}.`, + }; +} + +/** + * Removes a skill from the extraction inbox. + */ +export async function dismissInboxSkill( + config: Config, + dirName: string, +): Promise<{ success: boolean; message: string }> { + if (!isValidInboxSkillDirName(dirName)) { + return { + success: false, + message: 'Invalid skill name.', + }; + } + + const skillsDir = config.storage.getProjectSkillsMemoryDir(); + const sourcePath = path.join(skillsDir, dirName); + + try { + await fs.access(sourcePath); + } catch { + return { + success: false, + message: `Skill "${dirName}" not found in inbox.`, + }; + } + + await fs.rm(sourcePath, { recursive: true, force: true }); + + return { + success: true, + message: `Dismissed "${dirName}" from inbox.`, + }; +} diff --git a/packages/core/src/services/memoryService.test.ts b/packages/core/src/services/memoryService.test.ts index 65f1e74f55..b6084b6627 100644 --- a/packages/core/src/services/memoryService.test.ts +++ b/packages/core/src/services/memoryService.test.ts @@ -13,6 +13,7 @@ import { type ConversationRecord, } from './chatRecordingService.js'; import type { ExtractionState, ExtractionRun } from './memoryService.js'; +import { coreEvents } from '../utils/events.js'; // Mock external modules used by startMemoryService vi.mock('../agents/local-executor.js', () => ({ @@ -29,6 +30,7 @@ vi.mock('../agents/skill-extraction-agent.js', () => ({ promptConfig: { systemPrompt: 'test' }, tools: [], outputSchema: {}, + modelConfig: { model: 'test-model' }, }), })); @@ -51,6 +53,33 @@ vi.mock('../resources/resource-registry.js', () => ({ ResourceRegistry: vi.fn(), })); +vi.mock('../policy/policy-engine.js', () => ({ + PolicyEngine: vi.fn(), +})); + +vi.mock('../policy/types.js', () => ({ + PolicyDecision: { ALLOW: 'ALLOW' }, +})); + +vi.mock('../confirmation-bus/message-bus.js', () => ({ + MessageBus: vi.fn(), +})); + +vi.mock('../agents/registry.js', () => ({ + getModelConfigAlias: vi.fn().mockReturnValue('skill-extraction-config'), +})); + +vi.mock('../config/storage.js', () => ({ + Storage: { + getUserSkillsDir: vi.fn().mockReturnValue('/tmp/fake-user-skills'), + }, +})); + +vi.mock('../skills/skillLoader.js', () => ({ + FRONTMATTER_REGEX: /^---\n([\s\S]*?)\n---/, + parseFrontmatter: vi.fn().mockReturnValue(null), +})); + vi.mock('../utils/debugLogger.js', () => ({ debugLogger: { debug: vi.fn(), @@ -59,6 +88,12 @@ vi.mock('../utils/debugLogger.js', () => ({ }, })); +vi.mock('../utils/events.js', () => ({ + coreEvents: { + emitFeedback: vi.fn(), + }, +})); + // Helper to create a minimal ConversationRecord function createConversation( overrides: Partial & { messageCount?: number } = {}, @@ -427,6 +462,77 @@ describe('memoryService', () => { }), ); }); + + it('emits feedback when new skills are created during extraction', async () => { + const { startMemoryService } = await import('./memoryService.js'); + const { LocalAgentExecutor } = await import( + '../agents/local-executor.js' + ); + + // Reset mocks that may carry state from prior tests + vi.mocked(coreEvents.emitFeedback).mockClear(); + vi.mocked(LocalAgentExecutor.create).mockReset(); + + const memoryDir = path.join(tmpDir, 'memory4'); + const skillsDir = path.join(tmpDir, 'skills4'); + const projectTempDir = path.join(tmpDir, 'temp4'); + const chatsDir = path.join(projectTempDir, 'chats'); + await fs.mkdir(memoryDir, { recursive: true }); + await fs.mkdir(skillsDir, { recursive: true }); + await fs.mkdir(chatsDir, { recursive: true }); + + // Write a valid session with enough messages to pass the filter + const conversation = createConversation({ + sessionId: 'skill-session', + messageCount: 20, + }); + await fs.writeFile( + path.join(chatsDir, 'session-2025-01-01T00-00-skill001.json'), + JSON.stringify(conversation), + ); + + // Override LocalAgentExecutor.create to return an executor whose run + // creates a new skill directory with a SKILL.md in the skillsDir + vi.mocked(LocalAgentExecutor.create).mockResolvedValueOnce({ + run: vi.fn().mockImplementation(async () => { + const newSkillDir = path.join(skillsDir, 'my-new-skill'); + await fs.mkdir(newSkillDir, { recursive: true }); + await fs.writeFile( + path.join(newSkillDir, 'SKILL.md'), + '# My New Skill', + ); + return undefined; + }), + } as never); + + const mockConfig = { + storage: { + getProjectMemoryDir: vi.fn().mockReturnValue(memoryDir), + getProjectMemoryTempDir: vi.fn().mockReturnValue(memoryDir), + getProjectSkillsMemoryDir: vi.fn().mockReturnValue(skillsDir), + getProjectTempDir: vi.fn().mockReturnValue(projectTempDir), + }, + getToolRegistry: vi.fn(), + getMessageBus: vi.fn(), + getGeminiClient: vi.fn(), + getSkillManager: vi.fn().mockReturnValue({ getSkills: () => [] }), + modelConfigService: { + registerRuntimeModelConfig: vi.fn(), + }, + sandboxManager: undefined, + } as unknown as Parameters[0]; + + await startMemoryService(mockConfig); + + expect(coreEvents.emitFeedback).toHaveBeenCalledWith( + 'info', + expect.stringContaining('my-new-skill'), + ); + expect(coreEvents.emitFeedback).toHaveBeenCalledWith( + 'info', + expect.stringContaining('/memory inbox'), + ); + }); }); describe('getProcessedSessionIds', () => { diff --git a/packages/core/src/services/memoryService.ts b/packages/core/src/services/memoryService.ts index 495cbdc5ef..7b91047dba 100644 --- a/packages/core/src/services/memoryService.ts +++ b/packages/core/src/services/memoryService.ts @@ -14,6 +14,7 @@ import { type ConversationRecord, } from './chatRecordingService.js'; import { debugLogger } from '../utils/debugLogger.js'; +import { coreEvents } from '../utils/events.js'; import { isNodeError } from '../utils/errors.js'; import { FRONTMATTER_REGEX, parseFrontmatter } from '../skills/skillLoader.js'; import { LocalAgentExecutor } from '../agents/local-executor.js'; @@ -640,6 +641,11 @@ export async function startMemoryService(config: Config): Promise { debugLogger.log( `[MemoryService] Completed in ${elapsed}s. Created ${skillsCreated.length} skill(s): ${skillsCreated.join(', ')}`, ); + const skillList = skillsCreated.join(', '); + coreEvents.emitFeedback( + 'info', + `${skillsCreated.length} new skill${skillsCreated.length > 1 ? 's' : ''} extracted from past sessions: ${skillList}. Use /memory inbox to review.`, + ); } else { debugLogger.log( `[MemoryService] Completed in ${elapsed}s. No new skills created (processed ${newSessionIds.length} session(s))`, From 3df99d8bcbef76d677f54662a25904a8d1168ba9 Mon Sep 17 00:00:00 2001 From: gemini-cli-robot Date: Wed, 8 Apr 2026 11:10:28 -0700 Subject: [PATCH 21/39] chore(release): bump version to 0.39.0-nightly.20260408.e77b22e63 (#24939) --- package-lock.json | 18 +++++++++--------- package.json | 4 ++-- packages/a2a-server/package.json | 2 +- packages/cli/package.json | 4 ++-- packages/core/package.json | 2 +- packages/devtools/package.json | 2 +- packages/sdk/package.json | 2 +- packages/test-utils/package.json | 2 +- packages/vscode-ide-companion/package.json | 2 +- 9 files changed, 19 insertions(+), 19 deletions(-) diff --git a/package-lock.json b/package-lock.json index 7ec397323e..e849a895fe 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@google/gemini-cli", - "version": "0.36.0-nightly.20260317.2f90b4653", + "version": "0.39.0-nightly.20260408.e77b22e63", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@google/gemini-cli", - "version": "0.36.0-nightly.20260317.2f90b4653", + "version": "0.39.0-nightly.20260408.e77b22e63", "workspaces": [ "packages/*" ], @@ -17421,7 +17421,7 @@ }, "packages/a2a-server": { "name": "@google/gemini-cli-a2a-server", - "version": "0.36.0-nightly.20260317.2f90b4653", + "version": "0.39.0-nightly.20260408.e77b22e63", "dependencies": { "@a2a-js/sdk": "0.3.11", "@google-cloud/storage": "^7.16.0", @@ -17536,7 +17536,7 @@ }, "packages/cli": { "name": "@google/gemini-cli", - "version": "0.36.0-nightly.20260317.2f90b4653", + "version": "0.39.0-nightly.20260408.e77b22e63", "license": "Apache-2.0", "dependencies": { "@agentclientprotocol/sdk": "^0.16.1", @@ -17708,7 +17708,7 @@ }, "packages/core": { "name": "@google/gemini-cli-core", - "version": "0.36.0-nightly.20260317.2f90b4653", + "version": "0.39.0-nightly.20260408.e77b22e63", "license": "Apache-2.0", "dependencies": { "@a2a-js/sdk": "0.3.11", @@ -17976,7 +17976,7 @@ }, "packages/devtools": { "name": "@google/gemini-cli-devtools", - "version": "0.36.0-nightly.20260317.2f90b4653", + "version": "0.39.0-nightly.20260408.e77b22e63", "license": "Apache-2.0", "dependencies": { "ws": "^8.16.0" @@ -17991,7 +17991,7 @@ }, "packages/sdk": { "name": "@google/gemini-cli-sdk", - "version": "0.36.0-nightly.20260317.2f90b4653", + "version": "0.39.0-nightly.20260408.e77b22e63", "license": "Apache-2.0", "dependencies": { "@google/gemini-cli-core": "file:../core", @@ -18008,7 +18008,7 @@ }, "packages/test-utils": { "name": "@google/gemini-cli-test-utils", - "version": "0.36.0-nightly.20260317.2f90b4653", + "version": "0.39.0-nightly.20260408.e77b22e63", "license": "Apache-2.0", "dependencies": { "@google/gemini-cli-core": "file:../core", @@ -18026,7 +18026,7 @@ }, "packages/vscode-ide-companion": { "name": "gemini-cli-vscode-ide-companion", - "version": "0.36.0-nightly.20260317.2f90b4653", + "version": "0.39.0-nightly.20260408.e77b22e63", "license": "LICENSE", "dependencies": { "@modelcontextprotocol/sdk": "^1.23.0", diff --git a/package.json b/package.json index 9f67253ccc..948b98f9c5 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@google/gemini-cli", - "version": "0.36.0-nightly.20260317.2f90b4653", + "version": "0.39.0-nightly.20260408.e77b22e63", "engines": { "node": ">=20.0.0" }, @@ -14,7 +14,7 @@ "url": "git+https://github.com/google-gemini/gemini-cli.git" }, "config": { - "sandboxImageUri": "us-docker.pkg.dev/gemini-code-dev/gemini-cli/sandbox:0.36.0-nightly.20260317.2f90b4653" + "sandboxImageUri": "us-docker.pkg.dev/gemini-code-dev/gemini-cli/sandbox:0.39.0-nightly.20260408.e77b22e63" }, "scripts": { "start": "cross-env NODE_ENV=development node scripts/start.js", diff --git a/packages/a2a-server/package.json b/packages/a2a-server/package.json index 5257e56240..51e0450c97 100644 --- a/packages/a2a-server/package.json +++ b/packages/a2a-server/package.json @@ -1,6 +1,6 @@ { "name": "@google/gemini-cli-a2a-server", - "version": "0.36.0-nightly.20260317.2f90b4653", + "version": "0.39.0-nightly.20260408.e77b22e63", "description": "Gemini CLI A2A Server", "repository": { "type": "git", diff --git a/packages/cli/package.json b/packages/cli/package.json index 52ae182dca..5a128ea130 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -1,6 +1,6 @@ { "name": "@google/gemini-cli", - "version": "0.36.0-nightly.20260317.2f90b4653", + "version": "0.39.0-nightly.20260408.e77b22e63", "description": "Gemini CLI", "license": "Apache-2.0", "repository": { @@ -27,7 +27,7 @@ "dist" ], "config": { - "sandboxImageUri": "us-docker.pkg.dev/gemini-code-dev/gemini-cli/sandbox:0.36.0-nightly.20260317.2f90b4653" + "sandboxImageUri": "us-docker.pkg.dev/gemini-code-dev/gemini-cli/sandbox:0.39.0-nightly.20260408.e77b22e63" }, "dependencies": { "@agentclientprotocol/sdk": "^0.16.1", diff --git a/packages/core/package.json b/packages/core/package.json index de105d4389..53619d94c7 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -1,6 +1,6 @@ { "name": "@google/gemini-cli-core", - "version": "0.36.0-nightly.20260317.2f90b4653", + "version": "0.39.0-nightly.20260408.e77b22e63", "description": "Gemini CLI Core", "license": "Apache-2.0", "repository": { diff --git a/packages/devtools/package.json b/packages/devtools/package.json index ed3160b7f1..60eba8c1a6 100644 --- a/packages/devtools/package.json +++ b/packages/devtools/package.json @@ -1,6 +1,6 @@ { "name": "@google/gemini-cli-devtools", - "version": "0.36.0-nightly.20260317.2f90b4653", + "version": "0.39.0-nightly.20260408.e77b22e63", "license": "Apache-2.0", "type": "module", "main": "dist/src/index.js", diff --git a/packages/sdk/package.json b/packages/sdk/package.json index 7bd9c62d51..225b60ce2d 100644 --- a/packages/sdk/package.json +++ b/packages/sdk/package.json @@ -1,6 +1,6 @@ { "name": "@google/gemini-cli-sdk", - "version": "0.36.0-nightly.20260317.2f90b4653", + "version": "0.39.0-nightly.20260408.e77b22e63", "description": "Gemini CLI SDK", "license": "Apache-2.0", "repository": { diff --git a/packages/test-utils/package.json b/packages/test-utils/package.json index b16497da3c..8a1d11000f 100644 --- a/packages/test-utils/package.json +++ b/packages/test-utils/package.json @@ -1,6 +1,6 @@ { "name": "@google/gemini-cli-test-utils", - "version": "0.36.0-nightly.20260317.2f90b4653", + "version": "0.39.0-nightly.20260408.e77b22e63", "private": true, "main": "src/index.ts", "license": "Apache-2.0", diff --git a/packages/vscode-ide-companion/package.json b/packages/vscode-ide-companion/package.json index b2a2912c7e..da5931edd3 100644 --- a/packages/vscode-ide-companion/package.json +++ b/packages/vscode-ide-companion/package.json @@ -2,7 +2,7 @@ "name": "gemini-cli-vscode-ide-companion", "displayName": "Gemini CLI Companion", "description": "Enable Gemini CLI with direct access to your IDE workspace.", - "version": "0.36.0-nightly.20260317.2f90b4653", + "version": "0.39.0-nightly.20260408.e77b22e63", "publisher": "google", "icon": "assets/icon.png", "repository": { From a39461718c46869e7db746845f22e66194fcf763 Mon Sep 17 00:00:00 2001 From: Emily Hedlund Date: Wed, 8 Apr 2026 12:03:36 -0700 Subject: [PATCH 22/39] fix(core): ensure robust sandbox cleanup in all process execution paths (#24763) Co-authored-by: Spencer --- .../services/sandboxedFileSystemService.ts | 160 +++++----- .../src/services/shellExecutionService.ts | 38 ++- packages/core/src/tools/grep.ts | 36 ++- packages/core/src/tools/tool-registry.ts | 300 +++++++++--------- packages/core/src/utils/shell-utils.ts | 240 +++++++------- 5 files changed, 412 insertions(+), 362 deletions(-) diff --git a/packages/core/src/services/sandboxedFileSystemService.ts b/packages/core/src/services/sandboxedFileSystemService.ts index 03907657f3..d5e6dd4b4a 100644 --- a/packages/core/src/services/sandboxedFileSystemService.ts +++ b/packages/core/src/services/sandboxedFileSystemService.ts @@ -59,52 +59,56 @@ export class SandboxedFileSystemService implements FileSystemService { }, }); - return new Promise((resolve, reject) => { - // Direct spawn is necessary here for streaming large file contents. + try { + return await new Promise((resolve, reject) => { + // Direct spawn is necessary here for streaming large file contents. - const child = spawn(prepared.program, prepared.args, { - cwd: this.cwd, - env: prepared.env, - }); + const child = spawn(prepared.program, prepared.args, { + cwd: this.cwd, + env: prepared.env, + }); - let output = ''; - let error = ''; + let output = ''; + let error = ''; - child.stdout?.on('data', (data) => { - output += data.toString(); - }); + child.stdout?.on('data', (data) => { + output += data.toString(); + }); - child.stderr?.on('data', (data) => { - error += data.toString(); - }); + child.stderr?.on('data', (data) => { + error += data.toString(); + }); - child.on('close', (code) => { - if (code === 0) { - resolve(output); - } else { - const isEnoent = - error.toLowerCase().includes('no such file or directory') || - error.toLowerCase().includes('enoent') || - error.toLowerCase().includes('could not find file') || - error.toLowerCase().includes('could not find a part of the path'); - const err = new Error( - `Sandbox Error: read_file failed for '${filePath}'. Exit code ${code}. ${error ? 'Details: ' + error : ''}`, - ); - if (isEnoent) { - Object.assign(err, { code: 'ENOENT' }); + child.on('close', (code) => { + if (code === 0) { + resolve(output); + } else { + const isEnoent = + error.toLowerCase().includes('no such file or directory') || + error.toLowerCase().includes('enoent') || + error.toLowerCase().includes('could not find file') || + error.toLowerCase().includes('could not find a part of the path'); + const err = new Error( + `Sandbox Error: read_file failed for '${filePath}'. Exit code ${code}. ${error ? 'Details: ' + error : ''}`, + ); + if (isEnoent) { + Object.assign(err, { code: 'ENOENT' }); + } + reject(err); } - reject(err); - } - }); + }); - child.on('error', (err) => { - reject( - new Error( - `Sandbox Error: Failed to spawn read_file for '${filePath}': ${err.message}`, - ), - ); + child.on('error', (err) => { + reject( + new Error( + `Sandbox Error: Failed to spawn read_file for '${filePath}': ${err.message}`, + ), + ); + }); }); - }); + } finally { + prepared.cleanup?.(); + } } async writeTextFile(filePath: string, content: string): Promise { @@ -124,53 +128,57 @@ export class SandboxedFileSystemService implements FileSystemService { }, }); - return new Promise((resolve, reject) => { - // Direct spawn is necessary here for streaming large file contents. + try { + return await new Promise((resolve, reject) => { + // Direct spawn is necessary here for streaming large file contents. - const child = spawn(prepared.program, prepared.args, { - cwd: this.cwd, - env: prepared.env, - }); + const child = spawn(prepared.program, prepared.args, { + cwd: this.cwd, + env: prepared.env, + }); - child.stdin?.on('error', (err) => { - // Silently ignore EPIPE errors on stdin, they will be caught by the process error/close listeners - if (isNodeError(err) && err.code === 'EPIPE') { - return; - } - debugLogger.error( - `Sandbox Error: stdin error for '${filePath}': ${ - err instanceof Error ? err.message : String(err) - }`, - ); - }); + child.stdin?.on('error', (err) => { + // Silently ignore EPIPE errors on stdin, they will be caught by the process error/close listeners + if (isNodeError(err) && err.code === 'EPIPE') { + return; + } + debugLogger.error( + `Sandbox Error: stdin error for '${filePath}': ${ + err instanceof Error ? err.message : String(err) + }`, + ); + }); - child.stdin?.write(content); - child.stdin?.end(); + child.stdin?.write(content); + child.stdin?.end(); - let error = ''; - child.stderr?.on('data', (data) => { - error += data.toString(); - }); + let error = ''; + child.stderr?.on('data', (data) => { + error += data.toString(); + }); - child.on('close', (code) => { - if (code === 0) { - resolve(); - } else { + child.on('close', (code) => { + if (code === 0) { + resolve(); + } else { + reject( + new Error( + `Sandbox Error: write_file failed for '${filePath}'. Exit code ${code}. ${error ? 'Details: ' + error : ''}`, + ), + ); + } + }); + + child.on('error', (err) => { reject( new Error( - `Sandbox Error: write_file failed for '${filePath}'. Exit code ${code}. ${error ? 'Details: ' + error : ''}`, + `Sandbox Error: Failed to spawn write_file for '${filePath}': ${err.message}`, ), ); - } + }); }); - - child.on('error', (err) => { - reject( - new Error( - `Sandbox Error: Failed to spawn write_file for '${filePath}': ${err.message}`, - ), - ); - }); - }); + } finally { + prepared.cleanup?.(); + } } } diff --git a/packages/core/src/services/shellExecutionService.ts b/packages/core/src/services/shellExecutionService.ts index dfbb3a5033..46b894426f 100644 --- a/packages/core/src/services/shellExecutionService.ts +++ b/packages/core/src/services/shellExecutionService.ts @@ -510,21 +510,24 @@ export class ShellExecutionService { shellExecutionConfig: ShellExecutionConfig, isInteractive: boolean, ): Promise { + let cmdCleanup: (() => void) | undefined; try { const isWindows = os.platform() === 'win32'; + const prepared = await this.prepareExecution( + commandToExecute, + cwd, + shellExecutionConfig, + isInteractive, + ); + cmdCleanup = prepared.cleanup; + const { program: finalExecutable, args: finalArgs, env: finalEnv, cwd: finalCwd, - cleanup: cmdCleanup, - } = await this.prepareExecution( - commandToExecute, - cwd, - shellExecutionConfig, - isInteractive, - ); + } = prepared; const child = cpSpawn(finalExecutable, finalArgs, { cwd: finalCwd, @@ -811,6 +814,7 @@ export class ShellExecutionService { } catch (e) { // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion const error = e as Error; + cmdCleanup?.(); return { pid: undefined, result: Promise.resolve({ @@ -826,7 +830,6 @@ export class ShellExecutionService { }; } } - private static async executeWithPty( commandToExecute: string, cwd: string, @@ -840,23 +843,26 @@ export class ShellExecutionService { throw new Error('PTY implementation not found'); } let spawnedPty: IPty | undefined; + let cmdCleanup: (() => void) | undefined; try { const cols = shellExecutionConfig.terminalWidth ?? 80; const rows = shellExecutionConfig.terminalHeight ?? 30; + const prepared = await this.prepareExecution( + commandToExecute, + cwd, + shellExecutionConfig, + true, + ); + cmdCleanup = prepared.cleanup; + const { program: finalExecutable, args: finalArgs, env: finalEnv, cwd: finalCwd, - cleanup: cmdCleanup, - } = await this.prepareExecution( - commandToExecute, - cwd, - shellExecutionConfig, - true, - ); + } = prepared; // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment const ptyProcess = ptyInfo.module.spawn(finalExecutable, finalArgs, { @@ -1237,6 +1243,7 @@ export class ShellExecutionService { } catch (e) { // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion const error = e as Error; + cmdCleanup?.(); if (spawnedPty) { try { @@ -1270,7 +1277,6 @@ export class ShellExecutionService { } } } - /** * Writes a string to the pseudo-terminal (PTY) of a running process. * diff --git a/packages/core/src/tools/grep.ts b/packages/core/src/tools/grep.ts index ac7dc6cf02..3f6fd08ff3 100644 --- a/packages/core/src/tools/grep.ts +++ b/packages/core/src/tools/grep.ts @@ -326,6 +326,7 @@ class GrepToolInvocation extends BaseToolInvocation< let finalCommand = checkCommand; let finalArgs = checkArgs; let finalEnv = process.env; + let cleanup: (() => void) | undefined; if (sandboxManager) { try { @@ -338,6 +339,7 @@ class GrepToolInvocation extends BaseToolInvocation< finalCommand = prepared.program; finalArgs = prepared.args; finalEnv = prepared.env; + cleanup = prepared.cleanup; } catch (err) { debugLogger.debug( `[GrepTool] Sandbox preparation failed for '${command}':`, @@ -346,21 +348,27 @@ class GrepToolInvocation extends BaseToolInvocation< } } - return await new Promise((resolve) => { - const child = spawn(finalCommand, finalArgs, { - stdio: 'ignore', - shell: true, - env: finalEnv, + try { + return await new Promise((resolve) => { + const child = spawn(finalCommand, finalArgs, { + stdio: 'ignore', + shell: true, + env: finalEnv, + }); + child.on('close', (code) => { + resolve(code === 0); + }); + child.on('error', (err) => { + debugLogger.debug( + `[GrepTool] Failed to start process for '${command}':`, + err.message, + ); + resolve(false); + }); }); - child.on('close', (code) => resolve(code === 0)); - child.on('error', (err) => { - debugLogger.debug( - `[GrepTool] Failed to start process for '${command}':`, - err.message, - ); - resolve(false); - }); - }); + } finally { + cleanup?.(); + } } catch { return false; } diff --git a/packages/core/src/tools/tool-registry.ts b/packages/core/src/tools/tool-registry.ts index f9551d75da..5b174a97d7 100644 --- a/packages/core/src/tools/tool-registry.ts +++ b/packages/core/src/tools/tool-registry.ts @@ -65,6 +65,7 @@ class DiscoveredToolInvocation extends BaseToolInvocation< let finalCommand = callCommand; let finalArgs = args; let finalEnv = process.env; + let cleanupFunc: (() => void) | undefined; const sandboxManager = this.config.sandboxManager; if (sandboxManager) { @@ -77,58 +78,63 @@ class DiscoveredToolInvocation extends BaseToolInvocation< finalCommand = prepared.program; finalArgs = prepared.args; finalEnv = prepared.env; + cleanupFunc = prepared.cleanup; } - const child = spawn(finalCommand, finalArgs, { - env: finalEnv, - }); - child.stdin.write(JSON.stringify(this.params)); - child.stdin.end(); - let stdout = ''; let stderr = ''; let error: Error | null = null; let code: number | null = null; let signal: NodeJS.Signals | null = null; - await new Promise((resolve) => { - const onStdout = (data: Buffer) => { - stdout += data?.toString(); - }; + try { + const child = spawn(finalCommand, finalArgs, { + env: finalEnv, + }); + child.stdin.write(JSON.stringify(this.params)); + child.stdin.end(); - const onStderr = (data: Buffer) => { - stderr += data?.toString(); - }; + await new Promise((resolve) => { + const onStdout = (data: Buffer) => { + stdout += data?.toString(); + }; - const onError = (err: Error) => { - error = err; - }; + const onStderr = (data: Buffer) => { + stderr += data?.toString(); + }; - const onClose = ( - _code: number | null, - _signal: NodeJS.Signals | null, - ) => { - code = _code; - signal = _signal; - cleanup(); - resolve(); - }; + const onError = (err: Error) => { + error = err; + }; - const cleanup = () => { - child.stdout.removeListener('data', onStdout); - child.stderr.removeListener('data', onStderr); - child.removeListener('error', onError); - child.removeListener('close', onClose); - if (child.connected) { - child.disconnect(); - } - }; + const onClose = ( + _code: number | null, + _signal: NodeJS.Signals | null, + ) => { + code = _code; + signal = _signal; + cleanup(); + resolve(); + }; - child.stdout.on('data', onStdout); - child.stderr.on('data', onStderr); - child.on('error', onError); - child.on('close', onClose); - }); + const cleanup = () => { + child.stdout.removeListener('data', onStdout); + child.stderr.removeListener('data', onStderr); + child.removeListener('error', onError); + child.removeListener('close', onClose); + if (child.connected) { + child.disconnect(); + } + }; + + child.stdout.on('data', onStdout); + child.stderr.on('data', onStderr); + child.on('error', onError); + child.on('close', onClose); + }); + } finally { + cleanupFunc?.(); + } // if there is any error, non-zero exit code, signal, or stderr, return error details instead of stdout if (error || code !== 0 || signal || stderr) { @@ -374,6 +380,7 @@ export class ToolRegistry { .slice(1) .filter((p): p is string => typeof p === 'string'); let finalEnv = process.env; + let cleanupFunc: (() => void) | undefined; const sandboxManager = this.config.sandboxManager; if (sandboxManager) { @@ -386,118 +393,127 @@ export class ToolRegistry { finalCommand = prepared.program; finalArgs = prepared.args; finalEnv = prepared.env; + cleanupFunc = prepared.cleanup; } - const proc = spawn(finalCommand, finalArgs, { - env: finalEnv, - }); - let stdout = ''; - const stdoutDecoder = new StringDecoder('utf8'); - let stderr = ''; - const stderrDecoder = new StringDecoder('utf8'); - let sizeLimitExceeded = false; - const MAX_STDOUT_SIZE = 10 * 1024 * 1024; // 10MB limit - const MAX_STDERR_SIZE = 10 * 1024 * 1024; // 10MB limit - - let stdoutByteLength = 0; - let stderrByteLength = 0; - - proc.stdout.on('data', (data) => { - if (sizeLimitExceeded) return; - if (stdoutByteLength + data.length > MAX_STDOUT_SIZE) { - sizeLimitExceeded = true; - proc.kill(); - return; - } - stdoutByteLength += data.length; - stdout += stdoutDecoder.write(data); - }); - - proc.stderr.on('data', (data) => { - if (sizeLimitExceeded) return; - if (stderrByteLength + data.length > MAX_STDERR_SIZE) { - sizeLimitExceeded = true; - proc.kill(); - return; - } - stderrByteLength += data.length; - stderr += stderrDecoder.write(data); - }); - - await new Promise((resolve, reject) => { - proc.on('error', reject); - proc.on('close', (code) => { - stdout += stdoutDecoder.end(); - stderr += stderrDecoder.end(); - - if (sizeLimitExceeded) { - return reject( - new Error( - `Tool discovery command output exceeded size limit of ${MAX_STDOUT_SIZE} bytes.`, - ), - ); - } - - if (code !== 0) { - coreEvents.emitFeedback( - 'error', - `Tool discovery command failed with code ${code}.`, - stderr, - ); - return reject( - new Error(`Tool discovery command failed with exit code ${code}`), - ); - } - resolve(); + try { + const proc = spawn(finalCommand, finalArgs, { + env: finalEnv, }); - }); + let stdout = ''; + const stdoutDecoder = new StringDecoder('utf8'); + let stderr = ''; + const stderrDecoder = new StringDecoder('utf8'); + let sizeLimitExceeded = false; + const MAX_STDOUT_SIZE = 10 * 1024 * 1024; // 10MB limit + const MAX_STDERR_SIZE = 10 * 1024 * 1024; // 10MB limit - // execute discovery command and extract function declarations (w/ or w/o "tool" wrappers) - const functions: FunctionDeclaration[] = []; - // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment - const discoveredItems = JSON.parse(stdout.trim()); + let stdoutByteLength = 0; + let stderrByteLength = 0; - if (!discoveredItems || !Array.isArray(discoveredItems)) { - throw new Error( - 'Tool discovery command did not return a JSON array of tools.', - ); - } + proc.stdout.on('data', (data) => { + if (sizeLimitExceeded) return; + if (stdoutByteLength + data.length > MAX_STDOUT_SIZE) { + sizeLimitExceeded = true; + proc.kill(); + return; + } + stdoutByteLength += data.length; + stdout += stdoutDecoder.write(data); + }); - for (const tool of discoveredItems) { - if (tool && typeof tool === 'object') { - if (Array.isArray(tool['function_declarations'])) { - functions.push(...tool['function_declarations']); - } else if (Array.isArray(tool['functionDeclarations'])) { - functions.push(...tool['functionDeclarations']); - } else if (tool['name']) { - // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion - functions.push(tool as FunctionDeclaration); + proc.stderr.on('data', (data) => { + if (sizeLimitExceeded) return; + if (stderrByteLength + data.length > MAX_STDERR_SIZE) { + sizeLimitExceeded = true; + proc.kill(); + return; + } + stderrByteLength += data.length; + stderr += stderrDecoder.write(data); + }); + + await new Promise((resolve, reject) => { + proc.on('error', (err) => { + reject(err); + }); + proc.on('close', (code) => { + stdout += stdoutDecoder.end(); + stderr += stderrDecoder.end(); + + if (sizeLimitExceeded) { + return reject( + new Error( + `Tool discovery command output exceeded size limit of ${MAX_STDOUT_SIZE} bytes.`, + ), + ); + } + + if (code !== 0) { + coreEvents.emitFeedback( + 'error', + `Tool discovery command failed with code ${code}.`, + stderr, + ); + return reject( + new Error( + `Tool discovery command failed with exit code ${code}`, + ), + ); + } + resolve(); + }); + }); + + // execute discovery command and extract function declarations (w/ or w/o "tool" wrappers) + const functions: FunctionDeclaration[] = []; + // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment + const discoveredItems = JSON.parse(stdout.trim()); + + if (!discoveredItems || !Array.isArray(discoveredItems)) { + throw new Error( + 'Tool discovery command did not return a JSON array of tools.', + ); + } + + for (const tool of discoveredItems) { + if (tool && typeof tool === 'object') { + if (Array.isArray(tool['function_declarations'])) { + functions.push(...tool['function_declarations']); + } else if (Array.isArray(tool['functionDeclarations'])) { + functions.push(...tool['functionDeclarations']); + } else if (tool['name']) { + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion + functions.push(tool as FunctionDeclaration); + } } } - } - // register each function as a tool - for (const func of functions) { - if (!func.name) { - debugLogger.warn('Discovered a tool with no name. Skipping.'); - continue; + // register each function as a tool + for (const func of functions) { + if (!func.name) { + debugLogger.warn('Discovered a tool with no name. Skipping.'); + continue; + } + const parameters = + func.parametersJsonSchema && + typeof func.parametersJsonSchema === 'object' && + !Array.isArray(func.parametersJsonSchema) + ? func.parametersJsonSchema + : {}; + this.registerTool( + new DiscoveredTool( + this.config, + func.name, + DISCOVERED_TOOL_PREFIX + func.name, + func.description ?? '', + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion + parameters as Record, + this.messageBus, + ), + ); } - const parameters = - func.parametersJsonSchema && - typeof func.parametersJsonSchema === 'object' && - !Array.isArray(func.parametersJsonSchema) - ? func.parametersJsonSchema - : {}; - this.registerTool( - new DiscoveredTool( - this.config, - func.name, - DISCOVERED_TOOL_PREFIX + func.name, - func.description ?? '', - // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion - parameters as Record, - this.messageBus, - ), - ); + } finally { + cleanupFunc?.(); } } catch (e) { debugLogger.error(`Tool discovery command "${discoveryCmd}" failed:`, e); diff --git a/packages/core/src/utils/shell-utils.ts b/packages/core/src/utils/shell-utils.ts index 8486be0de9..46cffa1d35 100644 --- a/packages/core/src/utils/shell-utils.ts +++ b/packages/core/src/utils/shell-utils.ts @@ -847,34 +847,40 @@ export const spawnAsync = async ( const { program: finalCommand, args: finalArgs, env: finalEnv } = prepared; - return new Promise((resolve, reject) => { - const child = spawn(finalCommand, finalArgs, { - ...options, - env: finalEnv, - }); - let stdout = ''; - let stderr = ''; + try { + return await new Promise((resolve, reject) => { + const child = spawn(finalCommand, finalArgs, { + ...options, + env: finalEnv, + }); + let stdout = ''; + let stderr = ''; - child.stdout.on('data', (data) => { - stdout += data.toString(); - }); + child.stdout.on('data', (data) => { + stdout += data.toString(); + }); - child.stderr.on('data', (data) => { - stderr += data.toString(); - }); + child.stderr.on('data', (data) => { + stderr += data.toString(); + }); - child.on('close', (code) => { - if (code === 0) { - resolve({ stdout, stderr }); - } else { - reject(new Error(`Command failed with exit code ${code}:\n${stderr}`)); - } - }); + child.on('close', (code) => { + if (code === 0) { + resolve({ stdout, stderr }); + } else { + reject( + new Error(`Command failed with exit code ${code}:\n${stderr}`), + ); + } + }); - child.on('error', (err) => { - reject(err); + child.on('error', (err) => { + reject(err); + }); }); - }); + } finally { + prepared.cleanup?.(); + } }; /** @@ -902,109 +908,115 @@ export async function* execStreaming( env: options?.env ?? process.env, }); - const { program: finalCommand, args: finalArgs, env: finalEnv } = prepared; - - const child = spawn(finalCommand, finalArgs, { - ...options, - env: finalEnv, - // ensure we don't open a window on windows if possible/relevant - windowsHide: true, - }); - - const rl = readline.createInterface({ - input: child.stdout, - terminal: false, - }); - - const errorChunks: Buffer[] = []; - let stderrTotalBytes = 0; - const MAX_STDERR_BYTES = 20 * 1024; // 20KB limit - - child.stderr.on('data', (chunk) => { - if (stderrTotalBytes < MAX_STDERR_BYTES) { - errorChunks.push(chunk); - stderrTotalBytes += chunk.length; - } - }); - - let error: Error | null = null; - child.on('error', (err) => { - error = err; - }); - - const onAbort = () => { - // If manually aborted by signal, we kill immediately. - if (!child.killed) child.kill(); - }; - - if (options?.signal?.aborted) { - onAbort(); - } else { - options?.signal?.addEventListener('abort', onAbort); - } - - let finished = false; try { - for await (const line of rl) { - if (options?.signal?.aborted) break; - yield line; - } - finished = true; - } finally { - rl.close(); - options?.signal?.removeEventListener('abort', onAbort); + const { program: finalCommand, args: finalArgs, env: finalEnv } = prepared; - // Ensure process is killed when the generator is closed (consumer breaks loop) - let killedByGenerator = false; - if (!finished && child.exitCode === null && !child.killed) { - try { - child.kill(); - } catch { - // ignore error if process is already dead + const child = spawn(finalCommand, finalArgs, { + ...options, + env: finalEnv, + // ensure we don't open a window on windows if possible/relevant + windowsHide: true, + }); + + const rl = readline.createInterface({ + input: child.stdout, + terminal: false, + }); + + const errorChunks: Buffer[] = []; + let stderrTotalBytes = 0; + const MAX_STDERR_BYTES = 20 * 1024; // 20KB limit + + child.stderr.on('data', (chunk) => { + if (stderrTotalBytes < MAX_STDERR_BYTES) { + errorChunks.push(chunk); + stderrTotalBytes += chunk.length; } - killedByGenerator = true; + }); + + let error: Error | null = null; + child.on('error', (err) => { + error = err; + }); + + const onAbort = () => { + // If manually aborted by signal, we kill immediately. + if (!child.killed) child.kill(); + }; + + if (options?.signal?.aborted) { + onAbort(); + } else { + options?.signal?.addEventListener('abort', onAbort); } - // Ensure we wait for the process to exit to check codes - await new Promise((resolve, reject) => { - // If an error occurred before we got here (e.g. spawn failure), reject immediately. - if (error) { - reject(error); - return; + let finished = false; + try { + for await (const line of rl) { + if (options?.signal?.aborted) break; + yield line; + } + finished = true; + } finally { + rl.close(); + options?.signal?.removeEventListener('abort', onAbort); + + // Ensure process is killed when the generator is closed (consumer breaks loop) + let killedByGenerator = false; + if (!finished && child.exitCode === null && !child.killed) { + try { + child.kill(); + } catch { + // ignore error if process is already dead + } + killedByGenerator = true; } - function checkExit(code: number | null) { - // If we aborted or killed it manually, we treat it as success (stop waiting) - if (options?.signal?.aborted || killedByGenerator) { - resolve(); + // Ensure we wait for the process to exit to check codes + await new Promise((resolve, reject) => { + // If an error occurred before we got here (e.g. spawn failure), reject immediately. + if (error) { + reject(error); return; } - const allowed = options?.allowedExitCodes ?? [0]; - if (code !== null && allowed.includes(code)) { - resolve(); - } else { - // If we have an accumulated error or explicit error event - if (error) reject(error); - else { - const stderr = Buffer.concat(errorChunks).toString('utf8'); - const truncatedMsg = - stderrTotalBytes >= MAX_STDERR_BYTES ? '...[truncated]' : ''; - reject( - new Error( - `Process exited with code ${code}: ${stderr}${truncatedMsg}`, - ), - ); + function checkExit(code: number | null) { + // If we aborted or killed it manually, we treat it as success (stop waiting) + if (options?.signal?.aborted || killedByGenerator) { + resolve(); + return; + } + + const allowed = options?.allowedExitCodes ?? [0]; + if (code !== null && allowed.includes(code)) { + resolve(); + } else { + // If we have an accumulated error or explicit error event + if (error) reject(error); + else { + const stderr = Buffer.concat(errorChunks).toString('utf8'); + const truncatedMsg = + stderrTotalBytes >= MAX_STDERR_BYTES ? '...[truncated]' : ''; + reject( + new Error( + `Process exited with code ${code}: ${stderr}${truncatedMsg}`, + ), + ); + } } } - } - if (child.exitCode !== null) { - checkExit(child.exitCode); - } else { - child.on('close', (code) => checkExit(code)); - child.on('error', (err) => reject(err)); - } - }); + if (child.exitCode !== null) { + checkExit(child.exitCode); + } else { + child.on('close', (code) => checkExit(code)); + child.on('error', (err) => { + reject(err); + }); + } + }); + } + } finally { + prepared.cleanup?.(); } } From d1a062bcc47c715a5dc7af69d515796c375111ac Mon Sep 17 00:00:00 2001 From: Jacob Richman Date: Wed, 8 Apr 2026 13:02:03 -0700 Subject: [PATCH 23/39] chore: update ink version to 6.6.8 (#24934) --- package-lock.json | 10 +++++----- package.json | 4 ++-- packages/cli/package.json | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/package-lock.json b/package-lock.json index e849a895fe..2d3e670b74 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,7 +11,7 @@ "packages/*" ], "dependencies": { - "ink": "npm:@jrichman/ink@6.6.7", + "ink": "npm:@jrichman/ink@6.6.8", "latest-version": "^9.0.0", "node-fetch-native": "^1.6.7", "proper-lockfile": "^4.1.2", @@ -10070,9 +10070,9 @@ }, "node_modules/ink": { "name": "@jrichman/ink", - "version": "6.6.7", - "resolved": "https://registry.npmjs.org/@jrichman/ink/-/ink-6.6.7.tgz", - "integrity": "sha512-bDzQLpLzK/dn9Ur/Ku88ZZR9totVcMGrGYAgPHidsAAbe9NKztU1fggj/iu0wRp5g1kBeALb3cfagFGdDxAU1w==", + "version": "6.6.8", + "resolved": "https://registry.npmjs.org/@jrichman/ink/-/ink-6.6.8.tgz", + "integrity": "sha512-099iGdvWVIM2ivc3NEWyMF7FT06aLmrx1gMGI02ZYB4wLIFn0v/KQl6+20xEwcM6gyzj8Y8842Sf0UH2z0oTDw==", "license": "MIT", "peer": true, "dependencies": { @@ -17558,7 +17558,7 @@ "fzf": "^0.5.2", "glob": "^12.0.0", "highlight.js": "^11.11.1", - "ink": "npm:@jrichman/ink@6.6.7", + "ink": "npm:@jrichman/ink@6.6.8", "ink-gradient": "^3.0.0", "ink-spinner": "^5.0.0", "latest-version": "^9.0.0", diff --git a/package.json b/package.json index 948b98f9c5..f531b41dbc 100644 --- a/package.json +++ b/package.json @@ -71,7 +71,7 @@ "pre-commit": "node scripts/pre-commit.js" }, "overrides": { - "ink": "npm:@jrichman/ink@6.6.7", + "ink": "npm:@jrichman/ink@6.6.8", "wrap-ansi": "9.0.2", "cliui": { "wrap-ansi": "7.0.0" @@ -139,7 +139,7 @@ "yargs": "^17.7.2" }, "dependencies": { - "ink": "npm:@jrichman/ink@6.6.7", + "ink": "npm:@jrichman/ink@6.6.8", "latest-version": "^9.0.0", "node-fetch-native": "^1.6.7", "proper-lockfile": "^4.1.2", diff --git a/packages/cli/package.json b/packages/cli/package.json index 5a128ea130..82ff74b08e 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -49,7 +49,7 @@ "fzf": "^0.5.2", "glob": "^12.0.0", "highlight.js": "^11.11.1", - "ink": "npm:@jrichman/ink@6.6.7", + "ink": "npm:@jrichman/ink@6.6.8", "ink-gradient": "^3.0.0", "ink-spinner": "^5.0.0", "latest-version": "^9.0.0", From 56c2397e784dee3e25015215b8380f80904749c4 Mon Sep 17 00:00:00 2001 From: gemini-cli-robot Date: Wed, 8 Apr 2026 13:45:59 -0700 Subject: [PATCH 24/39] Changelog for v0.38.0-preview.0 (#24938) Co-authored-by: gemini-cli-robot <224641728+gemini-cli-robot@users.noreply.github.com> Co-authored-by: g-samroberts --- docs/changelogs/preview.md | 654 ++++++++++++++----------------------- 1 file changed, 248 insertions(+), 406 deletions(-) diff --git a/docs/changelogs/preview.md b/docs/changelogs/preview.md index 95feee1e2a..cf43e62c45 100644 --- a/docs/changelogs/preview.md +++ b/docs/changelogs/preview.md @@ -1,6 +1,6 @@ -# Preview release: v0.37.0-preview.2 +# Preview release: v0.38.0-preview.0 -Released: April 07, 2026 +Released: April 08, 2026 Our preview release includes the latest, new, and experimental features. This release may not be as stable as our [latest weekly release](latest.md). @@ -13,414 +13,256 @@ npm install -g @google/gemini-cli@preview ## Highlights -- **Plan Mode Enhancements**: Plan now includes support for untrusted folders, - prioritized pre-approval discussions, and a resolve for sandbox-related - deadlocks during file creation. -- **Browser Agent Evolved**: Significant updates to the browser agent, including - persistent session management, dynamic discovery of read-only tools, - sandbox-aware initialization, and automated reclamation of stale snapshots to - optimize context window usage. -- **Advanced Sandbox Security**: Implementation of dynamic sandbox expansion for - both Linux and Windows, alongside secret visibility lockdown for environment - files and OS-specific forbidden path support. -- **Unified Core Architecture**: Centralized context management and a new - `ModelConfigService` for unified model discovery, complemented by the - introduction of `AgentHistoryProvider` and tool-based topic grouping - (Chapters). -- **UI/UX & Performance Improvements**: New Tokyo Night theme, "tab to queue" - message support, and compact tool output formatting, plus optimized build - scripts and improved layout stability for TUI components. +- **Context Management:** Introduced a Context Compression Service to optimize + context window usage and landed a background memory service for skill + extraction. +- **Enhanced Security:** Implemented context-aware persistent policy approvals + for smarter tool permissions and enabled `web_fetch` in plan mode with user + confirmation. +- **Workflow Monitoring:** Added background process monitoring and inspection + tools for better visibility into long-running tasks. +- **UI/UX Refinements:** Enhanced the tool confirmation UI, selection layout, + and added support for selective topic expansion and click-to-expand. +- **Core Stability:** Improved sandbox reliability on Linux and Windows, + resolved shebang compatibility issues, and fixed various crashes in the CLI + and core services. ## What's Changed -- fix(patch): cherry-pick cb7f7d6 to release/v0.37.0-preview.1-pr-24342 to patch - version v0.37.0-preview.1 and create version 0.37.0-preview.2 by - @gemini-cli-robot in - [#24842](https://github.com/google-gemini/gemini-cli/pull/24842) -- fix(patch): cherry-pick 64c928f to release/v0.37.0-preview.0-pr-23257 to patch - version v0.37.0-preview.0 and create version 0.37.0-preview.1 by - @gemini-cli-robot in - [#24561](https://github.com/google-gemini/gemini-cli/pull/24561) -- feat(evals): centralize test agents into test-utils for reuse by @Samee24 in - [#23616](https://github.com/google-gemini/gemini-cli/pull/23616) -- revert: chore(config): disable agents by default by @abhipatel12 in - [#23672](https://github.com/google-gemini/gemini-cli/pull/23672) -- fix(plan): update telemetry attribute keys and add timestamp by @Adib234 in - [#23685](https://github.com/google-gemini/gemini-cli/pull/23685) -- fix(core): prevent premature MCP discovery completion by @jackwotherspoon in - [#23637](https://github.com/google-gemini/gemini-cli/pull/23637) -- feat(browser): add maxActionsPerTask for browser agent setting by - @cynthialong0-0 in - [#23216](https://github.com/google-gemini/gemini-cli/pull/23216) -- fix(core): improve agent loader error formatting for empty paths by - @adamfweidman in - [#23690](https://github.com/google-gemini/gemini-cli/pull/23690) -- fix(cli): only show updating spinner when auto-update is in progress by - @scidomino in [#23709](https://github.com/google-gemini/gemini-cli/pull/23709) -- Refine onboarding metrics to log the duration explicitly and use the tier - name. by @yunaseoul in - [#23678](https://github.com/google-gemini/gemini-cli/pull/23678) -- chore(tools): add toJSON to tools and invocations to reduce logging verbosity - by @alisa-alisa in - [#22899](https://github.com/google-gemini/gemini-cli/pull/22899) -- fix(cli): stabilize copy mode to prevent flickering and cursor resets by - @mattKorwel in - [#22584](https://github.com/google-gemini/gemini-cli/pull/22584) -- fix(test): move flaky ctrl-c-exit test to non-blocking suite by @mattKorwel in - [#23732](https://github.com/google-gemini/gemini-cli/pull/23732) -- feat(skills): add ci skill for automated failure replication by @mattKorwel in - [#23720](https://github.com/google-gemini/gemini-cli/pull/23720) -- feat(sandbox): implement forbiddenPaths for OS-specific sandbox managers by - @ehedlund in [#23282](https://github.com/google-gemini/gemini-cli/pull/23282) -- fix(core): conditionally expose additional_permissions in shell tool by - @galz10 in [#23729](https://github.com/google-gemini/gemini-cli/pull/23729) -- refactor(core): standardize OS-specific sandbox tests and extract linux helper - methods by @ehedlund in - [#23715](https://github.com/google-gemini/gemini-cli/pull/23715) -- format recently added script by @scidomino in - [#23739](https://github.com/google-gemini/gemini-cli/pull/23739) -- fix(ui): prevent over-eager slash subcommand completion by @keithguerin in - [#20136](https://github.com/google-gemini/gemini-cli/pull/20136) -- Fix dynamic model routing for gemini 3.1 pro to customtools model by - @kevinjwang1 in - [#23641](https://github.com/google-gemini/gemini-cli/pull/23641) -- feat(core): support inline agentCardJson for remote agents by @adamfweidman in - [#23743](https://github.com/google-gemini/gemini-cli/pull/23743) -- fix(cli): skip console log/info in headless mode by @cynthialong0-0 in - [#22739](https://github.com/google-gemini/gemini-cli/pull/22739) -- test(core): install bubblewrap on Linux CI for sandbox integration tests by - @ehedlund in [#23583](https://github.com/google-gemini/gemini-cli/pull/23583) -- docs(reference): split tools table into category sections by @sheikhlimon in - [#21516](https://github.com/google-gemini/gemini-cli/pull/21516) -- fix(browser): detect embedded URLs in query params to prevent allowedDomains - bypass by @tony-shi in - [#23225](https://github.com/google-gemini/gemini-cli/pull/23225) -- fix(browser): add proxy bypass constraint to domain restriction system prompt - by @tony-shi in - [#23229](https://github.com/google-gemini/gemini-cli/pull/23229) -- fix(policy): relax write_file argsPattern in plan mode to allow paths without - session ID by @Adib234 in - [#23695](https://github.com/google-gemini/gemini-cli/pull/23695) -- docs: fix grammar in CONTRIBUTING and numbering in sandbox docs by - @splint-disk-8i in - [#23448](https://github.com/google-gemini/gemini-cli/pull/23448) -- fix(acp): allow attachments by adding a permission prompt by @sripasg in - [#23680](https://github.com/google-gemini/gemini-cli/pull/23680) -- fix(core): thread AbortSignal to chat compression requests (#20405) by - @SH20RAJ in [#20778](https://github.com/google-gemini/gemini-cli/pull/20778) -- feat(core): implement Windows sandbox dynamic expansion Phase 1 and 2.1 by - @scidomino in [#23691](https://github.com/google-gemini/gemini-cli/pull/23691) -- Add note about root privileges in sandbox docs by @diodesign in - [#23314](https://github.com/google-gemini/gemini-cli/pull/23314) -- docs(core): document agent_card_json string literal options for remote agents - by @adamfweidman in - [#23797](https://github.com/google-gemini/gemini-cli/pull/23797) -- fix(cli): resolve TTY hang on headless environments by unconditionally - resuming process.stdin before React Ink launch by @cocosheng-g in - [#23673](https://github.com/google-gemini/gemini-cli/pull/23673) -- fix(ui): cleanup estimated string length hacks in composer by @keithguerin in - [#23694](https://github.com/google-gemini/gemini-cli/pull/23694) -- feat(browser): dynamically discover read-only tools by @cynthialong0-0 in - [#23805](https://github.com/google-gemini/gemini-cli/pull/23805) -- docs: clarify policy requirement for `general.plan.directory` in settings - schema by @jerop in - [#23784](https://github.com/google-gemini/gemini-cli/pull/23784) -- Revert "perf(cli): optimize --version startup time (#23671)" by @scidomino in - [#23812](https://github.com/google-gemini/gemini-cli/pull/23812) -- don't silence errors from wombat by @scidomino in - [#23822](https://github.com/google-gemini/gemini-cli/pull/23822) -- fix(ui): prevent escape key from cancelling requests in shell mode by - @PrasannaPal21 in - [#21245](https://github.com/google-gemini/gemini-cli/pull/21245) -- Changelog for v0.36.0-preview.0 by @gemini-cli-robot in - [#23702](https://github.com/google-gemini/gemini-cli/pull/23702) -- feat(core,ui): Add experiment-gated support for gemini flash 3.1 lite by - @chrstnb in [#23794](https://github.com/google-gemini/gemini-cli/pull/23794) -- Changelog for v0.36.0-preview.3 by @gemini-cli-robot in - [#23827](https://github.com/google-gemini/gemini-cli/pull/23827) -- new linting check: github-actions-pinning by @alisa-alisa in - [#23808](https://github.com/google-gemini/gemini-cli/pull/23808) -- fix(cli): show helpful guidance when no skills are available by @Niralisj in - [#23785](https://github.com/google-gemini/gemini-cli/pull/23785) -- fix: Chat logs and errors handle tail tool calls correctly by @googlestrobe in - [#22460](https://github.com/google-gemini/gemini-cli/pull/22460) -- Don't try removing a tag from a non-existent release. by @scidomino in - [#23830](https://github.com/google-gemini/gemini-cli/pull/23830) -- fix(cli): allow ask question dialog to take full window height by @jacob314 in - [#23693](https://github.com/google-gemini/gemini-cli/pull/23693) -- fix(core): strip leading underscores from error types in telemetry by - @yunaseoul in [#23824](https://github.com/google-gemini/gemini-cli/pull/23824) -- Changelog for v0.35.0 by @gemini-cli-robot in - [#23819](https://github.com/google-gemini/gemini-cli/pull/23819) -- feat(evals): add reliability harvester and 500/503 retry support by - @alisa-alisa in - [#23626](https://github.com/google-gemini/gemini-cli/pull/23626) -- feat(sandbox): dynamic Linux sandbox expansion and worktree support by @galz10 - in [#23692](https://github.com/google-gemini/gemini-cli/pull/23692) -- Merge examples of use into quickstart documentation by @diodesign in - [#23319](https://github.com/google-gemini/gemini-cli/pull/23319) -- fix(cli): prioritize primary name matches in slash command search by @sehoon38 - in [#23850](https://github.com/google-gemini/gemini-cli/pull/23850) -- Changelog for v0.35.1 by @gemini-cli-robot in - [#23840](https://github.com/google-gemini/gemini-cli/pull/23840) -- fix(browser): keep input blocker active across navigations by @kunal-10-cloud - in [#22562](https://github.com/google-gemini/gemini-cli/pull/22562) -- feat(core): new skill to look for duplicated code while reviewing PRs by - @devr0306 in [#23704](https://github.com/google-gemini/gemini-cli/pull/23704) -- fix(core): replace hardcoded non-interactive ASK_USER denial with explicit - policy rules by @ruomengz in - [#23668](https://github.com/google-gemini/gemini-cli/pull/23668) -- fix(plan): after exiting plan mode switches model to a flash model by @Adib234 - in [#23885](https://github.com/google-gemini/gemini-cli/pull/23885) -- feat(gcp): add development worker infrastructure by @mattKorwel in - [#23814](https://github.com/google-gemini/gemini-cli/pull/23814) -- fix(a2a-server): A2A server should execute ask policies in interactive mode by - @kschaab in [#23831](https://github.com/google-gemini/gemini-cli/pull/23831) -- feat(core): define TrajectoryProvider interface by @sehoon38 in - [#23050](https://github.com/google-gemini/gemini-cli/pull/23050) -- Docs: Update quotas and pricing by @jkcinouye in - [#23835](https://github.com/google-gemini/gemini-cli/pull/23835) -- fix(core): allow disabling environment variable redaction by @galz10 in - [#23927](https://github.com/google-gemini/gemini-cli/pull/23927) -- feat(cli): enable notifications cross-platform via terminal bell fallback by - @genneth in [#21618](https://github.com/google-gemini/gemini-cli/pull/21618) -- feat(sandbox): implement secret visibility lockdown for env files by - @DavidAPierce in - [#23712](https://github.com/google-gemini/gemini-cli/pull/23712) -- fix(core): remove shell outputChunks buffer caching to prevent memory bloat - and sanitize prompt input by @spencer426 in - [#23751](https://github.com/google-gemini/gemini-cli/pull/23751) -- feat(core): implement persistent browser session management by @kunal-10-cloud - in [#21306](https://github.com/google-gemini/gemini-cli/pull/21306) -- refactor(core): delegate sandbox denial parsing to SandboxManager by - @scidomino in [#23928](https://github.com/google-gemini/gemini-cli/pull/23928) -- dep(update) Update Ink version to 6.5.0 by @jacob314 in - [#23843](https://github.com/google-gemini/gemini-cli/pull/23843) -- Docs: Update 'docs-writer' skill for relative links by @jkcinouye in - [#21463](https://github.com/google-gemini/gemini-cli/pull/21463) -- Changelog for v0.36.0-preview.4 by @gemini-cli-robot in - [#23935](https://github.com/google-gemini/gemini-cli/pull/23935) -- fix(acp): Update allow approval policy flow for ACP clients to fix config - persistence and compatible with TUI by @sripasg in - [#23818](https://github.com/google-gemini/gemini-cli/pull/23818) -- Changelog for v0.35.2 by @gemini-cli-robot in - [#23960](https://github.com/google-gemini/gemini-cli/pull/23960) -- ACP integration documents by @g-samroberts in - [#22254](https://github.com/google-gemini/gemini-cli/pull/22254) -- fix(core): explicitly set error names to avoid bundling renaming issues by - @yunaseoul in [#23913](https://github.com/google-gemini/gemini-cli/pull/23913) -- feat(core): subagent isolation and cleanup hardening by @abhipatel12 in - [#23903](https://github.com/google-gemini/gemini-cli/pull/23903) -- disable extension-reload test by @scidomino in - [#24018](https://github.com/google-gemini/gemini-cli/pull/24018) -- feat(core): add forbiddenPaths to GlobalSandboxOptions and refactor - createSandboxManager by @ehedlund in - [#23936](https://github.com/google-gemini/gemini-cli/pull/23936) -- refactor(core): improve ignore resolution and fix directory-matching bug by - @ehedlund in [#23816](https://github.com/google-gemini/gemini-cli/pull/23816) -- revert(core): support custom base URL via env vars by @spencer426 in - [#23976](https://github.com/google-gemini/gemini-cli/pull/23976) -- Increase memory limited for eslint. by @jacob314 in - [#24022](https://github.com/google-gemini/gemini-cli/pull/24022) -- fix(acp): prevent crash on empty response in ACP mode by @sripasg in - [#23952](https://github.com/google-gemini/gemini-cli/pull/23952) -- feat(core): Land `AgentHistoryProvider`. by @joshualitt in - [#23978](https://github.com/google-gemini/gemini-cli/pull/23978) -- fix(core): switch to subshells for shell tool wrapping to fix heredocs and - edge cases by @abhipatel12 in - [#24024](https://github.com/google-gemini/gemini-cli/pull/24024) -- Debug command. by @jacob314 in - [#23851](https://github.com/google-gemini/gemini-cli/pull/23851) -- Changelog for v0.36.0-preview.5 by @gemini-cli-robot in - [#24046](https://github.com/google-gemini/gemini-cli/pull/24046) -- Fix test flakes by globally mocking ink-spinner by @jacob314 in - [#24044](https://github.com/google-gemini/gemini-cli/pull/24044) -- Enable network access in sandbox configuration by @galz10 in - [#24055](https://github.com/google-gemini/gemini-cli/pull/24055) -- feat(context): add configurable memoryBoundaryMarkers setting by @SandyTao520 - in [#24020](https://github.com/google-gemini/gemini-cli/pull/24020) -- feat(core): implement windows sandbox expansion and denial detection by - @scidomino in [#24027](https://github.com/google-gemini/gemini-cli/pull/24027) -- fix(core): resolve ACP Operation Aborted Errors in grep_search by @ivanporty - in [#23821](https://github.com/google-gemini/gemini-cli/pull/23821) -- fix(hooks): prevent SessionEnd from firing twice in non-interactive mode by - @krishdef7 in [#22139](https://github.com/google-gemini/gemini-cli/pull/22139) -- Re-word intro to Gemini 3 page. by @g-samroberts in - [#24069](https://github.com/google-gemini/gemini-cli/pull/24069) -- fix(cli): resolve layout contention and flashing loop in StatusRow by - @keithguerin in - [#24065](https://github.com/google-gemini/gemini-cli/pull/24065) -- fix(sandbox): implement Windows Mandatory Integrity Control for GeminiSandbox - by @galz10 in [#24057](https://github.com/google-gemini/gemini-cli/pull/24057) -- feat(core): implement tool-based topic grouping (Chapters) by @Abhijit-2592 in - [#23150](https://github.com/google-gemini/gemini-cli/pull/23150) -- feat(cli): support 'tab to queue' for messages while generating by @gundermanc - in [#24052](https://github.com/google-gemini/gemini-cli/pull/24052) -- feat(core): agnostic background task UI with CompletionBehavior by - @adamfweidman in - [#22740](https://github.com/google-gemini/gemini-cli/pull/22740) -- UX for topic narration tool by @gundermanc in - [#24079](https://github.com/google-gemini/gemini-cli/pull/24079) -- fix: shellcheck warnings in scripts by @scidomino in - [#24035](https://github.com/google-gemini/gemini-cli/pull/24035) -- test(evals): add comprehensive subagent delegation evaluations by @abhipatel12 - in [#24132](https://github.com/google-gemini/gemini-cli/pull/24132) -- fix(a2a-server): prioritize ADC before evaluating headless constraints for - auth initialization by @spencer426 in - [#23614](https://github.com/google-gemini/gemini-cli/pull/23614) -- Text can be added after /plan command by @rambleraptor in - [#22833](https://github.com/google-gemini/gemini-cli/pull/22833) -- fix(cli): resolve missing F12 logs via global console store by @scidomino in - [#24235](https://github.com/google-gemini/gemini-cli/pull/24235) -- fix broken tests by @scidomino in - [#24279](https://github.com/google-gemini/gemini-cli/pull/24279) -- fix(evals): add update_topic behavioral eval by @gundermanc in - [#24223](https://github.com/google-gemini/gemini-cli/pull/24223) -- feat(core): Unified Context Management and Tool Distillation. by @joshualitt - in [#24157](https://github.com/google-gemini/gemini-cli/pull/24157) -- Default enable narration for the team. by @gundermanc in - [#24224](https://github.com/google-gemini/gemini-cli/pull/24224) -- fix(core): ensure default agents provide tools and use model-specific schemas - by @abhipatel12 in - [#24268](https://github.com/google-gemini/gemini-cli/pull/24268) -- feat(cli): show Flash Lite Preview model regardless of user tier by @sehoon38 - in [#23904](https://github.com/google-gemini/gemini-cli/pull/23904) -- feat(cli): implement compact tool output by @jwhelangoog in - [#20974](https://github.com/google-gemini/gemini-cli/pull/20974) -- Add security settings for tool sandboxing by @galz10 in - [#23923](https://github.com/google-gemini/gemini-cli/pull/23923) -- chore(test-utils): switch integration tests to use PREVIEW_GEMINI_MODEL by - @sehoon38 in [#24276](https://github.com/google-gemini/gemini-cli/pull/24276) -- feat(core): enable topic update narration for legacy models by @Abhijit-2592 - in [#24241](https://github.com/google-gemini/gemini-cli/pull/24241) -- feat(core): add project-level memory scope to save_memory tool by @SandyTao520 - in [#24161](https://github.com/google-gemini/gemini-cli/pull/24161) -- test(integration): fix plan mode write denial test false positive by @sehoon38 - in [#24299](https://github.com/google-gemini/gemini-cli/pull/24299) -- feat(plan): support `Plan` mode in untrusted folders by @Adib234 in - [#17586](https://github.com/google-gemini/gemini-cli/pull/17586) -- fix(core): enable mid-stream retries for all models and re-enable compression - test by @sehoon38 in - [#24302](https://github.com/google-gemini/gemini-cli/pull/24302) -- Changelog for v0.36.0-preview.6 by @gemini-cli-robot in - [#24082](https://github.com/google-gemini/gemini-cli/pull/24082) -- Changelog for v0.35.3 by @gemini-cli-robot in - [#24083](https://github.com/google-gemini/gemini-cli/pull/24083) -- feat(cli): add auth info to footer by @sehoon38 in - [#24042](https://github.com/google-gemini/gemini-cli/pull/24042) -- fix(browser): reset action counter for each agent session and let it ignore - internal actions by @cynthialong0-0 in - [#24228](https://github.com/google-gemini/gemini-cli/pull/24228) -- feat(plan): promote planning feature to stable by @ruomengz in - [#24282](https://github.com/google-gemini/gemini-cli/pull/24282) -- fix(browser): terminate subagent immediately on domain restriction violations - by @gsquared94 in - [#24313](https://github.com/google-gemini/gemini-cli/pull/24313) -- feat(cli): add UI to update extensions by @ruomengz in - [#23682](https://github.com/google-gemini/gemini-cli/pull/23682) -- Fix(browser): terminate immediately for "browser is already running" error by - @cynthialong0-0 in - [#24233](https://github.com/google-gemini/gemini-cli/pull/24233) -- docs: Add 'plan' option to approval mode in CLI reference by @YifanRuan in - [#24134](https://github.com/google-gemini/gemini-cli/pull/24134) -- fix(core): batch macOS seatbelt rules into a profile file to prevent ARG_MAX - errors by @ehedlund in - [#24255](https://github.com/google-gemini/gemini-cli/pull/24255) -- fix(core): fix race condition between browser agent and main closing process - by @cynthialong0-0 in - [#24340](https://github.com/google-gemini/gemini-cli/pull/24340) -- perf(build): optimize build scripts for parallel execution and remove - redundant checks by @sehoon38 in - [#24307](https://github.com/google-gemini/gemini-cli/pull/24307) -- ci: install bubblewrap on Linux for release workflows by @ehedlund in - [#24347](https://github.com/google-gemini/gemini-cli/pull/24347) -- chore(release): allow bundling for all builds, including stable by @sehoon38 - in [#24305](https://github.com/google-gemini/gemini-cli/pull/24305) -- Revert "Add security settings for tool sandboxing" by @jerop in - [#24357](https://github.com/google-gemini/gemini-cli/pull/24357) -- docs: update subagents docs to not be experimental by @abhipatel12 in - [#24343](https://github.com/google-gemini/gemini-cli/pull/24343) -- fix(core): implement **read and **write commands in sandbox managers by - @galz10 in [#24283](https://github.com/google-gemini/gemini-cli/pull/24283) -- don't try to remove tags in dry run by @scidomino in - [#24356](https://github.com/google-gemini/gemini-cli/pull/24356) -- fix(config): disable JIT context loading by default by @SandyTao520 in - [#24364](https://github.com/google-gemini/gemini-cli/pull/24364) -- test(sandbox): add integration test for dynamic permission expansion by - @galz10 in [#24359](https://github.com/google-gemini/gemini-cli/pull/24359) -- docs(policy): remove unsupported mcpName wildcard edge case by @abhipatel12 in - [#24133](https://github.com/google-gemini/gemini-cli/pull/24133) -- docs: fix broken GEMINI.md link in CONTRIBUTING.md by @Panchal-Tirth in - [#24182](https://github.com/google-gemini/gemini-cli/pull/24182) -- feat(core): infrastructure for event-driven subagent history by @abhipatel12 - in [#23914](https://github.com/google-gemini/gemini-cli/pull/23914) -- fix(core): resolve Plan Mode deadlock during plan file creation due to sandbox - restrictions by @DavidAPierce in - [#24047](https://github.com/google-gemini/gemini-cli/pull/24047) -- fix(core): fix browser agent UX issues and improve E2E test reliability by - @gsquared94 in - [#24312](https://github.com/google-gemini/gemini-cli/pull/24312) -- fix(ui): wrap topic and intent fields in TopicMessage by @jwhelangoog in - [#24386](https://github.com/google-gemini/gemini-cli/pull/24386) -- refactor(core): Centralize context management logic into src/context by - @joshualitt in - [#24380](https://github.com/google-gemini/gemini-cli/pull/24380) -- fix(core): pin AuthType.GATEWAY to use Gemini 3.1 Pro/Flash Lite by default by - @sripasg in [#24375](https://github.com/google-gemini/gemini-cli/pull/24375) -- feat(ui): add Tokyo Night theme by @danrneal in - [#24054](https://github.com/google-gemini/gemini-cli/pull/24054) -- fix(cli): refactor test config loading and mock debugLogger in test-setup by - @mattKorwel in - [#24389](https://github.com/google-gemini/gemini-cli/pull/24389) -- Set memoryManager to false in settings.json by @mattKorwel in - [#24393](https://github.com/google-gemini/gemini-cli/pull/24393) -- ink 6.6.3 by @jacob314 in - [#24372](https://github.com/google-gemini/gemini-cli/pull/24372) -- fix(core): resolve subagent chat recording gaps and directory inheritance by +- fix(cli): refresh slash command list after /skills reload by @NTaylorMullen in + [#24454](https://github.com/google-gemini/gemini-cli/pull/24454) +- Update README.md for links. by @g-samroberts in + [#22759](https://github.com/google-gemini/gemini-cli/pull/22759) +- fix(core): ensure complete_task tool calls are recorded in chat history by @abhipatel12 in - [#24368](https://github.com/google-gemini/gemini-cli/pull/24368) -- fix(cli): cap shell output at 10 MB to prevent RangeError crash by @ProthamD - in [#24168](https://github.com/google-gemini/gemini-cli/pull/24168) -- feat(plan): conditionally add enter/exit plan mode tools based on current mode - by @ruomengz in - [#24378](https://github.com/google-gemini/gemini-cli/pull/24378) -- feat(core): prioritize discussion before formal plan approval by @jerop in - [#24423](https://github.com/google-gemini/gemini-cli/pull/24423) -- fix(ui): add accelerated scrolling on alternate buffer mode by @devr0306 in - [#23940](https://github.com/google-gemini/gemini-cli/pull/23940) -- feat(core): populate sandbox forbidden paths with project ignore file contents - by @ehedlund in - [#24038](https://github.com/google-gemini/gemini-cli/pull/24038) -- fix(core): ensure blue border overlay and input blocker to act correctly - depending on browser agent activities by @cynthialong0-0 in - [#24385](https://github.com/google-gemini/gemini-cli/pull/24385) -- fix(ui): removed additional vertical padding for tables by @devr0306 in - [#24381](https://github.com/google-gemini/gemini-cli/pull/24381) -- fix(build): upload full bundle directory archive to GitHub releases by - @sehoon38 in [#24403](https://github.com/google-gemini/gemini-cli/pull/24403) -- fix(build): wire bundle:browser-mcp into bundle pipeline by @gsquared94 in - [#24424](https://github.com/google-gemini/gemini-cli/pull/24424) -- feat(browser): add sandbox-aware browser agent initialization by @gsquared94 - in [#24419](https://github.com/google-gemini/gemini-cli/pull/24419) -- feat(core): enhance tracker task schemas for detailed titles and descriptions - by @anj-s in [#23902](https://github.com/google-gemini/gemini-cli/pull/23902) -- refactor(core): Unified context management settings schema by @joshualitt in - [#24391](https://github.com/google-gemini/gemini-cli/pull/24391) -- feat(core): update browser agent prompt to check open pages first when - bringing up by @cynthialong0-0 in - [#24431](https://github.com/google-gemini/gemini-cli/pull/24431) -- fix(acp) refactor(core,cli): centralize model discovery logic in - ModelConfigService by @sripasg in - [#24392](https://github.com/google-gemini/gemini-cli/pull/24392) -- Changelog for v0.36.0-preview.7 by @gemini-cli-robot in - [#24346](https://github.com/google-gemini/gemini-cli/pull/24346) -- fix: update task tracker storage location in system prompt by @anj-s in - [#24034](https://github.com/google-gemini/gemini-cli/pull/24034) -- feat(browser): supersede stale snapshots to reclaim context-window tokens by + [#24437](https://github.com/google-gemini/gemini-cli/pull/24437) +- feat(policy): explicitly allow web_fetch in plan mode with ask_user by + @Adib234 in [#24456](https://github.com/google-gemini/gemini-cli/pull/24456) +- fix(core): refactor linux sandbox to fix ARG_MAX crashes by @ehedlund in + [#24286](https://github.com/google-gemini/gemini-cli/pull/24286) +- feat(config): add experimental.adk.agentSessionNoninteractiveEnabled setting + by @adamfweidman in + [#24439](https://github.com/google-gemini/gemini-cli/pull/24439) +- Changelog for v0.36.0-preview.8 by @gemini-cli-robot in + [#24453](https://github.com/google-gemini/gemini-cli/pull/24453) +- feat(cli): change default loadingPhrases to 'off' to hide tips by @keithguerin + in [#24342](https://github.com/google-gemini/gemini-cli/pull/24342) +- fix(cli): ensure agent stops when all declinable tools are cancelled by + @NTaylorMullen in + [#24479](https://github.com/google-gemini/gemini-cli/pull/24479) +- fix(core): enhance sandbox usability and fix build error by @galz10 in + [#24460](https://github.com/google-gemini/gemini-cli/pull/24460) +- Terminal Serializer Optimization by @jacob314 in + [#24485](https://github.com/google-gemini/gemini-cli/pull/24485) +- Auto configure memory. by @jacob314 in + [#24474](https://github.com/google-gemini/gemini-cli/pull/24474) +- Unused error variables in catch block are not allowed by @alisa-alisa in + [#24487](https://github.com/google-gemini/gemini-cli/pull/24487) +- feat(core): add background memory service for skill extraction by @SandyTao520 + in [#24274](https://github.com/google-gemini/gemini-cli/pull/24274) +- feat: implement high-signal PR regression check for evaluations by + @alisa-alisa in + [#23937](https://github.com/google-gemini/gemini-cli/pull/23937) +- Fix shell output display by @jacob314 in + [#24490](https://github.com/google-gemini/gemini-cli/pull/24490) +- fix(ui): resolve unwanted vertical spacing around various tool output + treatments by @jwhelangoog in + [#24449](https://github.com/google-gemini/gemini-cli/pull/24449) +- revert(cli): bring back input box and footer visibility in copy mode by + @sehoon38 in [#24504](https://github.com/google-gemini/gemini-cli/pull/24504) +- fix(cli): prevent crash in AnsiOutputText when handling non-array data by + @sehoon38 in [#24498](https://github.com/google-gemini/gemini-cli/pull/24498) +- feat(cli): support default values for environment variables by @ruomengz in + [#24469](https://github.com/google-gemini/gemini-cli/pull/24469) +- Implement background process monitoring and inspection tools by @cocosheng-g + in [#23799](https://github.com/google-gemini/gemini-cli/pull/23799) +- docs(browser-agent): update stale browser agent documentation by @gsquared94 + in [#24463](https://github.com/google-gemini/gemini-cli/pull/24463) +- fix: enable browser_agent in integration tests and add localhost fixture tests + by @gsquared94 in + [#24523](https://github.com/google-gemini/gemini-cli/pull/24523) +- fix(browser): handle computer-use model detection for analyze_screenshot by @gsquared94 in - [#24440](https://github.com/google-gemini/gemini-cli/pull/24440) -- docs(core): add subagent tool isolation draft doc by @akh64bit in - [#23275](https://github.com/google-gemini/gemini-cli/pull/23275) + [#24502](https://github.com/google-gemini/gemini-cli/pull/24502) +- feat(core): Land ContextCompressionService by @joshualitt in + [#24483](https://github.com/google-gemini/gemini-cli/pull/24483) +- feat(core): scope subagent workspace directories via AsyncLocalStorage by + @SandyTao520 in + [#24445](https://github.com/google-gemini/gemini-cli/pull/24445) +- Update ink version to 6.6.7 by @jacob314 in + [#24514](https://github.com/google-gemini/gemini-cli/pull/24514) +- fix(acp): handle all InvalidStreamError types gracefully in prompt by @sripasg + in [#24540](https://github.com/google-gemini/gemini-cli/pull/24540) +- Fix crash when vim editor is not found in PATH on Windows by + @Nagajyothi-tammisetti in + [#22423](https://github.com/google-gemini/gemini-cli/pull/22423) +- fix(core): move project memory dir under tmp directory by @SandyTao520 in + [#24542](https://github.com/google-gemini/gemini-cli/pull/24542) +- Enable 'Other' option for yesno question type by @ruomengz in + [#24545](https://github.com/google-gemini/gemini-cli/pull/24545) +- fix(cli): clear stale retry/loading state after cancellation (#21096) by + @Aaxhirrr in [#21960](https://github.com/google-gemini/gemini-cli/pull/21960) +- Changelog for v0.37.0-preview.0 by @gemini-cli-robot in + [#24464](https://github.com/google-gemini/gemini-cli/pull/24464) +- feat(core): implement context-aware persistent policy approvals by @jerop in + [#23257](https://github.com/google-gemini/gemini-cli/pull/23257) +- docs: move agent disabling instructions and update remote agent status by + @jackwotherspoon in + [#24559](https://github.com/google-gemini/gemini-cli/pull/24559) +- feat(cli): migrate nonInteractiveCli to LegacyAgentSession by @adamfweidman in + [#22987](https://github.com/google-gemini/gemini-cli/pull/22987) +- fix(core): unsafe type assertions in Core File System #19712 by + @aniketsaurav18 in + [#19739](https://github.com/google-gemini/gemini-cli/pull/19739) +- fix(ui): hide model quota in /stats and refactor quota display by @danzaharia1 + in [#24206](https://github.com/google-gemini/gemini-cli/pull/24206) +- Changelog for v0.36.0 by @gemini-cli-robot in + [#24558](https://github.com/google-gemini/gemini-cli/pull/24558) +- Changelog for v0.37.0-preview.1 by @gemini-cli-robot in + [#24568](https://github.com/google-gemini/gemini-cli/pull/24568) +- docs: add missing .md extensions to internal doc links by @ishaan-arora-1 in + [#24145](https://github.com/google-gemini/gemini-cli/pull/24145) +- fix(ui): fixed table styling by @devr0306 in + [#24565](https://github.com/google-gemini/gemini-cli/pull/24565) +- fix(core): pass includeDirectories to sandbox configuration by @galz10 in + [#24573](https://github.com/google-gemini/gemini-cli/pull/24573) +- feat(ui): enable "TerminalBuffer" mode to solve flicker by @jacob314 in + [#24512](https://github.com/google-gemini/gemini-cli/pull/24512) +- docs: clarify release coordination by @scidomino in + [#24575](https://github.com/google-gemini/gemini-cli/pull/24575) +- fix(core): remove broken PowerShell translation and fix native \_\_write in + Windows sandbox by @scidomino in + [#24571](https://github.com/google-gemini/gemini-cli/pull/24571) +- Add instructions for how to start react in prod and force react to prod mode + by @jacob314 in + [#24590](https://github.com/google-gemini/gemini-cli/pull/24590) +- feat(cli): minimalist sandbox status labels by @galz10 in + [#24582](https://github.com/google-gemini/gemini-cli/pull/24582) +- Feat/browser agent metrics by @kunal-10-cloud in + [#24210](https://github.com/google-gemini/gemini-cli/pull/24210) +- test: fix Windows CI execution and resolve exposed platform failures by + @ehedlund in [#24476](https://github.com/google-gemini/gemini-cli/pull/24476) +- feat(core,cli): prioritize summary for topics (#24608) by @Abhijit-2592 in + [#24609](https://github.com/google-gemini/gemini-cli/pull/24609) +- show color by @jacob314 in + [#24613](https://github.com/google-gemini/gemini-cli/pull/24613) +- feat(cli): enable compact tool output by default (#24509) by @jwhelangoog in + [#24510](https://github.com/google-gemini/gemini-cli/pull/24510) +- fix(core): inject skill system instructions into subagent prompts if activated + by @abhipatel12 in + [#24620](https://github.com/google-gemini/gemini-cli/pull/24620) +- fix(core): improve windows sandbox reliability and fix integration tests by + @ehedlund in [#24480](https://github.com/google-gemini/gemini-cli/pull/24480) +- fix(core): ensure sandbox approvals are correctly persisted and matched for + proactive expansions by @galz10 in + [#24577](https://github.com/google-gemini/gemini-cli/pull/24577) +- feat(cli) Scrollbar for input prompt by @jacob314 in + [#21992](https://github.com/google-gemini/gemini-cli/pull/21992) +- Do not run pr-eval workflow when no steering changes detected by @alisa-alisa + in [#24621](https://github.com/google-gemini/gemini-cli/pull/24621) +- Fix restoration of topic headers. by @gundermanc in + [#24650](https://github.com/google-gemini/gemini-cli/pull/24650) +- feat(core): discourage update topic tool for simple tasks by @Samee24 in + [#24640](https://github.com/google-gemini/gemini-cli/pull/24640) +- fix(core): ensure global temp directory is always in sandbox allowed paths by + @galz10 in [#24638](https://github.com/google-gemini/gemini-cli/pull/24638) +- fix(core): detect uninitialized lines by @jacob314 in + [#24646](https://github.com/google-gemini/gemini-cli/pull/24646) +- docs: update sandboxing documentation and toolSandboxing settings by @galz10 + in [#24655](https://github.com/google-gemini/gemini-cli/pull/24655) +- feat(cli): enhance tool confirmation UI and selection layout by @galz10 in + [#24376](https://github.com/google-gemini/gemini-cli/pull/24376) +- feat(acp): add support for `/about` command by @sripasg in + [#24649](https://github.com/google-gemini/gemini-cli/pull/24649) +- feat(cli): add role specific metrics to /stats by @cynthialong0-0 in + [#24659](https://github.com/google-gemini/gemini-cli/pull/24659) +- split context by @jacob314 in + [#24623](https://github.com/google-gemini/gemini-cli/pull/24623) +- fix(cli): remove -S from shebang to fix Windows and BSD execution by + @scidomino in [#24756](https://github.com/google-gemini/gemini-cli/pull/24756) +- Fix issue where topic headers can be posted back to back by @gundermanc in + [#24759](https://github.com/google-gemini/gemini-cli/pull/24759) +- fix(core): handle partial llm_request in BeforeModel hook override by + @krishdef7 in [#22326](https://github.com/google-gemini/gemini-cli/pull/22326) +- fix(ui): improve narration suppression and reduce flicker by @gundermanc in + [#24635](https://github.com/google-gemini/gemini-cli/pull/24635) +- fix(ui): fixed auth race condition causing logo to flicker by @devr0306 in + [#24652](https://github.com/google-gemini/gemini-cli/pull/24652) +- fix(browser): remove premature browser cleanup after subagent invocation by + @gsquared94 in + [#24753](https://github.com/google-gemini/gemini-cli/pull/24753) +- Revert "feat(core,cli): prioritize summary for topics (#24608)" by + @Abhijit-2592 in + [#24777](https://github.com/google-gemini/gemini-cli/pull/24777) +- relax tool sandboxing overrides for plan mode to match defaults. by + @DavidAPierce in + [#24762](https://github.com/google-gemini/gemini-cli/pull/24762) +- fix(cli): respect global environment variable allowlist by @scidomino in + [#24767](https://github.com/google-gemini/gemini-cli/pull/24767) +- fix(cli): ensure skills list outputs to stdout in non-interactive environments + by @spencer426 in + [#24566](https://github.com/google-gemini/gemini-cli/pull/24566) +- Add an eval for and fix unsafe cloning behavior. by @gundermanc in + [#24457](https://github.com/google-gemini/gemini-cli/pull/24457) +- fix(policy): allow complete_task in plan mode by @abhipatel12 in + [#24771](https://github.com/google-gemini/gemini-cli/pull/24771) +- feat(telemetry): add browser agent clearcut metrics by @gsquared94 in + [#24688](https://github.com/google-gemini/gemini-cli/pull/24688) +- feat(cli): support selective topic expansion and click-to-expand by + @Abhijit-2592 in + [#24793](https://github.com/google-gemini/gemini-cli/pull/24793) +- temporarily disable sandbox integration test on windows by @ehedlund in + [#24786](https://github.com/google-gemini/gemini-cli/pull/24786) +- Remove flakey test by @scidomino in + [#24837](https://github.com/google-gemini/gemini-cli/pull/24837) +- Alisa/approve button by @alisa-alisa in + [#24645](https://github.com/google-gemini/gemini-cli/pull/24645) +- feat(hooks): display hook system messages in UI by @mbleigh in + [#24616](https://github.com/google-gemini/gemini-cli/pull/24616) +- fix(core): propagate BeforeModel hook model override end-to-end by @krishdef7 + in [#24784](https://github.com/google-gemini/gemini-cli/pull/24784) +- chore: fix formatting for behavioral eval skill reference file by @abhipatel12 + in [#24846](https://github.com/google-gemini/gemini-cli/pull/24846) +- fix: use directory junctions on Windows for skill linking by @enjoykumawat in + [#24823](https://github.com/google-gemini/gemini-cli/pull/24823) +- fix(cli): prevent multiple banner increments on remount by @sehoon38 in + [#24843](https://github.com/google-gemini/gemini-cli/pull/24843) +- feat(acp): add /help command by @sripasg in + [#24839](https://github.com/google-gemini/gemini-cli/pull/24839) +- fix(core): remove tmux alternate buffer warning by @jackwotherspoon in + [#24852](https://github.com/google-gemini/gemini-cli/pull/24852) +- Improve sandbox error matching and caching by @DavidAPierce in + [#24550](https://github.com/google-gemini/gemini-cli/pull/24550) +- feat(core): add agent protocol UI types and experimental flag by @mbleigh in + [#24275](https://github.com/google-gemini/gemini-cli/pull/24275) +- feat(core): use experiment flags for default fetch timeouts by @yunaseoul in + [#24261](https://github.com/google-gemini/gemini-cli/pull/24261) +- Revert "fix(ui): improve narration suppression and reduce flicker (#2… by + @gundermanc in + [#24857](https://github.com/google-gemini/gemini-cli/pull/24857) +- refactor(cli): remove duplication in interactive shell awaiting input hint by + @JayadityaGit in + [#24801](https://github.com/google-gemini/gemini-cli/pull/24801) +- refactor(core): make LegacyAgentSession dependencies optional by @mbleigh in + [#24287](https://github.com/google-gemini/gemini-cli/pull/24287) +- Changelog for v0.37.0-preview.2 by @gemini-cli-robot in + [#24848](https://github.com/google-gemini/gemini-cli/pull/24848) +- fix(cli): always show shell command description or actual command by @jacob314 + in [#24774](https://github.com/google-gemini/gemini-cli/pull/24774) +- Added flag for ept size and increased default size by @devr0306 in + [#24859](https://github.com/google-gemini/gemini-cli/pull/24859) +- fix(core): dispose Scheduler to prevent McpProgress listener leak by + @Anjaligarhwal in + [#24870](https://github.com/google-gemini/gemini-cli/pull/24870) +- fix(cli): switch default back to terminalBuffer=false and fix regressions + introduced for that mode by @jacob314 in + [#24873](https://github.com/google-gemini/gemini-cli/pull/24873) +- feat(cli): switch to ctrl+g from ctrl-x by @jacob314 in + [#24861](https://github.com/google-gemini/gemini-cli/pull/24861) +- fix: isolate concurrent browser agent instances by @gsquared94 in + [#24794](https://github.com/google-gemini/gemini-cli/pull/24794) +- docs: update MCP server OAuth redirect port documentation by @adamfweidman in + [#24844](https://github.com/google-gemini/gemini-cli/pull/24844) **Full Changelog**: -https://github.com/google-gemini/gemini-cli/compare/v0.36.0-preview.8...v0.37.0-preview.2 +https://github.com/google-gemini/gemini-cli/compare/v0.37.0-preview.2...v0.38.0-preview.0 From 8b01d785125d4e386a4ce4914f16aeb99e935217 Mon Sep 17 00:00:00 2001 From: JAYADITYA <96861162+JayadityaGit@users.noreply.github.com> Date: Thu, 9 Apr 2026 02:26:02 +0530 Subject: [PATCH 25/39] chore: ignore conductor directory (#22128) Co-authored-by: Coco Sheng --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index ebb94151e8..b6e3804ab5 100644 --- a/.gitignore +++ b/.gitignore @@ -64,3 +64,6 @@ gemini-debug.log evals/logs/ temp_agents/ + +# conductor extension and planning directories +conductor/ From 18cb7fd46c07ad0c3bf6a611b5bb168e16ca94df Mon Sep 17 00:00:00 2001 From: gemini-cli-robot Date: Wed, 8 Apr 2026 14:41:55 -0700 Subject: [PATCH 26/39] Changelog for v0.37.0 (#24940) Co-authored-by: gemini-cli-robot <224641728+gemini-cli-robot@users.noreply.github.com> Co-authored-by: Sam Roberts <158088236+g-samroberts@users.noreply.github.com> --- docs/changelogs/index.md | 21 ++ docs/changelogs/latest.md | 757 ++++++++++++++++++++------------------ 2 files changed, 418 insertions(+), 360 deletions(-) diff --git a/docs/changelogs/index.md b/docs/changelogs/index.md index ac3a433d0e..d9713c973a 100644 --- a/docs/changelogs/index.md +++ b/docs/changelogs/index.md @@ -18,6 +18,27 @@ on GitHub. | [Preview](preview.md) | Experimental features ready for early feedback. | | [Stable](latest.md) | Stable, recommended for general use. | +## Announcements: v0.37.0 - 2026-04-08 + +- **Dynamic Sandbox Expansion:** Implemented dynamic sandbox expansion and + worktree support for Linux and Windows, improving developer workflows in + isolated environments + ([#23692](https://github.com/google-gemini/gemini-cli/pull/23692) by @galz10, + [#23691](https://github.com/google-gemini/gemini-cli/pull/23691) by + @scidomino). +- **Chapters Narrative Flow:** Introduced tool-based topic grouping ("Chapters") + to provide better session structure and narrative continuity + ([#23150](https://github.com/google-gemini/gemini-cli/pull/23150) by + @Abhijit-2592, + [#24079](https://github.com/google-gemini/gemini-cli/pull/24079) by + @gundermanc). +- **Advanced Browser Capabilities:** Enhanced the browser agent with persistent + sessions and dynamic tool discovery + ([#21306](https://github.com/google-gemini/gemini-cli/pull/21306) by + @kunal-10-cloud, + [#23805](https://github.com/google-gemini/gemini-cli/pull/23805) by + @cynthialong0-0). + ## Announcements: v0.36.0 - 2026-04-01 - **Multi-Registry Architecture and Sandboxing:** Introduced a multi-registry diff --git a/docs/changelogs/latest.md b/docs/changelogs/latest.md index d776a43135..f57ea4b56d 100644 --- a/docs/changelogs/latest.md +++ b/docs/changelogs/latest.md @@ -1,6 +1,6 @@ -# Latest stable release: v0.36.0 +# Latest stable release: v0.37.0 -Released: April 1, 2026 +Released: April 08, 2026 For most users, our latest stable release is the recommended release. Install the latest stable version with: @@ -11,372 +11,409 @@ npm install -g @google/gemini-cli ## Highlights -- **Multi-Registry Architecture and Tool Isolation:** Introduced a - multi-registry architecture for subagents and implemented strict sandboxing - for macOS (Seatbelt) and Windows to enhance security and isolation. -- **Improved Subagent Coordination:** Enhanced subagents with local execution - capabilities, JIT context injection (upward traversal capped at git root), and - resilient tool rejection with contextual feedback. -- **Enhanced UI and UX:** Implemented a refreshed UX for the Composer layout, - improved terminal fallback warnings, and resolved various UI flickering and - state persistence issues. -- **Git Worktree Support:** Added support for Git worktrees to enable isolated - parallel sessions within the same repository. -- **Plan Mode Improvements:** Plan mode now supports non-interactive execution - and includes hardened sandbox path resolution to prevent hallucinations. +- **Dynamic Sandbox Expansion:** Implemented dynamic sandbox expansion and + worktree support for both Linux and Windows, enhancing development flexibility + in restricted environments. +- **Tool-Based Topic Grouping (Chapters):** Introduced "Chapters" to logically + group agent interactions based on tool usage and intent, providing a clearer + narrative flow in long sessions. +- **Enhanced Browser Agent:** Added persistent session management, dynamic + read-only tool discovery, and sandbox-aware initialization for the browser + agent. +- **Security & Permission Hardening:** Implemented secret visibility lockdown + for environment files and integrated integrity controls for Windows + sandboxing. ## What's Changed -- Changelog for v0.33.2 by @gemini-cli-robot in - [#22730](https://github.com/google-gemini/gemini-cli/pull/22730) -- feat(core): multi-registry architecture and tool filtering for subagents by - @akh64bit in [#22712](https://github.com/google-gemini/gemini-cli/pull/22712) -- Changelog for v0.34.0-preview.4 by @gemini-cli-robot in - [#22752](https://github.com/google-gemini/gemini-cli/pull/22752) -- fix(devtools): use theme-aware text colors for console warnings and errors by - @SandyTao520 in - [#22181](https://github.com/google-gemini/gemini-cli/pull/22181) -- Add support for dynamic model Resolution to ModelConfigService by @kevinjwang1 - in [#22578](https://github.com/google-gemini/gemini-cli/pull/22578) -- chore(release): bump version to 0.36.0-nightly.20260317.2f90b4653 by - @gemini-cli-robot in - [#22858](https://github.com/google-gemini/gemini-cli/pull/22858) -- fix(cli): use active sessionId in useLogger and improve resume robustness by - @mattKorwel in - [#22606](https://github.com/google-gemini/gemini-cli/pull/22606) -- fix(cli): expand tilde in policy paths from settings.json by @abhipatel12 in - [#22772](https://github.com/google-gemini/gemini-cli/pull/22772) -- fix(core): add actionable warnings for terminal fallbacks (#14426) by - @spencer426 in - [#22211](https://github.com/google-gemini/gemini-cli/pull/22211) -- feat(tracker): integrate task tracker protocol into core system prompt by - @anj-s in [#22442](https://github.com/google-gemini/gemini-cli/pull/22442) -- chore: add posttest build hooks and fix missing dependencies by @NTaylorMullen - in [#22865](https://github.com/google-gemini/gemini-cli/pull/22865) -- feat(a2a): add agent acknowledgment command and enhance registry discovery by - @alisa-alisa in - [#22389](https://github.com/google-gemini/gemini-cli/pull/22389) -- fix(cli): automatically add all VSCode workspace folders to Gemini context by - @sakshisemalti in - [#21380](https://github.com/google-gemini/gemini-cli/pull/21380) -- feat: add 'blocked' status to tasks and todos by @anj-s in - [#22735](https://github.com/google-gemini/gemini-cli/pull/22735) -- refactor(cli): remove extra newlines in ShellToolMessage.tsx by @NTaylorMullen - in [#22868](https://github.com/google-gemini/gemini-cli/pull/22868) -- fix(cli): lazily load settings in onModelChange to prevent stale closure data - loss by @KumarADITHYA123 in - [#20403](https://github.com/google-gemini/gemini-cli/pull/20403) -- feat(core): subagent local execution and tool isolation by @akh64bit in - [#22718](https://github.com/google-gemini/gemini-cli/pull/22718) -- fix(cli): resolve subagent grouping and UI state persistence by @abhipatel12 - in [#22252](https://github.com/google-gemini/gemini-cli/pull/22252) -- refactor(ui): extract SessionBrowser search and navigation components by - @abhipatel12 in - [#22377](https://github.com/google-gemini/gemini-cli/pull/22377) -- fix: updates Docker image reference for GitHub MCP server by @jhhornn in - [#22938](https://github.com/google-gemini/gemini-cli/pull/22938) -- refactor(cli): group subagent trajectory deletion and use native filesystem - testing by @abhipatel12 in - [#22890](https://github.com/google-gemini/gemini-cli/pull/22890) -- refactor(cli): simplify keypress and mouse providers and update tests by - @scidomino in [#22853](https://github.com/google-gemini/gemini-cli/pull/22853) -- Changelog for v0.34.0 by @gemini-cli-robot in - [#22860](https://github.com/google-gemini/gemini-cli/pull/22860) -- test(cli): simplify createMockSettings calls by @scidomino in - [#22952](https://github.com/google-gemini/gemini-cli/pull/22952) -- feat(ui): format multi-line banner warnings with a bold title by @keithguerin - in [#22955](https://github.com/google-gemini/gemini-cli/pull/22955) -- Docs: Remove references to stale Gemini CLI file structure info by - @g-samroberts in - [#22976](https://github.com/google-gemini/gemini-cli/pull/22976) -- feat(ui): remove write todo list tool from UI tips by @aniruddhaadak80 in - [#22281](https://github.com/google-gemini/gemini-cli/pull/22281) -- Fix issue where subagent thoughts are appended. by @gundermanc in - [#22975](https://github.com/google-gemini/gemini-cli/pull/22975) -- Feat/browser privacy consent by @kunal-10-cloud in - [#21119](https://github.com/google-gemini/gemini-cli/pull/21119) -- fix(core): explicitly map execution context in LocalAgentExecutor by @akh64bit - in [#22949](https://github.com/google-gemini/gemini-cli/pull/22949) -- feat(plan): support plan mode in non-interactive mode by @ruomengz in - [#22670](https://github.com/google-gemini/gemini-cli/pull/22670) -- feat(core): implement strict macOS sandboxing using Seatbelt allowlist by - @ehedlund in [#22832](https://github.com/google-gemini/gemini-cli/pull/22832) -- docs: add additional notes by @abhipatel12 in - [#23008](https://github.com/google-gemini/gemini-cli/pull/23008) -- fix(cli): resolve duplicate footer on tool cancel via ESC (#21743) by - @ruomengz in [#21781](https://github.com/google-gemini/gemini-cli/pull/21781) -- Changelog for v0.35.0-preview.1 by @gemini-cli-robot in - [#23012](https://github.com/google-gemini/gemini-cli/pull/23012) -- fix(ui): fix flickering on small terminal heights by @devr0306 in - [#21416](https://github.com/google-gemini/gemini-cli/pull/21416) -- fix(acp): provide more meta in tool_call_update by @Mervap in - [#22663](https://github.com/google-gemini/gemini-cli/pull/22663) -- docs: add FAQ entry for checking Gemini CLI version by @surajsahani in - [#21271](https://github.com/google-gemini/gemini-cli/pull/21271) -- feat(core): resilient subagent tool rejection with contextual feedback by - @abhipatel12 in - [#22951](https://github.com/google-gemini/gemini-cli/pull/22951) -- fix(cli): correctly handle auto-update for standalone binaries by @bdmorgan in - [#23038](https://github.com/google-gemini/gemini-cli/pull/23038) -- feat(core): add content-utils by @adamfweidman in - [#22984](https://github.com/google-gemini/gemini-cli/pull/22984) -- fix: circumvent genai sdk requirement for api key when using gateway auth via - ACP by @sripasg in - [#23042](https://github.com/google-gemini/gemini-cli/pull/23042) -- fix(core): don't persist browser consent sentinel in non-interactive mode by - @jasonmatthewsuhari in - [#23073](https://github.com/google-gemini/gemini-cli/pull/23073) -- fix(core): narrow browser agent description to prevent stealing URL tasks from - web_fetch by @gsquared94 in - [#23086](https://github.com/google-gemini/gemini-cli/pull/23086) -- feat(cli): Partial threading of AgentLoopContext. by @joshualitt in - [#22978](https://github.com/google-gemini/gemini-cli/pull/22978) -- fix(browser-agent): enable "Allow all server tools" session policy by +- feat(evals): centralize test agents into test-utils for reuse by @Samee24 in + [#23616](https://github.com/google-gemini/gemini-cli/pull/23616) +- revert: chore(config): disable agents by default by @abhipatel12 in + [#23672](https://github.com/google-gemini/gemini-cli/pull/23672) +- fix(plan): update telemetry attribute keys and add timestamp by @Adib234 in + [#23685](https://github.com/google-gemini/gemini-cli/pull/23685) +- fix(core): prevent premature MCP discovery completion by @jackwotherspoon in + [#23637](https://github.com/google-gemini/gemini-cli/pull/23637) +- feat(browser): add maxActionsPerTask for browser agent setting by @cynthialong0-0 in - [#22343](https://github.com/google-gemini/gemini-cli/pull/22343) -- refactor(cli): integrate real config loading into async test utils by - @scidomino in [#23040](https://github.com/google-gemini/gemini-cli/pull/23040) -- feat(core): inject memory and JIT context into subagents by @abhipatel12 in - [#23032](https://github.com/google-gemini/gemini-cli/pull/23032) -- Fix logging and virtual list. by @jacob314 in - [#23080](https://github.com/google-gemini/gemini-cli/pull/23080) -- feat(core): cap JIT context upward traversal at git root by @SandyTao520 in - [#23074](https://github.com/google-gemini/gemini-cli/pull/23074) -- Docs: Minor style updates from initial docs audit. by @g-samroberts in - [#22872](https://github.com/google-gemini/gemini-cli/pull/22872) -- feat(core): add experimental memory manager agent to replace save_memory tool - by @SandyTao520 in - [#22726](https://github.com/google-gemini/gemini-cli/pull/22726) -- Changelog for v0.35.0-preview.2 by @gemini-cli-robot in - [#23142](https://github.com/google-gemini/gemini-cli/pull/23142) -- Update website issue template for label and title by @g-samroberts in - [#23036](https://github.com/google-gemini/gemini-cli/pull/23036) -- fix: upgrade ACP SDK from 0.12 to 0.16.1 by @sripasg in - [#23132](https://github.com/google-gemini/gemini-cli/pull/23132) -- Update callouts to work on github. by @g-samroberts in - [#22245](https://github.com/google-gemini/gemini-cli/pull/22245) -- feat: ACP: Add token usage metadata to the `send` method's return value by - @sripasg in [#23148](https://github.com/google-gemini/gemini-cli/pull/23148) -- fix(plan): clarify that plan mode policies are combined with normal mode by - @ruomengz in [#23158](https://github.com/google-gemini/gemini-cli/pull/23158) -- Add ModelChain support to ModelConfigService and make ModelDialog dynamic by - @kevinjwang1 in - [#22914](https://github.com/google-gemini/gemini-cli/pull/22914) -- Ensure that copied extensions are writable in the user's local directory by - @kevinjwang1 in - [#23016](https://github.com/google-gemini/gemini-cli/pull/23016) -- feat(core): implement native Windows sandboxing by @mattKorwel in - [#21807](https://github.com/google-gemini/gemini-cli/pull/21807) -- feat(core): add support for admin-forced MCP server installations by - @gsquared94 in - [#23163](https://github.com/google-gemini/gemini-cli/pull/23163) -- chore(lint): ignore .gemini directory and recursive node_modules by - @mattKorwel in - [#23211](https://github.com/google-gemini/gemini-cli/pull/23211) -- feat(cli): conditionally exclude ask_user tool in ACP mode by @nmcnamara-eng - in [#23045](https://github.com/google-gemini/gemini-cli/pull/23045) -- feat(core): introduce AgentSession and rename stream events to agent events by - @mbleigh in [#23159](https://github.com/google-gemini/gemini-cli/pull/23159) -- feat(worktree): add Git worktree support for isolated parallel sessions by - @jerop in [#22973](https://github.com/google-gemini/gemini-cli/pull/22973) -- Add support for linking in the extension registry by @kevinjwang1 in - [#23153](https://github.com/google-gemini/gemini-cli/pull/23153) -- feat(extensions): add --skip-settings flag to install command by @Ratish1 in - [#17212](https://github.com/google-gemini/gemini-cli/pull/17212) -- feat(telemetry): track if session is running in a Git worktree by @jerop in - [#23265](https://github.com/google-gemini/gemini-cli/pull/23265) -- refactor(core): use absolute paths in GEMINI.md context markers by - @SandyTao520 in - [#23135](https://github.com/google-gemini/gemini-cli/pull/23135) -- fix(core): add sanitization to sub agent thoughts and centralize utilities by - @devr0306 in [#22828](https://github.com/google-gemini/gemini-cli/pull/22828) -- feat(core): refine User-Agent for VS Code traffic (unified format) by - @sehoon38 in [#23256](https://github.com/google-gemini/gemini-cli/pull/23256) -- Fix schema for ModelChains by @kevinjwang1 in - [#23284](https://github.com/google-gemini/gemini-cli/pull/23284) -- test(cli): refactor tests for async render utilities by @scidomino in - [#23252](https://github.com/google-gemini/gemini-cli/pull/23252) -- feat(core): add security prompt for browser agent by @cynthialong0-0 in - [#23241](https://github.com/google-gemini/gemini-cli/pull/23241) -- refactor(ide): replace dynamic undici import with static fetch import by - @cocosheng-g in - [#23268](https://github.com/google-gemini/gemini-cli/pull/23268) -- test(cli): address unresolved feedback from PR #23252 by @scidomino in - [#23303](https://github.com/google-gemini/gemini-cli/pull/23303) -- feat(browser): add sensitive action controls and read-only noise reduction by - @cynthialong0-0 in - [#22867](https://github.com/google-gemini/gemini-cli/pull/22867) -- Disabling failing test while investigating by @alisa-alisa in - [#23311](https://github.com/google-gemini/gemini-cli/pull/23311) -- fix broken extension link in hooks guide by @Indrapal-70 in - [#21728](https://github.com/google-gemini/gemini-cli/pull/21728) -- fix(core): fix agent description indentation by @abhipatel12 in - [#23315](https://github.com/google-gemini/gemini-cli/pull/23315) -- Wrap the text under TOML rule for easier readability in policy-engine.md… by - @CogitationOps in - [#23076](https://github.com/google-gemini/gemini-cli/pull/23076) -- fix(extensions): revert broken extension removal behavior by @ehedlund in - [#23317](https://github.com/google-gemini/gemini-cli/pull/23317) -- feat(core): set up onboarding telemetry by @yunaseoul in - [#23118](https://github.com/google-gemini/gemini-cli/pull/23118) -- Retry evals on API error. by @gundermanc in - [#23322](https://github.com/google-gemini/gemini-cli/pull/23322) -- fix(evals): remove tool restrictions and add compile-time guards by - @SandyTao520 in - [#23312](https://github.com/google-gemini/gemini-cli/pull/23312) -- fix(hooks): support 'ask' decision for BeforeTool hooks by @gundermanc in - [#21146](https://github.com/google-gemini/gemini-cli/pull/21146) -- feat(browser): add warning message for session mode 'existing' by - @cynthialong0-0 in - [#23288](https://github.com/google-gemini/gemini-cli/pull/23288) -- chore(lint): enforce zero warnings and cleanup syntax restrictions by - @alisa-alisa in - [#22902](https://github.com/google-gemini/gemini-cli/pull/22902) -- fix(cli): add Esc instruction to HooksDialog footer by @abhipatel12 in - [#23258](https://github.com/google-gemini/gemini-cli/pull/23258) -- Disallow and suppress misused spread operator. by @gundermanc in - [#23294](https://github.com/google-gemini/gemini-cli/pull/23294) -- fix(core): refine CliHelpAgent description for better delegation by - @abhipatel12 in - [#23310](https://github.com/google-gemini/gemini-cli/pull/23310) -- fix(core): enable global session and persistent approval for web_fetch by - @NTaylorMullen in - [#23295](https://github.com/google-gemini/gemini-cli/pull/23295) -- fix(plan): add state transition override to prevent plan mode freeze by - @Adib234 in [#23020](https://github.com/google-gemini/gemini-cli/pull/23020) -- fix(cli): record skill activation tool calls in chat history by @NTaylorMullen - in [#23203](https://github.com/google-gemini/gemini-cli/pull/23203) -- fix(core): ensure subagent tool updates apply configuration overrides - immediately by @abhipatel12 in - [#23161](https://github.com/google-gemini/gemini-cli/pull/23161) -- fix(cli): resolve flicker at boundaries of list in BaseSelectionList by - @jackwotherspoon in - [#23298](https://github.com/google-gemini/gemini-cli/pull/23298) -- test(cli): force generic terminal in tests to fix snapshot failures by - @abhipatel12 in - [#23499](https://github.com/google-gemini/gemini-cli/pull/23499) -- Evals: PR Guidance adding workflow by @alisa-alisa in - [#23164](https://github.com/google-gemini/gemini-cli/pull/23164) -- feat(core): refactor SandboxManager to a stateless architecture and introduce - explicit Deny interface by @ehedlund in - [#23141](https://github.com/google-gemini/gemini-cli/pull/23141) -- feat(core): add event-translator and update agent types by @adamfweidman in - [#22985](https://github.com/google-gemini/gemini-cli/pull/22985) -- perf(cli): parallelize and background startup cleanup tasks by @sehoon38 in - [#23545](https://github.com/google-gemini/gemini-cli/pull/23545) -- fix: "allow always" for commands with paths by @scidomino in - [#23558](https://github.com/google-gemini/gemini-cli/pull/23558) -- fix(cli): prevent terminal escape sequences from leaking on exit by - @mattKorwel in - [#22682](https://github.com/google-gemini/gemini-cli/pull/22682) -- feat(cli): implement full "GEMINI CLI" logo for logged-out state by - @keithguerin in - [#22412](https://github.com/google-gemini/gemini-cli/pull/22412) -- fix(plan): reserve minimum height for selection list in AskUserDialog by - @ruomengz in [#23280](https://github.com/google-gemini/gemini-cli/pull/23280) -- fix(core): harden AgentSession replay semantics by @adamfweidman in - [#23548](https://github.com/google-gemini/gemini-cli/pull/23548) -- test(core): migrate hook tests to scheduler by @abhipatel12 in - [#23496](https://github.com/google-gemini/gemini-cli/pull/23496) -- chore(config): disable agents by default by @abhipatel12 in - [#23546](https://github.com/google-gemini/gemini-cli/pull/23546) -- fix(ui): make tool confirmations take up entire terminal height by @devr0306 - in [#22366](https://github.com/google-gemini/gemini-cli/pull/22366) -- fix(core): prevent redundant remote agent loading on model switch by + [#23216](https://github.com/google-gemini/gemini-cli/pull/23216) +- fix(core): improve agent loader error formatting for empty paths by @adamfweidman in - [#23576](https://github.com/google-gemini/gemini-cli/pull/23576) -- refactor(core): update production type imports from coreToolScheduler by - @abhipatel12 in - [#23498](https://github.com/google-gemini/gemini-cli/pull/23498) -- feat(cli): always prefix extension skills with colon separator by - @NTaylorMullen in - [#23566](https://github.com/google-gemini/gemini-cli/pull/23566) -- fix(core): properly support allowRedirect in policy engine by @scidomino in - [#23579](https://github.com/google-gemini/gemini-cli/pull/23579) -- fix(cli): prevent subcommand shadowing and skip auth for commands by + [#23690](https://github.com/google-gemini/gemini-cli/pull/23690) +- fix(cli): only show updating spinner when auto-update is in progress by + @scidomino in [#23709](https://github.com/google-gemini/gemini-cli/pull/23709) +- Refine onboarding metrics to log the duration explicitly and use the tier + name. by @yunaseoul in + [#23678](https://github.com/google-gemini/gemini-cli/pull/23678) +- chore(tools): add toJSON to tools and invocations to reduce logging verbosity + by @alisa-alisa in + [#22899](https://github.com/google-gemini/gemini-cli/pull/22899) +- fix(cli): stabilize copy mode to prevent flickering and cursor resets by @mattKorwel in - [#23177](https://github.com/google-gemini/gemini-cli/pull/23177) -- fix(test): move flaky tests to non-blocking suite by @mattKorwel in - [#23259](https://github.com/google-gemini/gemini-cli/pull/23259) -- Changelog for v0.35.0-preview.3 by @gemini-cli-robot in - [#23574](https://github.com/google-gemini/gemini-cli/pull/23574) -- feat(skills): add behavioral-evals skill with fixing and promoting guides by + [#22584](https://github.com/google-gemini/gemini-cli/pull/22584) +- fix(test): move flaky ctrl-c-exit test to non-blocking suite by @mattKorwel in + [#23732](https://github.com/google-gemini/gemini-cli/pull/23732) +- feat(skills): add ci skill for automated failure replication by @mattKorwel in + [#23720](https://github.com/google-gemini/gemini-cli/pull/23720) +- feat(sandbox): implement forbiddenPaths for OS-specific sandbox managers by + @ehedlund in [#23282](https://github.com/google-gemini/gemini-cli/pull/23282) +- fix(core): conditionally expose additional_permissions in shell tool by + @galz10 in [#23729](https://github.com/google-gemini/gemini-cli/pull/23729) +- refactor(core): standardize OS-specific sandbox tests and extract linux helper + methods by @ehedlund in + [#23715](https://github.com/google-gemini/gemini-cli/pull/23715) +- format recently added script by @scidomino in + [#23739](https://github.com/google-gemini/gemini-cli/pull/23739) +- fix(ui): prevent over-eager slash subcommand completion by @keithguerin in + [#20136](https://github.com/google-gemini/gemini-cli/pull/20136) +- Fix dynamic model routing for gemini 3.1 pro to customtools model by + @kevinjwang1 in + [#23641](https://github.com/google-gemini/gemini-cli/pull/23641) +- feat(core): support inline agentCardJson for remote agents by @adamfweidman in + [#23743](https://github.com/google-gemini/gemini-cli/pull/23743) +- fix(cli): skip console log/info in headless mode by @cynthialong0-0 in + [#22739](https://github.com/google-gemini/gemini-cli/pull/22739) +- test(core): install bubblewrap on Linux CI for sandbox integration tests by + @ehedlund in [#23583](https://github.com/google-gemini/gemini-cli/pull/23583) +- docs(reference): split tools table into category sections by @sheikhlimon in + [#21516](https://github.com/google-gemini/gemini-cli/pull/21516) +- fix(browser): detect embedded URLs in query params to prevent allowedDomains + bypass by @tony-shi in + [#23225](https://github.com/google-gemini/gemini-cli/pull/23225) +- fix(browser): add proxy bypass constraint to domain restriction system prompt + by @tony-shi in + [#23229](https://github.com/google-gemini/gemini-cli/pull/23229) +- fix(policy): relax write_file argsPattern in plan mode to allow paths without + session ID by @Adib234 in + [#23695](https://github.com/google-gemini/gemini-cli/pull/23695) +- docs: fix grammar in CONTRIBUTING and numbering in sandbox docs by + @splint-disk-8i in + [#23448](https://github.com/google-gemini/gemini-cli/pull/23448) +- fix(acp): allow attachments by adding a permission prompt by @sripasg in + [#23680](https://github.com/google-gemini/gemini-cli/pull/23680) +- fix(core): thread AbortSignal to chat compression requests (#20405) by + @SH20RAJ in [#20778](https://github.com/google-gemini/gemini-cli/pull/20778) +- feat(core): implement Windows sandbox dynamic expansion Phase 1 and 2.1 by + @scidomino in [#23691](https://github.com/google-gemini/gemini-cli/pull/23691) +- Add note about root privileges in sandbox docs by @diodesign in + [#23314](https://github.com/google-gemini/gemini-cli/pull/23314) +- docs(core): document agent_card_json string literal options for remote agents + by @adamfweidman in + [#23797](https://github.com/google-gemini/gemini-cli/pull/23797) +- fix(cli): resolve TTY hang on headless environments by unconditionally + resuming process.stdin before React Ink launch by @cocosheng-g in + [#23673](https://github.com/google-gemini/gemini-cli/pull/23673) +- fix(ui): cleanup estimated string length hacks in composer by @keithguerin in + [#23694](https://github.com/google-gemini/gemini-cli/pull/23694) +- feat(browser): dynamically discover read-only tools by @cynthialong0-0 in + [#23805](https://github.com/google-gemini/gemini-cli/pull/23805) +- docs: clarify policy requirement for `general.plan.directory` in settings + schema by @jerop in + [#23784](https://github.com/google-gemini/gemini-cli/pull/23784) +- Revert "perf(cli): optimize --version startup time (#23671)" by @scidomino in + [#23812](https://github.com/google-gemini/gemini-cli/pull/23812) +- don't silence errors from wombat by @scidomino in + [#23822](https://github.com/google-gemini/gemini-cli/pull/23822) +- fix(ui): prevent escape key from cancelling requests in shell mode by + @PrasannaPal21 in + [#21245](https://github.com/google-gemini/gemini-cli/pull/21245) +- Changelog for v0.36.0-preview.0 by @gemini-cli-robot in + [#23702](https://github.com/google-gemini/gemini-cli/pull/23702) +- feat(core,ui): Add experiment-gated support for gemini flash 3.1 lite by + @chrstnb in [#23794](https://github.com/google-gemini/gemini-cli/pull/23794) +- Changelog for v0.36.0-preview.3 by @gemini-cli-robot in + [#23827](https://github.com/google-gemini/gemini-cli/pull/23827) +- new linting check: github-actions-pinning by @alisa-alisa in + [#23808](https://github.com/google-gemini/gemini-cli/pull/23808) +- fix(cli): show helpful guidance when no skills are available by @Niralisj in + [#23785](https://github.com/google-gemini/gemini-cli/pull/23785) +- fix: Chat logs and errors handle tail tool calls correctly by @googlestrobe in + [#22460](https://github.com/google-gemini/gemini-cli/pull/22460) +- Don't try removing a tag from a non-existent release. by @scidomino in + [#23830](https://github.com/google-gemini/gemini-cli/pull/23830) +- fix(cli): allow ask question dialog to take full window height by @jacob314 in + [#23693](https://github.com/google-gemini/gemini-cli/pull/23693) +- fix(core): strip leading underscores from error types in telemetry by + @yunaseoul in [#23824](https://github.com/google-gemini/gemini-cli/pull/23824) +- Changelog for v0.35.0 by @gemini-cli-robot in + [#23819](https://github.com/google-gemini/gemini-cli/pull/23819) +- feat(evals): add reliability harvester and 500/503 retry support by + @alisa-alisa in + [#23626](https://github.com/google-gemini/gemini-cli/pull/23626) +- feat(sandbox): dynamic Linux sandbox expansion and worktree support by @galz10 + in [#23692](https://github.com/google-gemini/gemini-cli/pull/23692) +- Merge examples of use into quickstart documentation by @diodesign in + [#23319](https://github.com/google-gemini/gemini-cli/pull/23319) +- fix(cli): prioritize primary name matches in slash command search by @sehoon38 + in [#23850](https://github.com/google-gemini/gemini-cli/pull/23850) +- Changelog for v0.35.1 by @gemini-cli-robot in + [#23840](https://github.com/google-gemini/gemini-cli/pull/23840) +- fix(browser): keep input blocker active across navigations by @kunal-10-cloud + in [#22562](https://github.com/google-gemini/gemini-cli/pull/22562) +- feat(core): new skill to look for duplicated code while reviewing PRs by + @devr0306 in [#23704](https://github.com/google-gemini/gemini-cli/pull/23704) +- fix(core): replace hardcoded non-interactive ASK_USER denial with explicit + policy rules by @ruomengz in + [#23668](https://github.com/google-gemini/gemini-cli/pull/23668) +- fix(plan): after exiting plan mode switches model to a flash model by @Adib234 + in [#23885](https://github.com/google-gemini/gemini-cli/pull/23885) +- feat(gcp): add development worker infrastructure by @mattKorwel in + [#23814](https://github.com/google-gemini/gemini-cli/pull/23814) +- fix(a2a-server): A2A server should execute ask policies in interactive mode by + @kschaab in [#23831](https://github.com/google-gemini/gemini-cli/pull/23831) +- feat(core): define TrajectoryProvider interface by @sehoon38 in + [#23050](https://github.com/google-gemini/gemini-cli/pull/23050) +- Docs: Update quotas and pricing by @jkcinouye in + [#23835](https://github.com/google-gemini/gemini-cli/pull/23835) +- fix(core): allow disabling environment variable redaction by @galz10 in + [#23927](https://github.com/google-gemini/gemini-cli/pull/23927) +- feat(cli): enable notifications cross-platform via terminal bell fallback by + @genneth in [#21618](https://github.com/google-gemini/gemini-cli/pull/21618) +- feat(sandbox): implement secret visibility lockdown for env files by + @DavidAPierce in + [#23712](https://github.com/google-gemini/gemini-cli/pull/23712) +- fix(core): remove shell outputChunks buffer caching to prevent memory bloat + and sanitize prompt input by @spencer426 in + [#23751](https://github.com/google-gemini/gemini-cli/pull/23751) +- feat(core): implement persistent browser session management by @kunal-10-cloud + in [#21306](https://github.com/google-gemini/gemini-cli/pull/21306) +- refactor(core): delegate sandbox denial parsing to SandboxManager by + @scidomino in [#23928](https://github.com/google-gemini/gemini-cli/pull/23928) +- dep(update) Update Ink version to 6.5.0 by @jacob314 in + [#23843](https://github.com/google-gemini/gemini-cli/pull/23843) +- Docs: Update 'docs-writer' skill for relative links by @jkcinouye in + [#21463](https://github.com/google-gemini/gemini-cli/pull/21463) +- Changelog for v0.36.0-preview.4 by @gemini-cli-robot in + [#23935](https://github.com/google-gemini/gemini-cli/pull/23935) +- fix(acp): Update allow approval policy flow for ACP clients to fix config + persistence and compatible with TUI by @sripasg in + [#23818](https://github.com/google-gemini/gemini-cli/pull/23818) +- Changelog for v0.35.2 by @gemini-cli-robot in + [#23960](https://github.com/google-gemini/gemini-cli/pull/23960) +- ACP integration documents by @g-samroberts in + [#22254](https://github.com/google-gemini/gemini-cli/pull/22254) +- fix(core): explicitly set error names to avoid bundling renaming issues by + @yunaseoul in [#23913](https://github.com/google-gemini/gemini-cli/pull/23913) +- feat(core): subagent isolation and cleanup hardening by @abhipatel12 in + [#23903](https://github.com/google-gemini/gemini-cli/pull/23903) +- disable extension-reload test by @scidomino in + [#24018](https://github.com/google-gemini/gemini-cli/pull/24018) +- feat(core): add forbiddenPaths to GlobalSandboxOptions and refactor + createSandboxManager by @ehedlund in + [#23936](https://github.com/google-gemini/gemini-cli/pull/23936) +- refactor(core): improve ignore resolution and fix directory-matching bug by + @ehedlund in [#23816](https://github.com/google-gemini/gemini-cli/pull/23816) +- revert(core): support custom base URL via env vars by @spencer426 in + [#23976](https://github.com/google-gemini/gemini-cli/pull/23976) +- Increase memory limited for eslint. by @jacob314 in + [#24022](https://github.com/google-gemini/gemini-cli/pull/24022) +- fix(acp): prevent crash on empty response in ACP mode by @sripasg in + [#23952](https://github.com/google-gemini/gemini-cli/pull/23952) +- feat(core): Land `AgentHistoryProvider`. by @joshualitt in + [#23978](https://github.com/google-gemini/gemini-cli/pull/23978) +- fix(core): switch to subshells for shell tool wrapping to fix heredocs and + edge cases by @abhipatel12 in + [#24024](https://github.com/google-gemini/gemini-cli/pull/24024) +- Debug command. by @jacob314 in + [#23851](https://github.com/google-gemini/gemini-cli/pull/23851) +- Changelog for v0.36.0-preview.5 by @gemini-cli-robot in + [#24046](https://github.com/google-gemini/gemini-cli/pull/24046) +- Fix test flakes by globally mocking ink-spinner by @jacob314 in + [#24044](https://github.com/google-gemini/gemini-cli/pull/24044) +- Enable network access in sandbox configuration by @galz10 in + [#24055](https://github.com/google-gemini/gemini-cli/pull/24055) +- feat(context): add configurable memoryBoundaryMarkers setting by @SandyTao520 + in [#24020](https://github.com/google-gemini/gemini-cli/pull/24020) +- feat(core): implement windows sandbox expansion and denial detection by + @scidomino in [#24027](https://github.com/google-gemini/gemini-cli/pull/24027) +- fix(core): resolve ACP Operation Aborted Errors in grep_search by @ivanporty + in [#23821](https://github.com/google-gemini/gemini-cli/pull/23821) +- fix(hooks): prevent SessionEnd from firing twice in non-interactive mode by + @krishdef7 in [#22139](https://github.com/google-gemini/gemini-cli/pull/22139) +- Re-word intro to Gemini 3 page. by @g-samroberts in + [#24069](https://github.com/google-gemini/gemini-cli/pull/24069) +- fix(cli): resolve layout contention and flashing loop in StatusRow by + @keithguerin in + [#24065](https://github.com/google-gemini/gemini-cli/pull/24065) +- fix(sandbox): implement Windows Mandatory Integrity Control for GeminiSandbox + by @galz10 in [#24057](https://github.com/google-gemini/gemini-cli/pull/24057) +- feat(core): implement tool-based topic grouping (Chapters) by @Abhijit-2592 in + [#23150](https://github.com/google-gemini/gemini-cli/pull/23150) +- feat(cli): support 'tab to queue' for messages while generating by @gundermanc + in [#24052](https://github.com/google-gemini/gemini-cli/pull/24052) +- feat(core): agnostic background task UI with CompletionBehavior by + @adamfweidman in + [#22740](https://github.com/google-gemini/gemini-cli/pull/22740) +- UX for topic narration tool by @gundermanc in + [#24079](https://github.com/google-gemini/gemini-cli/pull/24079) +- fix: shellcheck warnings in scripts by @scidomino in + [#24035](https://github.com/google-gemini/gemini-cli/pull/24035) +- test(evals): add comprehensive subagent delegation evaluations by @abhipatel12 + in [#24132](https://github.com/google-gemini/gemini-cli/pull/24132) +- fix(a2a-server): prioritize ADC before evaluating headless constraints for + auth initialization by @spencer426 in + [#23614](https://github.com/google-gemini/gemini-cli/pull/23614) +- Text can be added after /plan command by @rambleraptor in + [#22833](https://github.com/google-gemini/gemini-cli/pull/22833) +- fix(cli): resolve missing F12 logs via global console store by @scidomino in + [#24235](https://github.com/google-gemini/gemini-cli/pull/24235) +- fix broken tests by @scidomino in + [#24279](https://github.com/google-gemini/gemini-cli/pull/24279) +- fix(evals): add update_topic behavioral eval by @gundermanc in + [#24223](https://github.com/google-gemini/gemini-cli/pull/24223) +- feat(core): Unified Context Management and Tool Distillation. by @joshualitt + in [#24157](https://github.com/google-gemini/gemini-cli/pull/24157) +- Default enable narration for the team. by @gundermanc in + [#24224](https://github.com/google-gemini/gemini-cli/pull/24224) +- fix(core): ensure default agents provide tools and use model-specific schemas + by @abhipatel12 in + [#24268](https://github.com/google-gemini/gemini-cli/pull/24268) +- feat(cli): show Flash Lite Preview model regardless of user tier by @sehoon38 + in [#23904](https://github.com/google-gemini/gemini-cli/pull/23904) +- feat(cli): implement compact tool output by @jwhelangoog in + [#20974](https://github.com/google-gemini/gemini-cli/pull/20974) +- Add security settings for tool sandboxing by @galz10 in + [#23923](https://github.com/google-gemini/gemini-cli/pull/23923) +- chore(test-utils): switch integration tests to use PREVIEW_GEMINI_MODEL by + @sehoon38 in [#24276](https://github.com/google-gemini/gemini-cli/pull/24276) +- feat(core): enable topic update narration for legacy models by @Abhijit-2592 + in [#24241](https://github.com/google-gemini/gemini-cli/pull/24241) +- feat(core): add project-level memory scope to save_memory tool by @SandyTao520 + in [#24161](https://github.com/google-gemini/gemini-cli/pull/24161) +- test(integration): fix plan mode write denial test false positive by @sehoon38 + in [#24299](https://github.com/google-gemini/gemini-cli/pull/24299) +- feat(plan): support `Plan` mode in untrusted folders by @Adib234 in + [#17586](https://github.com/google-gemini/gemini-cli/pull/17586) +- fix(core): enable mid-stream retries for all models and re-enable compression + test by @sehoon38 in + [#24302](https://github.com/google-gemini/gemini-cli/pull/24302) +- Changelog for v0.36.0-preview.6 by @gemini-cli-robot in + [#24082](https://github.com/google-gemini/gemini-cli/pull/24082) +- Changelog for v0.35.3 by @gemini-cli-robot in + [#24083](https://github.com/google-gemini/gemini-cli/pull/24083) +- feat(cli): add auth info to footer by @sehoon38 in + [#24042](https://github.com/google-gemini/gemini-cli/pull/24042) +- fix(browser): reset action counter for each agent session and let it ignore + internal actions by @cynthialong0-0 in + [#24228](https://github.com/google-gemini/gemini-cli/pull/24228) +- feat(plan): promote planning feature to stable by @ruomengz in + [#24282](https://github.com/google-gemini/gemini-cli/pull/24282) +- fix(browser): terminate subagent immediately on domain restriction violations + by @gsquared94 in + [#24313](https://github.com/google-gemini/gemini-cli/pull/24313) +- feat(cli): add UI to update extensions by @ruomengz in + [#23682](https://github.com/google-gemini/gemini-cli/pull/23682) +- Fix(browser): terminate immediately for "browser is already running" error by + @cynthialong0-0 in + [#24233](https://github.com/google-gemini/gemini-cli/pull/24233) +- docs: Add 'plan' option to approval mode in CLI reference by @YifanRuan in + [#24134](https://github.com/google-gemini/gemini-cli/pull/24134) +- fix(core): batch macOS seatbelt rules into a profile file to prevent ARG_MAX + errors by @ehedlund in + [#24255](https://github.com/google-gemini/gemini-cli/pull/24255) +- fix(core): fix race condition between browser agent and main closing process + by @cynthialong0-0 in + [#24340](https://github.com/google-gemini/gemini-cli/pull/24340) +- perf(build): optimize build scripts for parallel execution and remove + redundant checks by @sehoon38 in + [#24307](https://github.com/google-gemini/gemini-cli/pull/24307) +- ci: install bubblewrap on Linux for release workflows by @ehedlund in + [#24347](https://github.com/google-gemini/gemini-cli/pull/24347) +- chore(release): allow bundling for all builds, including stable by @sehoon38 + in [#24305](https://github.com/google-gemini/gemini-cli/pull/24305) +- Revert "Add security settings for tool sandboxing" by @jerop in + [#24357](https://github.com/google-gemini/gemini-cli/pull/24357) +- docs: update subagents docs to not be experimental by @abhipatel12 in + [#24343](https://github.com/google-gemini/gemini-cli/pull/24343) +- fix(core): implement **read and **write commands in sandbox managers by + @galz10 in [#24283](https://github.com/google-gemini/gemini-cli/pull/24283) +- don't try to remove tags in dry run by @scidomino in + [#24356](https://github.com/google-gemini/gemini-cli/pull/24356) +- fix(config): disable JIT context loading by default by @SandyTao520 in + [#24364](https://github.com/google-gemini/gemini-cli/pull/24364) +- test(sandbox): add integration test for dynamic permission expansion by + @galz10 in [#24359](https://github.com/google-gemini/gemini-cli/pull/24359) +- docs(policy): remove unsupported mcpName wildcard edge case by @abhipatel12 in + [#24133](https://github.com/google-gemini/gemini-cli/pull/24133) +- docs: fix broken GEMINI.md link in CONTRIBUTING.md by @Panchal-Tirth in + [#24182](https://github.com/google-gemini/gemini-cli/pull/24182) +- feat(core): infrastructure for event-driven subagent history by @abhipatel12 + in [#23914](https://github.com/google-gemini/gemini-cli/pull/23914) +- fix(core): resolve Plan Mode deadlock during plan file creation due to sandbox + restrictions by @DavidAPierce in + [#24047](https://github.com/google-gemini/gemini-cli/pull/24047) +- fix(core): fix browser agent UX issues and improve E2E test reliability by + @gsquared94 in + [#24312](https://github.com/google-gemini/gemini-cli/pull/24312) +- fix(ui): wrap topic and intent fields in TopicMessage by @jwhelangoog in + [#24386](https://github.com/google-gemini/gemini-cli/pull/24386) +- refactor(core): Centralize context management logic into src/context by + @joshualitt in + [#24380](https://github.com/google-gemini/gemini-cli/pull/24380) +- fix(core): pin AuthType.GATEWAY to use Gemini 3.1 Pro/Flash Lite by default by + @sripasg in [#24375](https://github.com/google-gemini/gemini-cli/pull/24375) +- feat(ui): add Tokyo Night theme by @danrneal in + [#24054](https://github.com/google-gemini/gemini-cli/pull/24054) +- fix(cli): refactor test config loading and mock debugLogger in test-setup by + @mattKorwel in + [#24389](https://github.com/google-gemini/gemini-cli/pull/24389) +- Set memoryManager to false in settings.json by @mattKorwel in + [#24393](https://github.com/google-gemini/gemini-cli/pull/24393) +- ink 6.6.3 by @jacob314 in + [#24372](https://github.com/google-gemini/gemini-cli/pull/24372) +- fix(core): resolve subagent chat recording gaps and directory inheritance by @abhipatel12 in - [#23349](https://github.com/google-gemini/gemini-cli/pull/23349) -- refactor(core): delete obsolete coreToolScheduler by @abhipatel12 in - [#23502](https://github.com/google-gemini/gemini-cli/pull/23502) -- Changelog for v0.35.0-preview.4 by @gemini-cli-robot in - [#23581](https://github.com/google-gemini/gemini-cli/pull/23581) -- feat(core): add LegacyAgentSession by @adamfweidman in - [#22986](https://github.com/google-gemini/gemini-cli/pull/22986) -- feat(test-utils): add TestMcpServerBuilder and support in TestRig by - @abhipatel12 in - [#23491](https://github.com/google-gemini/gemini-cli/pull/23491) -- fix(core)!: Force policy config to specify toolName by @kschaab in - [#23330](https://github.com/google-gemini/gemini-cli/pull/23330) -- eval(save_memory): add multi-turn interactive evals for memoryManager by - @SandyTao520 in - [#23572](https://github.com/google-gemini/gemini-cli/pull/23572) -- fix(telemetry): patch memory leak and enforce logPrompts privacy by - @spencer426 in - [#23281](https://github.com/google-gemini/gemini-cli/pull/23281) -- perf(cli): background IDE client to speed up initialization by @sehoon38 in - [#23603](https://github.com/google-gemini/gemini-cli/pull/23603) -- fix(cli): prevent Ctrl+D exit when input buffer is not empty by @wtanaka in - [#23306](https://github.com/google-gemini/gemini-cli/pull/23306) -- fix: ACP: separate conversational text from execute tool command title by - @sripasg in [#23179](https://github.com/google-gemini/gemini-cli/pull/23179) -- feat(evals): add behavioral evaluations for subagent routing by @Samee24 in - [#23272](https://github.com/google-gemini/gemini-cli/pull/23272) -- refactor(cli,core): foundational layout, identity management, and type safety - by @jwhelangoog in - [#23286](https://github.com/google-gemini/gemini-cli/pull/23286) -- fix(core): accurately reflect subagent tool failure in UI by @abhipatel12 in - [#23187](https://github.com/google-gemini/gemini-cli/pull/23187) -- Changelog for v0.35.0-preview.5 by @gemini-cli-robot in - [#23606](https://github.com/google-gemini/gemini-cli/pull/23606) -- feat(ui): implement refreshed UX for Composer layout by @jwhelangoog in - [#21212](https://github.com/google-gemini/gemini-cli/pull/21212) -- fix: API key input dialog user interaction when selected Gemini API Key by - @kartikangiras in - [#21057](https://github.com/google-gemini/gemini-cli/pull/21057) -- docs: update `/mcp refresh` to `/mcp reload` by @adamfweidman in - [#23631](https://github.com/google-gemini/gemini-cli/pull/23631) -- Implementation of sandbox "Write-Protected" Governance Files by @DavidAPierce - in [#23139](https://github.com/google-gemini/gemini-cli/pull/23139) -- feat(sandbox): dynamic macOS sandbox expansion and worktree support by @galz10 - in [#23301](https://github.com/google-gemini/gemini-cli/pull/23301) -- fix(acp): Pass the cwd to `AcpFileSystemService` to avoid looping failures in - asking for perms to write plan md file by @sripasg in - [#23612](https://github.com/google-gemini/gemini-cli/pull/23612) -- fix(plan): sandbox path resolution in Plan Mode to prevent hallucinations by - @Adib234 in [#22737](https://github.com/google-gemini/gemini-cli/pull/22737) -- feat(ui): allow immediate user input during startup by @sehoon38 in - [#23661](https://github.com/google-gemini/gemini-cli/pull/23661) -- refactor(sandbox): reorganize Windows sandbox files by @galz10 in - [#23645](https://github.com/google-gemini/gemini-cli/pull/23645) -- fix(core): improve remote agent streaming UI and UX by @adamfweidman in - [#23633](https://github.com/google-gemini/gemini-cli/pull/23633) -- perf(cli): optimize --version startup time by @sehoon38 in - [#23671](https://github.com/google-gemini/gemini-cli/pull/23671) -- refactor(core): stop gemini CLI from producing unsafe casts by @gundermanc in - [#23611](https://github.com/google-gemini/gemini-cli/pull/23611) -- use enableAutoUpdate in test rig by @scidomino in - [#23681](https://github.com/google-gemini/gemini-cli/pull/23681) -- feat(core): change user-facing auth type from oauth2 to oauth by @adamfweidman - in [#23639](https://github.com/google-gemini/gemini-cli/pull/23639) -- chore(deps): fix npm audit vulnerabilities by @scidomino in - [#23679](https://github.com/google-gemini/gemini-cli/pull/23679) -- test(evals): fix overlapping act() deadlock in app-test-helper by @Adib234 in - [#23666](https://github.com/google-gemini/gemini-cli/pull/23666) -- fix(patch): cherry-pick 055ff92 to release/v0.36.0-preview.0-pr-23672 to patch - version v0.36.0-preview.0 and create version 0.36.0-preview.1 by + [#24368](https://github.com/google-gemini/gemini-cli/pull/24368) +- fix(cli): cap shell output at 10 MB to prevent RangeError crash by @ProthamD + in [#24168](https://github.com/google-gemini/gemini-cli/pull/24168) +- feat(plan): conditionally add enter/exit plan mode tools based on current mode + by @ruomengz in + [#24378](https://github.com/google-gemini/gemini-cli/pull/24378) +- feat(core): prioritize discussion before formal plan approval by @jerop in + [#24423](https://github.com/google-gemini/gemini-cli/pull/24423) +- fix(ui): add accelerated scrolling on alternate buffer mode by @devr0306 in + [#23940](https://github.com/google-gemini/gemini-cli/pull/23940) +- feat(core): populate sandbox forbidden paths with project ignore file contents + by @ehedlund in + [#24038](https://github.com/google-gemini/gemini-cli/pull/24038) +- fix(core): ensure blue border overlay and input blocker to act correctly + depending on browser agent activities by @cynthialong0-0 in + [#24385](https://github.com/google-gemini/gemini-cli/pull/24385) +- fix(ui): removed additional vertical padding for tables by @devr0306 in + [#24381](https://github.com/google-gemini/gemini-cli/pull/24381) +- fix(build): upload full bundle directory archive to GitHub releases by + @sehoon38 in [#24403](https://github.com/google-gemini/gemini-cli/pull/24403) +- fix(build): wire bundle:browser-mcp into bundle pipeline by @gsquared94 in + [#24424](https://github.com/google-gemini/gemini-cli/pull/24424) +- feat(browser): add sandbox-aware browser agent initialization by @gsquared94 + in [#24419](https://github.com/google-gemini/gemini-cli/pull/24419) +- feat(core): enhance tracker task schemas for detailed titles and descriptions + by @anj-s in [#23902](https://github.com/google-gemini/gemini-cli/pull/23902) +- refactor(core): Unified context management settings schema by @joshualitt in + [#24391](https://github.com/google-gemini/gemini-cli/pull/24391) +- feat(core): update browser agent prompt to check open pages first when + bringing up by @cynthialong0-0 in + [#24431](https://github.com/google-gemini/gemini-cli/pull/24431) +- fix(acp) refactor(core,cli): centralize model discovery logic in + ModelConfigService by @sripasg in + [#24392](https://github.com/google-gemini/gemini-cli/pull/24392) +- Changelog for v0.36.0-preview.7 by @gemini-cli-robot in + [#24346](https://github.com/google-gemini/gemini-cli/pull/24346) +- fix: update task tracker storage location in system prompt by @anj-s in + [#24034](https://github.com/google-gemini/gemini-cli/pull/24034) +- feat(browser): supersede stale snapshots to reclaim context-window tokens by + @gsquared94 in + [#24440](https://github.com/google-gemini/gemini-cli/pull/24440) +- docs(core): add subagent tool isolation draft doc by @akh64bit in + [#23275](https://github.com/google-gemini/gemini-cli/pull/23275) +- fix(patch): cherry-pick 64c928f to release/v0.37.0-preview.0-pr-23257 to patch + version v0.37.0-preview.0 and create version 0.37.0-preview.1 by @gemini-cli-robot in - [#23723](https://github.com/google-gemini/gemini-cli/pull/23723) -- fix(patch): cherry-pick 765fb67 to release/v0.36.0-preview.5-pr-24055 to patch - version v0.36.0-preview.5 and create version 0.36.0-preview.6 by + [#24561](https://github.com/google-gemini/gemini-cli/pull/24561) +- fix(patch): cherry-pick cb7f7d6 to release/v0.37.0-preview.1-pr-24342 to patch + version v0.37.0-preview.1 and create version 0.37.0-preview.2 by @gemini-cli-robot in - [#24061](https://github.com/google-gemini/gemini-cli/pull/24061) + [#24842](https://github.com/google-gemini/gemini-cli/pull/24842) **Full Changelog**: -https://github.com/google-gemini/gemini-cli/compare/v0.35.3...v0.36.0 +https://github.com/google-gemini/gemini-cli/compare/v0.36.0...v0.37.0 From 15f7b24312fee332d737d3cbb3ee617f97d0e008 Mon Sep 17 00:00:00 2001 From: ruomeng Date: Wed, 8 Apr 2026 17:44:53 -0400 Subject: [PATCH 27/39] feat(plan): require user confirmation for activate_skill in Plan Mode (#24946) --- packages/core/src/policy/policies/plan.toml | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/packages/core/src/policy/policies/plan.toml b/packages/core/src/policy/policies/plan.toml index eaf1f9471b..6e8cfcb454 100644 --- a/packages/core/src/policy/policies/plan.toml +++ b/packages/core/src/policy/policies/plan.toml @@ -80,13 +80,6 @@ priority = 40 modes = ["plan"] denyMessage = "You are in Plan Mode with access to read-only tools. Execution of scripts (including those from skills) is blocked." -# Explicitly Allow Read-Only Tools in Plan mode. -[[rule]] -toolName = ["activate_skill"] -decision = "allow" -priority = 50 -modes = ["plan"] - [[rule]] toolName = "*" mcpName = "*" @@ -106,14 +99,14 @@ modes = ["plan"] interactive = false [[rule]] -toolName = ["ask_user", "save_memory", "web_fetch"] +toolName = ["ask_user", "save_memory", "web_fetch", "activate_skill"] decision = "ask_user" priority = 50 modes = ["plan"] interactive = true [[rule]] -toolName = ["ask_user", "save_memory", "web_fetch"] +toolName = ["ask_user", "save_memory", "web_fetch", "activate_skill"] decision = "deny" priority = 50 modes = ["plan"] From c7b920717fad72229c788aa8efc646afac7a2965 Mon Sep 17 00:00:00 2001 From: Sri Pasumarthi <111310667+sripasg@users.noreply.github.com> Date: Wed, 8 Apr 2026 14:50:29 -0700 Subject: [PATCH 28/39] feat(test-utils): add CPU performance integration test harness (#24951) --- .github/workflows/perf-nightly.yml | 33 ++ .gitignore | 1 + GEMINI.md | 5 + docs/integration-tests.md | 42 ++ integration-tests/globalSetup.ts | 16 +- package-lock.json | 1 + package.json | 3 + packages/test-utils/src/env-setup.ts | 35 ++ packages/test-utils/src/index.ts | 2 + packages/test-utils/src/perf-test-harness.ts | 546 +++++++++++++++++++ perf-tests/README.md | 121 ++++ perf-tests/baselines.json | 24 + perf-tests/globalSetup.ts | 67 +++ perf-tests/perf-usage.test.ts | 153 ++++++ perf-tests/perf.cold-startup.responses | 2 + perf-tests/perf.idle-cpu.responses | 2 + perf-tests/perf.skill-loading.responses | 2 + perf-tests/tsconfig.json | 12 + perf-tests/vitest.config.ts | 27 + 19 files changed, 1081 insertions(+), 13 deletions(-) create mode 100644 .github/workflows/perf-nightly.yml create mode 100644 packages/test-utils/src/env-setup.ts create mode 100644 packages/test-utils/src/perf-test-harness.ts create mode 100644 perf-tests/README.md create mode 100644 perf-tests/baselines.json create mode 100644 perf-tests/globalSetup.ts create mode 100644 perf-tests/perf-usage.test.ts create mode 100644 perf-tests/perf.cold-startup.responses create mode 100644 perf-tests/perf.idle-cpu.responses create mode 100644 perf-tests/perf.skill-loading.responses create mode 100644 perf-tests/tsconfig.json create mode 100644 perf-tests/vitest.config.ts diff --git a/.github/workflows/perf-nightly.yml b/.github/workflows/perf-nightly.yml new file mode 100644 index 0000000000..3749df231a --- /dev/null +++ b/.github/workflows/perf-nightly.yml @@ -0,0 +1,33 @@ +name: 'Performance Tests: Nightly' + +on: + schedule: + - cron: '0 3 * * *' # Runs at 3 AM every day + workflow_dispatch: # Allow manual trigger + +permissions: + contents: 'read' + +jobs: + perf-test: + name: 'Run Performance Usage Tests' + runs-on: 'gemini-cli-ubuntu-16-core' + if: "github.repository == 'google-gemini/gemini-cli'" + steps: + - name: 'Checkout' + uses: 'actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8' # ratchet:actions/checkout@v5 + + - name: 'Set up Node.js' + uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions/setup-node@v4 + with: + node-version-file: '.nvmrc' + cache: 'npm' + + - name: 'Install dependencies' + run: 'npm ci' + + - name: 'Build project' + run: 'npm run build' + + - name: 'Run Performance Tests' + run: 'npm run test:perf' diff --git a/.gitignore b/.gitignore index b6e3804ab5..85902b4a7c 100644 --- a/.gitignore +++ b/.gitignore @@ -48,6 +48,7 @@ packages/cli/src/generated/ packages/core/src/generated/ packages/devtools/src/_client-assets.ts .integration-tests/ +.perf-tests/ packages/vscode-ide-companion/*.vsix packages/cli/download-ripgrep*/ diff --git a/GEMINI.md b/GEMINI.md index 60824972d3..4acdfc08be 100644 --- a/GEMINI.md +++ b/GEMINI.md @@ -44,8 +44,13 @@ powerful tool for developers. - **Test Commands:** - **Unit (All):** `npm run test` - **Integration (E2E):** `npm run test:e2e` + - > **NOTE**: Please run the memory and perf tests locally **only if** you are + > implementing changes related to those test areas. Otherwise skip these + > tests locally and rely on CI to run them on nightly builds. - **Memory (Nightly):** `npm run test:memory` (Runs memory regression tests against baselines. Excluded from `preflight`, run nightly.) + - **Performance (Nightly):** `npm run test:perf` (Runs CPU performance + regression tests against baselines. Excluded from `preflight`, run nightly.) - **Workspace-Specific:** `npm test -w -- ` (Note: `` must be relative to the workspace root, e.g., `-w @google/gemini-cli-core -- src/routing/modelRouterService.test.ts`) diff --git a/docs/integration-tests.md b/docs/integration-tests.md index bfed813ebc..ddd4eb9c73 100644 --- a/docs/integration-tests.md +++ b/docs/integration-tests.md @@ -157,6 +157,48 @@ The harness (`MemoryTestHarness` in `packages/test-utils`): - Compares against baselines with a 10% tolerance. - Can analyze sustained leaks across 3 snapshots using `analyzeSnapshots()`. +## Performance regression tests + +Performance regression tests are designed to detect wall-clock time, CPU usage, +and event loop delay regressions across key CLI scenarios. They are located in +the `perf-tests` directory. + +These tests are distinct from standard integration tests because they measure +performance metrics and compare it against committed baselines. + +### Running performance tests + +Performance tests are not run as part of the default `npm run test` or +`npm run test:e2e` commands. They are run nightly in CI but can be run manually: + +```bash +npm run test:perf +``` + +### Updating baselines + +If you intentionally change behavior that affects performance, you may need to +update the baselines. Set the `UPDATE_PERF_BASELINES` environment variable to +`true`: + +```bash +UPDATE_PERF_BASELINES=true npm run test:perf +``` + +This will run the tests multiple times (with warmup), apply IQR outlier +filtering, and overwrite `perf-tests/baselines.json`. You should review the +changes and commit the updated baseline file. + +### How it works + +The harness (`PerfTestHarness` in `packages/test-utils`): + +- Measures wall-clock time using `performance.now()`. +- Measures CPU usage using `process.cpuUsage()`. +- Monitors event loop delay using `perf_hooks.monitorEventLoopDelay()`. +- Applies IQR (Interquartile Range) filtering to remove outlier samples. +- Compares against baselines with a 15% tolerance. + ## Diagnostics The integration test runner provides several options for diagnostics to help diff --git a/integration-tests/globalSetup.ts b/integration-tests/globalSetup.ts index 9dad51f9b3..4a15d03255 100644 --- a/integration-tests/globalSetup.ts +++ b/integration-tests/globalSetup.ts @@ -14,6 +14,7 @@ import { join, dirname, extname } from 'node:path'; import { fileURLToPath } from 'node:url'; import { canUseRipgrep } from '../packages/core/src/tools/ripGrep.js'; import { disableMouseTracking } from '@google/gemini-cli-core'; +import { isolateTestEnv } from '../packages/test-utils/src/env-setup.js'; import { createServer, type Server } from 'node:http'; const __dirname = dirname(fileURLToPath(import.meta.url)); @@ -88,15 +89,8 @@ export async function setup() { runDir = join(integrationTestsDir, `${Date.now()}`); await mkdir(runDir, { recursive: true }); - // Set the home directory to the test run directory to avoid conflicts - // with the user's local config. - process.env['HOME'] = runDir; - if (process.platform === 'win32') { - process.env['USERPROFILE'] = runDir; - } - // We also need to set the config dir explicitly, since the code might - // construct the path before the HOME env var is set. - process.env['GEMINI_CONFIG_DIR'] = join(runDir, '.gemini'); + // Isolate environment variables + isolateTestEnv(runDir); // Download ripgrep to avoid race conditions in parallel tests const available = await canUseRipgrep(); @@ -127,10 +121,6 @@ export async function setup() { } process.env['INTEGRATION_TEST_FILE_DIR'] = runDir; - process.env['GEMINI_CLI_INTEGRATION_TEST'] = 'true'; - // Force file storage to avoid keychain prompts/hangs in CI, especially on macOS - process.env['GEMINI_FORCE_FILE_STORAGE'] = 'true'; - process.env['TELEMETRY_LOG_FILE'] = join(runDir, 'telemetry.log'); if (process.env['KEEP_OUTPUT']) { console.log(`Keeping output for test run in: ${runDir}`); diff --git a/package-lock.json b/package-lock.json index 2d3e670b74..3a22da1337 100644 --- a/package-lock.json +++ b/package-lock.json @@ -36,6 +36,7 @@ "@types/ws": "^8.18.1", "@vitest/coverage-v8": "^3.1.1", "@vitest/eslint-plugin": "^1.3.4", + "asciichart": "^1.5.25", "cross-env": "^7.0.3", "depcheck": "^1.4.7", "domexception": "^4.0.0", diff --git a/package.json b/package.json index f531b41dbc..77801eaa7b 100644 --- a/package.json +++ b/package.json @@ -53,6 +53,8 @@ "test:integration:sandbox:none": "cross-env GEMINI_SANDBOX=false vitest run --root ./integration-tests", "test:memory": "vitest run --root ./memory-tests", "test:memory:update-baselines": "cross-env UPDATE_MEMORY_BASELINES=true vitest run --root ./memory-tests", + "test:perf": "vitest run --root ./perf-tests", + "test:perf:update-baselines": "cross-env UPDATE_PERF_BASELINES=true vitest run --root ./perf-tests", "test:integration:sandbox:docker": "cross-env GEMINI_SANDBOX=docker npm run build:sandbox && cross-env GEMINI_SANDBOX=docker vitest run --root ./integration-tests", "test:integration:sandbox:podman": "cross-env GEMINI_SANDBOX=podman vitest run --root ./integration-tests", "lint": "cross-env NODE_OPTIONS=\"--max-old-space-size=8192\" eslint . --cache --max-warnings 0", @@ -105,6 +107,7 @@ "@types/ws": "^8.18.1", "@vitest/coverage-v8": "^3.1.1", "@vitest/eslint-plugin": "^1.3.4", + "asciichart": "^1.5.25", "cross-env": "^7.0.3", "depcheck": "^1.4.7", "domexception": "^4.0.0", diff --git a/packages/test-utils/src/env-setup.ts b/packages/test-utils/src/env-setup.ts new file mode 100644 index 0000000000..1c5ffd0d21 --- /dev/null +++ b/packages/test-utils/src/env-setup.ts @@ -0,0 +1,35 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { join } from 'node:path'; + +/** + * Isolate the test environment by setting environment variables + * to point to a temporary run directory. + * + * @param runDir - The temporary directory for this test run. + */ +export function isolateTestEnv(runDir: string): void { + // Set the home directory to the test run directory to avoid conflicts + // with the user's local config. + process.env['HOME'] = runDir; + if (process.platform === 'win32') { + process.env['USERPROFILE'] = runDir; + } + + // We also need to set the config dir explicitly, since the code might + // construct the path before the HOME env var is set. + process.env['GEMINI_CONFIG_DIR'] = join(runDir, '.gemini'); + + // Force file storage to avoid keychain prompts/hangs in CI, especially on macOS + process.env['GEMINI_FORCE_FILE_STORAGE'] = 'true'; + + // Mark as integration test + process.env['GEMINI_CLI_INTEGRATION_TEST'] = 'true'; + + // Isolate telemetry log + process.env['TELEMETRY_LOG_FILE'] = join(runDir, 'telemetry.log'); +} diff --git a/packages/test-utils/src/index.ts b/packages/test-utils/src/index.ts index 49eaec66d3..e851e7ab8d 100644 --- a/packages/test-utils/src/index.ts +++ b/packages/test-utils/src/index.ts @@ -8,6 +8,8 @@ export * from './file-system-test-helpers.js'; export * from './fixtures/agents.js'; export * from './memory-baselines.js'; export * from './memory-test-harness.js'; +export * from './perf-test-harness.js'; export * from './mock-utils.js'; export * from './test-mcp-server.js'; export * from './test-rig.js'; +export * from './env-setup.js'; diff --git a/packages/test-utils/src/perf-test-harness.ts b/packages/test-utils/src/perf-test-harness.ts new file mode 100644 index 0000000000..c4625077be --- /dev/null +++ b/packages/test-utils/src/perf-test-harness.ts @@ -0,0 +1,546 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { performance } from 'node:perf_hooks'; +import { setTimeout as sleep } from 'node:timers/promises'; +import { readFileSync, writeFileSync, existsSync } from 'node:fs'; + +/** Configuration for asciichart plot function. */ +interface PlotConfig { + height?: number; + format?: (x: number) => string; +} + +/** Type for the asciichart plot function. */ +type PlotFn = (series: number[], config?: PlotConfig) => string; + +/** + * Baseline entry for a single performance test scenario. + */ +export interface PerfBaseline { + wallClockMs: number; + cpuTotalUs: number; + eventLoopDelayP99Ms: number; + timestamp: string; +} + +/** + * Top-level structure of the perf baselines JSON file. + */ +export interface PerfBaselineFile { + version: number; + updatedAt: string; + scenarios: Record; +} + +/** + * A single performance snapshot at a point in time. + */ +export interface PerfSnapshot { + timestamp: number; + label: string; + wallClockMs: number; + cpuUserUs: number; + cpuSystemUs: number; + cpuTotalUs: number; + eventLoopDelayP50Ms: number; + eventLoopDelayP95Ms: number; + eventLoopDelayP99Ms: number; + eventLoopDelayMaxMs: number; +} + +/** + * Result from running a performance test scenario. + */ +export interface PerfTestResult { + scenarioName: string; + samples: PerfSnapshot[]; + filteredSamples: PerfSnapshot[]; + median: PerfSnapshot; + baseline: PerfBaseline | undefined; + withinTolerance: boolean; + deltaPercent: number; + cpuDeltaPercent: number; +} + +/** + * Options for the PerfTestHarness. + */ +export interface PerfTestHarnessOptions { + /** Path to the baselines JSON file */ + baselinesPath: string; + /** Default tolerance percentage (0-100). Default: 15 */ + defaultTolerancePercent?: number; + /** Default CPU tolerance percentage (0-100). Optional */ + defaultCpuTolerancePercent?: number; + /** Number of samples per scenario. Default: 5 */ + sampleCount?: number; + /** Number of warmup runs to discard. Default: 1 */ + warmupCount?: number; + /** Pause in ms between samples. Default: 100 */ + samplePauseMs?: number; +} + +/** + * Active timer state tracked internally. + */ +interface ActiveTimer { + label: string; + startTime: number; + startCpuUsage: NodeJS.CpuUsage; +} + +/** + * PerfTestHarness provides infrastructure for running CPU performance tests. + * + * It handles: + * - High-resolution wall-clock timing via performance.now() + * - CPU usage measurement via process.cpuUsage() + * - Event loop delay monitoring via perf_hooks.monitorEventLoopDelay() + * - IQR outlier filtering for noise reduction + * - Warmup runs to avoid JIT compilation noise + * - Comparing against baselines with configurable tolerance + * - Generating ASCII chart reports + */ +export class PerfTestHarness { + private baselines: PerfBaselineFile; + private readonly baselinesPath: string; + private readonly defaultTolerancePercent: number; + private readonly defaultCpuTolerancePercent?: number; + private readonly sampleCount: number; + private readonly warmupCount: number; + private readonly samplePauseMs: number; + private allResults: PerfTestResult[] = []; + private activeTimers: Map = new Map(); + + constructor(options: PerfTestHarnessOptions) { + this.baselinesPath = options.baselinesPath; + this.defaultTolerancePercent = options.defaultTolerancePercent ?? 15; + this.defaultCpuTolerancePercent = options.defaultCpuTolerancePercent; + this.sampleCount = options.sampleCount ?? 5; + this.warmupCount = options.warmupCount ?? 1; + this.samplePauseMs = options.samplePauseMs ?? 100; + this.baselines = loadPerfBaselines(this.baselinesPath); + } + + /** + * Start a high-resolution timer with CPU tracking. + */ + startTimer(label: string): void { + this.activeTimers.set(label, { + label, + startTime: performance.now(), + startCpuUsage: process.cpuUsage(), + }); + } + + /** + * Stop a timer and return the snapshot. + */ + stopTimer(label: string): PerfSnapshot { + const timer = this.activeTimers.get(label); + if (!timer) { + throw new Error(`No active timer found for label "${label}"`); + } + + const wallClockMs = performance.now() - timer.startTime; + const cpuDelta = process.cpuUsage(timer.startCpuUsage); + this.activeTimers.delete(label); + + return { + timestamp: Date.now(), + label, + wallClockMs, + cpuUserUs: cpuDelta.user, + cpuSystemUs: cpuDelta.system, + cpuTotalUs: cpuDelta.user + cpuDelta.system, + eventLoopDelayP50Ms: 0, + eventLoopDelayP95Ms: 0, + eventLoopDelayP99Ms: 0, + eventLoopDelayMaxMs: 0, + }; + } + + /** + * Measure a function's wall-clock time and CPU usage. + * Returns the snapshot with timing data. + */ + async measure(label: string, fn: () => Promise): Promise { + this.startTimer(label); + await fn(); + return this.stopTimer(label); + } + + /** + * Measure a function with event loop delay monitoring. + * Uses perf_hooks.monitorEventLoopDelay() for histogram data. + */ + async measureWithEventLoop( + label: string, + fn: () => Promise, + ): Promise { + // monitorEventLoopDelay is available in Node.js 12+ + const { monitorEventLoopDelay } = await import('node:perf_hooks'); + const histogram = monitorEventLoopDelay({ resolution: 10 }); + histogram.enable(); + + this.startTimer(label); + await fn(); + const snapshot = this.stopTimer(label); + + histogram.disable(); + + // Convert from nanoseconds to milliseconds + snapshot.eventLoopDelayP50Ms = histogram.percentile(50) / 1e6; + snapshot.eventLoopDelayP95Ms = histogram.percentile(95) / 1e6; + snapshot.eventLoopDelayP99Ms = histogram.percentile(99) / 1e6; + snapshot.eventLoopDelayMaxMs = histogram.max / 1e6; + + return snapshot; + } + + /** + * Run a scenario multiple times with warmup, outlier filtering, and baseline comparison. + * + * @param name - Scenario name (must match baseline key) + * @param fn - Async function that executes one sample of the scenario. + * Must return a PerfSnapshot with measured values. + * @param tolerancePercent - Override default tolerance for this scenario + */ + async runScenario( + name: string, + fn: () => Promise, + tolerancePercent?: number, + ): Promise { + const tolerance = tolerancePercent ?? this.defaultTolerancePercent; + const totalRuns = this.warmupCount + this.sampleCount; + const allSnapshots: PerfSnapshot[] = []; + + for (let i = 0; i < totalRuns; i++) { + const isWarmup = i < this.warmupCount; + const snapshot = await fn(); + snapshot.label = isWarmup + ? `warmup-${i}` + : `sample-${i - this.warmupCount}`; + + if (!isWarmup) { + allSnapshots.push(snapshot); + } + + // Brief pause between samples + await sleep(this.samplePauseMs); + } + + // Apply IQR outlier filtering on wall-clock time + const filteredSnapshots = this.filterOutliers(allSnapshots, 'wallClockMs'); + + // Get median of filtered samples + const median = this.getMedianSnapshot(filteredSnapshots); + median.label = 'median'; + + // Get baseline + const baseline = this.baselines.scenarios[name]; + + // Determine if within tolerance + let deltaPercent = 0; + let cpuDeltaPercent = 0; + let withinTolerance = true; + + if (baseline) { + deltaPercent = + ((median.wallClockMs - baseline.wallClockMs) / baseline.wallClockMs) * + 100; + cpuDeltaPercent = + ((median.cpuTotalUs - baseline.cpuTotalUs) / baseline.cpuTotalUs) * 100; + withinTolerance = deltaPercent <= tolerance; + } + + const result: PerfTestResult = { + scenarioName: name, + samples: allSnapshots, + filteredSamples: filteredSnapshots, + median, + baseline, + withinTolerance, + deltaPercent, + cpuDeltaPercent, + }; + + this.allResults.push(result); + return result; + } + + /** + * Assert that a scenario result is within the baseline tolerance. + */ + assertWithinBaseline( + result: PerfTestResult, + tolerancePercent?: number, + cpuTolerancePercent?: number, + ): void { + const tolerance = tolerancePercent ?? this.defaultTolerancePercent; + const cpuTolerance = cpuTolerancePercent ?? this.defaultCpuTolerancePercent; + + if (!result.baseline) { + console.warn( + `⚠ No baseline found for "${result.scenarioName}". ` + + `Run with UPDATE_PERF_BASELINES=true to create one. ` + + `Measured: ${result.median.wallClockMs.toFixed(1)} ms wall-clock.`, + ); + return; + } + + const deltaPercent = + ((result.median.wallClockMs - result.baseline.wallClockMs) / + result.baseline.wallClockMs) * + 100; + + if (deltaPercent > tolerance) { + throw new Error( + `Performance regression detected for "${result.scenarioName}"!\n` + + ` Measured: ${result.median.wallClockMs.toFixed(1)} ms wall-clock\n` + + ` Baseline: ${result.baseline.wallClockMs.toFixed(1)} ms wall-clock\n` + + ` Delta: ${deltaPercent.toFixed(1)}% (tolerance: ${tolerance}%)\n` + + ` CPU total: ${formatUs(result.median.cpuTotalUs)}\n` + + ` EL p99: ${result.median.eventLoopDelayP99Ms.toFixed(1)} ms\n` + + ` Samples: ${result.samples.length} (${result.filteredSamples.length} after IQR filter)`, + ); + } + + if (cpuTolerance !== undefined && result.cpuDeltaPercent > cpuTolerance) { + throw new Error( + `CPU usage regression detected for "${result.scenarioName}"!\n` + + ` Measured: ${formatUs(result.median.cpuTotalUs)}\n` + + ` Baseline: ${formatUs(result.baseline.cpuTotalUs)}\n` + + ` Delta: ${result.cpuDeltaPercent.toFixed(1)}% (tolerance: ${cpuTolerance}%)\n` + + ` Wall-clock: ${result.median.wallClockMs.toFixed(1)} ms\n` + + ` EL p99: ${result.median.eventLoopDelayP99Ms.toFixed(1)} ms`, + ); + } + } + + /** + * Update the baseline for a scenario with the current measured values. + */ + updateScenarioBaseline(result: PerfTestResult): void { + updatePerfBaseline(this.baselinesPath, result.scenarioName, { + wallClockMs: result.median.wallClockMs, + cpuTotalUs: result.median.cpuTotalUs, + eventLoopDelayP99Ms: result.median.eventLoopDelayP99Ms, + }); + // Reload baselines after update + this.baselines = loadPerfBaselines(this.baselinesPath); + console.log( + `Updated baseline for ${result.scenarioName}: ${result.median.wallClockMs.toFixed(1)} ms`, + ); + } + + /** + * Generate an ASCII report with summary table and charts. + */ + async generateReport(results?: PerfTestResult[]): Promise { + const resultsToReport = results ?? this.allResults; + const lines: string[] = []; + + lines.push(''); + lines.push('═══════════════════════════════════════════════════'); + lines.push(' PERFORMANCE TEST REPORT'); + lines.push('═══════════════════════════════════════════════════'); + lines.push(''); + + for (const result of resultsToReport) { + const measured = `${result.median.wallClockMs.toFixed(1)} ms`; + const baseline = result.baseline + ? `${result.baseline.wallClockMs.toFixed(1)} ms` + : 'N/A'; + const delta = result.baseline + ? `${result.deltaPercent >= 0 ? '+' : ''}${result.deltaPercent.toFixed(1)}%` + : 'N/A'; + const status = !result.baseline + ? 'NEW' + : result.withinTolerance + ? '✅' + : '❌'; + + lines.push( + `${result.scenarioName}: ${measured} (Baseline: ${baseline}, Delta: ${delta}) ${status}`, + ); + + // Show CPU breakdown + const cpuMs = `${(result.median.cpuTotalUs / 1000).toFixed(1)} ms`; + lines.push( + ` CPU: ${cpuMs} (user: ${formatUs(result.median.cpuUserUs)}, system: ${formatUs(result.median.cpuSystemUs)})`, + ); + + if (result.median.eventLoopDelayP99Ms > 0) { + lines.push( + ` Event loop: p50=${result.median.eventLoopDelayP50Ms.toFixed(1)}ms p95=${result.median.eventLoopDelayP95Ms.toFixed(1)}ms p99=${result.median.eventLoopDelayP99Ms.toFixed(1)}ms max=${result.median.eventLoopDelayMaxMs.toFixed(1)}ms`, + ); + } + + lines.push( + ` Samples: ${result.samples.length} → ${result.filteredSamples.length} after IQR filter`, + ); + } + lines.push(''); + + // Generate ASCII chart for wall-clock per scenario + try { + // @ts-expect-error - asciichart may not have types + const asciichart = (await import('asciichart')) as { + default?: { plot?: PlotFn }; + plot?: PlotFn; + }; + const plot: PlotFn | undefined = + asciichart.default?.plot ?? asciichart.plot; + + for (const result of resultsToReport) { + if (result.filteredSamples.length > 2) { + lines.push(`📈 Wall-clock trend: ${result.scenarioName}`); + lines.push('─'.repeat(60)); + + const wallClockData = result.filteredSamples.map( + (s) => s.wallClockMs, + ); + + if (plot) { + const chart = plot(wallClockData, { + height: 8, + format: (x: number) => `${x.toFixed(0)} ms`.padStart(10), + }); + lines.push(chart); + } + + const labels = result.filteredSamples.map((s) => s.label); + lines.push(' ' + labels.join(' → ')); + lines.push(''); + } + } + } catch { + lines.push( + '(asciichart not available — install with: npm install --save-dev asciichart)', + ); + lines.push(''); + } + + lines.push('═══════════════════════════════════════════════════'); + lines.push(''); + + const report = lines.join('\n'); + console.log(report); + return report; + } + + /** + * Filter outliers using the Interquartile Range (IQR) method. + * Removes samples where the given metric falls outside Q1 - 1.5*IQR or Q3 + 1.5*IQR. + */ + private filterOutliers( + snapshots: PerfSnapshot[], + metric: keyof PerfSnapshot, + ): PerfSnapshot[] { + if (snapshots.length < 4) { + // Not enough data for meaningful IQR filtering + return [...snapshots]; + } + + const sorted = [...snapshots].sort( + (a, b) => (a[metric] as number) - (b[metric] as number), + ); + const q1Idx = Math.floor(sorted.length * 0.25); + const q3Idx = Math.floor(sorted.length * 0.75); + + const q1 = sorted[q1Idx]![metric] as number; + const q3 = sorted[q3Idx]![metric] as number; + const iqr = q3 - q1; + const lowerBound = q1 - 1.5 * iqr; + const upperBound = q3 + 1.5 * iqr; + + return snapshots.filter((s) => { + const val = s[metric] as number; + return val >= lowerBound && val <= upperBound; + }); + } + + /** + * Get the median snapshot by wall-clock time from a sorted list. + */ + private getMedianSnapshot(snapshots: PerfSnapshot[]): PerfSnapshot { + if (snapshots.length === 0) { + throw new Error('Cannot compute median of empty snapshot list'); + } + + const sorted = [...snapshots].sort((a, b) => a.wallClockMs - b.wallClockMs); + const medianIdx = Math.floor(sorted.length / 2); + return { ...sorted[medianIdx]! }; + } +} + +// ─── Baseline management ───────────────────────────────────────────── + +/** + * Load perf baselines from a JSON file. + */ +export function loadPerfBaselines(path: string): PerfBaselineFile { + if (!existsSync(path)) { + return { + version: 1, + updatedAt: new Date().toISOString(), + scenarios: {}, + }; + } + + const content = readFileSync(path, 'utf-8'); + return JSON.parse(content) as PerfBaselineFile; +} + +/** + * Save perf baselines to a JSON file. + */ +export function savePerfBaselines( + path: string, + baselines: PerfBaselineFile, +): void { + baselines.updatedAt = new Date().toISOString(); + writeFileSync(path, JSON.stringify(baselines, null, 2) + '\n'); +} + +/** + * Update (or create) a single scenario baseline in the file. + */ +export function updatePerfBaseline( + path: string, + scenarioName: string, + measured: { + wallClockMs: number; + cpuTotalUs: number; + eventLoopDelayP99Ms: number; + }, +): void { + const baselines = loadPerfBaselines(path); + baselines.scenarios[scenarioName] = { + wallClockMs: measured.wallClockMs, + cpuTotalUs: measured.cpuTotalUs, + eventLoopDelayP99Ms: measured.eventLoopDelayP99Ms, + timestamp: new Date().toISOString(), + }; + savePerfBaselines(path, baselines); +} + +// ─── Helpers ───────────────────────────────────────────────────────── + +/** + * Format microseconds as a human-readable string. + */ +function formatUs(us: number): string { + if (us > 1_000_000) { + return `${(us / 1_000_000).toFixed(2)} s`; + } + if (us > 1_000) { + return `${(us / 1_000).toFixed(1)} ms`; + } + return `${us} μs`; +} diff --git a/perf-tests/README.md b/perf-tests/README.md new file mode 100644 index 0000000000..c8e9e448c1 --- /dev/null +++ b/perf-tests/README.md @@ -0,0 +1,121 @@ +# CPU Performance Integration Test Harness + +## Overview + +This directory contains performance/CPU integration tests for the Gemini CLI. +These tests measure wall-clock time, CPU usage, and event loop responsiveness to +detect regressions across key scenarios. + +CPU performance is inherently noisy, especially in CI. The harness addresses +this with: + +- **IQR outlier filtering** — discards anomalous samples +- **Median sampling** — takes N runs, reports the median after filtering +- **Warmup runs** — discards the first run to mitigate JIT compilation noise +- **15% default tolerance** — won't panic at slight regressions + +## Running + +```bash +# Run tests (compare against committed baselines) +npm run test:perf + +# Update baselines (after intentional changes) +npm run test:perf:update-baselines + +# Verbose output +VERBOSE=true npm run test:perf + +# Keep test artifacts for debugging +KEEP_OUTPUT=true npm run test:perf +``` + +## How It Works + +### Measurement Primitives + +The `PerfTestHarness` class (in `packages/test-utils`) provides: + +- **`performance.now()`** — high-resolution wall-clock timing +- **`process.cpuUsage()`** — user + system CPU microseconds (delta between + start/stop) +- **`perf_hooks.monitorEventLoopDelay()`** — event loop delay histogram + (p50/p95/p99/max) + +### Noise Reduction + +1. **Warmup**: First run is discarded to mitigate JIT compilation artifacts +2. **Multiple samples**: Each scenario runs N times (default 5) +3. **IQR filtering**: Samples outside Q1−1.5×IQR and Q3+1.5×IQR are discarded +4. **Median**: The median of remaining samples is used for comparison + +### Baseline Management + +Baselines are stored in `baselines.json` in this directory. Each scenario has: + +```json +{ + "cold-startup-time": { + "wallClockMs": 1234.5, + "cpuTotalUs": 567890, + "eventLoopDelayP99Ms": 12.3, + "timestamp": "2026-04-08T..." + } +} +``` + +Tests fail if the measured value exceeds `baseline × 1.15` (15% tolerance). + +To recalibrate after intentional changes: + +```bash +npm run test:perf:update-baselines +# then commit baselines.json +``` + +### Report Output + +After all tests, the harness prints an ASCII summary: + +``` +═══════════════════════════════════════════════════ + PERFORMANCE TEST REPORT +═══════════════════════════════════════════════════ + +cold-startup-time: 1234.5 ms (Baseline: 1200.0 ms, Delta: +2.9%) ✅ +idle-cpu-usage: 2.1 % (Baseline: 2.0 %, Delta: +5.0%) ✅ +skill-loading-time: 1567.8 ms (Baseline: 1500.0 ms, Delta: +4.5%) ✅ +``` + +## Architecture + +``` +perf-tests/ +├── README.md ← you are here +├── baselines.json ← committed baseline values +├── globalSetup.ts ← test environment setup +├── perf-usage.test.ts ← test scenarios +├── perf.*.responses ← fake API responses per scenario +├── tsconfig.json ← TypeScript config +└── vitest.config.ts ← vitest config (serial, isolated) + +packages/test-utils/src/ +├── perf-test-harness.ts ← PerfTestHarness class +└── index.ts ← re-exports +``` + +## CI Integration + +These tests are **excluded from `preflight`** and designed for nightly CI: + +```yaml +- name: Performance regression tests + run: npm run test:perf +``` + +## Adding a New Scenario + +1. Add a fake response file: `perf..responses` +2. Add a test case in `perf-usage.test.ts` using `harness.runScenario()` +3. Run `npm run test:perf:update-baselines` to establish initial baseline +4. Commit the updated `baselines.json` diff --git a/perf-tests/baselines.json b/perf-tests/baselines.json new file mode 100644 index 0000000000..a6bad73574 --- /dev/null +++ b/perf-tests/baselines.json @@ -0,0 +1,24 @@ +{ + "version": 1, + "updatedAt": "2026-04-08T18:51:29.839Z", + "scenarios": { + "cold-startup-time": { + "wallClockMs": 1333.4230420000004, + "cpuTotalUs": 1711, + "eventLoopDelayP99Ms": 0, + "timestamp": "2026-04-08T18:50:58.124Z" + }, + "idle-cpu-usage": { + "wallClockMs": 5001.926125, + "cpuTotalUs": 128518, + "eventLoopDelayP99Ms": 12.705791, + "timestamp": "2026-04-08T18:51:23.938Z" + }, + "skill-loading-time": { + "wallClockMs": 1372.4463749999995, + "cpuTotalUs": 1550, + "eventLoopDelayP99Ms": 0, + "timestamp": "2026-04-08T18:51:29.839Z" + } + } +} diff --git a/perf-tests/globalSetup.ts b/perf-tests/globalSetup.ts new file mode 100644 index 0000000000..77447bd2ba --- /dev/null +++ b/perf-tests/globalSetup.ts @@ -0,0 +1,67 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { mkdir, readdir, rm } from 'node:fs/promises'; +import { join, dirname } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { canUseRipgrep } from '../packages/core/src/tools/ripGrep.js'; +import { isolateTestEnv } from '../packages/test-utils/src/env-setup.js'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const rootDir = join(__dirname, '..'); +const perfTestsDir = join(rootDir, '.perf-tests'); +const KEEP_RUNS_COUNT = 5; +let runDir = ''; + +export async function setup() { + runDir = join(perfTestsDir, `${Date.now()}`); + await mkdir(runDir, { recursive: true }); + + // Isolate environment variables + isolateTestEnv(runDir); + + // Download ripgrep to avoid race conditions + const available = await canUseRipgrep(); + if (!available) { + throw new Error('Failed to download ripgrep binary'); + } + + // Clean up old test runs, keeping the latest few for debugging + try { + const testRuns = await readdir(perfTestsDir); + if (testRuns.length > KEEP_RUNS_COUNT) { + const oldRuns = testRuns + .sort() + .slice(0, testRuns.length - KEEP_RUNS_COUNT); + await Promise.all( + oldRuns.map((oldRun) => + rm(join(perfTestsDir, oldRun), { + recursive: true, + force: true, + }), + ), + ); + } + } catch (e) { + console.error('Error cleaning up old perf test runs:', e); + } + + process.env['INTEGRATION_TEST_FILE_DIR'] = runDir; + process.env['VERBOSE'] = process.env['VERBOSE'] ?? 'false'; + + console.log(`\nPerf test output directory: ${runDir}`); +} + +export async function teardown() { + // Cleanup unless KEEP_OUTPUT is set + if (process.env['KEEP_OUTPUT'] !== 'true' && runDir) { + try { + await rm(runDir, { recursive: true, force: true }); + } catch (e) { + console.warn('Failed to clean up perf test directory:', e); + } + } +} diff --git a/perf-tests/perf-usage.test.ts b/perf-tests/perf-usage.test.ts new file mode 100644 index 0000000000..3f92cd9f91 --- /dev/null +++ b/perf-tests/perf-usage.test.ts @@ -0,0 +1,153 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, beforeAll, afterAll } from 'vitest'; +import { TestRig, PerfTestHarness } from '@google/gemini-cli-test-utils'; +import { join, dirname } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const BASELINES_PATH = join(__dirname, 'baselines.json'); +const UPDATE_BASELINES = process.env['UPDATE_PERF_BASELINES'] === 'true'; +const TOLERANCE_PERCENT = 15; + +// Use fewer samples locally for faster iteration, more in CI +const SAMPLE_COUNT = process.env['CI'] ? 5 : 3; +const WARMUP_COUNT = 1; + +describe('CPU Performance Tests', () => { + let harness: PerfTestHarness; + + beforeAll(() => { + harness = new PerfTestHarness({ + baselinesPath: BASELINES_PATH, + defaultTolerancePercent: TOLERANCE_PERCENT, + sampleCount: SAMPLE_COUNT, + warmupCount: WARMUP_COUNT, + }); + }); + + afterAll(async () => { + // Generate the summary report after all tests + await harness.generateReport(); + }); + + it('cold-startup-time: startup completes within baseline', async () => { + const result = await harness.runScenario('cold-startup-time', async () => { + const rig = new TestRig(); + try { + rig.setup('perf-cold-startup', { + fakeResponsesPath: join(__dirname, 'perf.cold-startup.responses'), + }); + + return await harness.measure('cold-startup', async () => { + await rig.run({ + args: ['hello'], + timeout: 120000, + env: { GEMINI_API_KEY: 'fake-perf-test-key' }, + }); + }); + } finally { + await rig.cleanup(); + } + }); + + if (UPDATE_BASELINES) { + harness.updateScenarioBaseline(result); + } else { + harness.assertWithinBaseline(result); + } + }); + + it('idle-cpu-usage: CPU stays low when idle', async () => { + const IDLE_OBSERVATION_MS = 5000; + + const result = await harness.runScenario('idle-cpu-usage', async () => { + const rig = new TestRig(); + try { + rig.setup('perf-idle-cpu', { + fakeResponsesPath: join(__dirname, 'perf.idle-cpu.responses'), + }); + + // First, run a prompt to get the CLI into idle state + await rig.run({ + args: ['hello'], + timeout: 120000, + env: { GEMINI_API_KEY: 'fake-perf-test-key' }, + }); + + // Now measure CPU during idle period in the test process + return await harness.measureWithEventLoop('idle-cpu', async () => { + // Simulate idle period — just wait + const { setTimeout: sleep } = await import('node:timers/promises'); + await sleep(IDLE_OBSERVATION_MS); + }); + } finally { + await rig.cleanup(); + } + }); + + if (UPDATE_BASELINES) { + harness.updateScenarioBaseline(result); + } else { + harness.assertWithinBaseline(result); + } + }); + + it('skill-loading-time: startup with many skills within baseline', async () => { + const SKILL_COUNT = 20; + + const result = await harness.runScenario('skill-loading-time', async () => { + const rig = new TestRig(); + try { + rig.setup('perf-skill-loading', { + fakeResponsesPath: join(__dirname, 'perf.skill-loading.responses'), + }); + + // Create many skill directories with SKILL.md files + for (let i = 0; i < SKILL_COUNT; i++) { + const skillDir = `.gemini/skills/perf-skill-${i}`; + rig.mkdir(skillDir); + rig.createFile( + `${skillDir}/SKILL.md`, + [ + '---', + `name: perf-skill-${i}`, + `description: Performance test skill number ${i}`, + `activation: manual`, + '---', + '', + `# Performance Test Skill ${i}`, + '', + `This is a test skill for measuring skill loading performance.`, + `It contains some content to simulate real-world skill files.`, + '', + `## Usage`, + '', + `Use this skill by activating it with @perf-skill-${i}.`, + ].join('\n'), + ); + } + + return await harness.measure('skill-loading', async () => { + await rig.run({ + args: ['hello'], + timeout: 120000, + env: { GEMINI_API_KEY: 'fake-perf-test-key' }, + }); + }); + } finally { + await rig.cleanup(); + } + }); + + if (UPDATE_BASELINES) { + harness.updateScenarioBaseline(result); + } else { + harness.assertWithinBaseline(result); + } + }); +}); diff --git a/perf-tests/perf.cold-startup.responses b/perf-tests/perf.cold-startup.responses new file mode 100644 index 0000000000..7a5703e3d2 --- /dev/null +++ b/perf-tests/perf.cold-startup.responses @@ -0,0 +1,2 @@ +{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Hello! I'm ready to help. What would you like to work on?"}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":5,"candidatesTokenCount":12,"totalTokenCount":17,"promptTokensDetails":[{"modality":"TEXT","tokenCount":5}]}}]} diff --git a/perf-tests/perf.idle-cpu.responses b/perf-tests/perf.idle-cpu.responses new file mode 100644 index 0000000000..a0d05086d2 --- /dev/null +++ b/perf-tests/perf.idle-cpu.responses @@ -0,0 +1,2 @@ +{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Hello! I'm ready to help."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":5,"candidatesTokenCount":8,"totalTokenCount":13,"promptTokensDetails":[{"modality":"TEXT","tokenCount":5}]}}]} diff --git a/perf-tests/perf.skill-loading.responses b/perf-tests/perf.skill-loading.responses new file mode 100644 index 0000000000..eb6c96fe9c --- /dev/null +++ b/perf-tests/perf.skill-loading.responses @@ -0,0 +1,2 @@ +{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Hello! I'm ready to assist you with your project."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":5,"candidatesTokenCount":10,"totalTokenCount":15,"promptTokensDetails":[{"modality":"TEXT","tokenCount":5}]}}]} diff --git a/perf-tests/tsconfig.json b/perf-tests/tsconfig.json new file mode 100644 index 0000000000..7f2c199703 --- /dev/null +++ b/perf-tests/tsconfig.json @@ -0,0 +1,12 @@ +{ + "extends": "../tsconfig.json", + "compilerOptions": { + "noEmit": true, + "allowJs": true + }, + "include": ["**/*.ts"], + "references": [ + { "path": "../packages/core" }, + { "path": "../packages/test-utils" } + ] +} diff --git a/perf-tests/vitest.config.ts b/perf-tests/vitest.config.ts new file mode 100644 index 0000000000..e9baeec0bf --- /dev/null +++ b/perf-tests/vitest.config.ts @@ -0,0 +1,27 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { defineConfig } from 'vitest/config'; + +export default defineConfig({ + test: { + testTimeout: 600000, // 10 minutes — performance profiling needs time for multiple samples + globalSetup: './globalSetup.ts', + reporters: ['default'], + include: ['**/*.test.ts'], + retry: 0, // No retries — noise is handled by IQR filtering and tolerance + fileParallelism: false, // Must run serially to avoid CPU contention + pool: 'forks', + poolOptions: { + forks: { + singleFork: true, // Single process for accurate per-test CPU readings + }, + }, + env: { + GEMINI_TEST_TYPE: 'perf', + }, + }, +}); From af3638640c429fec6f77c8aada326bd779e2af33 Mon Sep 17 00:00:00 2001 From: Emily Hedlund Date: Wed, 8 Apr 2026 15:00:50 -0700 Subject: [PATCH 29/39] fix(core): resolve windows symlink bypass and stabilize sandbox integration tests (#24834) --- .../src/sandbox/linux/LinuxSandboxManager.ts | 11 +- .../sandbox/macos/MacOsSandboxManager.test.ts | 11 +- .../src/sandbox/macos/MacOsSandboxManager.ts | 13 +- .../windows/WindowsSandboxManager.test.ts | 8 +- .../sandbox/windows/WindowsSandboxManager.ts | 78 +- .../sandboxManager.integration.test.ts | 868 +++++++++--------- .../core/src/services/sandboxManager.test.ts | 8 +- packages/core/src/services/sandboxManager.ts | 92 +- 8 files changed, 586 insertions(+), 503 deletions(-) diff --git a/packages/core/src/sandbox/linux/LinuxSandboxManager.ts b/packages/core/src/sandbox/linux/LinuxSandboxManager.ts index f210138127..facd2fe46f 100644 --- a/packages/core/src/sandbox/linux/LinuxSandboxManager.ts +++ b/packages/core/src/sandbox/linux/LinuxSandboxManager.ts @@ -249,8 +249,11 @@ export class LinuxSandboxManager implements SandboxManager { const sanitizedEnv = sanitizeEnvironment(req.env, sanitizationConfig); - const { allowed: allowedPaths, forbidden: forbiddenPaths } = - await resolveSandboxPaths(this.options, req); + const resolvedPaths = await resolveSandboxPaths( + this.options, + req, + mergedAdditional, + ); for (const file of GOVERNANCE_FILES) { const filePath = join(this.options.workspace, file.path); @@ -261,8 +264,8 @@ export class LinuxSandboxManager implements SandboxManager { workspace: this.options.workspace, workspaceWrite, networkAccess, - allowedPaths, - forbiddenPaths, + allowedPaths: resolvedPaths.policyAllowed, + forbiddenPaths: resolvedPaths.forbidden, additionalPermissions: mergedAdditional, includeDirectories: this.options.includeDirectories || [], maskFilePath: this.getMaskFilePath(), diff --git a/packages/core/src/sandbox/macos/MacOsSandboxManager.test.ts b/packages/core/src/sandbox/macos/MacOsSandboxManager.test.ts index 7b58f70696..c7bdd351a7 100644 --- a/packages/core/src/sandbox/macos/MacOsSandboxManager.test.ts +++ b/packages/core/src/sandbox/macos/MacOsSandboxManager.test.ts @@ -233,7 +233,10 @@ describe('MacOsSandboxManager', () => { expect(seatbeltArgsBuilder.buildSeatbeltProfile).toHaveBeenCalledWith( expect.objectContaining({ - allowedPaths: ['/tmp/allowed1', '/tmp/allowed2'], + allowedPaths: expect.arrayContaining([ + '/tmp/allowed1', + '/tmp/allowed2', + ]), }), ); }); @@ -255,7 +258,7 @@ describe('MacOsSandboxManager', () => { expect(seatbeltArgsBuilder.buildSeatbeltProfile).toHaveBeenCalledWith( expect.objectContaining({ - forbiddenPaths: ['/tmp/forbidden1'], + forbiddenPaths: expect.arrayContaining(['/tmp/forbidden1']), }), ); }); @@ -275,7 +278,7 @@ describe('MacOsSandboxManager', () => { expect(seatbeltArgsBuilder.buildSeatbeltProfile).toHaveBeenCalledWith( expect.objectContaining({ - forbiddenPaths: ['/tmp/does-not-exist'], + forbiddenPaths: expect.arrayContaining(['/tmp/does-not-exist']), }), ); }); @@ -299,7 +302,7 @@ describe('MacOsSandboxManager', () => { expect(seatbeltArgsBuilder.buildSeatbeltProfile).toHaveBeenCalledWith( expect.objectContaining({ allowedPaths: [], - forbiddenPaths: ['/tmp/conflict'], + forbiddenPaths: expect.arrayContaining(['/tmp/conflict']), }), ); }); diff --git a/packages/core/src/sandbox/macos/MacOsSandboxManager.ts b/packages/core/src/sandbox/macos/MacOsSandboxManager.ts index 44774e8e82..27e6867030 100644 --- a/packages/core/src/sandbox/macos/MacOsSandboxManager.ts +++ b/packages/core/src/sandbox/macos/MacOsSandboxManager.ts @@ -106,13 +106,9 @@ export class MacOsSandboxManager implements SandboxManager { const isYolo = this.options.modeConfig?.yolo ?? false; const workspaceWrite = !isReadonlyMode || isApproved || isYolo; - const defaultNetwork = this.options.modeConfig?.network || req.policy?.networkAccess || isYolo; - const { allowed: allowedPaths, forbidden: forbiddenPaths } = - await resolveSandboxPaths(this.options, req); - // Fetch persistent approvals for this command const commandName = await getFullCommandName(currentReq); const persistentPermissions = allowOverrides @@ -137,6 +133,11 @@ export class MacOsSandboxManager implements SandboxManager { false, }; + const resolvedPaths = await resolveSandboxPaths( + this.options, + req, + mergedAdditional, + ); const { command: finalCommand, args: finalArgs } = handleReadWriteCommands( req, mergedAdditional, @@ -147,10 +148,10 @@ export class MacOsSandboxManager implements SandboxManager { const sandboxArgs = buildSeatbeltProfile({ workspace: this.options.workspace, allowedPaths: [ - ...allowedPaths, + ...resolvedPaths.policyAllowed, ...(this.options.includeDirectories || []), ], - forbiddenPaths, + forbiddenPaths: resolvedPaths.forbidden, networkAccess: mergedAdditional.network, workspaceWrite, additionalPermissions: mergedAdditional, diff --git a/packages/core/src/sandbox/windows/WindowsSandboxManager.test.ts b/packages/core/src/sandbox/windows/WindowsSandboxManager.test.ts index c814f740f7..40902b9121 100644 --- a/packages/core/src/sandbox/windows/WindowsSandboxManager.test.ts +++ b/packages/core/src/sandbox/windows/WindowsSandboxManager.test.ts @@ -398,16 +398,16 @@ describe('WindowsSandboxManager', () => { expect(icaclsArgs).toContainEqual([ path.resolve(longPath), '/grant', - '*S-1-16-4096:(OI)(CI)(M)', + '*S-1-16-4096:(M)', '/setintegritylevel', - '(OI)(CI)Low', + 'Low', ]); expect(icaclsArgs).toContainEqual([ path.resolve(devicePath), '/grant', - '*S-1-16-4096:(OI)(CI)(M)', + '*S-1-16-4096:(M)', '/setintegritylevel', - '(OI)(CI)Low', + 'Low', ]); }, ); diff --git a/packages/core/src/sandbox/windows/WindowsSandboxManager.ts b/packages/core/src/sandbox/windows/WindowsSandboxManager.ts index a2d6428906..86d1eda641 100644 --- a/packages/core/src/sandbox/windows/WindowsSandboxManager.ts +++ b/packages/core/src/sandbox/windows/WindowsSandboxManager.ts @@ -15,7 +15,6 @@ import { GOVERNANCE_FILES, findSecretFiles, type GlobalSandboxOptions, - sanitizePaths, type SandboxPermissions, type ParsedSandboxDenial, resolveSandboxPaths, @@ -51,6 +50,10 @@ const __dirname = path.dirname(__filename); // S-1-16-4096 is the SID for "Low Mandatory Level" (Low Integrity) const LOW_INTEGRITY_SID = '*S-1-16-4096'; +// icacls flags: (OI) Object Inherit, (CI) Container Inherits. +// Omit /T (recursive) for performance; (OI)(CI) ensures inheritance for new items. +const DIRECTORY_FLAGS = '(OI)(CI)'; + /** * A SandboxManager implementation for Windows that uses Restricted Tokens, * Job Objects, and Low Integrity levels for process isolation. @@ -277,8 +280,11 @@ export class WindowsSandboxManager implements SandboxManager { this.options.modeConfig?.network ?? req.policy?.networkAccess ?? false; const networkAccess = defaultNetwork || mergedAdditional.network; - const { allowed: allowedPaths, forbidden: forbiddenPaths } = - await resolveSandboxPaths(this.options, req); + const resolvedPaths = await resolveSandboxPaths( + this.options, + req, + mergedAdditional, + ); // Track all roots where Low Integrity write access has been granted. // New files created within these roots will inherit the Low label. @@ -294,51 +300,45 @@ export class WindowsSandboxManager implements SandboxManager { : false; if (!isReadonlyMode || isApproved) { - await this.grantLowIntegrityAccess(this.options.workspace); - writableRoots.push(this.options.workspace); + await this.grantLowIntegrityAccess(resolvedPaths.workspace.resolved); + writableRoots.push(resolvedPaths.workspace.resolved); } // 2. Globally included directories - const includeDirs = sanitizePaths(this.options.includeDirectories); - for (const includeDir of includeDirs) { + for (const includeDir of resolvedPaths.globalIncludes) { await this.grantLowIntegrityAccess(includeDir); writableRoots.push(includeDir); } // 3. Explicitly allowed paths from the request policy - for (const allowedPath of allowedPaths) { - const resolved = resolveToRealPath(allowedPath); + for (const allowedPath of resolvedPaths.policyAllowed) { try { - await fs.promises.access(resolved, fs.constants.F_OK); + await fs.promises.access(allowedPath, fs.constants.F_OK); } catch { throw new Error( - `Sandbox request rejected: Allowed path does not exist: ${resolved}. ` + + `Sandbox request rejected: Allowed path does not exist: ${allowedPath}. ` + 'On Windows, granular sandbox access can only be granted to existing paths to avoid broad parent directory permissions.', ); } - await this.grantLowIntegrityAccess(resolved); - writableRoots.push(resolved); + await this.grantLowIntegrityAccess(allowedPath); + writableRoots.push(allowedPath); } // 4. Additional write paths (e.g. from internal __write command) - const additionalWritePaths = sanitizePaths( - mergedAdditional.fileSystem?.write, - ); - for (const writePath of additionalWritePaths) { - const resolved = resolveToRealPath(writePath); + for (const writePath of resolvedPaths.policyWrite) { try { - await fs.promises.access(resolved, fs.constants.F_OK); - await this.grantLowIntegrityAccess(resolved); + await fs.promises.access(writePath, fs.constants.F_OK); + await this.grantLowIntegrityAccess(writePath); continue; } catch { // If the file doesn't exist, it's only allowed if it resides within a granted root. const isInherited = writableRoots.some((root) => - isSubpath(root, resolved), + isSubpath(root, writePath), ); if (!isInherited) { throw new Error( - `Sandbox request rejected: Additional write path does not exist and its parent directory is not allowed: ${resolved}. ` + + `Sandbox request rejected: Additional write path does not exist and its parent directory is not allowed: ${writePath}. ` + 'On Windows, granular sandbox access can only be granted to existing paths to avoid broad parent directory permissions.', ); } @@ -350,9 +350,9 @@ export class WindowsSandboxManager implements SandboxManager { // processes to ensure they cannot be read or written. const secretsToBlock: string[] = []; const searchDirs = new Set([ - this.options.workspace, - ...allowedPaths, - ...includeDirs, + resolvedPaths.workspace.resolved, + ...resolvedPaths.policyAllowed, + ...resolvedPaths.globalIncludes, ]); for (const dir of searchDirs) { try { @@ -382,7 +382,7 @@ export class WindowsSandboxManager implements SandboxManager { // is restricted to avoid host corruption. External commands rely on // Low Integrity read/write restrictions, while internal commands // use the manifest for enforcement. - for (const forbiddenPath of forbiddenPaths) { + for (const forbiddenPath of resolvedPaths.forbidden) { try { await this.denyLowIntegrityAccess(forbiddenPath); } catch (e) { @@ -398,14 +398,14 @@ export class WindowsSandboxManager implements SandboxManager { // the sandboxed process from creating them with Low integrity. // By being created as Medium integrity, they are write-protected from Low processes. for (const file of GOVERNANCE_FILES) { - const filePath = path.join(this.options.workspace, file.path); + const filePath = path.join(resolvedPaths.workspace.resolved, file.path); this.touch(filePath, file.isDirectory); } // 4. Forbidden paths manifest // We use a manifest file to avoid command-line length limits. const allForbidden = Array.from( - new Set([...secretsToBlock, ...forbiddenPaths]), + new Set([...secretsToBlock, ...resolvedPaths.forbidden]), ); const tempDir = fs.mkdtempSync( path.join(os.tmpdir(), 'gemini-cli-forbidden-'), @@ -475,14 +475,19 @@ export class WindowsSandboxManager implements SandboxManager { } try { + const stats = await fs.promises.stat(resolvedPath); + const isDirectory = stats.isDirectory(); + + const flags = isDirectory ? DIRECTORY_FLAGS : ''; + // 1. Grant explicit Modify access to the Low Integrity SID // 2. Set the Mandatory Label to Low to allow "Write Up" from Low processes await spawnAsync('icacls', [ resolvedPath, '/grant', - `${LOW_INTEGRITY_SID}:(OI)(CI)(M)`, + `${LOW_INTEGRITY_SID}:${flags}(M)`, '/setintegritylevel', - '(OI)(CI)Low', + `${flags}Low`, ]); this.allowedCache.add(resolvedPath); } catch (e) { @@ -512,29 +517,26 @@ export class WindowsSandboxManager implements SandboxManager { return; } - // icacls flags: (OI) Object Inherit, (CI) Container Inherit, (F) Full Access Deny. - // Omit /T (recursive) for performance; (OI)(CI) ensures inheritance for new items. - // Windows dynamically evaluates existing items, though deep explicit Allow ACEs - // could potentially bypass this inherited Deny rule. - const DENY_ALL_INHERIT = '(OI)(CI)(F)'; - // icacls fails on non-existent paths, so we cannot explicitly deny // paths that do not yet exist (unlike macOS/Linux). // Skip to prevent sandbox initialization failure. + let isDirectory = false; try { - await fs.promises.stat(resolvedPath); + const stats = await fs.promises.stat(resolvedPath); + isDirectory = stats.isDirectory(); } catch (e: unknown) { if (isNodeError(e) && e.code === 'ENOENT') { return; } throw e; } + const flags = isDirectory ? DIRECTORY_FLAGS : ''; try { await spawnAsync('icacls', [ resolvedPath, '/deny', - `${LOW_INTEGRITY_SID}:${DENY_ALL_INHERIT}`, + `${LOW_INTEGRITY_SID}:${flags}(F)`, ]); this.deniedCache.add(resolvedPath); } catch (e) { diff --git a/packages/core/src/services/sandboxManager.integration.test.ts b/packages/core/src/services/sandboxManager.integration.test.ts index 4923de97bf..1461b6d606 100644 --- a/packages/core/src/services/sandboxManager.integration.test.ts +++ b/packages/core/src/services/sandboxManager.integration.test.ts @@ -1,4 +1,4 @@ -/** +/** * @license * Copyright 2026 Google LLC * SPDX-License-Identifier: Apache-2.0 @@ -8,11 +8,10 @@ import { createSandboxManager } from './sandboxManagerFactory.js'; import { ShellExecutionService } from './shellExecutionService.js'; import { getSecureSanitizationConfig } from './environmentSanitization.js'; import { + type SandboxManager, type SandboxedCommand, - NoopSandboxManager, - LocalSandboxManager, } from './sandboxManager.js'; -import { execFile, execSync } from 'node:child_process'; +import { execFile } from 'node:child_process'; import { promisify } from 'node:util'; import os from 'node:os'; import fs from 'node:fs'; @@ -20,49 +19,59 @@ import path from 'node:path'; import http from 'node:http'; /** - * Abstracts platform-specific shell commands for integration testing. + * Cross-platform command wrappers using Node.js inline scripts. + * Ensures consistent execution behavior and reliable exit codes across + * different host operating systems and restricted sandbox environments. */ const Platform = { isWindows: os.platform() === 'win32', + isMac: os.platform() === 'darwin', /** Returns a command to create an empty file. */ touch(filePath: string) { - return this.isWindows - ? { - command: 'powershell.exe', - args: [ - '-NoProfile', - '-Command', - `New-Item -Path "${filePath}" -ItemType File -Force`, - ], - } - : { command: 'touch', args: [filePath] }; + return { + command: process.execPath, + args: [ + '-e', + `require("node:fs").writeFileSync(${JSON.stringify(filePath)}, "")`, + ], + }; }, /** Returns a command to read a file's content. */ cat(filePath: string) { - return this.isWindows - ? { command: 'cmd.exe', args: ['/c', `type "${filePath}"`] } - : { command: 'cat', args: [filePath] }; + return { + command: process.execPath, + args: [ + '-e', + `console.log(require("node:fs").readFileSync(${JSON.stringify(filePath)}, "utf8"))`, + ], + }; }, /** Returns a command to echo a string. */ echo(text: string) { - return this.isWindows - ? { command: 'cmd.exe', args: ['/c', `echo ${text}`] } - : { command: 'echo', args: [text] }; + return { + command: process.execPath, + args: ['-e', `console.log(${JSON.stringify(text)})`], + }; }, /** Returns a command to perform a network request. */ curl(url: string) { - return { command: 'curl', args: ['-s', '--connect-timeout', '1', url] }; + return { + command: process.execPath, + args: [ + '-e', + `require("node:http").get(${JSON.stringify(url)}, (res) => { res.on("data", (d) => process.stdout.write(d)); res.on("end", () => process.exit(0)); }).on("error", () => process.exit(1));`, + ], + }; }, /** Returns a command that checks if the current terminal is interactive. */ isPty() { - return this.isWindows - ? 'powershell.exe -NoProfile -Command "echo True"' - : 'bash -c "if [ -t 1 ]; then echo True; else echo False; fi"'; + // ShellExecutionService.execute expects a raw shell string + return `"${process.execPath}" -e "console.log(process.stdout.isTTY ? 'True' : 'False')"`; }, /** Returns a path that is strictly outside the workspace and likely blocked. */ @@ -96,462 +105,465 @@ async function runCommand(command: SandboxedCommand) { } /** - * Determines if the system has the necessary binaries to run the sandbox. - * Throws an error if a supported platform is missing its required tools. + * Asserts the result of a sandboxed command execution, and provides detailed + * diagnostics on failure. */ -function ensureSandboxAvailable(): boolean { - const platform = os.platform(); +function assertResult( + result: { status: number; stdout: string; stderr: string }, + command: SandboxedCommand, + expected: 'success' | 'failure', +) { + const isSuccess = result.status === 0; + const shouldBeSuccess = expected === 'success'; - if (platform === 'win32') { - // Windows sandboxing relies on icacls, which is a core system utility and - // always available. - // TODO: reenable once flakiness is addressed - return false; - } - - if (platform === 'darwin') { - if (fs.existsSync('/usr/bin/sandbox-exec')) { - try { - execSync('sandbox-exec -p "(version 1)(allow default)" echo test', { - stdio: 'ignore', - }); - return true; - } catch { - // eslint-disable-next-line no-console - console.warn( - 'sandbox-exec is present but cannot be used (likely running inside a sandbox already). Skipping sandbox tests.', - ); - return false; - } + if (isSuccess === shouldBeSuccess) { + if (shouldBeSuccess) { + expect(result.status).toBe(0); + } else { + expect(result.status).not.toBe(0); } - throw new Error( - 'Sandboxing tests on macOS require /usr/bin/sandbox-exec to be present.', - ); + return; } - if (platform === 'linux') { - try { - execSync('which bwrap', { stdio: 'ignore' }); - return true; - } catch { - throw new Error( - 'Sandboxing tests on Linux require bubblewrap (bwrap) to be installed.', - ); - } - } + const commandLine = `${command.program} ${command.args.join(' ')}`; + const message = `Command ${ + shouldBeSuccess ? 'failed' : 'succeeded' + } unexpectedly. +Command: ${commandLine} +CWD: ${command.cwd || 'N/A'} +Status: ${result.status} (expected ${expected})${ + result.stdout ? `\nStdout: ${result.stdout.trim()}` : '' + }${result.stderr ? `\nStderr: ${result.stderr.trim()}` : ''}`; - return false; + throw new Error(message); } describe('SandboxManager Integration', () => { - const workspace = process.cwd(); - const manager = createSandboxManager({ enabled: true }, { workspace }); + const tempDirectories: string[] = []; - // Skip if we are on an unsupported platform or if it's a NoopSandboxManager - const shouldSkip = - manager instanceof NoopSandboxManager || - manager instanceof LocalSandboxManager || - !ensureSandboxAvailable(); + /** + * Creates a temporary directory. + * - macOS: Created in process.cwd() to avoid the seatbelt profile's global os.tmpdir() whitelist. + * - Win/Linux: Created in os.tmpdir() because enforcing sandbox restrictions inside a large directory can be very slow. + */ + function createTempDir(prefix = 'gemini-sandbox-test-'): string { + const baseDir = Platform.isMac + ? path.join(process.cwd(), `.${prefix}`) + : path.join(os.tmpdir(), prefix); - describe.skipIf(shouldSkip)('Cross-platform Sandbox Behavior', () => { - describe('Basic Execution', () => { - it('executes commands within the workspace', async () => { - const { command, args } = Platform.echo('sandbox test'); - const sandboxed = await manager.prepareCommand({ - command, - args, - cwd: workspace, - env: process.env, - }); + const dir = fs.mkdtempSync(baseDir); + tempDirectories.push(dir); + return dir; + } - const result = await runCommand(sandboxed); - expect(result.status).toBe(0); - expect(result.stdout.trim()).toBe('sandbox test'); + let workspace: string; + let manager: SandboxManager; + + beforeAll(() => { + workspace = createTempDir('workspace-'); + manager = createSandboxManager({ enabled: true }, { workspace }); + }); + + afterAll(() => { + for (const dir of tempDirectories) { + try { + fs.rmSync(dir, { recursive: true, force: true }); + } catch { + // Best-effort cleanup + } + } + }); + + describe('Basic Execution', () => { + it('executes commands within the workspace', async () => { + const { command, args } = Platform.echo('sandbox test'); + const sandboxed = await manager.prepareCommand({ + command, + args, + cwd: workspace, + env: process.env, }); - // The Windows sandbox wrapper (GeminiSandbox.exe) uses standard pipes - // for I/O interception, which breaks ConPTY pseudo-terminal inheritance. - it.skipIf(Platform.isWindows)( - 'supports interactive pseudo-terminals (node-pty)', - async () => { - const handle = await ShellExecutionService.execute( - Platform.isPty(), - workspace, - () => {}, - new AbortController().signal, - true, - { - sanitizationConfig: getSecureSanitizationConfig(), - sandboxManager: manager, - }, - ); - - const result = await handle.result; - expect(result.exitCode).toBe(0); - expect(result.output).toContain('True'); - }, - ); + const result = await runCommand(sandboxed); + assertResult(result, sandboxed, 'success'); + expect(result.stdout.trim()).toBe('sandbox test'); }); - describe('File System Access', () => { - it('blocks access outside the workspace', async () => { - const blockedPath = Platform.getExternalBlockedPath(); - const { command, args } = Platform.touch(blockedPath); + // The Windows sandbox wrapper (GeminiSandbox.exe) uses standard pipes + // for I/O interception, which breaks ConPTY pseudo-terminal inheritance. + it.skipIf(Platform.isWindows)( + 'supports interactive pseudo-terminals (node-pty)', + async () => { + const handle = await ShellExecutionService.execute( + Platform.isPty(), + workspace, + () => {}, + new AbortController().signal, + true, + { + sanitizationConfig: getSecureSanitizationConfig(), + sandboxManager: manager, + }, + ); - const sandboxed = await manager.prepareCommand({ - command, - args, - cwd: workspace, - env: process.env, - }); + const result = await handle.result; + expect(result.exitCode).toBe(0); + expect(result.output).toContain('True'); + }, + ); + }); - const result = await runCommand(sandboxed); - expect(result.status).not.toBe(0); + describe('File System Access', () => { + it('blocks access outside the workspace', async () => { + const blockedPath = Platform.getExternalBlockedPath(); + const { command, args } = Platform.touch(blockedPath); + + const sandboxed = await manager.prepareCommand({ + command, + args, + cwd: workspace, + env: process.env, }); - it('allows dynamic expansion of permissions after a failure', async () => { - const tempDir = fs.mkdtempSync( - path.join(workspace, '..', 'expansion-'), - ); - const testFile = path.join(tempDir, 'test.txt'); + const result = await runCommand(sandboxed); + assertResult(result, sandboxed, 'failure'); + }); - try { - const { command, args } = Platform.touch(testFile); + it('allows dynamic expansion of permissions after a failure', async () => { + const tempDir = createTempDir('expansion-'); + const testFile = path.join(tempDir, 'test.txt'); + const { command, args } = Platform.touch(testFile); - // First attempt: fails due to sandbox restrictions - const sandboxed1 = await manager.prepareCommand({ - command, - args, - cwd: workspace, - env: process.env, - }); - const result1 = await runCommand(sandboxed1); - expect(result1.status).not.toBe(0); - expect(fs.existsSync(testFile)).toBe(false); + // First attempt: fails due to sandbox restrictions + const sandboxed1 = await manager.prepareCommand({ + command, + args, + cwd: workspace, + env: process.env, + }); + const result1 = await runCommand(sandboxed1); + assertResult(result1, sandboxed1, 'failure'); + expect(fs.existsSync(testFile)).toBe(false); - // Second attempt: succeeds with additional permissions - const sandboxed2 = await manager.prepareCommand({ - command, - args, - cwd: workspace, - env: process.env, - policy: { allowedPaths: [tempDir] }, - }); - const result2 = await runCommand(sandboxed2); - expect(result2.status).toBe(0); - expect(fs.existsSync(testFile)).toBe(true); - } finally { - if (fs.existsSync(testFile)) fs.unlinkSync(testFile); - fs.rmSync(tempDir, { recursive: true, force: true }); - } + // Second attempt: succeeds with additional permissions + const sandboxed2 = await manager.prepareCommand({ + command, + args, + cwd: workspace, + env: process.env, + policy: { allowedPaths: [tempDir] }, + }); + const result2 = await runCommand(sandboxed2); + assertResult(result2, sandboxed2, 'success'); + expect(fs.existsSync(testFile)).toBe(true); + }); + + it('grants access to explicitly allowed paths', async () => { + const allowedDir = createTempDir('allowed-'); + const testFile = path.join(allowedDir, 'test.txt'); + + const { command, args } = Platform.touch(testFile); + const sandboxed = await manager.prepareCommand({ + command, + args, + cwd: workspace, + env: process.env, + policy: { allowedPaths: [allowedDir] }, }); - it('grants access to explicitly allowed paths', async () => { - const allowedDir = fs.mkdtempSync( - path.join(workspace, '..', 'allowed-'), - ); - const testFile = path.join(allowedDir, 'test.txt'); + const result = await runCommand(sandboxed); + assertResult(result, sandboxed, 'success'); + expect(fs.existsSync(testFile)).toBe(true); + }); - try { - const { command, args } = Platform.touch(testFile); - const sandboxed = await manager.prepareCommand({ - command, - args, - cwd: workspace, - env: process.env, - policy: { allowedPaths: [allowedDir] }, - }); + it('blocks write access to forbidden paths within the workspace', async () => { + const tempWorkspace = createTempDir('workspace-'); + const forbiddenDir = path.join(tempWorkspace, 'forbidden'); + const testFile = path.join(forbiddenDir, 'test.txt'); + fs.mkdirSync(forbiddenDir); - const result = await runCommand(sandboxed); - expect(result.status).toBe(0); - expect(fs.existsSync(testFile)).toBe(true); - } finally { - if (fs.existsSync(testFile)) fs.unlinkSync(testFile); - fs.rmSync(allowedDir, { recursive: true, force: true }); - } + const osManager = createSandboxManager( + { enabled: true }, + { + workspace: tempWorkspace, + forbiddenPaths: async () => [forbiddenDir], + }, + ); + const { command, args } = Platform.touch(testFile); + + const sandboxed = await osManager.prepareCommand({ + command, + args, + cwd: tempWorkspace, + env: process.env, }); - it('blocks access to forbidden paths within the workspace', async () => { - const tempWorkspace = fs.mkdtempSync( - path.join(os.tmpdir(), 'workspace-'), - ); + const result = await runCommand(sandboxed); + assertResult(result, sandboxed, 'failure'); + }); + + // Windows icacls does not reliably block read-up access for Low Integrity + // processes, so we skip read-specific assertions on Windows. The internal + // tool architecture prevents read bypasses via the C# wrapper and __read. + it.skipIf(Platform.isWindows)( + 'blocks read access to forbidden paths within the workspace', + async () => { + const tempWorkspace = createTempDir('workspace-'); const forbiddenDir = path.join(tempWorkspace, 'forbidden'); const testFile = path.join(forbiddenDir, 'test.txt'); fs.mkdirSync(forbiddenDir); + fs.writeFileSync(testFile, 'secret data'); - try { - const osManager = createSandboxManager( - { enabled: true }, - { - workspace: tempWorkspace, - forbiddenPaths: async () => [forbiddenDir], - }, - ); - const { command, args } = Platform.touch(testFile); - - const sandboxed = await osManager.prepareCommand({ - command, - args, - cwd: tempWorkspace, - env: process.env, - }); - - const result = await runCommand(sandboxed); - expect(result.status).not.toBe(0); - } finally { - fs.rmSync(tempWorkspace, { recursive: true, force: true }); - } - }); - - it('blocks access to files inside forbidden directories recursively', async () => { - const tempWorkspace = fs.mkdtempSync( - path.join(os.tmpdir(), 'workspace-'), + const osManager = createSandboxManager( + { enabled: true }, + { + workspace: tempWorkspace, + forbiddenPaths: async () => [forbiddenDir], + }, ); - const forbiddenDir = path.join(tempWorkspace, 'forbidden'); - const nestedDir = path.join(forbiddenDir, 'nested'); - const nestedFile = path.join(nestedDir, 'test.txt'); - fs.mkdirSync(nestedDir, { recursive: true }); - fs.writeFileSync(nestedFile, 'secret'); + const { command, args } = Platform.cat(testFile); - try { - const osManager = createSandboxManager( - { enabled: true }, - { - workspace: tempWorkspace, - forbiddenPaths: async () => [forbiddenDir], - }, - ); - const { command, args } = Platform.cat(nestedFile); - - const sandboxed = await osManager.prepareCommand({ - command, - args, - cwd: tempWorkspace, - env: process.env, - }); - - const result = await runCommand(sandboxed); - expect(result.status).not.toBe(0); - } finally { - fs.rmSync(tempWorkspace, { recursive: true, force: true }); - } - }); - - it('prioritizes forbiddenPaths over allowedPaths', async () => { - const tempWorkspace = fs.mkdtempSync( - path.join(os.tmpdir(), 'workspace-'), - ); - const conflictDir = path.join(tempWorkspace, 'conflict'); - const testFile = path.join(conflictDir, 'test.txt'); - fs.mkdirSync(conflictDir); - - try { - const osManager = createSandboxManager( - { enabled: true }, - { - workspace: tempWorkspace, - forbiddenPaths: async () => [conflictDir], - }, - ); - const { command, args } = Platform.touch(testFile); - - const sandboxed = await osManager.prepareCommand({ - command, - args, - cwd: tempWorkspace, - env: process.env, - policy: { - allowedPaths: [conflictDir], - }, - }); - - const result = await runCommand(sandboxed); - expect(result.status).not.toBe(0); - } finally { - fs.rmSync(tempWorkspace, { recursive: true, force: true }); - } - }); - - it('gracefully ignores non-existent paths in allowedPaths and forbiddenPaths', async () => { - const tempWorkspace = fs.mkdtempSync( - path.join(os.tmpdir(), 'workspace-'), - ); - const nonExistentPath = path.join(tempWorkspace, 'does-not-exist'); - - try { - const osManager = createSandboxManager( - { enabled: true }, - { - workspace: tempWorkspace, - forbiddenPaths: async () => [nonExistentPath], - }, - ); - const { command, args } = Platform.echo('survived'); - const sandboxed = await osManager.prepareCommand({ - command, - args, - cwd: tempWorkspace, - env: process.env, - policy: { - allowedPaths: [nonExistentPath], - }, - }); - const result = await runCommand(sandboxed); - expect(result.status).toBe(0); - expect(result.stdout.trim()).toBe('survived'); - } finally { - fs.rmSync(tempWorkspace, { recursive: true, force: true }); - } - }); - - it('prevents creation of non-existent forbidden paths', async () => { - // Windows icacls cannot explicitly protect paths that have not yet been created. - if (Platform.isWindows) return; - - const tempWorkspace = fs.mkdtempSync( - path.join(os.tmpdir(), 'workspace-'), - ); - const nonExistentFile = path.join(tempWorkspace, 'never-created.txt'); - - try { - const osManager = createSandboxManager( - { enabled: true }, - { - workspace: tempWorkspace, - forbiddenPaths: async () => [nonExistentFile], - }, - ); - - // We use touch to attempt creation of the file - const { command: cmdTouch, args: argsTouch } = - Platform.touch(nonExistentFile); - - const sandboxedCmd = await osManager.prepareCommand({ - command: cmdTouch, - args: argsTouch, - cwd: tempWorkspace, - env: process.env, - }); - - // Execute the command, we expect it to fail (permission denied or read-only file system) - const result = await runCommand(sandboxedCmd); - - expect(result.status).not.toBe(0); - expect(fs.existsSync(nonExistentFile)).toBe(false); - } finally { - fs.rmSync(tempWorkspace, { recursive: true, force: true }); - } - }); - - it('blocks access to both a symlink and its target when the symlink is forbidden', async () => { - if (Platform.isWindows) return; - - const tempWorkspace = fs.mkdtempSync( - path.join(os.tmpdir(), 'workspace-'), - ); - const targetFile = path.join(tempWorkspace, 'target.txt'); - const symlinkFile = path.join(tempWorkspace, 'link.txt'); - - fs.writeFileSync(targetFile, 'secret data'); - fs.symlinkSync(targetFile, symlinkFile); - - try { - const osManager = createSandboxManager( - { enabled: true }, - { - workspace: tempWorkspace, - forbiddenPaths: async () => [symlinkFile], - }, - ); - - // Attempt to read the target file directly - const { command: cmdTarget, args: argsTarget } = - Platform.cat(targetFile); - const commandTarget = await osManager.prepareCommand({ - command: cmdTarget, - args: argsTarget, - cwd: tempWorkspace, - env: process.env, - }); - const resultTarget = await runCommand(commandTarget); - expect(resultTarget.status).not.toBe(0); - - // Attempt to read via the symlink - const { command: cmdLink, args: argsLink } = - Platform.cat(symlinkFile); - const commandLink = await osManager.prepareCommand({ - command: cmdLink, - args: argsLink, - cwd: tempWorkspace, - env: process.env, - }); - const resultLink = await runCommand(commandLink); - expect(resultLink.status).not.toBe(0); - } finally { - fs.rmSync(tempWorkspace, { recursive: true, force: true }); - } - }); - }); - - describe('Network Access', () => { - let server: http.Server; - let url: string; - - beforeAll(async () => { - server = http.createServer((_, res) => { - res.setHeader('Connection', 'close'); - res.writeHead(200); - res.end('ok'); + const sandboxed = await osManager.prepareCommand({ + command, + args, + cwd: tempWorkspace, + env: process.env, }); - await new Promise((resolve, reject) => { - server.on('error', reject); - server.listen(0, '127.0.0.1', () => { - const addr = server.address() as import('net').AddressInfo; - url = `http://127.0.0.1:${addr.port}`; - resolve(); - }); - }); - }); - afterAll(async () => { - if (server) await new Promise((res) => server.close(() => res())); - }); + const result = await runCommand(sandboxed); + assertResult(result, sandboxed, 'failure'); + }, + ); - // Windows Job Object rate limits exempt loopback (127.0.0.1) traffic, - // so this test cannot verify loopback blocking on Windows. - it.skipIf(Platform.isWindows)( - 'blocks network access by default', - async () => { - const { command, args } = Platform.curl(url); - const sandboxed = await manager.prepareCommand({ - command, - args, - cwd: workspace, - env: process.env, - }); + it('blocks access to files inside forbidden directories recursively', async () => { + const tempWorkspace = createTempDir('workspace-'); + const forbiddenDir = path.join(tempWorkspace, 'forbidden'); + const nestedDir = path.join(forbiddenDir, 'nested'); + const nestedFile = path.join(nestedDir, 'test.txt'); - const result = await runCommand(sandboxed); - expect(result.status).not.toBe(0); + // Create the base forbidden directory first so the manager can restrict access to it. + fs.mkdirSync(forbiddenDir); + + const osManager = createSandboxManager( + { enabled: true }, + { + workspace: tempWorkspace, + forbiddenPaths: async () => [forbiddenDir], }, ); - it('grants network access when explicitly allowed', async () => { + // Execute a dummy command so the manager initializes its restrictions. + const dummyCommand = await osManager.prepareCommand({ + ...Platform.echo('init'), + cwd: tempWorkspace, + env: process.env, + }); + await runCommand(dummyCommand); + + // Now create the nested items. They will inherit the sandbox restrictions from their parent. + fs.mkdirSync(nestedDir, { recursive: true }); + fs.writeFileSync(nestedFile, 'secret'); + + const { command, args } = Platform.touch(nestedFile); + + const sandboxed = await osManager.prepareCommand({ + command, + args, + cwd: tempWorkspace, + env: process.env, + }); + + const result = await runCommand(sandboxed); + assertResult(result, sandboxed, 'failure'); + }); + + it('prioritizes forbiddenPaths over allowedPaths', async () => { + const tempWorkspace = createTempDir('workspace-'); + const conflictDir = path.join(tempWorkspace, 'conflict'); + const testFile = path.join(conflictDir, 'test.txt'); + fs.mkdirSync(conflictDir); + + const osManager = createSandboxManager( + { enabled: true }, + { + workspace: tempWorkspace, + forbiddenPaths: async () => [conflictDir], + }, + ); + const { command, args } = Platform.touch(testFile); + + const sandboxed = await osManager.prepareCommand({ + command, + args, + cwd: tempWorkspace, + env: process.env, + policy: { + allowedPaths: [conflictDir], + }, + }); + + const result = await runCommand(sandboxed); + assertResult(result, sandboxed, 'failure'); + }); + + it('gracefully ignores non-existent paths in allowedPaths and forbiddenPaths', async () => { + const tempWorkspace = createTempDir('workspace-'); + const nonExistentPath = path.join(tempWorkspace, 'does-not-exist'); + + const osManager = createSandboxManager( + { enabled: true }, + { + workspace: tempWorkspace, + forbiddenPaths: async () => [nonExistentPath], + }, + ); + const { command, args } = Platform.echo('survived'); + const sandboxed = await osManager.prepareCommand({ + command, + args, + cwd: tempWorkspace, + env: process.env, + policy: { + allowedPaths: [nonExistentPath], + }, + }); + + const result = await runCommand(sandboxed); + assertResult(result, sandboxed, 'success'); + expect(result.stdout.trim()).toBe('survived'); + }); + + it('prevents creation of non-existent forbidden paths', async () => { + const tempWorkspace = createTempDir('workspace-'); + const nonExistentFile = path.join(tempWorkspace, 'never-created.txt'); + + const osManager = createSandboxManager( + { enabled: true }, + { + workspace: tempWorkspace, + forbiddenPaths: async () => [nonExistentFile], + }, + ); + + // We use touch to attempt creation of the file + const { command: cmdTouch, args: argsTouch } = + Platform.touch(nonExistentFile); + + const sandboxedCmd = await osManager.prepareCommand({ + command: cmdTouch, + args: argsTouch, + cwd: tempWorkspace, + env: process.env, + }); + + // Execute the command, we expect it to fail (permission denied or read-only file system) + const result = await runCommand(sandboxedCmd); + + assertResult(result, sandboxedCmd, 'failure'); + expect(fs.existsSync(nonExistentFile)).toBe(false); + }); + + it('blocks access to both a symlink and its target when the symlink is forbidden', async () => { + const tempWorkspace = createTempDir('workspace-'); + const targetFile = path.join(tempWorkspace, 'target.txt'); + const symlinkFile = path.join(tempWorkspace, 'link.txt'); + + fs.writeFileSync(targetFile, 'secret data'); + fs.symlinkSync(targetFile, symlinkFile); + + const osManager = createSandboxManager( + { enabled: true }, + { + workspace: tempWorkspace, + forbiddenPaths: async () => [symlinkFile], + }, + ); + + // Attempt to write to the target file directly + const { command: cmdTarget, args: argsTarget } = + Platform.touch(targetFile); + const commandTarget = await osManager.prepareCommand({ + command: cmdTarget, + args: argsTarget, + cwd: tempWorkspace, + env: process.env, + }); + + const resultTarget = await runCommand(commandTarget); + assertResult(resultTarget, commandTarget, 'failure'); + + // Attempt to write via the symlink + const { command: cmdLink, args: argsLink } = Platform.touch(symlinkFile); + const commandLink = await osManager.prepareCommand({ + command: cmdLink, + args: argsLink, + cwd: tempWorkspace, + env: process.env, + }); + + const resultLink = await runCommand(commandLink); + assertResult(resultLink, commandLink, 'failure'); + }); + }); + + describe('Network Access', () => { + let server: http.Server; + let url: string; + + beforeAll(async () => { + server = http.createServer((_, res) => { + res.setHeader('Connection', 'close'); + res.writeHead(200); + res.end('ok'); + }); + await new Promise((resolve, reject) => { + server.on('error', reject); + server.listen(0, '127.0.0.1', () => { + const addr = server.address() as import('net').AddressInfo; + url = `http://127.0.0.1:${addr.port}`; + resolve(); + }); + }); + }); + + afterAll(async () => { + if (server) await new Promise((res) => server.close(() => res())); + }); + + // Windows Job Object rate limits exempt loopback (127.0.0.1) traffic, + // so this test cannot verify loopback blocking on Windows. + it.skipIf(Platform.isWindows)( + 'blocks network access by default', + async () => { const { command, args } = Platform.curl(url); const sandboxed = await manager.prepareCommand({ command, args, cwd: workspace, env: process.env, - policy: { networkAccess: true }, }); const result = await runCommand(sandboxed); - expect(result.status).toBe(0); - if (!Platform.isWindows) { - expect(result.stdout.trim()).toBe('ok'); - } + assertResult(result, sandboxed, 'failure'); + }, + ); + + it('grants network access when explicitly allowed', async () => { + const { command, args } = Platform.curl(url); + const sandboxed = await manager.prepareCommand({ + command, + args, + cwd: workspace, + env: process.env, + policy: { networkAccess: true }, }); + + const result = await runCommand(sandboxed); + assertResult(result, sandboxed, 'success'); + if (!Platform.isWindows) { + expect(result.stdout.trim()).toBe('ok'); + } }); }); }); diff --git a/packages/core/src/services/sandboxManager.test.ts b/packages/core/src/services/sandboxManager.test.ts index d6b026395a..134ef167bd 100644 --- a/packages/core/src/services/sandboxManager.test.ts +++ b/packages/core/src/services/sandboxManager.test.ts @@ -204,7 +204,7 @@ describe('SandboxManager', () => { const result = await resolveSandboxPaths(options, req as SandboxRequest); - expect(result.allowed).toEqual([allowed]); + expect(result.policyAllowed).toEqual([allowed]); expect(result.forbidden).toEqual([forbidden]); }); @@ -226,7 +226,7 @@ describe('SandboxManager', () => { const result = await resolveSandboxPaths(options, req as SandboxRequest); - expect(result.allowed).toEqual([other]); + expect(result.policyAllowed).toEqual([other]); }); it('should prioritize forbidden paths over allowed paths', async () => { @@ -249,7 +249,7 @@ describe('SandboxManager', () => { const result = await resolveSandboxPaths(options, req as SandboxRequest); - expect(result.allowed).toEqual([normal]); + expect(result.policyAllowed).toEqual([normal]); expect(result.forbidden).toEqual([secret]); }); @@ -274,7 +274,7 @@ describe('SandboxManager', () => { const result = await resolveSandboxPaths(options, req as SandboxRequest); - expect(result.allowed).toEqual([]); + expect(result.policyAllowed).toEqual([]); expect(result.forbidden).toEqual([secretUpper]); }); }); diff --git a/packages/core/src/services/sandboxManager.ts b/packages/core/src/services/sandboxManager.ts index 673c13b9af..f7f2944fe7 100644 --- a/packages/core/src/services/sandboxManager.ts +++ b/packages/core/src/services/sandboxManager.ts @@ -23,6 +23,33 @@ import { } from './environmentSanitization.js'; import type { ShellExecutionResult } from './shellExecutionService.js'; import type { SandboxPolicyManager } from '../policy/sandboxPolicyManager.js'; +import { resolveToRealPath } from '../utils/paths.js'; + +/** + * A structured result of fully resolved sandbox paths. + * All paths in this object are absolute, deduplicated, and expanded to include + * both the original path and its real target (if it is a symlink). + */ +export interface ResolvedSandboxPaths { + /** The primary workspace directory. */ + workspace: { + /** The original path provided in the sandbox options. */ + original: string; + /** The real path. */ + resolved: string; + }; + /** Explicitly denied paths. */ + forbidden: string[]; + /** Directories included globally across all commands in this sandbox session. */ + globalIncludes: string[]; + /** Paths explicitly allowed by the policy of the currently executing command. */ + policyAllowed: string[]; + /** Paths granted temporary read access by the current command's dynamic permissions. */ + policyRead: string[]; + /** Paths granted temporary write access by the current command's dynamic permissions. */ + policyWrite: string[]; +} + export interface SandboxPermissions { /** Filesystem permissions. */ fileSystem?: { @@ -326,33 +353,68 @@ export class LocalSandboxManager implements SandboxManager { } /** - * Resolves sanitized allowed and forbidden paths for a request. - * Filters the workspace from allowed paths and ensures forbidden paths take precedence. + * Resolves and sanitizes all path categories for a sandbox request. */ export async function resolveSandboxPaths( options: GlobalSandboxOptions, req: SandboxRequest, -): Promise<{ - allowed: string[]; - forbidden: string[]; -}> { - const forbidden = sanitizePaths(await options.forbiddenPaths?.()); - const allowed = sanitizePaths(req.policy?.allowedPaths); + overridePermissions?: SandboxPermissions, +): Promise { + /** + * Helper that expands each path to include its realpath (if it's a symlink) + * and pipes the result through sanitizePaths for deduplication and absolute path enforcement. + */ + const expand = (paths?: string[] | null): string[] => { + if (!paths || paths.length === 0) return []; + const expanded = paths.flatMap((p) => { + try { + const resolved = resolveToRealPath(p); + return resolved === p ? [p] : [p, resolved]; + } catch { + return [p]; + } + }); + return sanitizePaths(expanded); + }; - const workspaceIdentity = getPathIdentity(options.workspace); + const forbidden = expand(await options.forbiddenPaths?.()); + + const globalIncludes = expand(options.includeDirectories); + const policyAllowed = expand(req.policy?.allowedPaths); + + const policyRead = expand(overridePermissions?.fileSystem?.read); + const policyWrite = expand(overridePermissions?.fileSystem?.write); + + const resolvedWorkspace = resolveToRealPath(options.workspace); + + const workspaceIdentities = new Set( + [options.workspace, resolvedWorkspace].map(getPathIdentity), + ); const forbiddenIdentities = new Set(forbidden.map(getPathIdentity)); - const filteredAllowed = allowed.filter((p) => { - const identity = getPathIdentity(p); - return identity !== workspaceIdentity && !forbiddenIdentities.has(identity); - }); + /** + * Filters out any paths that are explicitly forbidden or match the workspace root (original or resolved). + */ + const filter = (paths: string[]) => + paths.filter((p) => { + const identity = getPathIdentity(p); + return ( + !workspaceIdentities.has(identity) && !forbiddenIdentities.has(identity) + ); + }); return { - allowed: filteredAllowed, + workspace: { + original: options.workspace, + resolved: resolvedWorkspace, + }, forbidden, + globalIncludes: filter(globalIncludes), + policyAllowed: filter(policyAllowed), + policyRead: filter(policyRead), + policyWrite: filter(policyWrite), }; } - /** * Sanitizes an array of paths by deduplicating them and ensuring they are absolute. * Always returns an array (empty if input is null/undefined). From 1023c5b7a665fb9d1c4b425781907df135619b60 Mon Sep 17 00:00:00 2001 From: Adamya Singh Date: Thu, 9 Apr 2026 03:35:57 +0530 Subject: [PATCH 30/39] test(sdk): add unit tests for GeminiCliSession (#21897) --- packages/sdk/src/session.test.ts | 331 +++++++++++++++++++++++++++++++ 1 file changed, 331 insertions(+) create mode 100644 packages/sdk/src/session.test.ts diff --git a/packages/sdk/src/session.test.ts b/packages/sdk/src/session.test.ts new file mode 100644 index 0000000000..52230055e2 --- /dev/null +++ b/packages/sdk/src/session.test.ts @@ -0,0 +1,331 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { GeminiCliSession } from './session.js'; +import type { GeminiCliAgent } from './agent.js'; +import type { GeminiCliAgentOptions } from './types.js'; + +// Mutable mock client so individual tests can override sendMessageStream +const mockClient = { + resumeChat: vi.fn().mockResolvedValue(undefined), + getHistory: vi.fn().mockReturnValue([]), + sendMessageStream: vi.fn().mockReturnValue((async function* () {})()), + updateSystemInstruction: vi.fn(), +}; + +// Mutable mock config so individual tests can spy on setUserMemory etc. +const mockConfig = { + initialize: vi.fn().mockResolvedValue(undefined), + refreshAuth: vi.fn().mockResolvedValue(undefined), + getSkillManager: vi.fn().mockReturnValue({ + getSkills: vi.fn().mockReturnValue([]), + addSkills: vi.fn(), + }), + getToolRegistry: vi.fn().mockReturnValue({ + getTool: vi.fn().mockReturnValue(null), + registerTool: vi.fn(), + unregisterTool: vi.fn(), + }), + getMessageBus: vi.fn().mockReturnValue({}), + getGeminiClient: vi.fn().mockReturnValue(mockClient), + getSessionId: vi.fn().mockReturnValue('mock-session-id'), + getWorkingDir: vi.fn().mockReturnValue('/tmp'), + setUserMemory: vi.fn(), +}; + +// Mock scheduleAgentTools at module level so tests can override it +const mockScheduleAgentTools = vi.fn().mockResolvedValue([]); + +// Mock @google/gemini-cli-core to avoid heavy filesystem/auth/telemetry setup +vi.mock('@google/gemini-cli-core', async (importOriginal) => { + const actual = + await importOriginal(); + return { + ...actual, + Config: vi.fn().mockImplementation(() => mockConfig), + getAuthTypeFromEnv: vi.fn().mockReturnValue(null), + scheduleAgentTools: (...args: unknown[]) => mockScheduleAgentTools(...args), + loadSkillsFromDir: vi.fn().mockResolvedValue([]), + ActivateSkillTool: class { + static Name = 'activate_skill'; + }, + PolicyDecision: actual.PolicyDecision, + }; +}); + +const mockAgent = {} as unknown as GeminiCliAgent; + +const baseOptions: GeminiCliAgentOptions = { + instructions: 'You are a helpful assistant.', +}; + +beforeEach(() => { + vi.clearAllMocks(); + // Reset sendMessageStream to empty stream by default + mockClient.sendMessageStream.mockReturnValue((async function* () {})()); + mockScheduleAgentTools.mockResolvedValue([]); +}); + +describe('GeminiCliSession constructor', () => { + it('accepts string instructions', () => { + expect( + () => new GeminiCliSession(baseOptions, 'session-1', mockAgent), + ).not.toThrow(); + }); + + it('accepts function instructions', () => { + const options: GeminiCliAgentOptions = { + instructions: async () => 'dynamic instructions', + }; + expect( + () => new GeminiCliSession(options, 'session-2', mockAgent), + ).not.toThrow(); + }); + + it('throws when instructions is an object (not string or function)', () => { + const options = { + instructions: { invalid: true }, + } as unknown as GeminiCliAgentOptions; + expect(() => new GeminiCliSession(options, 'session-3', mockAgent)).toThrow( + 'Instructions must be a string or a function.', + ); + }); + + it('throws when instructions is a number', () => { + const options = { + instructions: 42, + } as unknown as GeminiCliAgentOptions; + expect(() => new GeminiCliSession(options, 'session-4', mockAgent)).toThrow( + 'Instructions must be a string or a function.', + ); + }); + + it('throws when instructions is an array', () => { + const options = { + instructions: ['step1', 'step2'], + } as unknown as GeminiCliAgentOptions; + expect(() => new GeminiCliSession(options, 'session-5', mockAgent)).toThrow( + 'Instructions must be a string or a function.', + ); + }); +}); + +describe('GeminiCliSession id getter', () => { + it('returns the sessionId passed to the constructor', () => { + const session = new GeminiCliSession( + baseOptions, + 'my-session-id', + mockAgent, + ); + expect(session.id).toBe('my-session-id'); + }); + + it('returns different ids for different sessions', () => { + const s1 = new GeminiCliSession(baseOptions, 'session-a', mockAgent); + const s2 = new GeminiCliSession(baseOptions, 'session-b', mockAgent); + expect(s1.id).not.toBe(s2.id); + }); +}); + +describe('GeminiCliSession initialize()', () => { + it('initializes successfully with string instructions', async () => { + const session = new GeminiCliSession( + baseOptions, + 'session-init-1', + mockAgent, + ); + await expect(session.initialize()).resolves.toBeUndefined(); + }); + + it('is idempotent — calling initialize() twice does not throw', async () => { + const session = new GeminiCliSession( + baseOptions, + 'session-init-2', + mockAgent, + ); + await session.initialize(); + await expect(session.initialize()).resolves.toBeUndefined(); + }); + + it('initializes with empty tools array', async () => { + const options: GeminiCliAgentOptions = { ...baseOptions, tools: [] }; + const session = new GeminiCliSession(options, 'session-init-3', mockAgent); + await expect(session.initialize()).resolves.toBeUndefined(); + }); + + it('initializes with empty skills array', async () => { + const options: GeminiCliAgentOptions = { ...baseOptions, skills: [] }; + const session = new GeminiCliSession(options, 'session-init-4', mockAgent); + await expect(session.initialize()).resolves.toBeUndefined(); + }); + + it('initializes with custom model', async () => { + const options: GeminiCliAgentOptions = { + ...baseOptions, + model: 'gemini-2.0-flash', + }; + const session = new GeminiCliSession(options, 'session-init-5', mockAgent); + await expect(session.initialize()).resolves.toBeUndefined(); + }); + + it('initializes with custom cwd', async () => { + const options: GeminiCliAgentOptions = { + ...baseOptions, + cwd: '/custom/working/dir', + }; + const session = new GeminiCliSession(options, 'session-init-6', mockAgent); + await expect(session.initialize()).resolves.toBeUndefined(); + }); +}); + +describe('GeminiCliSession sendStream()', () => { + it('auto-initializes if not yet initialized', async () => { + const session = new GeminiCliSession( + baseOptions, + 'session-stream-1', + mockAgent, + ); + const events = []; + for await (const event of session.sendStream('Hello')) { + events.push(event); + } + expect(events).toHaveLength(0); + }); + + it('completes cleanly when model returns no tool calls', async () => { + const session = new GeminiCliSession( + baseOptions, + 'session-stream-2', + mockAgent, + ); + await session.initialize(); + const events = []; + for await (const event of session.sendStream('Hello')) { + events.push(event); + } + expect(events).toHaveLength(0); + }); + + it('accepts an AbortSignal without throwing', async () => { + const session = new GeminiCliSession( + baseOptions, + 'session-stream-3', + mockAgent, + ); + const controller = new AbortController(); + const events = []; + for await (const event of session.sendStream('Hello', controller.signal)) { + events.push(event); + } + expect(events).toHaveLength(0); + }); + + it('executes tool call loop and sends function response back to model', async () => { + const { GeminiEventType } = await import('@google/gemini-cli-core'); + + // First call: yield a ToolCallRequest, then end + // Second call: empty stream (model is done after tool result) + let callCount = 0; + mockClient.sendMessageStream.mockImplementation(() => { + callCount++; + if (callCount === 1) { + return (async function* () { + yield { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'call-1', + name: 'testTool', + args: { input: 'value' }, + }, + }; + })(); + } + return (async function* () {})(); + }); + + mockScheduleAgentTools.mockResolvedValue([ + { + response: { + responseParts: [ + { + functionResponse: { + name: 'testTool', + response: { result: 'done' }, + }, + }, + ], + }, + }, + ]); + + const session = new GeminiCliSession( + baseOptions, + 'session-stream-4', + mockAgent, + ); + const events = []; + for await (const event of session.sendStream('Use the tool')) { + events.push(event); + } + + // The ToolCallRequest event should have been yielded to the caller + expect(events).toHaveLength(1); + expect(events[0].type).toBe(GeminiEventType.ToolCallRequest); + + // scheduleAgentTools should have been called with the tool call + expect(mockScheduleAgentTools).toHaveBeenCalledOnce(); + + // sendMessageStream called twice: once for prompt, once with tool result + expect(mockClient.sendMessageStream).toHaveBeenCalledTimes(2); + }); + + it('calls setUserMemory and updateSystemInstruction when instructions is a function', async () => { + const dynamicInstructions = vi + .fn() + .mockResolvedValue('updated instructions'); + const options: GeminiCliAgentOptions = { + instructions: dynamicInstructions, + }; + + const session = new GeminiCliSession( + options, + 'session-stream-5', + mockAgent, + ); + for await (const _event of session.sendStream('Hello')) { + // consume stream + } + + // The instructions function should have been called with a SessionContext + expect(dynamicInstructions).toHaveBeenCalledOnce(); + const context = dynamicInstructions.mock.calls[0][0]; + expect(context).toHaveProperty('sessionId'); + expect(context).toHaveProperty('transcript'); + expect(context).toHaveProperty('cwd'); + expect(context).toHaveProperty('timestamp'); + + // Config should have been updated with the new instructions + expect(mockConfig.setUserMemory).toHaveBeenCalledWith( + 'updated instructions', + ); + + // Client system instruction should have been refreshed + expect(mockClient.updateSystemInstruction).toHaveBeenCalledOnce(); + }); + + it('does not call setUserMemory when instructions is a string', async () => { + const session = new GeminiCliSession( + baseOptions, + 'session-stream-6', + mockAgent, + ); + for await (const _event of session.sendStream('Hello')) { + // consume stream + } + expect(mockConfig.setUserMemory).not.toHaveBeenCalled(); + expect(mockClient.updateSystemInstruction).not.toHaveBeenCalled(); + }); +}); From 14b2f356777b678999536e2c811b6870894ec834 Mon Sep 17 00:00:00 2001 From: Jarrod Whelan <150866123+jwhelangoog@users.noreply.github.com> Date: Wed, 8 Apr 2026 15:19:25 -0700 Subject: [PATCH 31/39] fix(cli): restore file path display in edit and write tool confirmations (#24974) --- ...-the-frame-of-the-entire-terminal.snap.svg | 3 +- .../ToolConfirmationFullFrame.test.tsx.snap | 2 +- .../components/ToolConfirmationQueue.test.tsx | 38 +++++++++++++++++++ .../ui/components/ToolConfirmationQueue.tsx | 4 +- ...-and-content-for-large-edit-diffs.snap.svg | 3 +- .../ToolConfirmationQueue.test.tsx.snap | 6 +-- .../messages/DenseToolMessage.test.tsx | 22 +++++++++++ 7 files changed, 70 insertions(+), 8 deletions(-) diff --git a/packages/cli/src/ui/__snapshots__/ToolConfirmationFullFrame-Full-Terminal-Tool-Confirmation-Snapshot-renders-tool-confirmation-box-in-the-frame-of-the-entire-terminal.snap.svg b/packages/cli/src/ui/__snapshots__/ToolConfirmationFullFrame-Full-Terminal-Tool-Confirmation-Snapshot-renders-tool-confirmation-box-in-the-frame-of-the-entire-terminal.snap.svg index 7565185d93..42e28aac6a 100644 --- a/packages/cli/src/ui/__snapshots__/ToolConfirmationFullFrame-Full-Terminal-Tool-Confirmation-Snapshot-renders-tool-confirmation-box-in-the-frame-of-the-entire-terminal.snap.svg +++ b/packages/cli/src/ui/__snapshots__/ToolConfirmationFullFrame-Full-Terminal-Tool-Confirmation-Snapshot-renders-tool-confirmation-box-in-the-frame-of-the-entire-terminal.snap.svg @@ -14,7 +14,8 @@ ▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄ ╭─────────────────────────────────────────────────────────────────────────────────────────────────╮ - ? Edit + ? Edit + packages/.../InputPrompt.tsx: return kittyProtocolSupporte... => return kittyProto… ╭─────────────────────────────────────────────────────────────────────────────────────────────╮ diff --git a/packages/cli/src/ui/__snapshots__/ToolConfirmationFullFrame.test.tsx.snap b/packages/cli/src/ui/__snapshots__/ToolConfirmationFullFrame.test.tsx.snap index d9cc9f7ce3..caebc9ae49 100644 --- a/packages/cli/src/ui/__snapshots__/ToolConfirmationFullFrame.test.tsx.snap +++ b/packages/cli/src/ui/__snapshots__/ToolConfirmationFullFrame.test.tsx.snap @@ -5,7 +5,7 @@ exports[`Full Terminal Tool Confirmation Snapshot > renders tool confirmation bo ▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄ ╭─────────────────────────────────────────────────────────────────────────────────────────────────╮ -│ ? Edit │ +│ ? Edit packages/.../InputPrompt.tsx: return kittyProtocolSupporte... => return kittyProto… │ │ ╭─────────────────────────────────────────────────────────────────────────────────────────────╮ │ │ │ ... first 42 lines hidden (Ctrl+O to show) ... │ │ │ │ 43 const line43 = true; │ │ diff --git a/packages/cli/src/ui/components/ToolConfirmationQueue.test.tsx b/packages/cli/src/ui/components/ToolConfirmationQueue.test.tsx index 58a78d3c24..e48c244bdf 100644 --- a/packages/cli/src/ui/components/ToolConfirmationQueue.test.tsx +++ b/packages/cli/src/ui/components/ToolConfirmationQueue.test.tsx @@ -66,6 +66,44 @@ describe('ToolConfirmationQueue', () => { vi.clearAllMocks(); }); + it('explicitly renders the tool description (containing filename) for edit confirmations', async () => { + const confirmingTool = { + tool: { + callId: 'call-1', + name: 'Edit', + description: 'Editing src/main.ts', + status: CoreToolCallStatus.AwaitingApproval, + confirmationDetails: { + type: 'edit' as const, + title: 'Confirm edit', + fileName: 'main.ts', + filePath: '/src/main.ts', + fileDiff: '--- a/main.ts\n+++ b/main.ts\n@@ -1 +1 @@\n-old\n+new', + originalContent: 'old', + newContent: 'new', + }, + }, + index: 1, + total: 1, + }; + + const { lastFrame, unmount } = await renderWithProviders( + , + { + config: mockConfig, + uiState: { + terminalWidth: 80, + }, + }, + ); + + const output = lastFrame(); + expect(output).toContain('Editing src/main.ts'); + unmount(); + }); + it('renders the confirming tool with progress indicator', async () => { const confirmingTool = { tool: { diff --git a/packages/cli/src/ui/components/ToolConfirmationQueue.tsx b/packages/cli/src/ui/components/ToolConfirmationQueue.tsx index 1a836662b7..fd9c51ae1a 100644 --- a/packages/cli/src/ui/components/ToolConfirmationQueue.tsx +++ b/packages/cli/src/ui/components/ToolConfirmationQueue.tsx @@ -98,9 +98,9 @@ export const ToolConfirmationQueue: React.FC = ({ ? {toolLabel} - {!isEdit && !!tool.description && ' '} + {!!tool.description && ' '} - {!isEdit && !!tool.description && ( + {!!tool.description && ( {tool.description} diff --git a/packages/cli/src/ui/components/__snapshots__/ToolConfirmationQueue-ToolConfirmationQueue-height-allocation-and-layout-should-render-the-full-queue-wrapper-with-borders-and-content-for-large-edit-diffs.snap.svg b/packages/cli/src/ui/components/__snapshots__/ToolConfirmationQueue-ToolConfirmationQueue-height-allocation-and-layout-should-render-the-full-queue-wrapper-with-borders-and-content-for-large-edit-diffs.snap.svg index bbfedfab59..a257a1253c 100644 --- a/packages/cli/src/ui/components/__snapshots__/ToolConfirmationQueue-ToolConfirmationQueue-height-allocation-and-layout-should-render-the-full-queue-wrapper-with-borders-and-content-for-large-edit-diffs.snap.svg +++ b/packages/cli/src/ui/components/__snapshots__/ToolConfirmationQueue-ToolConfirmationQueue-height-allocation-and-layout-should-render-the-full-queue-wrapper-with-borders-and-content-for-large-edit-diffs.snap.svg @@ -6,7 +6,8 @@ ╭──────────────────────────────────────────────────────────────────────────────╮ - ? replace + ? replace + Replaces content in a file ╭──────────────────────────────────────────────────────────────────────────╮ diff --git a/packages/cli/src/ui/components/__snapshots__/ToolConfirmationQueue.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/ToolConfirmationQueue.test.tsx.snap index 9214e58713..238efefba4 100644 --- a/packages/cli/src/ui/components/__snapshots__/ToolConfirmationQueue.test.tsx.snap +++ b/packages/cli/src/ui/components/__snapshots__/ToolConfirmationQueue.test.tsx.snap @@ -2,7 +2,7 @@ exports[`ToolConfirmationQueue > calculates availableContentHeight based on availableTerminalHeight from UI state 1`] = ` "╭──────────────────────────────────────────────────────────────────────────────╮ -│ ? replace │ +│ ? replace edit file │ │ ╭──────────────────────────────────────────────────────────────────────────╮ │ │ ╰─... 48 hidden (Ctrl+O) ...───────────────────────────────────────────────╯ │ │ Apply this change? │ @@ -17,7 +17,7 @@ exports[`ToolConfirmationQueue > calculates availableContentHeight based on avai exports[`ToolConfirmationQueue > does not render expansion hint when constrainHeight is false 1`] = ` "╭──────────────────────────────────────────────────────────────────────────────╮ -│ ? replace │ +│ ? replace edit file │ │ ╭──────────────────────────────────────────────────────────────────────────╮ │ │ │ │ │ │ │ No changes detected. │ │ @@ -63,7 +63,7 @@ exports[`ToolConfirmationQueue > height allocation and layout > should handle se exports[`ToolConfirmationQueue > height allocation and layout > should render the full queue wrapper with borders and content for large edit diffs 1`] = ` "╭──────────────────────────────────────────────────────────────────────────────╮ -│ ? replace │ +│ ? replace Replaces content in a file │ │ ╭──────────────────────────────────────────────────────────────────────────╮ │ │ │ ... 13 hidden (Ctrl+O) ... │ │ │ │ 7 + const newLine7 = true; │ │ diff --git a/packages/cli/src/ui/components/messages/DenseToolMessage.test.tsx b/packages/cli/src/ui/components/messages/DenseToolMessage.test.tsx index e187c3343b..30879b13b3 100644 --- a/packages/cli/src/ui/components/messages/DenseToolMessage.test.tsx +++ b/packages/cli/src/ui/components/messages/DenseToolMessage.test.tsx @@ -34,6 +34,28 @@ describe('DenseToolMessage', () => { terminalWidth: 80, }; + it('explicitly renders the filename in the header for FileDiff results', async () => { + const fileDiff: FileDiff = { + fileName: 'test-file.ts', + filePath: '/test-file.ts', + fileDiff: + '--- a/test-file.ts\n+++ b/test-file.ts\n@@ -1 +1 @@\n-old\n+new', + originalContent: 'old', + newContent: 'new', + }; + + const { lastFrame, waitUntilReady } = await renderWithProviders( + , + ); + await waitUntilReady(); + const output = lastFrame(); + expect(output).toContain('test-file.ts'); + }); + it('renders correctly for a successful string result', async () => { const { lastFrame, waitUntilReady } = await renderWithProviders( , From 80764c8bb50017ed84072f10a90d8ff2c5368846 Mon Sep 17 00:00:00 2001 From: dogukanozen Date: Thu, 9 Apr 2026 01:25:29 +0300 Subject: [PATCH 32/39] fix(cli-ui): enable Ctrl+Backspace for word deletion in Windows Terminal (#21447) --- .../src/ui/components/SettingsDialog.test.tsx | 2 +- .../shared/BaseSettingsDialog.test.tsx | 2 +- .../src/ui/contexts/KeypressContext.test.tsx | 86 ++++++++++++++++++- .../cli/src/ui/contexts/KeypressContext.tsx | 16 +++- 4 files changed, 101 insertions(+), 5 deletions(-) diff --git a/packages/cli/src/ui/components/SettingsDialog.test.tsx b/packages/cli/src/ui/components/SettingsDialog.test.tsx index 9887415a57..7ba451d538 100644 --- a/packages/cli/src/ui/components/SettingsDialog.test.tsx +++ b/packages/cli/src/ui/components/SettingsDialog.test.tsx @@ -44,7 +44,7 @@ enum TerminalKeys { LEFT_ARROW = '\u001B[D', RIGHT_ARROW = '\u001B[C', ESCAPE = '\u001B', - BACKSPACE = '\u0008', + BACKSPACE = '\x7f', CTRL_P = '\u0010', CTRL_N = '\u000E', } diff --git a/packages/cli/src/ui/components/shared/BaseSettingsDialog.test.tsx b/packages/cli/src/ui/components/shared/BaseSettingsDialog.test.tsx index f66af9fd17..c49c967714 100644 --- a/packages/cli/src/ui/components/shared/BaseSettingsDialog.test.tsx +++ b/packages/cli/src/ui/components/shared/BaseSettingsDialog.test.tsx @@ -24,7 +24,7 @@ enum TerminalKeys { LEFT_ARROW = '\u001B[D', RIGHT_ARROW = '\u001B[C', ESCAPE = '\u001B', - BACKSPACE = '\u0008', + BACKSPACE = '\x7f', CTRL_L = '\u000C', } diff --git a/packages/cli/src/ui/contexts/KeypressContext.test.tsx b/packages/cli/src/ui/contexts/KeypressContext.test.tsx index e7d0406dd7..26f1c1cf35 100644 --- a/packages/cli/src/ui/contexts/KeypressContext.test.tsx +++ b/packages/cli/src/ui/contexts/KeypressContext.test.tsx @@ -9,7 +9,17 @@ import { act } from 'react'; import { renderHookWithProviders } from '../../test-utils/render.js'; import { createMockSettings } from '../../test-utils/settings.js'; import { waitFor } from '../../test-utils/async.js'; -import { vi, afterAll, beforeAll, type Mock } from 'vitest'; +import type { Mock } from 'vitest'; +import { + vi, + afterAll, + beforeAll, + describe, + it, + expect, + beforeEach, + afterEach, +} from 'vitest'; import { useKeypressContext, ESC_TIMEOUT, @@ -431,6 +441,80 @@ describe('KeypressContext', () => { ); }); + describe('Windows Terminal Backspace handling', () => { + afterEach(() => { + vi.unstubAllEnvs(); + }); + + it('should NOT treat \\b as ctrl when WT_SESSION is NOT present and OS is not Windows_NT', async () => { + vi.stubEnv('WT_SESSION', ''); + vi.stubEnv('OS', 'Linux'); + const { keyHandler } = await setupKeypressTest(); + + act(() => { + stdin.write('\b'); + }); + + expect(keyHandler).toHaveBeenCalledWith( + expect.objectContaining({ + name: 'backspace', + ctrl: false, + }), + ); + }); + + it('should treat \\b as ctrl when WT_SESSION IS present (even if not Windows_NT)', async () => { + vi.stubEnv('WT_SESSION', 'some-id'); + vi.stubEnv('OS', 'Linux'); + const { keyHandler } = await setupKeypressTest(); + + act(() => { + stdin.write('\b'); + }); + + expect(keyHandler).toHaveBeenCalledWith( + expect.objectContaining({ + name: 'backspace', + ctrl: true, + }), + ); + }); + + it('should treat \\b as ctrl when OS is Windows_NT', async () => { + vi.stubEnv('WT_SESSION', ''); + vi.stubEnv('OS', 'Windows_NT'); + const { keyHandler } = await setupKeypressTest(); + + act(() => { + stdin.write('\b'); + }); + + expect(keyHandler).toHaveBeenCalledWith( + expect.objectContaining({ + name: 'backspace', + ctrl: true, + }), + ); + }); + + it('should treat \\x7f as regular backspace regardless of WT_SESSION or OS', async () => { + vi.stubEnv('WT_SESSION', 'some-id'); + vi.stubEnv('OS', 'Windows_NT'); + const { keyHandler } = await setupKeypressTest(); + + act(() => { + stdin.write('\x7f'); + }); + + expect(keyHandler).toHaveBeenCalledWith( + expect.objectContaining({ + name: 'backspace', + ctrl: false, + }), + ); + }); + }); + describe('paste mode', () => { it.each([ { diff --git a/packages/cli/src/ui/contexts/KeypressContext.tsx b/packages/cli/src/ui/contexts/KeypressContext.tsx index 3a3961221f..d834608fbe 100644 --- a/packages/cli/src/ui/contexts/KeypressContext.tsx +++ b/packages/cli/src/ui/contexts/KeypressContext.tsx @@ -651,8 +651,20 @@ function* emitKeys( // tab name = 'tab'; alt = escaped; - } else if (ch === '\b' || ch === '\x7f') { - // backspace or ctrl+h + } else if (ch === '\b') { + // ctrl+h / ctrl+backspace (windows terminals send \x08 for ctrl+backspace) + name = 'backspace'; + // In Windows environments, \b is sent for Ctrl+Backspace (standard backspace is translated to \x7f). + // We scope this to Windows/WT_SESSION to avoid breaking other unixes where \b is a plain backspace. + if ( + typeof process !== 'undefined' && + (process.env?.['OS'] === 'Windows_NT' || !!process.env?.['WT_SESSION']) + ) { + ctrl = true; + } + alt = escaped; + } else if (ch === '\x7f') { + // backspace name = 'backspace'; alt = escaped; } else if (ch === ESC) { From d06dba353851c545d62354f2702f2b0d97757957 Mon Sep 17 00:00:00 2001 From: Tommaso Sciortino Date: Wed, 8 Apr 2026 23:27:24 +0000 Subject: [PATCH 33/39] fix(core): dynamic session ID injection to resolve resume bugs (#24972) --- packages/cli/src/acp/acpClient.ts | 2 +- packages/cli/src/gemini.tsx | 72 ++++++++++--------- packages/cli/src/gemini_cleanup.test.tsx | 3 + packages/cli/src/interactiveCli.tsx | 2 +- packages/cli/src/test-utils/render.tsx | 2 +- packages/cli/src/ui/AppContainer.tsx | 2 +- .../cli/src/ui/commands/bugCommand.test.ts | 11 +-- packages/cli/src/ui/commands/bugCommand.ts | 3 +- .../ui/components/ExitPlanModeDialog.test.tsx | 2 + .../cli/src/ui/components/Footer.test.tsx | 1 + .../ui/components/HistoryItemDisplay.test.tsx | 8 +-- .../src/ui/components/ModelDialog.test.tsx | 1 + .../components/ToolConfirmationQueue.test.tsx | 1 + .../src/ui/contexts/SessionContext.test.tsx | 8 +-- .../cli/src/ui/contexts/SessionContext.tsx | 10 +-- packages/cli/src/ui/hooks/useGeminiStream.ts | 20 ++---- packages/cli/src/ui/hooks/useLogger.test.tsx | 35 ++------- packages/cli/src/ui/hooks/useLogger.ts | 21 ++---- packages/cli/src/utils/sessionUtils.test.ts | 44 +++++------- packages/cli/src/utils/sessionUtils.ts | 21 ++---- packages/cli/src/utils/sessions.ts | 4 +- packages/core/src/agents/subagent-tool.ts | 1 + .../src/core/loggingContentGenerator.test.ts | 1 + .../core/src/core/loggingContentGenerator.ts | 3 + packages/core/src/index.ts | 2 +- packages/core/src/scheduler/policy.test.ts | 37 ++++++---- packages/core/src/scheduler/scheduler.test.ts | 2 + packages/core/src/scheduler/scheduler.ts | 1 + .../src/scheduler/scheduler_parallel.test.ts | 1 + packages/core/src/scheduler/tool-executor.ts | 1 + packages/core/src/telemetry/trace.test.ts | 21 +++--- packages/core/src/telemetry/trace.ts | 9 ++- packages/core/src/utils/session.ts | 2 - 33 files changed, 165 insertions(+), 189 deletions(-) diff --git a/packages/cli/src/acp/acpClient.ts b/packages/cli/src/acp/acpClient.ts index e0a352e0d1..b84c9d6b87 100644 --- a/packages/cli/src/acp/acpClient.ts +++ b/packages/cli/src/acp/acpClient.ts @@ -372,7 +372,7 @@ export class GeminiAgent { mcpServers, ); - const sessionSelector = new SessionSelector(config); + const sessionSelector = new SessionSelector(config.storage); const { sessionData, sessionPath } = await sessionSelector.resolveSession(sessionId); diff --git a/packages/cli/src/gemini.tsx b/packages/cli/src/gemini.tsx index f77fc11d61..f496bee37b 100644 --- a/packages/cli/src/gemini.tsx +++ b/packages/cli/src/gemini.tsx @@ -13,7 +13,7 @@ import { type OutputPayload, type ConsoleLogPayload, type UserFeedbackPayload, - sessionId, + createSessionId, logUserPrompt, AuthType, UserPromptEvent, @@ -33,6 +33,7 @@ import { type AdminControlsSettings, debugLogger, isHeadlessMode, + Storage, } from '@google/gemini-cli-core'; import { loadCliConfig, parseArguments } from './config/config.js'; @@ -185,6 +186,39 @@ ${reason.stack}` }); } +export async function resolveSessionId(resumeArg: string | undefined): Promise<{ + sessionId: string; + resumedSessionData?: ResumedSessionData; +}> { + if (!resumeArg) { + return { sessionId: createSessionId() }; + } + + const storage = new Storage(process.cwd()); + await storage.initialize(); + + try { + const { sessionData, sessionPath } = await new SessionSelector( + storage, + ).resolveSession(resumeArg); + return { + sessionId: sessionData.sessionId, + resumedSessionData: { conversation: sessionData, filePath: sessionPath }, + }; + } catch (error) { + if (error instanceof SessionError && error.code === 'NO_SESSIONS_FOUND') { + coreEvents.emitFeedback('warning', error.message); + return { sessionId: createSessionId() }; + } + coreEvents.emitFeedback( + 'error', + `Error resuming session: ${error instanceof Error ? error.message : 'Unknown error'}`, + ); + await runExitCleanup(); + process.exit(ExitCodes.FATAL_INPUT_ERROR); + } +} + export async function startInteractiveUI( config: Config, settings: LoadedSettings, @@ -280,6 +314,8 @@ export async function main() { const argv = await argvPromise; + const { sessionId, resumedSessionData } = await resolveSessionId(argv.resume); + if ( (argv.allowedTools && argv.allowedTools.length > 0) || (settings.merged.tools?.allowed && settings.merged.tools.allowed.length > 0) @@ -599,40 +635,6 @@ export async function main() { })), ]; - // Handle --resume flag - let resumedSessionData: ResumedSessionData | undefined = undefined; - if (argv.resume) { - const sessionSelector = new SessionSelector(config); - try { - const result = await sessionSelector.resolveSession(argv.resume); - resumedSessionData = { - conversation: result.sessionData, - filePath: result.sessionPath, - }; - // Use the existing session ID to continue recording to the same session - config.setSessionId(resumedSessionData.conversation.sessionId); - } catch (error) { - if ( - error instanceof SessionError && - error.code === 'NO_SESSIONS_FOUND' - ) { - // No sessions to resume — start a fresh session with a warning - startupWarnings.push({ - id: 'resume-no-sessions', - message: error.message, - priority: WarningPriority.High, - }); - } else { - coreEvents.emitFeedback( - 'error', - `Error resuming session: ${error instanceof Error ? error.message : 'Unknown error'}`, - ); - await runExitCleanup(); - process.exit(ExitCodes.FATAL_INPUT_ERROR); - } - } - } - cliStartupHandle?.end(); // Render UI, passing necessary config values. Check that there is no command line question. diff --git a/packages/cli/src/gemini_cleanup.test.tsx b/packages/cli/src/gemini_cleanup.test.tsx index 4bbc7e7648..0fc43ba2bf 100644 --- a/packages/cli/src/gemini_cleanup.test.tsx +++ b/packages/cli/src/gemini_cleanup.test.tsx @@ -73,6 +73,7 @@ vi.mock('./config/config.js', () => ({ getSandbox: vi.fn(() => false), getQuestion: vi.fn(() => ''), isInteractive: () => false, + getSessionId: vi.fn().mockReturnValue('test-session-id'), storage: { initialize: vi.fn().mockResolvedValue(undefined) }, } as unknown as Config), parseArguments: vi.fn().mockResolvedValue({}), @@ -213,6 +214,7 @@ describe('gemini.tsx main function cleanup', () => { getSandbox: vi.fn(() => false), getDebugMode: vi.fn(() => false), getPolicyEngine: vi.fn(), + getSessionId: vi.fn().mockReturnValue('test-session-id'), getMessageBus: () => ({ subscribe: vi.fn() }), getEnableHooks: vi.fn(() => false), getHookSystem: () => undefined, @@ -273,6 +275,7 @@ describe('gemini.tsx main function cleanup', () => { vi.mocked(loadCliConfig).mockResolvedValue( buildMockConfig({ getHookSystem: vi.fn(() => mockHookSystem), + getSessionId: vi.fn().mockReturnValue('test-session-id'), }), ); diff --git a/packages/cli/src/interactiveCli.tsx b/packages/cli/src/interactiveCli.tsx index 965bc27693..4b307fb9d3 100644 --- a/packages/cli/src/interactiveCli.tsx +++ b/packages/cli/src/interactiveCli.tsx @@ -107,7 +107,7 @@ export async function startInteractiveUI( - + - + diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx index e61cada6b5..efdc7223ea 100644 --- a/packages/cli/src/ui/AppContainer.tsx +++ b/packages/cli/src/ui/AppContainer.tsx @@ -444,7 +444,7 @@ export const AppContainer = (props: AppContainerProps) => { const [isConfigInitialized, setConfigInitialized] = useState(false); - const logger = useLogger(config.storage); + const logger = useLogger(config); const { inputHistory, addInput, initializeFromLogger } = useInputHistoryStore(); diff --git a/packages/cli/src/ui/commands/bugCommand.test.ts b/packages/cli/src/ui/commands/bugCommand.test.ts index c2c1a9a1d6..f767805b01 100644 --- a/packages/cli/src/ui/commands/bugCommand.test.ts +++ b/packages/cli/src/ui/commands/bugCommand.test.ts @@ -9,7 +9,7 @@ import open from 'open'; import path from 'node:path'; import { bugCommand } from './bugCommand.js'; import { createMockCommandContext } from '../../test-utils/mockCommandContext.js'; -import { getVersion } from '@google/gemini-cli-core'; +import { getVersion, type Config } from '@google/gemini-cli-core'; import { GIT_COMMIT_INFO } from '../../generated/git-commit.js'; import { formatBytes } from '../utils/formatters.js'; @@ -89,7 +89,8 @@ describe('bugCommand', () => { getBugCommand: () => undefined, getIdeMode: () => true, getContentGeneratorConfig: () => ({ authType: 'oauth-personal' }), - }, + getSessionId: vi.fn().mockReturnValue('test-session-id'), + } as unknown as Config, geminiClient: { getChat: () => ({ getHistory: () => [], @@ -137,7 +138,8 @@ describe('bugCommand', () => { storage: { getProjectTempDir: () => '/tmp/gemini', }, - }, + getSessionId: vi.fn().mockReturnValue('test-session-id'), + } as unknown as Config, geminiClient: { getChat: () => ({ getHistory: () => history, @@ -182,7 +184,8 @@ describe('bugCommand', () => { getBugCommand: () => ({ urlTemplate: customTemplate }), getIdeMode: () => true, getContentGeneratorConfig: () => ({ authType: 'vertex-ai' }), - }, + getSessionId: vi.fn().mockReturnValue('test-session-id'), + } as unknown as Config, geminiClient: { getChat: () => ({ getHistory: () => [], diff --git a/packages/cli/src/ui/commands/bugCommand.ts b/packages/cli/src/ui/commands/bugCommand.ts index 134bccc9f0..e146491dec 100644 --- a/packages/cli/src/ui/commands/bugCommand.ts +++ b/packages/cli/src/ui/commands/bugCommand.ts @@ -16,7 +16,6 @@ import { GIT_COMMIT_INFO } from '../../generated/git-commit.js'; import { formatBytes } from '../utils/formatters.js'; import { IdeClient, - sessionId, getVersion, INITIAL_HISTORY_LENGTH, debugLogger, @@ -59,7 +58,7 @@ export const bugCommand: SlashCommand = { let info = ` * **CLI Version:** ${cliVersion} * **Git Commit:** ${GIT_COMMIT_INFO} -* **Session ID:** ${sessionId} +* **Session ID:** ${config?.getSessionId() || 'Unknown'} * **Operating System:** ${osVersion} * **Sandbox Environment:** ${sandboxEnv} * **Model Version:** ${modelVersion} diff --git a/packages/cli/src/ui/components/ExitPlanModeDialog.test.tsx b/packages/cli/src/ui/components/ExitPlanModeDialog.test.tsx index 6925c749d7..cfbcb22499 100644 --- a/packages/cli/src/ui/components/ExitPlanModeDialog.test.tsx +++ b/packages/cli/src/ui/components/ExitPlanModeDialog.test.tsx @@ -158,6 +158,7 @@ Implement a comprehensive authentication system with multiple providers. getIdeMode: () => false, isTrustedFolder: () => true, getPreferredEditor: () => undefined, + getSessionId: () => 'test-session-id', storage: { getPlansDir: () => mockPlansDir, }, @@ -464,6 +465,7 @@ Implement a comprehensive authentication system with multiple providers. getTargetDir: () => mockTargetDir, getIdeMode: () => false, isTrustedFolder: () => true, + getSessionId: () => 'test-session-id', storage: { getPlansDir: () => mockPlansDir, }, diff --git a/packages/cli/src/ui/components/Footer.test.tsx b/packages/cli/src/ui/components/Footer.test.tsx index 8c62434e61..bb2e0c5e4d 100644 --- a/packages/cli/src/ui/components/Footer.test.tsx +++ b/packages/cli/src/ui/components/Footer.test.tsx @@ -82,6 +82,7 @@ const mockConfigPlain = { getExtensionRegistryURI: () => undefined, getContentGeneratorConfig: () => ({ authType: undefined }), getSandboxEnabled: () => false, + getSessionId: () => 'test-session-id', }; const mockConfig = mockConfigPlain as unknown as Config; diff --git a/packages/cli/src/ui/components/HistoryItemDisplay.test.tsx b/packages/cli/src/ui/components/HistoryItemDisplay.test.tsx index ddbc30c022..2f6e9e1b8a 100644 --- a/packages/cli/src/ui/components/HistoryItemDisplay.test.tsx +++ b/packages/cli/src/ui/components/HistoryItemDisplay.test.tsx @@ -124,7 +124,7 @@ describe('', () => { duration: '1s', }; const { lastFrame, unmount } = await renderWithProviders( - + , ); @@ -157,7 +157,7 @@ describe('', () => { type: 'model_stats', }; const { lastFrame, unmount } = await renderWithProviders( - + , ); @@ -173,7 +173,7 @@ describe('', () => { type: 'tool_stats', }; const { lastFrame, unmount } = await renderWithProviders( - + , ); @@ -190,7 +190,7 @@ describe('', () => { duration: '1s', }; const { lastFrame, unmount } = await renderWithProviders( - + , ); diff --git a/packages/cli/src/ui/components/ModelDialog.test.tsx b/packages/cli/src/ui/components/ModelDialog.test.tsx index e5796727f3..487aa34b4a 100644 --- a/packages/cli/src/ui/components/ModelDialog.test.tsx +++ b/packages/cli/src/ui/components/ModelDialog.test.tsx @@ -86,6 +86,7 @@ describe('', () => { getProModelNoAccess: mockGetProModelNoAccess, getProModelNoAccessSync: mockGetProModelNoAccessSync, getLastRetrievedQuota: () => ({ buckets: [] }), + getSessionId: () => 'test-session-id', }; beforeEach(() => { diff --git a/packages/cli/src/ui/components/ToolConfirmationQueue.test.tsx b/packages/cli/src/ui/components/ToolConfirmationQueue.test.tsx index e48c244bdf..703a028557 100644 --- a/packages/cli/src/ui/components/ToolConfirmationQueue.test.tsx +++ b/packages/cli/src/ui/components/ToolConfirmationQueue.test.tsx @@ -55,6 +55,7 @@ describe('ToolConfirmationQueue', () => { getFileSystemService: () => ({ readFile: vi.fn().mockResolvedValue('Plan content'), }), + getSessionId: () => 'test-session-id', storage: { getPlansDir: () => '/mock/temp/plans', }, diff --git a/packages/cli/src/ui/contexts/SessionContext.test.tsx b/packages/cli/src/ui/contexts/SessionContext.test.tsx index f07d28de85..46874d0917 100644 --- a/packages/cli/src/ui/contexts/SessionContext.test.tsx +++ b/packages/cli/src/ui/contexts/SessionContext.test.tsx @@ -60,7 +60,7 @@ describe('SessionStatsContext', () => { > = { current: undefined }; const { unmount } = await render( - + , ); @@ -79,7 +79,7 @@ describe('SessionStatsContext', () => { > = { current: undefined }; const { unmount } = await render( - + , ); @@ -162,7 +162,7 @@ describe('SessionStatsContext', () => { }; const { unmount } = await render( - + , ); @@ -245,7 +245,7 @@ describe('SessionStatsContext', () => { > = { current: undefined }; const { unmount } = await render( - + , ); diff --git a/packages/cli/src/ui/contexts/SessionContext.tsx b/packages/cli/src/ui/contexts/SessionContext.tsx index 7f313bb443..1e0113b784 100644 --- a/packages/cli/src/ui/contexts/SessionContext.tsx +++ b/packages/cli/src/ui/contexts/SessionContext.tsx @@ -13,14 +13,13 @@ import { useMemo, useEffect, } from 'react'; - import type { SessionMetrics, ModelMetrics, RoleMetrics, ToolCallStats, } from '@google/gemini-cli-core'; -import { uiTelemetryService, sessionId } from '@google/gemini-cli-core'; +import { uiTelemetryService } from '@google/gemini-cli-core'; export enum ToolCallDecision { ACCEPT = 'accept', @@ -183,9 +182,10 @@ const SessionStatsContext = createContext( // --- Provider Component --- -export const SessionStatsProvider: React.FC<{ children: React.ReactNode }> = ({ - children, -}) => { +export const SessionStatsProvider: React.FC<{ + children: React.ReactNode; + sessionId: string; +}> = ({ children, sessionId }) => { const [stats, setStats] = useState({ sessionId, sessionStartTime: new Date(), diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index a2621c4546..c0e3fcdd04 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -262,14 +262,13 @@ export const useGeminiStream = ( useStateAndRef(true); const processedMemoryToolsRef = useRef>(new Set()); const { startNewPrompt, getPromptCount } = useSessionStats(); - const storage = config.storage; - const logger = useLogger(storage); + const logger = useLogger(config); const gitService = useMemo(() => { if (!config.getProjectRoot()) { return; } - return new GitService(config.getProjectRoot(), storage); - }, [config, storage]); + return new GitService(config.getProjectRoot(), config.storage); + }, [config]); useEffect(() => { const handleRetryAttempt = (payload: RetryAttemptPayload) => { @@ -1580,6 +1579,7 @@ export const useGeminiStream = ( operation: options?.isContinuation ? GeminiCliOperation.SystemPrompt : GeminiCliOperation.UserPrompt, + sessionId: config.getSessionId(), }, async ({ metadata: spanMetadata }) => { spanMetadata.input = query; @@ -2105,7 +2105,7 @@ export const useGeminiStream = ( } if (checkpointsToWrite.size > 0) { - const checkpointDir = storage.getProjectTempCheckpointsDir(); + const checkpointDir = config.storage.getProjectTempCheckpointsDir(); try { await fs.mkdir(checkpointDir, { recursive: true }); for (const [fileName, content] of checkpointsToWrite) { @@ -2122,15 +2122,7 @@ export const useGeminiStream = ( }; // eslint-disable-next-line @typescript-eslint/no-floating-promises saveRestorableToolCalls(); - }, [ - toolCalls, - config, - onDebugMessage, - gitService, - history, - geminiClient, - storage, - ]); + }, [toolCalls, config, onDebugMessage, gitService, history, geminiClient]); const lastOutputTime = Math.max( lastToolOutputTime, diff --git a/packages/cli/src/ui/hooks/useLogger.test.tsx b/packages/cli/src/ui/hooks/useLogger.test.tsx index c0791f5afe..7616c0d2fc 100644 --- a/packages/cli/src/ui/hooks/useLogger.test.tsx +++ b/packages/cli/src/ui/hooks/useLogger.test.tsx @@ -8,14 +8,7 @@ import { act } from 'react'; import { describe, it, expect, vi, beforeEach } from 'vitest'; import { renderHook } from '../../test-utils/render.js'; import { useLogger } from './useLogger.js'; -import { - sessionId as globalSessionId, - Logger, - type Storage, - type Config, -} from '@google/gemini-cli-core'; -import { ConfigContext } from '../contexts/ConfigContext.js'; -import type React from 'react'; +import { Logger, type Storage, type Config } from '@google/gemini-cli-core'; let deferredInit: { resolve: (val?: unknown) => void }; @@ -41,35 +34,15 @@ describe('useLogger', () => { const mockStorage = {} as Storage; const mockConfig = { getSessionId: vi.fn().mockReturnValue('active-session-id'), + storage: mockStorage, } as unknown as Config; beforeEach(() => { vi.clearAllMocks(); }); - it('should initialize with the global sessionId by default', async () => { - const { result } = await renderHook(() => useLogger(mockStorage)); - - expect(result.current).toBeNull(); - - await act(async () => { - deferredInit.resolve(); - }); - - expect(result.current).not.toBeNull(); - expect(Logger).toHaveBeenCalledWith(globalSessionId, mockStorage); - }); - - it('should initialize with the active sessionId from ConfigContext when available', async () => { - const wrapper = ({ children }: { children: React.ReactNode }) => ( - - {children} - - ); - - const { result } = await renderHook(() => useLogger(mockStorage), { - wrapper, - }); + it('should initialize with the sessionId from config', async () => { + const { result } = await renderHook(() => useLogger(mockConfig)); expect(result.current).toBeNull(); diff --git a/packages/cli/src/ui/hooks/useLogger.ts b/packages/cli/src/ui/hooks/useLogger.ts index 2c9309821d..443713635f 100644 --- a/packages/cli/src/ui/hooks/useLogger.ts +++ b/packages/cli/src/ui/hooks/useLogger.ts @@ -4,24 +4,17 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { useState, useEffect, useContext } from 'react'; -import { - sessionId as globalSessionId, - Logger, - type Storage, -} from '@google/gemini-cli-core'; -import { ConfigContext } from '../contexts/ConfigContext.js'; +import { useState, useEffect } from 'react'; +import { Logger, type Config } from '@google/gemini-cli-core'; /** * Hook to manage the logger instance. */ -export const useLogger = (storage: Storage): Logger | null => { +export const useLogger = (config: Config): Logger | null => { const [logger, setLogger] = useState(null); - const config = useContext(ConfigContext); useEffect(() => { - const activeSessionId = config?.getSessionId() ?? globalSessionId; - const newLogger = new Logger(activeSessionId, storage); + const newLogger = new Logger(config.getSessionId(), config.storage); /** * Start async initialization, no need to await. Using await slows down the @@ -30,11 +23,9 @@ export const useLogger = (storage: Storage): Logger | null => { */ newLogger .initialize() - .then(() => { - setLogger(newLogger); - }) + .then(() => setLogger(newLogger)) .catch(() => {}); - }, [storage, config]); + }, [config]); return logger; }; diff --git a/packages/cli/src/utils/sessionUtils.test.ts b/packages/cli/src/utils/sessionUtils.test.ts index e1cd1137fa..0495bf5588 100644 --- a/packages/cli/src/utils/sessionUtils.test.ts +++ b/packages/cli/src/utils/sessionUtils.test.ts @@ -15,7 +15,7 @@ import { } from './sessionUtils.js'; import { SESSION_FILE_PREFIX, - type Config, + type Storage, type MessageRecord, CoreToolCallStatus, } from '@google/gemini-cli-core'; @@ -25,20 +25,17 @@ import { randomUUID } from 'node:crypto'; describe('SessionSelector', () => { let tmpDir: string; - let config: Config; + let storage: Storage; beforeEach(async () => { // Create a temporary directory for testing tmpDir = path.join(process.cwd(), '.tmp-test-sessions'); await fs.mkdir(tmpDir, { recursive: true }); - // Mock config - config = { - storage: { - getProjectTempDir: () => tmpDir, - }, - getSessionId: () => 'current-session-id', - } as Partial as Config; + // Mock storage + storage = { + getProjectTempDir: () => tmpDir, + } as Partial as Storage; }); afterEach(async () => { @@ -104,7 +101,7 @@ describe('SessionSelector', () => { JSON.stringify(session2, null, 2), ); - const sessionSelector = new SessionSelector(config); + const sessionSelector = new SessionSelector(storage); // Test resolving by UUID const result1 = await sessionSelector.resolveSession(sessionId1); @@ -170,7 +167,7 @@ describe('SessionSelector', () => { JSON.stringify(session2, null, 2), ); - const sessionSelector = new SessionSelector(config); + const sessionSelector = new SessionSelector(storage); // Test resolving by index (1-based) const result1 = await sessionSelector.resolveSession('1'); @@ -234,7 +231,7 @@ describe('SessionSelector', () => { JSON.stringify(session2, null, 2), ); - const sessionSelector = new SessionSelector(config); + const sessionSelector = new SessionSelector(storage); // Test resolving latest const result = await sessionSelector.resolveSession('latest'); @@ -271,7 +268,7 @@ describe('SessionSelector', () => { JSON.stringify(session, null, 2), ); - const sessionSelector = new SessionSelector(config); + const sessionSelector = new SessionSelector(storage); // Test resolving by UUID with leading/trailing spaces const result = await sessionSelector.resolveSession(` ${sessionId} `); @@ -334,7 +331,7 @@ describe('SessionSelector', () => { JSON.stringify(sessionDuplicate, null, 2), ); - const sessionSelector = new SessionSelector(config); + const sessionSelector = new SessionSelector(storage); const sessions = await sessionSelector.listSessions(); expect(sessions.length).toBe(1); @@ -373,7 +370,7 @@ describe('SessionSelector', () => { JSON.stringify(session1, null, 2), ); - const sessionSelector = new SessionSelector(config); + const sessionSelector = new SessionSelector(storage); await expect( sessionSelector.resolveSession('invalid-uuid'), @@ -389,14 +386,11 @@ describe('SessionSelector', () => { const chatsDir = path.join(tmpDir, 'chats'); await fs.mkdir(chatsDir, { recursive: true }); - const emptyConfig = { - storage: { - getProjectTempDir: () => tmpDir, - }, - getSessionId: () => 'current-session-id', - } as Partial as Config; + const emptyStorage = { + getProjectTempDir: () => tmpDir, + } as Partial as Storage; - const sessionSelector = new SessionSelector(emptyConfig); + const sessionSelector = new SessionSelector(emptyStorage); await expect(sessionSelector.resolveSession('latest')).rejects.toSatisfy( (error) => { @@ -469,7 +463,7 @@ describe('SessionSelector', () => { JSON.stringify(sessionSystemOnly, null, 2), ); - const sessionSelector = new SessionSelector(config); + const sessionSelector = new SessionSelector(storage); const sessions = await sessionSelector.listSessions(); // Should only list the session with user message @@ -508,7 +502,7 @@ describe('SessionSelector', () => { JSON.stringify(sessionGeminiOnly, null, 2), ); - const sessionSelector = new SessionSelector(config); + const sessionSelector = new SessionSelector(storage); const sessions = await sessionSelector.listSessions(); // Should list the session with gemini message @@ -574,7 +568,7 @@ describe('SessionSelector', () => { JSON.stringify(subagentSession, null, 2), ); - const sessionSelector = new SessionSelector(config); + const sessionSelector = new SessionSelector(storage); const sessions = await sessionSelector.listSessions(); // Should only list the main session diff --git a/packages/cli/src/utils/sessionUtils.ts b/packages/cli/src/utils/sessionUtils.ts index cf95b0f545..6f72b20381 100644 --- a/packages/cli/src/utils/sessionUtils.ts +++ b/packages/cli/src/utils/sessionUtils.ts @@ -9,7 +9,7 @@ import { partListUnionToString, SESSION_FILE_PREFIX, CoreToolCallStatus, - type Config, + type Storage, type ConversationRecord, type MessageRecord, } from '@google/gemini-cli-core'; @@ -399,17 +399,14 @@ export const getSessionFiles = async ( * Utility class for session discovery and selection. */ export class SessionSelector { - constructor(private config: Config) {} + constructor(private storage: Storage) {} /** * Lists all available sessions for the current project. */ async listSessions(): Promise { - const chatsDir = path.join( - this.config.storage.getProjectTempDir(), - 'chats', - ); - return getSessionFiles(chatsDir, this.config.getSessionId()); + const chatsDir = path.join(this.storage.getProjectTempDir(), 'chats'); + return getSessionFiles(chatsDir); } /** @@ -452,10 +449,7 @@ export class SessionSelector { return sortedSessions[index - 1]; } - const chatsDir = path.join( - this.config.storage.getProjectTempDir(), - 'chats', - ); + const chatsDir = path.join(this.storage.getProjectTempDir(), 'chats'); throw SessionError.invalidSessionIdentifier(trimmedIdentifier, chatsDir); } @@ -507,10 +501,7 @@ export class SessionSelector { private async selectSession( sessionInfo: SessionInfo, ): Promise { - const chatsDir = path.join( - this.config.storage.getProjectTempDir(), - 'chats', - ); + const chatsDir = path.join(this.storage.getProjectTempDir(), 'chats'); const sessionPath = path.join(chatsDir, sessionInfo.fileName); try { diff --git a/packages/cli/src/utils/sessions.ts b/packages/cli/src/utils/sessions.ts index 9a4def4995..8b62376ff8 100644 --- a/packages/cli/src/utils/sessions.ts +++ b/packages/cli/src/utils/sessions.ts @@ -21,7 +21,7 @@ export async function listSessions(config: Config): Promise { // Generate summary for most recent session if needed await generateSummary(config); - const sessionSelector = new SessionSelector(config); + const sessionSelector = new SessionSelector(config.storage); const sessions = await sessionSelector.listSessions(); if (sessions.length === 0) { @@ -55,7 +55,7 @@ export async function deleteSession( config: Config, sessionIndex: string, ): Promise { - const sessionSelector = new SessionSelector(config); + const sessionSelector = new SessionSelector(config.storage); const sessions = await sessionSelector.listSessions(); if (sessions.length === 0) { diff --git a/packages/core/src/agents/subagent-tool.ts b/packages/core/src/agents/subagent-tool.ts index 3ef9f0aa86..e689098f5a 100644 --- a/packages/core/src/agents/subagent-tool.ts +++ b/packages/core/src/agents/subagent-tool.ts @@ -182,6 +182,7 @@ class SubAgentInvocation extends BaseToolInvocation { { operation: GeminiCliOperation.AgentCall, logPrompts: this.context.config.getTelemetryLogPromptsEnabled(), + sessionId: this.context.config.getSessionId(), attributes: { [GEN_AI_AGENT_NAME]: this.definition.name, [GEN_AI_AGENT_DESCRIPTION]: this.definition.description, diff --git a/packages/core/src/core/loggingContentGenerator.test.ts b/packages/core/src/core/loggingContentGenerator.test.ts index 7b37d1a5ff..2b8249d539 100644 --- a/packages/core/src/core/loggingContentGenerator.test.ts +++ b/packages/core/src/core/loggingContentGenerator.test.ts @@ -74,6 +74,7 @@ describe('LoggingContentGenerator', () => { }), getTelemetryLogPromptsEnabled: vi.fn().mockReturnValue(true), refreshUserQuotaIfStale: vi.fn().mockResolvedValue(undefined), + getSessionId: vi.fn().mockReturnValue('test-session-id'), } as unknown as Config; loggingContentGenerator = new LoggingContentGenerator(wrapped, config); vi.useFakeTimers(); diff --git a/packages/core/src/core/loggingContentGenerator.ts b/packages/core/src/core/loggingContentGenerator.ts index c9350593ec..027a7ae622 100644 --- a/packages/core/src/core/loggingContentGenerator.ts +++ b/packages/core/src/core/loggingContentGenerator.ts @@ -350,6 +350,7 @@ export class LoggingContentGenerator implements ContentGenerator { { operation: GeminiCliOperation.LLMCall, logPrompts: this.config.getTelemetryLogPromptsEnabled(), + sessionId: this.config.getSessionId(), attributes: { [GEN_AI_REQUEST_MODEL]: req.model, [GEN_AI_PROMPT_NAME]: userPromptId, @@ -440,6 +441,7 @@ export class LoggingContentGenerator implements ContentGenerator { { operation: GeminiCliOperation.LLMCall, logPrompts: this.config.getTelemetryLogPromptsEnabled(), + sessionId: this.config.getSessionId(), attributes: { [GEN_AI_REQUEST_MODEL]: req.model, [GEN_AI_PROMPT_NAME]: userPromptId, @@ -594,6 +596,7 @@ export class LoggingContentGenerator implements ContentGenerator { { operation: GeminiCliOperation.LLMCall, logPrompts: this.config.getTelemetryLogPromptsEnabled(), + sessionId: this.config.getSessionId(), attributes: { [GEN_AI_REQUEST_MODEL]: req.model, }, diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 130ca9c2a5..04456a2964 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -252,7 +252,7 @@ export * from './telemetry/index.js'; export * from './telemetry/billingEvents.js'; export { logBillingEvent } from './telemetry/loggers.js'; export * from './telemetry/constants.js'; -export { sessionId, createSessionId } from './utils/session.js'; +export { createSessionId } from './utils/session.js'; export * from './utils/compatibility.js'; export * from './utils/browser.js'; export { Storage } from './config/storage.js'; diff --git a/packages/core/src/scheduler/policy.test.ts b/packages/core/src/scheduler/policy.test.ts index acea3d3ab6..c228ead10d 100644 --- a/packages/core/src/scheduler/policy.test.ts +++ b/packages/core/src/scheduler/policy.test.ts @@ -51,8 +51,8 @@ describe('policy.ts', () => { const mockConfig = { getApprovalMode: vi.fn().mockReturnValue(ApprovalMode.DEFAULT), getPolicyEngine: vi.fn().mockReturnValue(mockPolicyEngine), + getSessionId: vi.fn().mockReturnValue('test-session-id'), } as unknown as Mocked; - (mockConfig as unknown as { config: Config }).config = mockConfig as Config; @@ -79,8 +79,8 @@ describe('policy.ts', () => { const mockConfig = { getApprovalMode: vi.fn().mockReturnValue(ApprovalMode.DEFAULT), getPolicyEngine: vi.fn().mockReturnValue(mockPolicyEngine), + getSessionId: vi.fn().mockReturnValue('test-session-id'), } as unknown as Mocked; - (mockConfig as unknown as { config: Config }).config = mockConfig as Config; @@ -161,8 +161,8 @@ describe('policy.ts', () => { const mockConfig = { getApprovalMode: vi.fn().mockReturnValue(ApprovalMode.DEFAULT), getPolicyEngine: vi.fn().mockReturnValue(mockPolicyEngine), + getSessionId: vi.fn().mockReturnValue('test-session-id'), } as unknown as Mocked; - (mockConfig as unknown as { config: Config }).config = mockConfig as Config; @@ -226,8 +226,8 @@ describe('policy.ts', () => { const mockConfig = { getApprovalMode: vi.fn().mockReturnValue(ApprovalMode.DEFAULT), getPolicyEngine: vi.fn().mockReturnValue(mockPolicyEngine), + getSessionId: vi.fn().mockReturnValue('test-session-id'), } as unknown as Mocked; - const toolCall = { request: { name: 'test-tool', args: {}, isClientInitiated: true }, tool: { name: 'test-tool' }, @@ -243,8 +243,8 @@ describe('policy.ts', () => { const mockConfig = { getApprovalMode: vi.fn().mockReturnValue(ApprovalMode.DEFAULT), setApprovalMode: vi.fn(), + getSessionId: vi.fn().mockReturnValue('test-session-id'), } as unknown as Mocked; - (mockConfig as unknown as { config: Config }).config = mockConfig as Config; const mockMessageBus = { @@ -273,8 +273,8 @@ describe('policy.ts', () => { const mockConfig = { getApprovalMode: vi.fn().mockReturnValue(ApprovalMode.DEFAULT), setApprovalMode: vi.fn(), + getSessionId: vi.fn().mockReturnValue('test-session-id'), } as unknown as Mocked; - (mockConfig as unknown as { config: Config }).config = mockConfig as Config; const mockMessageBus = { @@ -307,6 +307,7 @@ describe('policy.ts', () => { isTrustedFolder: vi.fn().mockReturnValue(false), getWorkspacePoliciesDir: vi.fn().mockReturnValue(undefined), setApprovalMode: vi.fn(), + getSessionId: vi.fn().mockReturnValue('test-session-id'), } as unknown as Mocked; (mockConfig as unknown as { config: Config }).config = @@ -339,8 +340,8 @@ describe('policy.ts', () => { const mockConfig = { getApprovalMode: vi.fn().mockReturnValue(ApprovalMode.DEFAULT), setApprovalMode: vi.fn(), + getSessionId: vi.fn().mockReturnValue('test-session-id'), } as unknown as Mocked; - (mockConfig as unknown as { config: Config }).config = mockConfig as Config; const mockMessageBus = { @@ -379,8 +380,8 @@ describe('policy.ts', () => { const mockConfig = { getApprovalMode: vi.fn().mockReturnValue(ApprovalMode.DEFAULT), setApprovalMode: vi.fn(), + getSessionId: vi.fn().mockReturnValue('test-session-id'), } as unknown as Mocked; - (mockConfig as unknown as { config: Config }).config = mockConfig as Config; const mockMessageBus = { @@ -420,8 +421,8 @@ describe('policy.ts', () => { const mockConfig = { getApprovalMode: vi.fn().mockReturnValue(ApprovalMode.DEFAULT), setApprovalMode: vi.fn(), + getSessionId: vi.fn().mockReturnValue('test-session-id'), } as unknown as Mocked; - (mockConfig as unknown as { config: Config }).config = mockConfig as Config; const mockMessageBus = { @@ -447,8 +448,8 @@ describe('policy.ts', () => { const mockConfig = { getApprovalMode: vi.fn().mockReturnValue(ApprovalMode.DEFAULT), setApprovalMode: vi.fn(), + getSessionId: vi.fn().mockReturnValue('test-session-id'), } as unknown as Mocked; - (mockConfig as unknown as { config: Config }).config = mockConfig as Config; const mockMessageBus = { @@ -473,8 +474,8 @@ describe('policy.ts', () => { const mockConfig = { getApprovalMode: vi.fn().mockReturnValue(ApprovalMode.DEFAULT), setApprovalMode: vi.fn(), + getSessionId: vi.fn().mockReturnValue('test-session-id'), } as unknown as Mocked; - (mockConfig as unknown as { config: Config }).config = mockConfig as Config; const mockMessageBus = { @@ -499,8 +500,8 @@ describe('policy.ts', () => { const mockConfig = { getApprovalMode: vi.fn().mockReturnValue(ApprovalMode.DEFAULT), setApprovalMode: vi.fn(), + getSessionId: vi.fn().mockReturnValue('test-session-id'), } as unknown as Mocked; - (mockConfig as unknown as { config: Config }).config = mockConfig as Config; const mockMessageBus = { @@ -540,8 +541,8 @@ describe('policy.ts', () => { const mockConfig = { getApprovalMode: vi.fn().mockReturnValue(ApprovalMode.DEFAULT), setApprovalMode: vi.fn(), + getSessionId: vi.fn().mockReturnValue('test-session-id'), } as unknown as Mocked; - (mockConfig as unknown as { config: Config }).config = mockConfig as Config; const mockMessageBus = { @@ -583,6 +584,7 @@ describe('policy.ts', () => { isTrustedFolder: vi.fn().mockReturnValue(false), getWorkspacePoliciesDir: vi.fn().mockReturnValue(undefined), setApprovalMode: vi.fn(), + getSessionId: vi.fn().mockReturnValue('test-session-id'), } as unknown as Mocked; (mockConfig as unknown as { config: Config }).config = @@ -628,6 +630,7 @@ describe('policy.ts', () => { .fn() .mockReturnValue('/mock/project/policies'), setApprovalMode: vi.fn(), + getSessionId: vi.fn().mockReturnValue('test-session-id'), } as unknown as Mocked; const mockMessageBus = { publish: vi.fn(), @@ -659,6 +662,7 @@ describe('policy.ts', () => { .fn() .mockReturnValue('/mock/project/policies'), setApprovalMode: vi.fn(), + getSessionId: vi.fn().mockReturnValue('test-session-id'), } as unknown as Mocked; const mockMessageBus = { publish: vi.fn(), @@ -689,6 +693,7 @@ describe('policy.ts', () => { getWorkspacePoliciesDir: vi.fn().mockReturnValue(undefined), getTargetDir: vi.fn().mockReturnValue('/mock/dir'), setApprovalMode: vi.fn(), + getSessionId: vi.fn().mockReturnValue('test-session-id'), } as unknown as Mocked; const mockMessageBus = { publish: vi.fn(), @@ -727,6 +732,7 @@ describe('policy.ts', () => { const mockConfig = { getApprovalMode: vi.fn().mockReturnValue(ApprovalMode.DEFAULT), setApprovalMode: vi.fn(), + getSessionId: vi.fn().mockReturnValue('test-session-id'), } as unknown as Mocked; const mockMessageBus = { publish: vi.fn(), @@ -766,6 +772,7 @@ describe('policy.ts', () => { it('should return default denial message when no rule provided', () => { const mockConfig = { getApprovalMode: vi.fn().mockReturnValue(ApprovalMode.DEFAULT), + getSessionId: vi.fn().mockReturnValue('test-session-id'), } as unknown as Config; (mockConfig as unknown as { config: Config }).config = mockConfig; @@ -779,6 +786,7 @@ describe('policy.ts', () => { it('should return custom deny message if provided', () => { const mockConfig = { getApprovalMode: vi.fn().mockReturnValue(ApprovalMode.DEFAULT), + getSessionId: vi.fn().mockReturnValue('test-session-id'), } as unknown as Config; (mockConfig as unknown as { config: Config }).config = mockConfig; @@ -840,7 +848,6 @@ describe('Plan Mode Denial Consistency', () => { publish: vi.fn(), subscribe: vi.fn(), } as unknown as Mocked; - mockConfig = { getPolicyEngine: vi.fn().mockReturnValue(mockPolicyEngine), toolRegistry: mockToolRegistry, @@ -852,6 +859,7 @@ describe('Plan Mode Denial Consistency', () => { getApprovalMode: vi.fn().mockReturnValue(ApprovalMode.PLAN), // Key: Plan Mode getTelemetryLogPromptsEnabled: vi.fn().mockReturnValue(false), setApprovalMode: vi.fn(), + getSessionId: vi.fn().mockReturnValue('test-session-id'), getUsageStatisticsEnabled: vi.fn().mockReturnValue(false), } as unknown as Mocked; (mockConfig as unknown as { config: Config }).config = mockConfig as Config; @@ -933,6 +941,7 @@ describe('Plan Mode Denial Consistency', () => { getApprovalMode: vi.fn().mockReturnValue(currentMode), isTrustedFolder: vi.fn().mockReturnValue(false), getWorkspacePoliciesDir: vi.fn().mockReturnValue(undefined), + getSessionId: vi.fn().mockReturnValue('test-session-id'), } as unknown as Mocked; const mockMessageBus = { diff --git a/packages/core/src/scheduler/scheduler.test.ts b/packages/core/src/scheduler/scheduler.test.ts index 54562933a8..e0fe7b873c 100644 --- a/packages/core/src/scheduler/scheduler.test.ts +++ b/packages/core/src/scheduler/scheduler.test.ts @@ -177,6 +177,7 @@ describe('Scheduler (Orchestrator)', () => { setApprovalMode: vi.fn(), getApprovalMode: vi.fn().mockReturnValue(ApprovalMode.DEFAULT), getTelemetryLogPromptsEnabled: vi.fn().mockReturnValue(false), + getSessionId: vi.fn().mockReturnValue('test-session-id'), } as unknown as Mocked; (mockConfig as unknown as { config: Config }).config = mockConfig as Config; @@ -1423,6 +1424,7 @@ describe('Scheduler MCP Progress', () => { setApprovalMode: vi.fn(), getApprovalMode: vi.fn().mockReturnValue(ApprovalMode.DEFAULT), getTelemetryLogPromptsEnabled: vi.fn().mockReturnValue(false), + getSessionId: vi.fn().mockReturnValue('test-session-id'), } as unknown as Mocked; (mockConfig as unknown as { config: Config }).config = mockConfig as Config; diff --git a/packages/core/src/scheduler/scheduler.ts b/packages/core/src/scheduler/scheduler.ts index e35993d542..2f95748597 100644 --- a/packages/core/src/scheduler/scheduler.ts +++ b/packages/core/src/scheduler/scheduler.ts @@ -197,6 +197,7 @@ export class Scheduler { { operation: GeminiCliOperation.ScheduleToolCalls, logPrompts: this.context.config.getTelemetryLogPromptsEnabled(), + sessionId: this.context.config.getSessionId(), }, async ({ metadata: spanMetadata }) => { const requests = Array.isArray(request) ? request : [request]; diff --git a/packages/core/src/scheduler/scheduler_parallel.test.ts b/packages/core/src/scheduler/scheduler_parallel.test.ts index ec187452f0..9229a94550 100644 --- a/packages/core/src/scheduler/scheduler_parallel.test.ts +++ b/packages/core/src/scheduler/scheduler_parallel.test.ts @@ -218,6 +218,7 @@ describe('Scheduler Parallel Execution', () => { setApprovalMode: vi.fn(), getApprovalMode: vi.fn().mockReturnValue(ApprovalMode.DEFAULT), getTelemetryLogPromptsEnabled: vi.fn().mockReturnValue(false), + getSessionId: vi.fn().mockReturnValue('test-session-id'), } as unknown as Mocked; (mockConfig as unknown as { config: Config }).config = mockConfig as Config; diff --git a/packages/core/src/scheduler/tool-executor.ts b/packages/core/src/scheduler/tool-executor.ts index 464810d8f0..3910aaee47 100644 --- a/packages/core/src/scheduler/tool-executor.ts +++ b/packages/core/src/scheduler/tool-executor.ts @@ -84,6 +84,7 @@ export class ToolExecutor { { operation: GeminiCliOperation.ToolCall, logPrompts: this.config.getTelemetryLogPromptsEnabled(), + sessionId: this.config.getSessionId(), attributes: { [GEN_AI_TOOL_NAME]: toolName, [GEN_AI_TOOL_CALL_ID]: callId, diff --git a/packages/core/src/telemetry/trace.test.ts b/packages/core/src/telemetry/trace.test.ts index ba2ad9c444..9cb1e8796f 100644 --- a/packages/core/src/telemetry/trace.test.ts +++ b/packages/core/src/telemetry/trace.test.ts @@ -110,7 +110,7 @@ describe('runInDevTraceSpan', () => { const fn = vi.fn(async () => 'result'); const result = await runInDevTraceSpan( - { operation: GeminiCliOperation.LLMCall }, + { operation: GeminiCliOperation.LLMCall, sessionId: 'test-session-id' }, fn, ); @@ -125,7 +125,7 @@ describe('runInDevTraceSpan', () => { it('should set default attributes on the span metadata', async () => { await runInDevTraceSpan( - { operation: GeminiCliOperation.LLMCall }, + { operation: GeminiCliOperation.LLMCall, sessionId: 'test-session-id' }, async ({ metadata }) => { expect(metadata.attributes[GEN_AI_OPERATION_NAME]).toBe( GeminiCliOperation.LLMCall, @@ -143,7 +143,7 @@ describe('runInDevTraceSpan', () => { it('should set span attributes from metadata on completion', async () => { await runInDevTraceSpan( - { operation: GeminiCliOperation.LLMCall }, + { operation: GeminiCliOperation.LLMCall, sessionId: 'test-session-id' }, async ({ metadata }) => { metadata.input = { query: 'hello' }; metadata.output = { response: 'world' }; @@ -169,9 +169,12 @@ describe('runInDevTraceSpan', () => { it('should handle errors in the wrapped function', async () => { const error = new Error('test error'); await expect( - runInDevTraceSpan({ operation: GeminiCliOperation.LLMCall }, async () => { - throw error; - }), + runInDevTraceSpan( + { operation: GeminiCliOperation.LLMCall, sessionId: 'test-session-id' }, + async () => { + throw error; + }, + ), ).rejects.toThrow(error); expect(mockSpan.setStatus).toHaveBeenCalledWith({ @@ -189,7 +192,7 @@ describe('runInDevTraceSpan', () => { } const resultStream = await runInDevTraceSpan( - { operation: GeminiCliOperation.LLMCall }, + { operation: GeminiCliOperation.LLMCall, sessionId: 'test-session-id' }, async () => testStream(), ); @@ -212,7 +215,7 @@ describe('runInDevTraceSpan', () => { } const resultStream = await runInDevTraceSpan( - { operation: GeminiCliOperation.LLMCall }, + { operation: GeminiCliOperation.LLMCall, sessionId: 'test-session-id' }, async () => errorStream(), ); @@ -231,7 +234,7 @@ describe('runInDevTraceSpan', () => { }); await runInDevTraceSpan( - { operation: GeminiCliOperation.LLMCall }, + { operation: GeminiCliOperation.LLMCall, sessionId: 'test-session-id' }, async ({ metadata }) => { metadata.input = 'trigger error'; }, diff --git a/packages/core/src/telemetry/trace.ts b/packages/core/src/telemetry/trace.ts index 9059340495..86447eb353 100644 --- a/packages/core/src/telemetry/trace.ts +++ b/packages/core/src/telemetry/trace.ts @@ -23,7 +23,6 @@ import { SERVICE_DESCRIPTION, SERVICE_NAME, } from './constants.js'; -import { sessionId } from '../utils/session.js'; import { truncateString } from '../utils/textUtils.js'; @@ -96,10 +95,14 @@ export interface SpanMetadata { * @returns The result of the function. */ export async function runInDevTraceSpan( - opts: SpanOptions & { operation: GeminiCliOperation; logPrompts?: boolean }, + opts: SpanOptions & { + operation: GeminiCliOperation; + logPrompts?: boolean; + sessionId: string; + }, fn: ({ metadata }: { metadata: SpanMetadata }) => Promise, ): Promise { - const { operation, logPrompts, ...restOfSpanOpts } = opts; + const { operation, logPrompts, sessionId, ...restOfSpanOpts } = opts; const tracer = trace.getTracer(TRACER_NAME, TRACER_VERSION); return tracer.startActiveSpan(operation, restOfSpanOpts, async (span) => { diff --git a/packages/core/src/utils/session.ts b/packages/core/src/utils/session.ts index 2a0ec52115..a010305e82 100644 --- a/packages/core/src/utils/session.ts +++ b/packages/core/src/utils/session.ts @@ -6,8 +6,6 @@ import { randomUUID } from 'node:crypto'; -export const sessionId = randomUUID(); - export function createSessionId(): string { return randomUUID(); } From 9c4e17b7ce0141481def18ea396487e2be151e2e Mon Sep 17 00:00:00 2001 From: Jacob Richman Date: Wed, 8 Apr 2026 16:36:19 -0700 Subject: [PATCH 34/39] Update ink version to 6.6.9 (#24980) --- package-lock.json | 44 +++++++-------------------------------- package.json | 4 ++-- packages/cli/package.json | 2 +- 3 files changed, 11 insertions(+), 39 deletions(-) diff --git a/package-lock.json b/package-lock.json index 3a22da1337..17b8bc26cc 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,7 +11,7 @@ "packages/*" ], "dependencies": { - "ink": "npm:@jrichman/ink@6.6.8", + "ink": "npm:@jrichman/ink@6.6.9", "latest-version": "^9.0.0", "node-fetch-native": "^1.6.7", "proper-lockfile": "^4.1.2", @@ -447,8 +447,7 @@ "version": "2.11.0", "resolved": "https://registry.npmjs.org/@bufbuild/protobuf/-/protobuf-2.11.0.tgz", "integrity": "sha512-sBXGT13cpmPR5BMgHE6UEEfEaShh5Ror6rfN3yEK5si7QVrtZg8LEPQb0VVhiLRUslD2yLnXtnRzG035J/mZXQ==", - "license": "(Apache-2.0 AND BSD-3-Clause)", - "peer": true + "license": "(Apache-2.0 AND BSD-3-Clause)" }, "node_modules/@bundled-es-modules/cookie": { "version": "2.0.1", @@ -1451,7 +1450,6 @@ "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.13.4.tgz", "integrity": "sha512-GsFaMXCkMqkKIvwCQjCrwH+GHbPKBjhwo/8ZuUkWHqbI73Kky9I+pQltrlT0+MWpedCoosda53lgjYfyEPgxBg==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@grpc/proto-loader": "^0.7.13", "@js-sdsl/ordered-map": "^4.4.2" @@ -2158,7 +2156,6 @@ "integrity": "sha512-t54CUOsFMappY1Jbzb7fetWeO0n6K0k/4+/ZpkS+3Joz8I4VcvY9OiEBFRYISqaI2fq5sCiPtAjRDOzVYG8m+Q==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@octokit/auth-token": "^6.0.0", "@octokit/graphql": "^9.0.2", @@ -2339,7 +2336,6 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz", "integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==", "license": "Apache-2.0", - "peer": true, "engines": { "node": ">=8.0.0" } @@ -2389,7 +2385,6 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/core/-/core-2.5.0.tgz", "integrity": "sha512-ka4H8OM6+DlUhSAZpONu0cPBtPPTQKxbxVzC4CzVx5+K4JnroJVBtDzLAMx4/3CDTJXRvVFhpFjtl4SaiTNoyQ==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@opentelemetry/semantic-conventions": "^1.29.0" }, @@ -2764,7 +2759,6 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/resources/-/resources-2.5.0.tgz", "integrity": "sha512-F8W52ApePshpoSrfsSk1H2yJn9aKjCrbpQF1M9Qii0GHzbfVeFUB+rc3X4aggyZD8x9Gu3Slua+s6krmq6Dt8g==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@opentelemetry/core": "2.5.0", "@opentelemetry/semantic-conventions": "^1.29.0" @@ -2798,7 +2792,6 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-metrics/-/sdk-metrics-2.5.0.tgz", "integrity": "sha512-BeJLtU+f5Gf905cJX9vXFQorAr6TAfK3SPvTFqP+scfIpDQEJfRaGJWta7sJgP+m4dNtBf9y3yvBKVAZZtJQVA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@opentelemetry/core": "2.5.0", "@opentelemetry/resources": "2.5.0" @@ -2853,7 +2846,6 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-trace-base/-/sdk-trace-base-2.5.0.tgz", "integrity": "sha512-VzRf8LzotASEyNDUxTdaJ9IRJ1/h692WyArDBInf5puLCjxbICD6XkHgpuudis56EndyS7LYFmtTMny6UABNdQ==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@opentelemetry/core": "2.5.0", "@opentelemetry/resources": "2.5.0", @@ -4090,7 +4082,6 @@ "integrity": "sha512-6mDvHUFSjyT2B2yeNx2nUgMxh9LtOWvkhIU3uePn2I2oyNymUAX1NIsdgviM4CH+JSrp2D2hsMvJOkxY+0wNRA==", "devOptional": true, "license": "MIT", - "peer": true, "dependencies": { "csstype": "^3.0.2" } @@ -4365,7 +4356,6 @@ "integrity": "sha512-6sMvZePQrnZH2/cJkwRpkT7DxoAWh+g6+GFRK6bV3YQo7ogi3SX5rgF6099r5Q53Ma5qeT7LGmOmuIutF4t3lA==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.35.0", "@typescript-eslint/types": "8.35.0", @@ -5239,7 +5229,6 @@ "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "license": "MIT", - "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -7380,8 +7369,7 @@ "version": "0.0.1581282", "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1581282.tgz", "integrity": "sha512-nv7iKtNZQshSW2hKzYNr46nM/Cfh5SEvE2oV0/SEGgc9XupIY5ggf84Cz8eJIkBce7S3bmTAauFD6aysMpnqsQ==", - "license": "BSD-3-Clause", - "peer": true + "license": "BSD-3-Clause" }, "node_modules/dezalgo": { "version": "1.0.4", @@ -7965,7 +7953,6 @@ "integrity": "sha512-GsGizj2Y1rCWDu6XoEekL3RLilp0voSePurjZIkxL3wlm5o5EC9VpgaP7lrCvjnkuLvzFBQWB3vWB3K5KQTveQ==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.2.0", "@eslint-community/regexpp": "^4.12.1", @@ -8483,7 +8470,6 @@ "resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz", "integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==", "license": "MIT", - "peer": true, "dependencies": { "accepts": "^2.0.0", "body-parser": "^2.2.1", @@ -9796,7 +9782,6 @@ "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.7.tgz", "integrity": "sha512-jq9l1DM0zVIvsm3lv9Nw9nlJnMNPOcAtsbsgiUhWcFzPE99Gvo6yRTlszSLLYacMeQ6quHD6hMfId8crVHvexw==", "license": "MIT", - "peer": true, "engines": { "node": ">=16.9.0" } @@ -10071,11 +10056,10 @@ }, "node_modules/ink": { "name": "@jrichman/ink", - "version": "6.6.8", - "resolved": "https://registry.npmjs.org/@jrichman/ink/-/ink-6.6.8.tgz", - "integrity": "sha512-099iGdvWVIM2ivc3NEWyMF7FT06aLmrx1gMGI02ZYB4wLIFn0v/KQl6+20xEwcM6gyzj8Y8842Sf0UH2z0oTDw==", + "version": "6.6.9", + "resolved": "https://registry.npmjs.org/@jrichman/ink/-/ink-6.6.9.tgz", + "integrity": "sha512-RL9sSiLQZECnjbmBwjIHOp8yVGdWF7C/uifg7ISv/e+F3nLNsfl7FdUFQs8iZARFMJAYxMFpxW6OW+HSt9drwQ==", "license": "MIT", - "peer": true, "dependencies": { "ansi-escapes": "^7.0.0", "ansi-styles": "^6.2.3", @@ -13849,7 +13833,6 @@ "resolved": "https://registry.npmjs.org/react/-/react-19.2.4.tgz", "integrity": "sha512-9nfp2hYpCwOjAN+8TZFGhtWEwgvWHXqESH8qT89AT/lWklpLON22Lc8pEtnpsZz7VmawabSU0gCjnj8aC0euHQ==", "license": "MIT", - "peer": true, "engines": { "node": ">=0.10.0" } @@ -13860,7 +13843,6 @@ "integrity": "sha512-ePrwPfxAnB+7hgnEr8vpKxL9cmnp7F322t8oqcPshbIQQhDKgFDW4tjhF2wjVbdXF9O/nyuy3sQWd9JGpiLPvA==", "devOptional": true, "license": "MIT", - "peer": true, "dependencies": { "shell-quote": "^1.6.1", "ws": "^7" @@ -16010,7 +15992,6 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -16233,8 +16214,7 @@ "version": "2.8.1", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "license": "0BSD", - "peer": true + "license": "0BSD" }, "node_modules/tsx": { "version": "4.20.3", @@ -16242,7 +16222,6 @@ "integrity": "sha512-qjbnuR9Tr+FJOMBqJCW5ehvIo/buZq7vH7qD7JziU98h6l3qGy0a/yPFjwO+y0/T7GFpNgNAvEcPPVfyT8rrPQ==", "devOptional": true, "license": "MIT", - "peer": true, "dependencies": { "esbuild": "~0.25.0", "get-tsconfig": "^4.7.5" @@ -16408,7 +16387,6 @@ "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==", "devOptional": true, "license": "Apache-2.0", - "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -16631,7 +16609,6 @@ "resolved": "https://registry.npmjs.org/vite/-/vite-7.2.2.tgz", "integrity": "sha512-BxAKBWmIbrDgrokdGZH1IgkIk/5mMHDreLDmCJ0qpyJaAteP8NvMhkwr/ZCQNqNH97bw/dANTE9PDzqwJghfMQ==", "license": "MIT", - "peer": true, "dependencies": { "esbuild": "^0.25.0", "fdir": "^6.5.0", @@ -16745,7 +16722,6 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -16758,7 +16734,6 @@ "resolved": "https://registry.npmjs.org/vitest/-/vitest-3.2.4.tgz", "integrity": "sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A==", "license": "MIT", - "peer": true, "dependencies": { "@types/chai": "^5.2.2", "@vitest/expect": "3.2.4", @@ -17406,7 +17381,6 @@ "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", "license": "MIT", - "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } @@ -17559,7 +17533,7 @@ "fzf": "^0.5.2", "glob": "^12.0.0", "highlight.js": "^11.11.1", - "ink": "npm:@jrichman/ink@6.6.8", + "ink": "npm:@jrichman/ink@6.6.9", "ink-gradient": "^3.0.0", "ink-spinner": "^5.0.0", "latest-version": "^9.0.0", @@ -17850,7 +17824,6 @@ "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.14.3.tgz", "integrity": "sha512-Iq8QQQ/7X3Sac15oB6p0FmUg/klxQvXLeileoqrTRGJYLV+/9tubbr9ipz0GKHjmXVsgFPo/+W+2cA8eNcR+XA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@grpc/proto-loader": "^0.8.0", "@js-sdsl/ordered-map": "^4.4.2" @@ -17954,7 +17927,6 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, diff --git a/package.json b/package.json index 77801eaa7b..0af6a9aad0 100644 --- a/package.json +++ b/package.json @@ -73,7 +73,7 @@ "pre-commit": "node scripts/pre-commit.js" }, "overrides": { - "ink": "npm:@jrichman/ink@6.6.8", + "ink": "npm:@jrichman/ink@6.6.9", "wrap-ansi": "9.0.2", "cliui": { "wrap-ansi": "7.0.0" @@ -142,7 +142,7 @@ "yargs": "^17.7.2" }, "dependencies": { - "ink": "npm:@jrichman/ink@6.6.8", + "ink": "npm:@jrichman/ink@6.6.9", "latest-version": "^9.0.0", "node-fetch-native": "^1.6.7", "proper-lockfile": "^4.1.2", diff --git a/packages/cli/package.json b/packages/cli/package.json index 82ff74b08e..cd3b2ec135 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -49,7 +49,7 @@ "fzf": "^0.5.2", "glob": "^12.0.0", "highlight.js": "^11.11.1", - "ink": "npm:@jrichman/ink@6.6.8", + "ink": "npm:@jrichman/ink@6.6.9", "ink-gradient": "^3.0.0", "ink-spinner": "^5.0.0", "latest-version": "^9.0.0", From bc3ed61adbbca5eb2c3bb2d5da2dc710155d4487 Mon Sep 17 00:00:00 2001 From: Jarrod Whelan <150866123+jwhelangoog@users.noreply.github.com> Date: Wed, 8 Apr 2026 16:40:43 -0700 Subject: [PATCH 35/39] feat(core): refine shell tool description display logic (#24903) --- packages/core/src/tools/shell.test.ts | 66 ++++++++++++++++----------- packages/core/src/tools/shell.ts | 10 ++-- 2 files changed, 47 insertions(+), 29 deletions(-) diff --git a/packages/core/src/tools/shell.test.ts b/packages/core/src/tools/shell.test.ts index 9551fd9638..1741b57be1 100644 --- a/packages/core/src/tools/shell.test.ts +++ b/packages/core/src/tools/shell.test.ts @@ -768,6 +768,46 @@ describe('ShellTool', () => { const shellTool = new ShellTool(mockConfig, createMockMessageBus()); expect(shellTool.description).not.toContain('Efficiency Guidelines:'); }); + + it('should return the command if description is not provided', () => { + const invocation = shellTool.build({ + command: 'echo "hello"', + }); + expect(invocation.getDescription()).toBe('echo "hello"'); + }); + + it('should return the command if it is short (<= 150 chars), even if description is provided', () => { + const invocation = shellTool.build({ + command: 'echo "hello"', + description: 'Prints a friendly greeting.', + }); + expect(invocation.getDescription()).toBe('echo "hello"'); + }); + + it('should return the description if the command is long (> 150 chars)', () => { + const longCommand = 'echo "hello" && '.repeat(15) + 'echo "world"'; // Length > 150 + const invocation = shellTool.build({ + command: longCommand, + description: 'Prints multiple greetings.', + }); + expect(invocation.getDescription()).toBe('Prints multiple greetings.'); + }); + + it('should return the raw command if description is an empty string', () => { + const invocation = shellTool.build({ + command: 'echo hello', + description: '', + }); + expect(invocation.getDescription()).toBe('echo hello'); + }); + + it('should return the raw command if description is just whitespace', () => { + const invocation = shellTool.build({ + command: 'echo hello', + description: ' ', + }); + expect(invocation.getDescription()).toBe('echo hello'); + }); }); describe('getDisplayTitle and getExplanation', () => { @@ -803,32 +843,6 @@ describe('ShellTool', () => { }); }); - describe('invocation getDescription', () => { - it('should return the description if it is present and not empty whitespace', () => { - const invocation = shellTool.build({ - command: 'echo hello', - description: 'prints hello', - }); - expect(invocation.getDescription()).toBe('prints hello'); - }); - - it('should return the raw command if description is an empty string', () => { - const invocation = shellTool.build({ - command: 'echo hello', - description: '', - }); - expect(invocation.getDescription()).toBe('echo hello'); - }); - - it('should return the raw command if description is just whitespace', () => { - const invocation = shellTool.build({ - command: 'echo hello', - description: ' ', - }); - expect(invocation.getDescription()).toBe('echo hello'); - }); - }); - describe('llmContent output format', () => { const mockAbortSignal = new AbortController().signal; diff --git a/packages/core/src/tools/shell.ts b/packages/core/src/tools/shell.ts index 3ea29474c6..acbd5e72ff 100644 --- a/packages/core/src/tools/shell.ts +++ b/packages/core/src/tools/shell.ts @@ -63,6 +63,7 @@ export const OUTPUT_UPDATE_INTERVAL_MS = 1000; // Delay so user does not see the output of the process before the process is moved to the background. const BACKGROUND_DELAY_MS = 200; +const SHOW_NL_DESCRIPTION_THRESHOLD = 150; export interface ShellToolParams { command: string; @@ -136,9 +137,12 @@ export class ShellToolInvocation extends BaseToolInvocation< } getDescription(): string { - return this.params.description?.trim() - ? this.params.description - : this.params.command; + const descStr = this.params.description?.trim(); + const commandStr = this.params.command; + return Array.from(commandStr).length <= SHOW_NL_DESCRIPTION_THRESHOLD || + !descStr + ? commandStr + : descStr; } private simplifyPaths(paths: Set): string[] { From f1bb2af6de80f5c239809d1ab70acf842b4b25fc Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Wed, 8 Apr 2026 23:57:26 +0000 Subject: [PATCH 36/39] Generalize evals infra to support more types of evals, organization and queuing of named suites (#24941) --- .github/workflows/chained_e2e.yml | 2 + .github/workflows/evals-nightly.yml | 20 ++- evals/answer-vs-act.eval.ts | 12 ++ evals/app-test-helper.ts | 98 ++++++------ evals/ask_user.eval.ts | 24 ++- evals/automated-tool-use.eval.ts | 4 + evals/cli_help_delegation.eval.ts | 2 + evals/component-test-helper.ts | 136 ++++++++++++++++ evals/concurrency-safety.eval.ts | 2 + evals/edit-locations-eval.eval.ts | 2 + evals/frugalReads.eval.ts | 6 + evals/frugalSearch.eval.ts | 14 +- evals/generalist_agent.eval.ts | 2 + evals/generalist_delegation.eval.ts | 8 + evals/gitRepo.eval.ts | 4 + evals/grep_search_functionality.eval.ts | 12 ++ evals/hierarchical_memory.eval.ts | 9 +- evals/interactive-hang.eval.ts | 4 + evals/model_steering.eval.ts | 6 +- evals/plan_mode.eval.ts | 12 ++ evals/redundant_casts.eval.ts | 2 + evals/sandbox_recovery.eval.ts | 2 + evals/save_memory.eval.ts | 30 +++- evals/shell-efficiency.eval.ts | 6 + evals/subagents.eval.ts | 12 ++ evals/test-helper.test.ts | 12 ++ evals/test-helper.ts | 148 +++++++++++------- evals/tool_output_masking.eval.ts | 4 + evals/tracker.eval.ts | 4 + evals/validation_fidelity.eval.ts | 2 + ...ation_fidelity_pre_existing_errors.eval.ts | 2 + evals/vitest.config.ts | 5 +- 32 files changed, 475 insertions(+), 133 deletions(-) create mode 100644 evals/component-test-helper.ts diff --git a/.github/workflows/chained_e2e.yml b/.github/workflows/chained_e2e.yml index fe87fb1d5d..94215e4795 100644 --- a/.github/workflows/chained_e2e.yml +++ b/.github/workflows/chained_e2e.yml @@ -335,6 +335,8 @@ jobs: env: GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}' GEMINI_MODEL: 'gemini-3-pro-preview' + # Only run always passes behavioral tests. + EVAL_SUITE_TYPE: 'behavioral' # Disable Vitest internal retries to avoid double-retrying; # custom retry logic is handled in evals/test-helper.ts VITEST_RETRY: 0 diff --git a/.github/workflows/evals-nightly.yml b/.github/workflows/evals-nightly.yml index 9acc1de050..fbb770ac84 100644 --- a/.github/workflows/evals-nightly.yml +++ b/.github/workflows/evals-nightly.yml @@ -5,10 +5,18 @@ on: - cron: '0 1 * * *' # Runs at 1 AM every day workflow_dispatch: inputs: - run_all: - description: 'Run all evaluations (including usually passing)' - type: 'boolean' - default: true + suite_type: + description: 'Suite type to run' + type: 'choice' + options: + - 'behavioral' + - 'component-level' + - 'hero-scenario' + default: 'behavioral' + suite_name: + description: 'Specific suite name to run' + required: false + type: 'string' test_name_pattern: description: 'Test name pattern or file name' required: false @@ -59,7 +67,9 @@ jobs: env: GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}' GEMINI_MODEL: '${{ matrix.model }}' - RUN_EVALS: "${{ github.event.inputs.run_all != 'false' }}" + RUN_EVALS: 'true' + EVAL_SUITE_TYPE: "${{ github.event.inputs.suite_type || 'behavioral' }}" + EVAL_SUITE_NAME: '${{ github.event.inputs.suite_name }}' TEST_NAME_PATTERN: '${{ github.event.inputs.test_name_pattern }}' # Disable Vitest internal retries to avoid double-retrying; # custom retry logic is handled in evals/test-helper.ts diff --git a/evals/answer-vs-act.eval.ts b/evals/answer-vs-act.eval.ts index ff87d12564..1d19294363 100644 --- a/evals/answer-vs-act.eval.ts +++ b/evals/answer-vs-act.eval.ts @@ -19,6 +19,8 @@ describe('Answer vs. ask eval', () => { * automatically modify the file, but instead asks for permission. */ evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should not edit files when asked to inspect for bugs', prompt: 'Inspect app.ts for bugs', files: FILES, @@ -42,6 +44,8 @@ describe('Answer vs. ask eval', () => { * does modify the file. */ evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should edit files when asked to fix bug', prompt: 'Fix the bug in app.ts - it should add numbers not subtract', files: FILES, @@ -66,6 +70,8 @@ describe('Answer vs. ask eval', () => { * automatically modify the file, but instead asks for permission. */ evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should not edit when asking "any bugs"', prompt: 'Any bugs in app.ts?', files: FILES, @@ -89,6 +95,8 @@ describe('Answer vs. ask eval', () => { * automatically modify the file. */ evalTest('ALWAYS_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should not edit files when asked a general question', prompt: 'How does app.ts work?', files: FILES, @@ -112,6 +120,8 @@ describe('Answer vs. ask eval', () => { * automatically modify the file. */ evalTest('ALWAYS_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should not edit files when asked about style', prompt: 'Is app.ts following good style?', files: FILES, @@ -135,6 +145,8 @@ describe('Answer vs. ask eval', () => { * the agent does NOT automatically modify the file. */ evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should not edit files when user notes an issue', prompt: 'The add function subtracts numbers.', files: FILES, diff --git a/evals/app-test-helper.ts b/evals/app-test-helper.ts index 8ea842aa38..1794573fe1 100644 --- a/evals/app-test-helper.ts +++ b/evals/app-test-helper.ts @@ -10,10 +10,13 @@ import { runEval, prepareLogDir, symlinkNodeModules, + withEvalRetries, + prepareWorkspace, + type BaseEvalCase, + EVAL_MODEL, } from './test-helper.js'; import fs from 'node:fs'; import path from 'node:path'; -import { DEFAULT_GEMINI_MODEL } from '@google/gemini-cli-core'; /** * Config overrides for evals, with tool-restriction fields explicitly @@ -29,15 +32,13 @@ interface EvalConfigOverrides { allowedTools?: never; /** Restricting tools via mainAgentTools in evals is forbidden. */ mainAgentTools?: never; + [key: string]: unknown; } -export interface AppEvalCase { - name: string; +export interface AppEvalCase extends BaseEvalCase { configOverrides?: EvalConfigOverrides; prompt: string; - timeout?: number; - files?: Record; setup?: (rig: AppRig) => Promise; assert: (rig: AppRig, output: string) => Promise; } @@ -48,56 +49,55 @@ export interface AppEvalCase { */ export function appEvalTest(policy: EvalPolicy, evalCase: AppEvalCase) { const fn = async () => { - const rig = new AppRig({ - configOverrides: { - model: DEFAULT_GEMINI_MODEL, - ...evalCase.configOverrides, - }, - }); + await withEvalRetries(evalCase.name, async () => { + const rig = new AppRig({ + configOverrides: { + model: EVAL_MODEL, + ...evalCase.configOverrides, + }, + }); - const { logDir, sanitizedName } = await prepareLogDir(evalCase.name); - const logFile = path.join(logDir, `${sanitizedName}.log`); + const { logDir, sanitizedName } = await prepareLogDir(evalCase.name); + const logFile = path.join(logDir, `${sanitizedName}.log`); - try { - await rig.initialize(); + try { + await rig.initialize(); - const testDir = rig.getTestDir(); - symlinkNodeModules(testDir); + const testDir = rig.getTestDir(); + symlinkNodeModules(testDir); - // Setup initial files - if (evalCase.files) { - for (const [filePath, content] of Object.entries(evalCase.files)) { - const fullPath = path.join(testDir, filePath); - fs.mkdirSync(path.dirname(fullPath), { recursive: true }); - fs.writeFileSync(fullPath, content); + // Setup initial files + if (evalCase.files) { + // Note: AppRig does not use a separate homeDir, so we use testDir twice + await prepareWorkspace(testDir, testDir, evalCase.files); } + + // Run custom setup if provided (e.g. for breakpoints) + if (evalCase.setup) { + await evalCase.setup(rig); + } + + // Render the app! + await rig.render(); + + // Wait for initial ready state + await rig.waitForIdle(); + + // Send the initial prompt + await rig.sendMessage(evalCase.prompt); + + // Run assertion. Interaction-heavy tests can do their own waiting/steering here. + const output = rig.getStaticOutput(); + await evalCase.assert(rig, output); + } finally { + const output = rig.getStaticOutput(); + if (output) { + await fs.promises.writeFile(logFile, output); + } + await rig.unmount(); } - - // Run custom setup if provided (e.g. for breakpoints) - if (evalCase.setup) { - await evalCase.setup(rig); - } - - // Render the app! - await rig.render(); - - // Wait for initial ready state - await rig.waitForIdle(); - - // Send the initial prompt - await rig.sendMessage(evalCase.prompt); - - // Run assertion. Interaction-heavy tests can do their own waiting/steering here. - const output = rig.getStaticOutput(); - await evalCase.assert(rig, output); - } finally { - const output = rig.getStaticOutput(); - if (output) { - await fs.promises.writeFile(logFile, output); - } - await rig.unmount(); - } + }); }; - runEval(policy, evalCase.name, fn, (evalCase.timeout ?? 60000) + 10000); + runEval(policy, evalCase, fn, (evalCase.timeout ?? 60000) + 10000); } diff --git a/evals/ask_user.eval.ts b/evals/ask_user.eval.ts index 6495cb3f22..60d89f7b5b 100644 --- a/evals/ask_user.eval.ts +++ b/evals/ask_user.eval.ts @@ -5,17 +5,21 @@ */ import { describe, expect } from 'vitest'; -import { appEvalTest, AppEvalCase } from './app-test-helper.js'; -import { EvalPolicy } from './test-helper.js'; +import { ApprovalMode, isRecord } from '@google/gemini-cli-core'; +import { appEvalTest, type AppEvalCase } from './app-test-helper.js'; +import { type EvalPolicy } from './test-helper.js'; function askUserEvalTest(policy: EvalPolicy, evalCase: AppEvalCase) { + const existingGeneral = evalCase.configOverrides?.['general']; + const generalBase = isRecord(existingGeneral) ? existingGeneral : {}; + return appEvalTest(policy, { ...evalCase, configOverrides: { ...evalCase.configOverrides, + approvalMode: ApprovalMode.DEFAULT, general: { - ...evalCase.configOverrides?.general, - approvalMode: 'default', + ...generalBase, enableAutoUpdate: false, enableAutoUpdateNotification: false, }, @@ -28,6 +32,8 @@ function askUserEvalTest(policy: EvalPolicy, evalCase: AppEvalCase) { describe('ask_user', () => { askUserEvalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'Agent uses AskUser tool to present multiple choice options', prompt: `Use the ask_user tool to ask me what my favorite color is. Provide 3 options: red, green, or blue.`, setup: async (rig) => { @@ -43,6 +49,8 @@ describe('ask_user', () => { }); askUserEvalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'Agent uses AskUser tool to clarify ambiguous requirements', files: { 'package.json': JSON.stringify({ name: 'my-app', version: '1.0.0' }), @@ -61,6 +69,8 @@ describe('ask_user', () => { }); askUserEvalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'Agent uses AskUser tool before performing significant ambiguous rework', files: { 'packages/core/src/index.ts': '// index\nexport const version = "1.0.0";', @@ -82,8 +92,8 @@ describe('ask_user', () => { ]); expect(confirmation, 'Expected a tool call confirmation').toBeDefined(); - if (confirmation?.name === 'enter_plan_mode') { - rig.acceptConfirmation('enter_plan_mode'); + if (confirmation?.toolName === 'enter_plan_mode') { + await rig.resolveTool('enter_plan_mode'); confirmation = await rig.waitForPendingConfirmation('ask_user'); } @@ -101,6 +111,8 @@ describe('ask_user', () => { // updates to clarify that shell command confirmation is handled by the UI. // See fix: https://github.com/google-gemini/gemini-cli/pull/20504 askUserEvalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'Agent does NOT use AskUser to confirm shell commands', files: { 'package.json': JSON.stringify({ diff --git a/evals/automated-tool-use.eval.ts b/evals/automated-tool-use.eval.ts index 87f88a1ff3..27e43708dc 100644 --- a/evals/automated-tool-use.eval.ts +++ b/evals/automated-tool-use.eval.ts @@ -14,6 +14,8 @@ describe('Automated tool use', () => { * a repro by guiding the agent into using the existing deficient script. */ evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should use automated tools (eslint --fix) to fix code style issues', files: { 'package.json': JSON.stringify( @@ -102,6 +104,8 @@ describe('Automated tool use', () => { * instead of trying to edit the files itself. */ evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should use automated tools (prettier --write) to fix formatting issues', files: { 'package.json': JSON.stringify( diff --git a/evals/cli_help_delegation.eval.ts b/evals/cli_help_delegation.eval.ts index 8be3bf1c51..e1714c0636 100644 --- a/evals/cli_help_delegation.eval.ts +++ b/evals/cli_help_delegation.eval.ts @@ -3,6 +3,8 @@ import { evalTest } from './test-helper.js'; describe('CliHelpAgent Delegation', () => { evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should delegate to cli_help agent for subagent creation questions', params: { settings: { diff --git a/evals/component-test-helper.ts b/evals/component-test-helper.ts new file mode 100644 index 0000000000..9be68e6936 --- /dev/null +++ b/evals/component-test-helper.ts @@ -0,0 +1,136 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { + type EvalPolicy, + runEval, + prepareLogDir, + withEvalRetries, + prepareWorkspace, + type BaseEvalCase, +} from './test-helper.js'; +import fs from 'node:fs'; +import path from 'node:path'; +import os from 'node:os'; +import { randomUUID } from 'node:crypto'; +import { + Config, + type ConfigParameters, + AuthType, + ApprovalMode, + createPolicyEngineConfig, + ExtensionLoader, + IntegrityDataStatus, + makeFakeConfig, + type GeminiCLIExtension, +} from '@google/gemini-cli-core'; +import { createMockSettings } from '../packages/cli/src/test-utils/settings.js'; + +// A minimal mock ExtensionManager to bypass integrity checks +class MockExtensionManager extends ExtensionLoader { + override getExtensions(): GeminiCLIExtension[] { + return []; + } + setRequestConsent = (): void => {}; + setRequestSetting = (): void => {}; + integrityManager = { + verifyExtensionIntegrity: async (): Promise => + IntegrityDataStatus.VERIFIED, + storeExtensionIntegrity: async (): Promise => undefined, + }; +} + +export interface ComponentEvalCase extends BaseEvalCase { + configOverrides?: Partial; + setup?: (config: Config) => Promise; + assert: (config: Config) => Promise; +} + +export class ComponentRig { + public config: Config | undefined; + public testDir: string; + public sessionId: string; + + constructor( + private options: { configOverrides?: Partial } = {}, + ) { + const uniqueId = randomUUID(); + this.testDir = fs.mkdtempSync( + path.join(os.tmpdir(), `gemini-component-rig-${uniqueId.slice(0, 8)}-`), + ); + this.sessionId = `test-session-${uniqueId}`; + } + + async initialize() { + const settings = createMockSettings(); + const policyEngineConfig = await createPolicyEngineConfig( + settings.merged, + ApprovalMode.DEFAULT, + ); + + const configParams: ConfigParameters = { + sessionId: this.sessionId, + targetDir: this.testDir, + cwd: this.testDir, + debugMode: false, + model: 'test-model', + interactive: false, + approvalMode: ApprovalMode.DEFAULT, + policyEngineConfig, + enableEventDrivenScheduler: false, // Don't need scheduler for direct component tests + extensionLoader: new MockExtensionManager(), + useAlternateBuffer: false, + ...this.options.configOverrides, + }; + + this.config = makeFakeConfig(configParams); + await this.config.initialize(); + + // Refresh auth using USE_GEMINI to initialize the real BaseLlmClient + await this.config.refreshAuth(AuthType.USE_GEMINI); + } + + async cleanup() { + fs.rmSync(this.testDir, { recursive: true, force: true }); + } +} + +/** + * A helper for running behavioral evaluations directly against backend components. + * It provides a fully initialized Config with real API access, bypassing the UI. + */ +export function componentEvalTest( + policy: EvalPolicy, + evalCase: ComponentEvalCase, +) { + const fn = async () => { + await withEvalRetries(evalCase.name, async () => { + const rig = new ComponentRig({ + configOverrides: evalCase.configOverrides, + }); + + await prepareLogDir(evalCase.name); + + try { + await rig.initialize(); + + if (evalCase.files) { + await prepareWorkspace(rig.testDir, rig.testDir, evalCase.files); + } + + if (evalCase.setup) { + await evalCase.setup(rig.config!); + } + + await evalCase.assert(rig.config!); + } finally { + await rig.cleanup(); + } + }); + }; + + runEval(policy, evalCase, fn, (evalCase.timeout ?? 60000) + 10000); +} diff --git a/evals/concurrency-safety.eval.ts b/evals/concurrency-safety.eval.ts index f2f9e24be9..3aae68b5c4 100644 --- a/evals/concurrency-safety.eval.ts +++ b/evals/concurrency-safety.eval.ts @@ -20,6 +20,8 @@ You are the mutation agent. Do the mutation requested. describe('concurrency safety eval test cases', () => { evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'mutation agents are run in parallel when explicitly requested', params: { settings: { diff --git a/evals/edit-locations-eval.eval.ts b/evals/edit-locations-eval.eval.ts index 60e34e6df7..4acc4f2cf9 100644 --- a/evals/edit-locations-eval.eval.ts +++ b/evals/edit-locations-eval.eval.ts @@ -13,6 +13,8 @@ describe('Edits location eval', () => { * instead of creating a new one. */ evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should update existing test file instead of creating a new one', files: { 'package.json': JSON.stringify( diff --git a/evals/frugalReads.eval.ts b/evals/frugalReads.eval.ts index 47578039a6..4dd5f912b8 100644 --- a/evals/frugalReads.eval.ts +++ b/evals/frugalReads.eval.ts @@ -15,6 +15,8 @@ describe('Frugal reads eval', () => { * nearby ranges into a single contiguous read to save tool calls. */ evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should use ranged read when nearby lines are targeted', files: { 'package.json': JSON.stringify({ @@ -135,6 +137,8 @@ describe('Frugal reads eval', () => { * apart to avoid the need to read the whole file. */ evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should use ranged read when targets are far apart', files: { 'package.json': JSON.stringify({ @@ -204,6 +208,8 @@ describe('Frugal reads eval', () => { * (e.g.: 10), as it's more efficient than many small ranged reads. */ evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should read the entire file when there are many matches', files: { 'package.json': JSON.stringify({ diff --git a/evals/frugalSearch.eval.ts b/evals/frugalSearch.eval.ts index 1c49fc2ed4..d5962b1534 100644 --- a/evals/frugalSearch.eval.ts +++ b/evals/frugalSearch.eval.ts @@ -13,18 +13,6 @@ import { evalTest } from './test-helper.js'; * This ensures the agent doesn't flood the context window with unnecessary search results. */ describe('Frugal Search', () => { - const getGrepParams = (call: any): any => { - let args = call.toolRequest.args; - if (typeof args === 'string') { - try { - args = JSON.parse(args); - } catch (e) { - // Ignore parse errors - } - } - return args; - }; - /** * Ensure that the agent makes use of either grep or ranged reads in fulfilling this task. * The task is specifically phrased to not evoke "view" or "search" specifically because @@ -33,6 +21,8 @@ describe('Frugal Search', () => { * ranged reads. */ evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should use grep or ranged read for large files', prompt: 'What year was legacy_processor.ts written?', files: { diff --git a/evals/generalist_agent.eval.ts b/evals/generalist_agent.eval.ts index 8161e33156..b8313079e9 100644 --- a/evals/generalist_agent.eval.ts +++ b/evals/generalist_agent.eval.ts @@ -11,6 +11,8 @@ import fs from 'node:fs/promises'; describe('generalist_agent', () => { evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should be able to use generalist agent by explicitly asking the main agent to invoke it', params: { settings: { diff --git a/evals/generalist_delegation.eval.ts b/evals/generalist_delegation.eval.ts index 81252880eb..d731747826 100644 --- a/evals/generalist_delegation.eval.ts +++ b/evals/generalist_delegation.eval.ts @@ -11,6 +11,8 @@ describe('generalist_delegation', () => { // --- Positive Evals (Should Delegate) --- appEvalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should delegate batch error fixing to generalist agent', configOverrides: { agents: { @@ -54,6 +56,8 @@ describe('generalist_delegation', () => { }); appEvalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should autonomously delegate complex batch task to generalist agent', configOverrides: { agents: { @@ -94,6 +98,8 @@ describe('generalist_delegation', () => { // --- Negative Evals (Should NOT Delegate - Assertive Handling) --- appEvalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should NOT delegate simple read and fix to generalist agent', configOverrides: { agents: { @@ -128,6 +134,8 @@ describe('generalist_delegation', () => { }); appEvalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should NOT delegate simple direct question to generalist agent', configOverrides: { agents: { diff --git a/evals/gitRepo.eval.ts b/evals/gitRepo.eval.ts index 6415b9c20d..b5dbd8a760 100644 --- a/evals/gitRepo.eval.ts +++ b/evals/gitRepo.eval.ts @@ -26,6 +26,8 @@ describe('git repo eval', () => { * be more consistent. */ evalTest('ALWAYS_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should not git add commit changes unprompted', prompt: 'Finish this up for me by just making a targeted fix for the bug in index.ts. Do not build, install anything, or add tests', @@ -55,6 +57,8 @@ describe('git repo eval', () => { * instructed to not do so by default. */ evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should git commit changes when prompted', prompt: 'Make a targeted fix for the bug in index.ts without building, installing anything, or adding tests. Then, commit your changes.', diff --git a/evals/grep_search_functionality.eval.ts b/evals/grep_search_functionality.eval.ts index f1224b8221..5c1da827e1 100644 --- a/evals/grep_search_functionality.eval.ts +++ b/evals/grep_search_functionality.eval.ts @@ -15,6 +15,8 @@ describe('grep_search_functionality', () => { const TEST_PREFIX = 'Grep Search Functionality: '; evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should find a simple string in a file', files: { 'test.txt': `hello @@ -33,6 +35,8 @@ describe('grep_search_functionality', () => { }); evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should perform a case-sensitive search', files: { 'test.txt': `Hello @@ -63,6 +67,8 @@ describe('grep_search_functionality', () => { }); evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should return only file names when names_only is used', files: { 'file1.txt': 'match me', @@ -93,6 +99,8 @@ describe('grep_search_functionality', () => { }); evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should search only within the specified include_pattern glob', files: { 'file.js': 'my_function();', @@ -123,6 +131,8 @@ describe('grep_search_functionality', () => { }); evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should search within a specific subdirectory', files: { 'src/main.js': 'unique_string_1', @@ -153,6 +163,8 @@ describe('grep_search_functionality', () => { }); evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should report no matches correctly', files: { 'file.txt': 'nothing to see here', diff --git a/evals/hierarchical_memory.eval.ts b/evals/hierarchical_memory.eval.ts index dd4f8fbbd1..7b673af6d6 100644 --- a/evals/hierarchical_memory.eval.ts +++ b/evals/hierarchical_memory.eval.ts @@ -5,13 +5,14 @@ */ import { describe, expect } from 'vitest'; -import { evalTest } from './test-helper.js'; -import { assertModelHasOutput } from '../integration-tests/test-helper.js'; +import { evalTest, assertModelHasOutput } from './test-helper.js'; describe('Hierarchical Memory', () => { const conflictResolutionTest = 'Agent follows hierarchy for contradictory instructions'; evalTest('ALWAYS_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: conflictResolutionTest, params: { settings: { @@ -48,6 +49,8 @@ What is my favorite fruit? Tell me just the name of the fruit.`, const provenanceAwarenessTest = 'Agent is aware of memory provenance'; evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: provenanceAwarenessTest, params: { settings: { @@ -87,6 +90,8 @@ Provide the answer as an XML block like this: const extensionVsGlobalTest = 'Extension memory wins over Global memory'; evalTest('ALWAYS_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: extensionVsGlobalTest, params: { settings: { diff --git a/evals/interactive-hang.eval.ts b/evals/interactive-hang.eval.ts index 0cf56acf98..72a5067fcc 100644 --- a/evals/interactive-hang.eval.ts +++ b/evals/interactive-hang.eval.ts @@ -8,6 +8,8 @@ describe('interactive_commands', () => { * intervention. */ evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should not use interactive commands', prompt: 'Execute tests.', files: { @@ -49,6 +51,8 @@ describe('interactive_commands', () => { * Validates that the agent uses non-interactive flags when scaffolding a new project. */ evalTest('ALWAYS_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should use non-interactive flags when scaffolding a new app', prompt: 'Create a new react application named my-app using vite.', assert: async (rig, result) => { diff --git a/evals/model_steering.eval.ts b/evals/model_steering.eval.ts index 2cb87edcc2..4033b3a88f 100644 --- a/evals/model_steering.eval.ts +++ b/evals/model_steering.eval.ts @@ -5,14 +5,14 @@ */ import { describe, expect } from 'vitest'; -import { act } from 'react'; import path from 'node:path'; import fs from 'node:fs'; import { appEvalTest } from './app-test-helper.js'; -import { PolicyDecision } from '@google/gemini-cli-core'; describe('Model Steering Behavioral Evals', () => { appEvalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'Corrective Hint: Model switches task based on hint during tool turn', configOverrides: { modelSteering: true, @@ -52,6 +52,8 @@ describe('Model Steering Behavioral Evals', () => { }); appEvalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'Suggestive Hint: Model incorporates user guidance mid-stream', configOverrides: { modelSteering: true, diff --git a/evals/plan_mode.eval.ts b/evals/plan_mode.eval.ts index 6eea0c62ba..d52415a26d 100644 --- a/evals/plan_mode.eval.ts +++ b/evals/plan_mode.eval.ts @@ -33,6 +33,8 @@ describe('plan_mode', () => { .filter(Boolean); evalTest('ALWAYS_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should refuse file modification when in plan mode', approvalMode: ApprovalMode.PLAN, params: { @@ -68,6 +70,8 @@ describe('plan_mode', () => { }); evalTest('ALWAYS_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should refuse saving new documentation to the repo when in plan mode', approvalMode: ApprovalMode.PLAN, params: { @@ -105,6 +109,8 @@ describe('plan_mode', () => { }); evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should enter plan mode when asked to create a plan', approvalMode: ApprovalMode.DEFAULT, params: { @@ -122,6 +128,8 @@ describe('plan_mode', () => { }); evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should exit plan mode when plan is complete and implementation is requested', approvalMode: ApprovalMode.PLAN, params: { @@ -169,6 +177,8 @@ describe('plan_mode', () => { }); evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should allow file modification in plans directory when in plan mode', approvalMode: ApprovalMode.PLAN, params: { @@ -201,6 +211,8 @@ describe('plan_mode', () => { }); evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should create a plan in plan mode and implement it for a refactoring task', params: { settings, diff --git a/evals/redundant_casts.eval.ts b/evals/redundant_casts.eval.ts index 83750e44d4..fc991b5ba7 100644 --- a/evals/redundant_casts.eval.ts +++ b/evals/redundant_casts.eval.ts @@ -11,6 +11,8 @@ import fs from 'node:fs/promises'; describe('redundant_casts', () => { evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should not add redundant or unsafe casts when modifying typescript code', files: { 'src/cast_example.ts': ` diff --git a/evals/sandbox_recovery.eval.ts b/evals/sandbox_recovery.eval.ts index ad6b630236..073379e94f 100755 --- a/evals/sandbox_recovery.eval.ts +++ b/evals/sandbox_recovery.eval.ts @@ -3,6 +3,8 @@ import { evalTest } from './test-helper.js'; describe('Sandbox recovery', () => { evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'attempts to use additional_permissions when operation not permitted', prompt: 'Run ./script.sh. It will fail with "Operation not permitted". When it does, you must retry running it by passing the appropriate additional_permissions.', diff --git a/evals/save_memory.eval.ts b/evals/save_memory.eval.ts index 25e081a819..5a228ed065 100644 --- a/evals/save_memory.eval.ts +++ b/evals/save_memory.eval.ts @@ -5,16 +5,18 @@ */ import { describe, expect } from 'vitest'; -import { evalTest } from './test-helper.js'; import { + evalTest, assertModelHasOutput, checkModelOutputContent, -} from '../integration-tests/test-helper.js'; +} from './test-helper.js'; describe('save_memory', () => { const TEST_PREFIX = 'Save memory test: '; const rememberingFavoriteColor = "Agent remembers user's favorite color"; evalTest('ALWAYS_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: rememberingFavoriteColor, prompt: `remember that my favorite color is blue. @@ -35,6 +37,8 @@ describe('save_memory', () => { }); const rememberingCommandRestrictions = 'Agent remembers command restrictions'; evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: rememberingCommandRestrictions, prompt: `I don't want you to ever run npm commands.`, @@ -54,6 +58,8 @@ describe('save_memory', () => { const rememberingWorkflow = 'Agent remembers workflow preferences'; evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: rememberingWorkflow, prompt: `I want you to always lint after building.`, @@ -74,6 +80,8 @@ describe('save_memory', () => { const ignoringTemporaryInformation = 'Agent ignores temporary conversation details'; evalTest('ALWAYS_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: ignoringTemporaryInformation, prompt: `I'm going to get a coffee.`, @@ -97,6 +105,8 @@ describe('save_memory', () => { const rememberingPetName = "Agent remembers user's pet's name"; evalTest('ALWAYS_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: rememberingPetName, prompt: `Please remember that my dog's name is Buddy.`, @@ -116,6 +126,8 @@ describe('save_memory', () => { const rememberingCommandAlias = 'Agent remembers custom command aliases'; evalTest('ALWAYS_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: rememberingCommandAlias, prompt: `When I say 'start server', you should run 'npm run dev'.`, @@ -136,6 +148,8 @@ describe('save_memory', () => { const ignoringDbSchemaLocation = "Agent ignores workspace's database schema location"; evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: ignoringDbSchemaLocation, prompt: `The database schema for this workspace is located in \`db/schema.sql\`.`, assert: async (rig, result) => { @@ -155,6 +169,8 @@ describe('save_memory', () => { const rememberingCodingStyle = "Agent remembers user's coding style preference"; evalTest('ALWAYS_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: rememberingCodingStyle, prompt: `I prefer to use tabs instead of spaces for indentation.`, @@ -175,6 +191,8 @@ describe('save_memory', () => { const ignoringBuildArtifactLocation = 'Agent ignores workspace build artifact location'; evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: ignoringBuildArtifactLocation, prompt: `In this workspace, build artifacts are stored in the \`dist/artifacts\` directory.`, assert: async (rig, result) => { @@ -193,6 +211,8 @@ describe('save_memory', () => { const ignoringMainEntryPoint = "Agent ignores workspace's main entry point"; evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: ignoringMainEntryPoint, prompt: `The main entry point for this workspace is \`src/index.js\`.`, assert: async (rig, result) => { @@ -211,6 +231,8 @@ describe('save_memory', () => { const rememberingBirthday = "Agent remembers user's birthday"; evalTest('ALWAYS_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: rememberingBirthday, prompt: `My birthday is on June 15th.`, @@ -231,6 +253,8 @@ describe('save_memory', () => { const proactiveMemoryFromLongSession = 'Agent saves preference from earlier in conversation history'; evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: proactiveMemoryFromLongSession, params: { settings: { @@ -309,6 +333,8 @@ describe('save_memory', () => { const memoryManagerRoutingPreferences = 'Agent routes global and project preferences to memory'; evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: memoryManagerRoutingPreferences, params: { settings: { diff --git a/evals/shell-efficiency.eval.ts b/evals/shell-efficiency.eval.ts index dc555d5298..936af245fd 100644 --- a/evals/shell-efficiency.eval.ts +++ b/evals/shell-efficiency.eval.ts @@ -21,6 +21,8 @@ describe('Shell Efficiency', () => { }; evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should use --silent/--quiet flags when installing packages', prompt: 'Install the "lodash" package using npm.', assert: async (rig) => { @@ -50,6 +52,8 @@ describe('Shell Efficiency', () => { }); evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should use --no-pager with git commands', prompt: 'Show the git log.', assert: async (rig) => { @@ -73,6 +77,8 @@ describe('Shell Efficiency', () => { }); evalTest('ALWAYS_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should NOT use efficiency flags when enableShellOutputEfficiency is disabled', params: { settings: { diff --git a/evals/subagents.eval.ts b/evals/subagents.eval.ts index 7053290fba..853d08f211 100644 --- a/evals/subagents.eval.ts +++ b/evals/subagents.eval.ts @@ -45,6 +45,8 @@ describe('subagent eval test cases', () => { * This tests the system prompt's subagent specific clauses. */ evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should delegate to user provided agent with relevant expertise', params: { settings: { @@ -69,6 +71,8 @@ describe('subagent eval test cases', () => { * subagents are available. This helps catch orchestration overuse. */ evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should avoid delegating trivial direct edit work', params: { settings: { @@ -113,6 +117,8 @@ describe('subagent eval test cases', () => { * This is meant to codify the "overusing Generalist" failure mode. */ evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should prefer relevant specialist over generalist', params: { settings: { @@ -149,6 +155,8 @@ describe('subagent eval test cases', () => { * naturally spans docs and tests, so multiple specialists should be used. */ evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should use multiple relevant specialists for multi-surface task', params: { settings: { @@ -193,6 +201,8 @@ describe('subagent eval test cases', () => { * from a large pool of available subagents (10 total). */ evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should select the correct subagent from a pool of 10 different agents', prompt: 'Please add a new SQL table migration for a user profile.', files: { @@ -243,6 +253,8 @@ describe('subagent eval test cases', () => { * This test includes stress tests the subagent delegation with ~80 tools. */ evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should select the correct subagent from a pool of 10 different agents with MCP tools present', prompt: 'Please add a new SQL table migration for a user profile.', setup: async (rig) => { diff --git a/evals/test-helper.test.ts b/evals/test-helper.test.ts index c0147cda75..6be26e918a 100644 --- a/evals/test-helper.test.ts +++ b/evals/test-helper.test.ts @@ -49,6 +49,8 @@ describe('evalTest reliability logic', () => { // Execute the test function directly await internalEvalTest({ + suiteName: 'test', + suiteType: 'behavioral', name: 'test-api-failure', prompt: 'do something', assert: async () => {}, @@ -83,6 +85,8 @@ describe('evalTest reliability logic', () => { // Expect the test function to throw immediately await expect( internalEvalTest({ + suiteName: 'test', + suiteType: 'behavioral', name: 'test-logic-failure', prompt: 'do something', assert: async () => { @@ -108,6 +112,8 @@ describe('evalTest reliability logic', () => { .mockResolvedValueOnce('Success'); await internalEvalTest({ + suiteName: 'test', + suiteType: 'behavioral', name: 'test-recovery', prompt: 'do something', assert: async () => {}, @@ -135,6 +141,8 @@ describe('evalTest reliability logic', () => { ); await internalEvalTest({ + suiteName: 'test', + suiteType: 'behavioral', name: 'test-api-503', prompt: 'do something', assert: async () => {}, @@ -162,6 +170,8 @@ describe('evalTest reliability logic', () => { try { await expect( internalEvalTest({ + suiteName: 'test', + suiteType: 'behavioral', name: 'test-absolute-path', prompt: 'do something', files: { @@ -190,6 +200,8 @@ describe('evalTest reliability logic', () => { try { await expect( internalEvalTest({ + suiteName: 'test', + suiteType: 'behavioral', name: 'test-traversal', prompt: 'do something', files: { diff --git a/evals/test-helper.ts b/evals/test-helper.ts index 2bf9188eee..7369a6919c 100644 --- a/evals/test-helper.ts +++ b/evals/test-helper.ts @@ -16,10 +16,19 @@ import { Storage, getProjectHash, SESSION_FILE_PREFIX, + PREVIEW_GEMINI_FLASH_MODEL, + getErrorMessage, } from '@google/gemini-cli-core'; export * from '@google/gemini-cli-test-utils'; +/** + * The default model used for all evaluations. + * Can be overridden by setting the GEMINI_MODEL environment variable. + */ +export const EVAL_MODEL = + process.env['GEMINI_MODEL'] || PREVIEW_GEMINI_FLASH_MODEL; + // Indicates the consistency expectation for this test. // - ALWAYS_PASSES - Means that the test is expected to pass 100% of the time. These // These tests are typically trivial and test basic functionality with unambiguous @@ -39,19 +48,49 @@ export * from '@google/gemini-cli-test-utils'; export type EvalPolicy = 'ALWAYS_PASSES' | 'USUALLY_PASSES'; export function evalTest(policy: EvalPolicy, evalCase: EvalCase) { - runEval( - policy, - evalCase.name, - () => internalEvalTest(evalCase), - evalCase.timeout, - ); + runEval(policy, evalCase, () => internalEvalTest(evalCase)); } -export async function internalEvalTest(evalCase: EvalCase) { +export async function withEvalRetries( + name: string, + attemptFn: (attempt: number) => Promise, +) { const maxRetries = 3; let attempt = 0; while (attempt <= maxRetries) { + try { + await attemptFn(attempt); + return; // Success! Exit the retry loop. + } catch (error: unknown) { + const errorMessage = getErrorMessage(error); + const errorCode = getApiErrorCode(errorMessage); + + if (errorCode) { + const status = attempt < maxRetries ? 'RETRY' : 'SKIP'; + logReliabilityEvent(name, attempt, status, errorCode, errorMessage); + + if (attempt < maxRetries) { + attempt++; + console.warn( + `[Eval] Attempt ${attempt} failed with ${errorCode} Error. Retrying...`, + ); + continue; // Retry + } + + console.warn( + `[Eval] '${name}' failed after ${maxRetries} retries due to persistent API errors. Skipping failure to avoid blocking PR.`, + ); + return; // Gracefully exit without failing the test + } + + throw error; // Real failure + } + } +} + +export async function internalEvalTest(evalCase: EvalCase) { + await withEvalRetries(evalCase.name, async () => { const rig = new TestRig(); const { logDir, sanitizedName } = await prepareLogDir(evalCase.name); const activityLogFile = path.join(logDir, `${sanitizedName}.jsonl`); @@ -59,14 +98,21 @@ export async function internalEvalTest(evalCase: EvalCase) { let isSuccess = false; try { - rig.setup(evalCase.name, evalCase.params); + const setupOptions = { + ...evalCase.params, + settings: { + model: { name: EVAL_MODEL }, + ...evalCase.params?.settings, + }, + }; + rig.setup(evalCase.name, setupOptions); if (evalCase.setup) { await evalCase.setup(rig); } if (evalCase.files) { - await setupTestFiles(rig, evalCase.files); + await prepareWorkspace(rig.testDir!, rig.homeDir!, evalCase.files); } symlinkNodeModules(rig.testDir || ''); @@ -139,37 +185,6 @@ export async function internalEvalTest(evalCase: EvalCase) { await evalCase.assert(rig, result); isSuccess = true; - return; // Success! Exit the retry loop. - } catch (error: unknown) { - const errorMessage = - error instanceof Error ? error.message : String(error); - const errorCode = getApiErrorCode(errorMessage); - - if (errorCode) { - const status = attempt < maxRetries ? 'RETRY' : 'SKIP'; - logReliabilityEvent( - evalCase.name, - attempt, - status, - errorCode, - errorMessage, - ); - - if (attempt < maxRetries) { - attempt++; - console.warn( - `[Eval] Attempt ${attempt} failed with ${errorCode} Error. Retrying...`, - ); - continue; // Retry - } - - console.warn( - `[Eval] '${evalCase.name}' failed after ${maxRetries} retries due to persistent API errors. Skipping failure to avoid blocking PR.`, - ); - return; // Gracefully exit without failing the test - } - - throw error; // Real failure } finally { if (isSuccess) { await fs.promises.unlink(activityLogFile).catch((err) => { @@ -188,7 +203,7 @@ export async function internalEvalTest(evalCase: EvalCase) { ); await rig.cleanup(); } - } + }); } function getApiErrorCode(message: string): '500' | '503' | undefined { @@ -226,7 +241,7 @@ function logReliabilityEvent( const reliabilityLog = { timestamp: new Date().toISOString(), testName, - model: process.env.GEMINI_MODEL || 'unknown', + model: process.env['GEMINI_MODEL'] || 'unknown', attempt, status, errorCode, @@ -252,9 +267,13 @@ function logReliabilityEvent( * intentionally uses synchronous filesystem and child_process operations * for simplicity and to ensure sequential environment preparation. */ -async function setupTestFiles(rig: TestRig, files: Record) { +export async function prepareWorkspace( + testDir: string, + homeDir: string, + files: Record, +) { const acknowledgedAgents: Record> = {}; - const projectRoot = fs.realpathSync(rig.testDir!); + const projectRoot = fs.realpathSync(testDir); for (const [filePath, content] of Object.entries(files)) { if (filePath.includes('..') || path.isAbsolute(filePath)) { @@ -290,7 +309,7 @@ async function setupTestFiles(rig: TestRig, files: Record) { if (Object.keys(acknowledgedAgents).length > 0) { const ackPath = path.join( - rig.homeDir!, + homeDir, '.gemini', 'acknowledgments', 'agents.json', @@ -299,7 +318,7 @@ async function setupTestFiles(rig: TestRig, files: Record) { fs.writeFileSync(ackPath, JSON.stringify(acknowledgedAgents, null, 2)); } - const execOptions = { cwd: rig.testDir!, stdio: 'inherit' as const }; + const execOptions = { cwd: testDir, stdio: 'ignore' as const }; execSync('git init --initial-branch=main', execOptions); execSync('git config user.email "test@example.com"', execOptions); execSync('git config user.name "Test User"', execOptions); @@ -320,14 +339,30 @@ async function setupTestFiles(rig: TestRig, files: Record) { */ export function runEval( policy: EvalPolicy, - name: string, + evalCase: BaseEvalCase, fn: () => Promise, - timeout?: number, + timeoutOverride?: number, ) { - if (policy === 'USUALLY_PASSES' && !process.env['RUN_EVALS']) { - it.skip(name, fn); + const { name, timeout, suiteName, suiteType } = evalCase; + const targetSuiteType = process.env['EVAL_SUITE_TYPE']; + const targetSuiteName = process.env['EVAL_SUITE_NAME']; + + const meta = { suiteType, suiteName }; + + const skipBySuiteType = + targetSuiteType && suiteType && suiteType !== targetSuiteType; + const skipBySuiteName = + targetSuiteName && suiteName && suiteName !== targetSuiteName; + + const options = { timeout: timeoutOverride ?? timeout, meta }; + if ( + (policy === 'USUALLY_PASSES' && !process.env['RUN_EVALS']) || + skipBySuiteType || + skipBySuiteName + ) { + it.skip(name, options, fn); } else { - it(name, fn, timeout); + it(name, options, fn); } } @@ -366,15 +401,20 @@ interface ForbiddenToolSettings { }; } -export interface EvalCase { +export interface BaseEvalCase { + suiteName: string; + suiteType: 'behavioral' | 'component-level' | 'hero-scenario'; name: string; + timeout?: number; + files?: Record; +} + +export interface EvalCase extends BaseEvalCase { params?: { settings?: ForbiddenToolSettings & Record; [key: string]: unknown; }; prompt: string; - timeout?: number; - files?: Record; setup?: (rig: TestRig) => Promise | void; /** Conversation history to pre-load via --resume. Each entry is a message object with type, content, etc. */ messages?: Record[]; diff --git a/evals/tool_output_masking.eval.ts b/evals/tool_output_masking.eval.ts index dff639e421..ccaa279877 100644 --- a/evals/tool_output_masking.eval.ts +++ b/evals/tool_output_masking.eval.ts @@ -31,6 +31,8 @@ describe('Tool Output Masking Behavioral Evals', () => { * It should recognize the tag and use a tool to read the file. */ evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should attempt to read the redirected full output file when information is masked', params: { security: { @@ -167,6 +169,8 @@ Output too large. Full output available at: ${outputFilePath} * Scenario: Information is in the preview. */ evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should NOT read the full output file when the information is already in the preview', params: { security: { diff --git a/evals/tracker.eval.ts b/evals/tracker.eval.ts index 49bc903b0a..44fbdc46e0 100644 --- a/evals/tracker.eval.ts +++ b/evals/tracker.eval.ts @@ -25,6 +25,8 @@ const FILES = { describe('tracker_mode', () => { evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should manage tasks in the tracker when explicitly requested during a bug fix', params: { settings: { experimental: { taskTracker: true } }, @@ -78,6 +80,8 @@ describe('tracker_mode', () => { }); evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should implicitly create tasks when asked to build a feature plan', params: { settings: { experimental: { taskTracker: true } }, diff --git a/evals/validation_fidelity.eval.ts b/evals/validation_fidelity.eval.ts index 8cfb4f6626..2a69b88740 100644 --- a/evals/validation_fidelity.eval.ts +++ b/evals/validation_fidelity.eval.ts @@ -9,6 +9,8 @@ import { evalTest } from './test-helper.js'; describe('validation_fidelity', () => { evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should perform exhaustive validation autonomously when guided by system instructions', files: { 'src/types.ts': ` diff --git a/evals/validation_fidelity_pre_existing_errors.eval.ts b/evals/validation_fidelity_pre_existing_errors.eval.ts index 4990b7bc91..0b100e5668 100644 --- a/evals/validation_fidelity_pre_existing_errors.eval.ts +++ b/evals/validation_fidelity_pre_existing_errors.eval.ts @@ -9,6 +9,8 @@ import { evalTest } from './test-helper.js'; describe('validation_fidelity_pre_existing_errors', () => { evalTest('USUALLY_PASSES', { + suiteName: 'default', + suiteType: 'behavioral', name: 'should handle pre-existing project errors gracefully during validation', files: { 'src/math.ts': ` diff --git a/evals/vitest.config.ts b/evals/vitest.config.ts index 50733a999c..b0ad05c9e9 100644 --- a/evals/vitest.config.ts +++ b/evals/vitest.config.ts @@ -24,7 +24,10 @@ export default defineConfig({ environment: 'node', globals: true, alias: { - react: path.resolve(__dirname, '../node_modules/react'), + '@google/gemini-cli-core': path.resolve( + __dirname, + '../packages/core/index.ts', + ), }, setupFiles: [path.resolve(__dirname, '../packages/cli/test-setup.ts')], server: { From 464bac270ce8bac983c30a02db8ebbd44d4ac42f Mon Sep 17 00:00:00 2001 From: Sehoon Shon Date: Wed, 8 Apr 2026 20:17:32 -0400 Subject: [PATCH 37/39] fix(cli): optimize startup with lightweight parent process (#24667) --- docs/cli/settings.md | 6 +- docs/reference/configuration.md | 5 +- packages/cli/index.ts | 186 ++++++++++++++++++---- packages/cli/src/config/settingsSchema.ts | 3 +- packages/cli/src/gemini.tsx | 15 +- schemas/settings.schema.json | 4 +- 6 files changed, 170 insertions(+), 49 deletions(-) diff --git a/docs/cli/settings.md b/docs/cli/settings.md index dbb3651a4f..88a5d2ff83 100644 --- a/docs/cli/settings.md +++ b/docs/cli/settings.md @@ -153,9 +153,9 @@ they appear in the UI. ### Advanced -| UI Label | Setting | Description | Default | -| --------------------------------- | ------------------------------ | --------------------------------------------- | ------- | -| Auto Configure Max Old Space Size | `advanced.autoConfigureMemory` | Automatically configure Node.js memory limits | `true` | +| UI Label | Setting | Description | Default | +| --------------------------------- | ------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- | +| Auto Configure Max Old Space Size | `advanced.autoConfigureMemory` | Automatically configure Node.js memory limits. Note: Because memory is allocated during the initial process boot, this setting is only read from the global user settings file and ignores workspace-level overrides. | `true` | ### Experimental diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index 1fdbc755f0..f10336a0d9 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -1578,7 +1578,10 @@ their corresponding top-level category object in your `settings.json` file. #### `advanced` - **`advanced.autoConfigureMemory`** (boolean): - - **Description:** Automatically configure Node.js memory limits + - **Description:** Automatically configure Node.js memory limits. Note: + Because memory is allocated during the initial process boot, this setting is + only read from the global user settings file and ignores workspace-level + overrides. - **Default:** `true` - **Requires restart:** Yes diff --git a/packages/cli/index.ts b/packages/cli/index.ts index d94a2dd191..d857831fb7 100644 --- a/packages/cli/index.ts +++ b/packages/cli/index.ts @@ -6,9 +6,9 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { main } from './src/gemini.js'; -import { FatalError, writeToStderr } from '@google/gemini-cli-core'; -import { runExitCleanup } from './src/utils/cleanup.js'; +import { spawn } from 'node:child_process'; +import os from 'node:os'; +import v8 from 'node:v8'; // --- Global Entry Point --- @@ -28,44 +28,162 @@ process.on('uncaughtException', (error) => { // For other errors, we rely on the default behavior, but since we attached a listener, // we must manually replicate it. if (error instanceof Error) { - writeToStderr(error.stack + '\n'); + process.stderr.write(error.stack + '\n'); } else { - writeToStderr(String(error) + '\n'); + process.stderr.write(String(error) + '\n'); } process.exit(1); }); -main().catch(async (error) => { - // Set a timeout to force exit if cleanup hangs - const cleanupTimeout = setTimeout(() => { - writeToStderr('Cleanup timed out, forcing exit...\n'); - process.exit(1); - }, 5000); - +async function getMemoryNodeArgs(): Promise { + let autoConfigureMemory = true; try { - await runExitCleanup(); - } catch (cleanupError) { - writeToStderr( - `Error during final cleanup: ${cleanupError instanceof Error ? cleanupError.message : String(cleanupError)}\n`, - ); - } finally { - clearTimeout(cleanupTimeout); - } - - if (error instanceof FatalError) { - let errorMessage = error.message; - if (!process.env['NO_COLOR']) { - errorMessage = `\x1b[31m${errorMessage}\x1b[0m`; + const { readFileSync } = await import('node:fs'); + const { join } = await import('node:path'); + // Respect GEMINI_CLI_HOME environment variable, falling back to os.homedir() + const baseDir = + process.env['GEMINI_CLI_HOME'] || join(os.homedir(), '.gemini'); + const settingsPath = join(baseDir, 'settings.json'); + const rawSettings = readFileSync(settingsPath, 'utf8'); + const settings = JSON.parse(rawSettings); + if (settings?.advanced?.autoConfigureMemory === false) { + autoConfigureMemory = false; } - writeToStderr(errorMessage + '\n'); - process.exit(error.exitCode); + } catch { + // ignore } - writeToStderr('An unexpected critical error occurred:'); - if (error instanceof Error) { - writeToStderr(error.stack + '\n'); - } else { - writeToStderr(String(error) + '\n'); + if (autoConfigureMemory) { + const totalMemoryMB = os.totalmem() / (1024 * 1024); + const heapStats = v8.getHeapStatistics(); + const currentMaxOldSpaceSizeMb = Math.floor( + heapStats.heap_size_limit / 1024 / 1024, + ); + const targetMaxOldSpaceSizeInMB = Math.floor(totalMemoryMB * 0.5); + + if (targetMaxOldSpaceSizeInMB > currentMaxOldSpaceSizeMb) { + return [`--max-old-space-size=${targetMaxOldSpaceSizeInMB}`]; + } } - process.exit(1); -}); + + return []; +} + +async function run() { + if (!process.env['GEMINI_CLI_NO_RELAUNCH'] && !process.env['SANDBOX']) { + // --- Lightweight Parent Process / Daemon --- + // We avoid importing heavy dependencies here to save ~1.5s of startup time. + + const nodeArgs: string[] = [...process.execArgv]; + const scriptArgs = process.argv.slice(2); + + const memoryArgs = await getMemoryNodeArgs(); + nodeArgs.push(...memoryArgs); + + const script = process.argv[1]; + nodeArgs.push(script); + nodeArgs.push(...scriptArgs); + + const newEnv = { ...process.env, GEMINI_CLI_NO_RELAUNCH: 'true' }; + const RELAUNCH_EXIT_CODE = 199; + let latestAdminSettings: unknown = undefined; + + // Prevent the parent process from exiting prematurely on signals. + // The child process will receive the same signals and handle its own cleanup. + for (const sig of ['SIGINT', 'SIGTERM', 'SIGHUP']) { + process.on(sig as NodeJS.Signals, () => {}); + } + + const runner = () => { + process.stdin.pause(); + + const child = spawn(process.execPath, nodeArgs, { + stdio: ['inherit', 'inherit', 'inherit', 'ipc'], + env: newEnv, + }); + + if (latestAdminSettings) { + child.send({ type: 'admin-settings', settings: latestAdminSettings }); + } + + child.on('message', (msg: { type?: string; settings?: unknown }) => { + if (msg.type === 'admin-settings-update' && msg.settings) { + latestAdminSettings = msg.settings; + } + }); + + return new Promise((resolve) => { + child.on('error', (err) => { + process.stderr.write( + 'Error: Failed to start child process: ' + err.message + '\n', + ); + resolve(1); + }); + child.on('close', (code) => { + process.stdin.resume(); + resolve(code ?? 1); + }); + }); + }; + + while (true) { + try { + const exitCode = await runner(); + if (exitCode !== RELAUNCH_EXIT_CODE) { + process.exit(exitCode); + } + } catch (error: unknown) { + process.stdin.resume(); + process.stderr.write( + `Fatal error: Failed to relaunch the CLI process.\n${error instanceof Error ? (error.stack ?? error.message) : String(error)}\n`, + ); + process.exit(1); + } + } + } else { + // --- Heavy Child Process --- + // Now we can safely import everything. + const { main } = await import('./src/gemini.js'); + const { FatalError, writeToStderr } = await import( + '@google/gemini-cli-core' + ); + const { runExitCleanup } = await import('./src/utils/cleanup.js'); + + main().catch(async (error: unknown) => { + // Set a timeout to force exit if cleanup hangs + const cleanupTimeout = setTimeout(() => { + writeToStderr('Cleanup timed out, forcing exit...\n'); + process.exit(1); + }, 5000); + + try { + await runExitCleanup(); + } catch (cleanupError: unknown) { + writeToStderr( + `Error during final cleanup: ${cleanupError instanceof Error ? cleanupError.message : String(cleanupError)}\n`, + ); + } finally { + clearTimeout(cleanupTimeout); + } + + if (error instanceof FatalError) { + let errorMessage = error.message; + if (!process.env['NO_COLOR']) { + errorMessage = `\x1b[31m${errorMessage}\x1b[0m`; + } + writeToStderr(errorMessage + '\n'); + process.exit(error.exitCode); + } + + writeToStderr('An unexpected critical error occurred:'); + if (error instanceof Error) { + writeToStderr(error.stack + '\n'); + } else { + writeToStderr(String(error) + '\n'); + } + process.exit(1); + }); + } +} + +run(); diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index c041aaa8c3..076978b203 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -1907,7 +1907,8 @@ const SETTINGS_SCHEMA = { category: 'Advanced', requiresRestart: true, default: true, - description: 'Automatically configure Node.js memory limits', + description: + 'Automatically configure Node.js memory limits. Note: Because memory is allocated during the initial process boot, this setting is only read from the global user settings file and ignores workspace-level overrides.', showInDialog: true, }, dnsResolutionOrder: { diff --git a/packages/cli/src/gemini.tsx b/packages/cli/src/gemini.tsx index f496bee37b..166ee0e7eb 100644 --- a/packages/cli/src/gemini.tsx +++ b/packages/cli/src/gemini.tsx @@ -81,10 +81,7 @@ import { validateNonInteractiveAuth } from './validateNonInterActiveAuth.js'; import { appEvents, AppEvent } from './utils/events.js'; import { SessionError, SessionSelector } from './utils/sessionUtils.js'; -import { - relaunchAppInChildProcess, - relaunchOnExitCode, -} from './utils/relaunch.js'; +import { relaunchOnExitCode } from './utils/relaunch.js'; import { loadSandboxConfig } from './config/sandboxConfig.js'; import { deleteSession, listSessions } from './utils/sessions.js'; import { createPolicyUpdater } from './config/policy.js'; @@ -439,6 +436,12 @@ export async function main() { // Set remote admin settings if returned from CCPA. if (remoteAdminSettings) { settings.setRemoteAdminSettings(remoteAdminSettings); + if (process.send) { + process.send({ + type: 'admin-settings-update', + settings: remoteAdminSettings, + }); + } } // Run deferred command now that we have admin settings. @@ -496,10 +499,6 @@ export async function main() { ); await runExitCleanup(); process.exit(ExitCodes.SUCCESS); - } else { - // Relaunch app so we always have a child process that can be internally - // restarted if needed. - await relaunchAppInChildProcess(memoryArgs, [], remoteAdminSettings); } } diff --git a/schemas/settings.schema.json b/schemas/settings.schema.json index bb5c9a9d54..1281d0f429 100644 --- a/schemas/settings.schema.json +++ b/schemas/settings.schema.json @@ -2725,8 +2725,8 @@ "properties": { "autoConfigureMemory": { "title": "Auto Configure Max Old Space Size", - "description": "Automatically configure Node.js memory limits", - "markdownDescription": "Automatically configure Node.js memory limits\n\n- Category: `Advanced`\n- Requires restart: `yes`\n- Default: `true`", + "description": "Automatically configure Node.js memory limits. Note: Because memory is allocated during the initial process boot, this setting is only read from the global user settings file and ignores workspace-level overrides.", + "markdownDescription": "Automatically configure Node.js memory limits. Note: Because memory is allocated during the initial process boot, this setting is only read from the global user settings file and ignores workspace-level overrides.\n\n- Category: `Advanced`\n- Requires restart: `yes`\n- Default: `true`", "default": true, "type": "boolean" }, From 5d589946ad5642771003f464733e07471409f967 Mon Sep 17 00:00:00 2001 From: Emily Hedlund Date: Wed, 8 Apr 2026 18:29:38 -0700 Subject: [PATCH 38/39] refactor(sandbox): use centralized sandbox paths in macOS Seatbelt implementation (#24984) --- .../sandbox/macos/MacOsSandboxManager.test.ts | 59 ++--- .../src/sandbox/macos/MacOsSandboxManager.ts | 24 +- .../sandbox/macos/seatbeltArgsBuilder.test.ts | 144 ++++-------- .../src/sandbox/macos/seatbeltArgsBuilder.ts | 213 +++++++++--------- 4 files changed, 190 insertions(+), 250 deletions(-) diff --git a/packages/core/src/sandbox/macos/MacOsSandboxManager.test.ts b/packages/core/src/sandbox/macos/MacOsSandboxManager.test.ts index c7bdd351a7..3e1862998e 100644 --- a/packages/core/src/sandbox/macos/MacOsSandboxManager.test.ts +++ b/packages/core/src/sandbox/macos/MacOsSandboxManager.test.ts @@ -64,20 +64,12 @@ describe('MacOsSandboxManager', () => { policy: mockPolicy, }); - expect(seatbeltArgsBuilder.buildSeatbeltProfile).toHaveBeenCalledWith({ - workspace: mockWorkspace, - allowedPaths: mockAllowedPaths, - forbiddenPaths: [], - networkAccess: mockNetworkAccess, - workspaceWrite: false, - additionalPermissions: { - fileSystem: { - read: [], - write: [], - }, - network: true, - }, - }); + expect(seatbeltArgsBuilder.buildSeatbeltProfile).toHaveBeenCalledWith( + expect.objectContaining({ + networkAccess: true, + workspaceWrite: false, + }), + ); expect(result.program).toBe('/usr/bin/sandbox-exec'); expect(result.args[0]).toBe('-f'); @@ -155,11 +147,10 @@ describe('MacOsSandboxManager', () => { expect(seatbeltArgsBuilder.buildSeatbeltProfile).toHaveBeenCalledWith( expect.objectContaining({ - additionalPermissions: expect.objectContaining({ - fileSystem: expect.objectContaining({ - read: expect.not.arrayContaining(['/']), - write: expect.not.arrayContaining(['/']), - }), + workspaceWrite: true, + resolvedPaths: expect.objectContaining({ + policyRead: expect.not.arrayContaining(['/']), + policyWrite: expect.not.arrayContaining(['/']), }), }), ); @@ -213,7 +204,11 @@ describe('MacOsSandboxManager', () => { // The seatbelt builder internally handles governance files, so we simply verify // it is invoked correctly with the right workspace. expect(seatbeltArgsBuilder.buildSeatbeltProfile).toHaveBeenCalledWith( - expect.objectContaining({ workspace: mockWorkspace }), + expect.objectContaining({ + resolvedPaths: expect.objectContaining({ + workspace: { resolved: mockWorkspace, original: mockWorkspace }, + }), + }), ); }); }); @@ -233,10 +228,12 @@ describe('MacOsSandboxManager', () => { expect(seatbeltArgsBuilder.buildSeatbeltProfile).toHaveBeenCalledWith( expect.objectContaining({ - allowedPaths: expect.arrayContaining([ - '/tmp/allowed1', - '/tmp/allowed2', - ]), + resolvedPaths: expect.objectContaining({ + policyAllowed: expect.arrayContaining([ + '/tmp/allowed1', + '/tmp/allowed2', + ]), + }), }), ); }); @@ -258,7 +255,9 @@ describe('MacOsSandboxManager', () => { expect(seatbeltArgsBuilder.buildSeatbeltProfile).toHaveBeenCalledWith( expect.objectContaining({ - forbiddenPaths: expect.arrayContaining(['/tmp/forbidden1']), + resolvedPaths: expect.objectContaining({ + forbidden: expect.arrayContaining(['/tmp/forbidden1']), + }), }), ); }); @@ -278,7 +277,9 @@ describe('MacOsSandboxManager', () => { expect(seatbeltArgsBuilder.buildSeatbeltProfile).toHaveBeenCalledWith( expect.objectContaining({ - forbiddenPaths: expect.arrayContaining(['/tmp/does-not-exist']), + resolvedPaths: expect.objectContaining({ + forbidden: expect.arrayContaining(['/tmp/does-not-exist']), + }), }), ); }); @@ -301,8 +302,10 @@ describe('MacOsSandboxManager', () => { expect(seatbeltArgsBuilder.buildSeatbeltProfile).toHaveBeenCalledWith( expect.objectContaining({ - allowedPaths: [], - forbiddenPaths: expect.arrayContaining(['/tmp/conflict']), + resolvedPaths: expect.objectContaining({ + policyAllowed: [], + forbidden: expect.arrayContaining(['/tmp/conflict']), + }), }), ); }); diff --git a/packages/core/src/sandbox/macos/MacOsSandboxManager.ts b/packages/core/src/sandbox/macos/MacOsSandboxManager.ts index 27e6867030..f87dc0289c 100644 --- a/packages/core/src/sandbox/macos/MacOsSandboxManager.ts +++ b/packages/core/src/sandbox/macos/MacOsSandboxManager.ts @@ -133,28 +133,26 @@ export class MacOsSandboxManager implements SandboxManager { false, }; + const { command: finalCommand, args: finalArgs } = handleReadWriteCommands( + req, + mergedAdditional, + this.options.workspace, + [ + ...(req.policy?.allowedPaths || []), + ...(this.options.includeDirectories || []), + ], + ); + const resolvedPaths = await resolveSandboxPaths( this.options, req, mergedAdditional, ); - const { command: finalCommand, args: finalArgs } = handleReadWriteCommands( - req, - mergedAdditional, - this.options.workspace, - req.policy?.allowedPaths, - ); const sandboxArgs = buildSeatbeltProfile({ - workspace: this.options.workspace, - allowedPaths: [ - ...resolvedPaths.policyAllowed, - ...(this.options.includeDirectories || []), - ], - forbiddenPaths: resolvedPaths.forbidden, + resolvedPaths, networkAccess: mergedAdditional.network, workspaceWrite, - additionalPermissions: mergedAdditional, }); const tempFile = this.writeProfileToTempFile(sandboxArgs); diff --git a/packages/core/src/sandbox/macos/seatbeltArgsBuilder.test.ts b/packages/core/src/sandbox/macos/seatbeltArgsBuilder.test.ts index 7102fde2f7..19ba8303ae 100644 --- a/packages/core/src/sandbox/macos/seatbeltArgsBuilder.test.ts +++ b/packages/core/src/sandbox/macos/seatbeltArgsBuilder.test.ts @@ -8,18 +8,21 @@ import { buildSeatbeltProfile, escapeSchemeString, } from './seatbeltArgsBuilder.js'; -import * as fsUtils from '../utils/fsUtils.js'; +import type { ResolvedSandboxPaths } from '../../services/sandboxManager.js'; import fs from 'node:fs'; import os from 'node:os'; -vi.mock('../utils/fsUtils.js', async () => { - const actual = await vi.importActual('../utils/fsUtils.js'); - return { - ...actual, - tryRealpath: vi.fn((p) => p), - resolveGitWorktreePaths: vi.fn(() => ({})), - }; -}); +const defaultResolvedPaths: ResolvedSandboxPaths = { + workspace: { + resolved: '/Users/test/workspace', + original: '/Users/test/raw-workspace', + }, + forbidden: [], + globalIncludes: [], + policyAllowed: [], + policyRead: [], + policyWrite: [], +}; describe.skipIf(os.platform() === 'win32')('seatbeltArgsBuilder', () => { afterEach(() => { @@ -35,12 +38,8 @@ describe.skipIf(os.platform() === 'win32')('seatbeltArgsBuilder', () => { describe('buildSeatbeltProfile', () => { it('should build a strict allowlist profile allowing the workspace', () => { - vi.mocked(fsUtils.tryRealpath).mockImplementation((p) => p); - const profile = buildSeatbeltProfile({ - workspace: '/Users/test/workspace', - allowedPaths: [], - forbiddenPaths: [], + resolvedPaths: defaultResolvedPaths, }); expect(profile).toContain('(version 1)'); @@ -51,11 +50,11 @@ describe.skipIf(os.platform() === 'win32')('seatbeltArgsBuilder', () => { }); it('should allow network when networkAccess is true', () => { - vi.mocked(fsUtils.tryRealpath).mockImplementation((p) => p); const profile = buildSeatbeltProfile({ - workspace: '/test', - allowedPaths: [], - forbiddenPaths: [], + resolvedPaths: { + ...defaultResolvedPaths, + workspace: { resolved: '/test', original: '/test' }, + }, networkAccess: true, }); expect(profile).toContain('(allow network-outbound)'); @@ -63,7 +62,6 @@ describe.skipIf(os.platform() === 'win32')('seatbeltArgsBuilder', () => { describe('governance files', () => { it('should inject explicit deny rules for governance files', () => { - vi.mocked(fsUtils.tryRealpath).mockImplementation((p) => p.toString()); vi.spyOn(fs, 'existsSync').mockReturnValue(true); vi.spyOn(fs, 'lstatSync').mockImplementation( (p) => @@ -74,9 +72,13 @@ describe.skipIf(os.platform() === 'win32')('seatbeltArgsBuilder', () => { ); const profile = buildSeatbeltProfile({ - workspace: '/test/workspace', - allowedPaths: [], - forbiddenPaths: [], + resolvedPaths: { + ...defaultResolvedPaths, + workspace: { + resolved: '/test/workspace', + original: '/test/workspace', + }, + }, }); expect(profile).toContain( @@ -87,48 +89,16 @@ describe.skipIf(os.platform() === 'win32')('seatbeltArgsBuilder', () => { `(deny file-write* (subpath "/test/workspace/.git"))`, ); }); - - it('should protect both the symlink and the real path if they differ', () => { - vi.mocked(fsUtils.tryRealpath).mockImplementation((p) => { - if (p === '/test/workspace/.gitignore') - return '/test/real/.gitignore'; - return p.toString(); - }); - vi.spyOn(fs, 'existsSync').mockReturnValue(true); - vi.spyOn(fs, 'lstatSync').mockImplementation( - () => - ({ - isDirectory: () => false, - isFile: () => true, - }) as unknown as fs.Stats, - ); - - const profile = buildSeatbeltProfile({ - workspace: '/test/workspace', - allowedPaths: [], - forbiddenPaths: [], - }); - - expect(profile).toContain( - `(deny file-write* (literal "/test/workspace/.gitignore"))`, - ); - expect(profile).toContain( - `(deny file-write* (literal "/test/real/.gitignore"))`, - ); - }); }); describe('allowedPaths', () => { - it('should embed allowed paths and normalize them', () => { - vi.mocked(fsUtils.tryRealpath).mockImplementation((p) => { - if (p === '/test/symlink') return '/test/real_path'; - return p; - }); - + it('should embed allowed paths', () => { const profile = buildSeatbeltProfile({ - workspace: '/test', - allowedPaths: ['/custom/path1', '/test/symlink'], - forbiddenPaths: [], + resolvedPaths: { + ...defaultResolvedPaths, + workspace: { resolved: '/test', original: '/test' }, + policyAllowed: ['/custom/path1', '/test/real_path'], + }, }); expect(profile).toContain(`(subpath "/custom/path1")`); @@ -138,12 +108,12 @@ describe.skipIf(os.platform() === 'win32')('seatbeltArgsBuilder', () => { describe('forbiddenPaths', () => { it('should explicitly deny forbidden paths', () => { - vi.mocked(fsUtils.tryRealpath).mockImplementation((p) => p); - const profile = buildSeatbeltProfile({ - workspace: '/test', - allowedPaths: [], - forbiddenPaths: ['/secret/path'], + resolvedPaths: { + ...defaultResolvedPaths, + workspace: { resolved: '/test', original: '/test' }, + forbidden: ['/secret/path'], + }, }); expect(profile).toContain( @@ -151,46 +121,14 @@ describe.skipIf(os.platform() === 'win32')('seatbeltArgsBuilder', () => { ); }); - it('resolves forbidden symlink paths to their real paths', () => { - vi.mocked(fsUtils.tryRealpath).mockImplementation((p) => { - if (p === '/test/symlink' || p === '/test/missing-dir') { - return '/test/real_path'; - } - return p; - }); - - const profile = buildSeatbeltProfile({ - workspace: '/test', - allowedPaths: [], - forbiddenPaths: ['/test/symlink'], - }); - - expect(profile).toContain( - `(deny file-read* file-write* (subpath "/test/real_path"))`, - ); - }); - - it('explicitly denies non-existent forbidden paths to prevent creation', () => { - vi.mocked(fsUtils.tryRealpath).mockImplementation((p) => p); - - const profile = buildSeatbeltProfile({ - workspace: '/test', - allowedPaths: [], - forbiddenPaths: ['/test/missing-dir/missing-file.txt'], - }); - - expect(profile).toContain( - `(deny file-read* file-write* (subpath "/test/missing-dir/missing-file.txt"))`, - ); - }); - it('should override allowed paths if a path is also in forbidden paths', () => { - vi.mocked(fsUtils.tryRealpath).mockImplementation((p) => p); - const profile = buildSeatbeltProfile({ - workspace: '/test', - allowedPaths: ['/custom/path1'], - forbiddenPaths: ['/custom/path1'], + resolvedPaths: { + ...defaultResolvedPaths, + workspace: { resolved: '/test', original: '/test' }, + policyAllowed: ['/custom/path1'], + forbidden: ['/custom/path1'], + }, }); const allowString = `(allow file-read* file-write* (subpath "/custom/path1"))`; diff --git a/packages/core/src/sandbox/macos/seatbeltArgsBuilder.ts b/packages/core/src/sandbox/macos/seatbeltArgsBuilder.ts index e5430d1471..967cd8f183 100644 --- a/packages/core/src/sandbox/macos/seatbeltArgsBuilder.ts +++ b/packages/core/src/sandbox/macos/seatbeltArgsBuilder.ts @@ -12,9 +12,9 @@ import { NETWORK_SEATBELT_PROFILE, } from './baseProfile.js'; import { - type SandboxPermissions, GOVERNANCE_FILES, SECRET_FILES, + type ResolvedSandboxPaths, } from '../../services/sandboxManager.js'; import { tryRealpath, resolveGitWorktreePaths } from '../utils/fsUtils.js'; @@ -22,16 +22,10 @@ import { tryRealpath, resolveGitWorktreePaths } from '../utils/fsUtils.js'; * Options for building macOS Seatbelt profile. */ export interface SeatbeltArgsOptions { - /** The primary workspace path to allow access to. */ - workspace: string; - /** Additional paths to allow access to. */ - allowedPaths: string[]; - /** Absolute paths to explicitly deny read/write access to (overrides allowlists). */ - forbiddenPaths: string[]; + /** Fully resolved paths for the sandbox execution. */ + resolvedPaths: ResolvedSandboxPaths; /** Whether to allow network access. */ networkAccess?: boolean; - /** Granular additional permissions. */ - additionalPermissions?: SandboxPermissions; /** Whether to allow write access to the workspace. */ workspaceWrite?: boolean; } @@ -49,72 +43,22 @@ export function escapeSchemeString(str: string): string { */ export function buildSeatbeltProfile(options: SeatbeltArgsOptions): string { let profile = BASE_SEATBELT_PROFILE + '\n'; + const { resolvedPaths, networkAccess, workspaceWrite } = options; - const workspacePath = tryRealpath(options.workspace); - profile += `(allow file-read* (subpath "${escapeSchemeString(options.workspace)}"))\n`; - profile += `(allow file-read* (subpath "${escapeSchemeString(workspacePath)}"))\n`; - if (options.workspaceWrite) { - profile += `(allow file-write* (subpath "${escapeSchemeString(options.workspace)}"))\n`; - profile += `(allow file-write* (subpath "${escapeSchemeString(workspacePath)}"))\n`; + profile += `(allow file-read* (subpath "${escapeSchemeString(resolvedPaths.workspace.original)}"))\n`; + profile += `(allow file-read* (subpath "${escapeSchemeString(resolvedPaths.workspace.resolved)}"))\n`; + if (workspaceWrite) { + profile += `(allow file-write* (subpath "${escapeSchemeString(resolvedPaths.workspace.original)}"))\n`; + profile += `(allow file-write* (subpath "${escapeSchemeString(resolvedPaths.workspace.resolved)}"))\n`; } const tmpPath = tryRealpath(os.tmpdir()); profile += `(allow file-read* file-write* (subpath "${escapeSchemeString(tmpPath)}"))\n`; - // Add explicit deny rules for governance files in the workspace. - // These are added after the workspace allow rule to ensure they take precedence - // (Seatbelt evaluates rules in order, later rules win for same path). - for (let i = 0; i < GOVERNANCE_FILES.length; i++) { - const governanceFile = path.join(workspacePath, GOVERNANCE_FILES[i].path); - const realGovernanceFile = tryRealpath(governanceFile); - - // Determine if it should be treated as a directory (subpath) or a file (literal). - // .git is generally a directory, while ignore files are literals. - let isDirectory = GOVERNANCE_FILES[i].isDirectory; - try { - if (fs.existsSync(realGovernanceFile)) { - isDirectory = fs.lstatSync(realGovernanceFile).isDirectory(); - } - } catch { - // Ignore errors, use default guess - } - - const ruleType = isDirectory ? 'subpath' : 'literal'; - - profile += `(deny file-write* (${ruleType} "${escapeSchemeString(governanceFile)}"))\n`; - - if (realGovernanceFile !== governanceFile) { - profile += `(deny file-write* (${ruleType} "${escapeSchemeString(realGovernanceFile)}"))\n`; - } - } - - // Add explicit deny rules for secret files (.env, .env.*) in the workspace and allowed paths. - // We use regex rules to avoid expensive file discovery scans. - // Anchoring to workspace/allowed paths to avoid over-blocking. - const searchPaths = [options.workspace, ...options.allowedPaths]; - - for (const basePath of searchPaths) { - const resolvedBase = tryRealpath(basePath); - for (const secret of SECRET_FILES) { - // Map pattern to Seatbelt regex - let regexPattern: string; - const escapedBase = escapeRegex(resolvedBase); - if (secret.pattern.endsWith('*')) { - // .env.* -> .env\..+ (match .env followed by dot and something) - // We anchor the secret file name to either a directory separator or the start of the relative path. - const basePattern = secret.pattern.slice(0, -1).replace(/\./g, '\\\\.'); - regexPattern = `^${escapedBase}/(.*/)?${basePattern}[^/]+$`; - } else { - // .env -> \.env$ - const basePattern = secret.pattern.replace(/\./g, '\\\\.'); - regexPattern = `^${escapedBase}/(.*/)?${basePattern}$`; - } - profile += `(deny file-read* file-write* (regex #"${regexPattern}"))\n`; - } - } - // Auto-detect and support git worktrees by granting read and write access to the underlying git directory - const { worktreeGitDir, mainGitDir } = resolveGitWorktreePaths(workspacePath); + const { worktreeGitDir, mainGitDir } = resolveGitWorktreePaths( + resolvedPaths.workspace.resolved, + ); if (worktreeGitDir) { profile += `(allow file-read* file-write* (subpath "${escapeSchemeString(worktreeGitDir)}"))\n`; } @@ -154,58 +98,115 @@ export function buildSeatbeltProfile(options: SeatbeltArgsOptions): string { } } - // Handle allowedPaths - const allowedPaths = options.allowedPaths; + // Handle allowedPaths and globalIncludes + const allowedPaths = [ + ...resolvedPaths.policyAllowed, + ...resolvedPaths.globalIncludes, + ]; for (let i = 0; i < allowedPaths.length; i++) { - const allowedPath = tryRealpath(allowedPaths[i]); + const allowedPath = allowedPaths[i]; profile += `(allow file-read* file-write* (subpath "${escapeSchemeString(allowedPath)}"))\n`; } - // Handle granular additional permissions - if (options.additionalPermissions?.fileSystem) { - const { read, write } = options.additionalPermissions.fileSystem; - if (read) { - for (let i = 0; i < read.length; i++) { - const resolved = tryRealpath(read[i]); - let isFile = false; - try { - isFile = fs.statSync(resolved).isFile(); - } catch { - // Ignore error - } - if (isFile) { - profile += `(allow file-read* (literal "${escapeSchemeString(resolved)}"))\n`; - } else { - profile += `(allow file-read* (subpath "${escapeSchemeString(resolved)}"))\n`; - } - } + // Handle granular additional read permissions + for (let i = 0; i < resolvedPaths.policyRead.length; i++) { + const resolved = resolvedPaths.policyRead[i]; + let isFile = false; + try { + isFile = fs.statSync(resolved).isFile(); + } catch { + // Ignore error } - if (write) { - for (let i = 0; i < write.length; i++) { - const resolved = tryRealpath(write[i]); - let isFile = false; - try { - isFile = fs.statSync(resolved).isFile(); - } catch { - // Ignore error - } - if (isFile) { - profile += `(allow file-read* file-write* (literal "${escapeSchemeString(resolved)}"))\n`; - } else { - profile += `(allow file-read* file-write* (subpath "${escapeSchemeString(resolved)}"))\n`; - } + if (isFile) { + profile += `(allow file-read* (literal "${escapeSchemeString(resolved)}"))\n`; + } else { + profile += `(allow file-read* (subpath "${escapeSchemeString(resolved)}"))\n`; + } + } + + // Handle granular additional write permissions + for (let i = 0; i < resolvedPaths.policyWrite.length; i++) { + const resolved = resolvedPaths.policyWrite[i]; + let isFile = false; + try { + isFile = fs.statSync(resolved).isFile(); + } catch { + // Ignore error + } + if (isFile) { + profile += `(allow file-read* file-write* (literal "${escapeSchemeString(resolved)}"))\n`; + } else { + profile += `(allow file-read* file-write* (subpath "${escapeSchemeString(resolved)}"))\n`; + } + } + + // Add explicit deny rules for governance files in the workspace. + // These are added after the workspace allow rule to ensure they take precedence + // (Seatbelt evaluates rules in order, later rules win for same path). + for (let i = 0; i < GOVERNANCE_FILES.length; i++) { + const governanceFile = path.join( + resolvedPaths.workspace.resolved, + GOVERNANCE_FILES[i].path, + ); + const realGovernanceFile = tryRealpath(governanceFile); + + // Determine if it should be treated as a directory (subpath) or a file (literal). + // .git is generally a directory, while ignore files are literals. + let isDirectory = GOVERNANCE_FILES[i].isDirectory; + try { + if (fs.existsSync(realGovernanceFile)) { + isDirectory = fs.lstatSync(realGovernanceFile).isDirectory(); } + } catch { + // Ignore errors, use default guess + } + + const ruleType = isDirectory ? 'subpath' : 'literal'; + + profile += `(deny file-write* (${ruleType} "${escapeSchemeString(governanceFile)}"))\n`; + + if (realGovernanceFile !== governanceFile) { + profile += `(deny file-write* (${ruleType} "${escapeSchemeString(realGovernanceFile)}"))\n`; + } + } + + // Add explicit deny rules for secret files (.env, .env.*) in the workspace and allowed paths. + // We use regex rules to avoid expensive file discovery scans. + // Anchoring to workspace/allowed paths to avoid over-blocking. + const searchPaths = [ + resolvedPaths.workspace.resolved, + resolvedPaths.workspace.original, + ...resolvedPaths.policyAllowed, + ...resolvedPaths.globalIncludes, + ]; + + for (const basePath of searchPaths) { + for (const secret of SECRET_FILES) { + // Map pattern to Seatbelt regex + let regexPattern: string; + const escapedBase = escapeRegex(basePath); + if (secret.pattern.endsWith('*')) { + // .env.* -> .env\..+ (match .env followed by dot and something) + // We anchor the secret file name to either a directory separator or the start of the relative path. + const basePattern = secret.pattern.slice(0, -1).replace(/\./g, '\\\\.'); + regexPattern = `^${escapedBase}/(.*/)?${basePattern}[^/]+$`; + } else { + // .env -> \.env$ + const basePattern = secret.pattern.replace(/\./g, '\\\\.'); + regexPattern = `^${escapedBase}/(.*/)?${basePattern}$`; + } + profile += `(deny file-read* file-write* (regex #"${regexPattern}"))\n`; } } // Handle forbiddenPaths - const forbiddenPaths = options.forbiddenPaths; + const forbiddenPaths = resolvedPaths.forbidden; for (let i = 0; i < forbiddenPaths.length; i++) { - const forbiddenPath = tryRealpath(forbiddenPaths[i]); + const forbiddenPath = forbiddenPaths[i]; profile += `(deny file-read* file-write* (subpath "${escapeSchemeString(forbiddenPath)}"))\n`; } - if (options.networkAccess || options.additionalPermissions?.network) { + if (networkAccess) { profile += NETWORK_SEATBELT_PROFILE; } From faa7a9da305ac8ac5599cf08b0f68aac2b9639d3 Mon Sep 17 00:00:00 2001 From: Jarrod Whelan <150866123+jwhelangoog@users.noreply.github.com> Date: Wed, 8 Apr 2026 20:30:52 -0700 Subject: [PATCH 39/39] feat(cli): refine tool output formatting for compact mode (#24677) --- .../messages/DenseToolMessage.test.tsx | 32 +++++++-- .../components/messages/DenseToolMessage.tsx | 71 +++++-------------- .../components/messages/ToolGroupMessage.tsx | 11 --- .../DenseToolMessage.test.tsx.snap | 13 ++-- packages/core/src/tools/ls.test.ts | 14 ++-- packages/core/src/tools/ls.ts | 2 +- 6 files changed, 58 insertions(+), 85 deletions(-) diff --git a/packages/cli/src/ui/components/messages/DenseToolMessage.test.tsx b/packages/cli/src/ui/components/messages/DenseToolMessage.test.tsx index 30879b13b3..586ce89ab2 100644 --- a/packages/cli/src/ui/components/messages/DenseToolMessage.test.tsx +++ b/packages/cli/src/ui/components/messages/DenseToolMessage.test.tsx @@ -357,9 +357,8 @@ describe('DenseToolMessage', () => { await waitUntilReady(); const output = lastFrame(); expect(output).toContain('→ Found 2 matches'); - // Matches are rendered in a secondary list for high-signal summaries - expect(output).toContain('file1.ts:10: match 1'); - expect(output).toContain('file2.ts:20: match 2'); + // Matches should no longer be rendered in dense mode to keep it compact + expect(output).not.toContain('file1.ts:10: match 1'); expect(output).toMatchSnapshot(); }); @@ -400,9 +399,8 @@ describe('DenseToolMessage', () => { const output = lastFrame(); expect(output).toContain('Attempting to read files from **/*.ts'); expect(output).toContain('→ Read 3 file(s) (1 ignored)'); - expect(output).toContain('file1.ts'); - expect(output).toContain('file2.ts'); - expect(output).toContain('file3.ts'); + // File lists should no longer be rendered in dense mode + expect(output).not.toContain('file1.ts'); expect(output).toMatchSnapshot(); }); @@ -477,6 +475,28 @@ describe('DenseToolMessage', () => { expect(output).toMatchSnapshot(); }); + it('truncates long description but preserves tool name (< 25 chars)', async () => { + const longDescription = + 'This is a very long description that should definitely be truncated because it exceeds the available terminal width and we want to see how it behaves.'; + const toolName = 'tool-name-is-24-chars-!!'; // Exactly 24 chars + const { lastFrame, waitUntilReady } = await renderWithProviders( + , + ); + await waitUntilReady(); + const output = lastFrame(); + + // Tool name should be fully present (it plus one space is exactly 25, fitting the maxWidth) + expect(output).toContain(toolName); + // Description should be present but truncated + expect(output).toContain('This is a'); + expect(output).toMatchSnapshot(); + }); + describe('Toggleable Diff View (Alternate Buffer)', () => { const diffResult: FileDiff = { fileDiff: '@@ -1,1 +1,1 @@\n-old line\n+new line', diff --git a/packages/cli/src/ui/components/messages/DenseToolMessage.tsx b/packages/cli/src/ui/components/messages/DenseToolMessage.tsx index 6e81d07931..f5e4b31c66 100644 --- a/packages/cli/src/ui/components/messages/DenseToolMessage.tsx +++ b/packages/cli/src/ui/components/messages/DenseToolMessage.tsx @@ -72,27 +72,6 @@ const hasPayload = (res: unknown): res is PayloadResult => { return typeof value === 'string'; }; -const RenderItemsList: React.FC<{ - items?: string[]; - maxVisible?: number; -}> = ({ items, maxVisible = 20 }) => { - if (!items || items.length === 0) return null; - return ( - - {items.slice(0, maxVisible).map((item, i) => ( - - {item} - - ))} - {items.length > maxVisible && ( - - ... and {items.length - maxVisible} more - - )} - - ); -}; - function getFileOpData( diff: FileDiff, status: CoreToolCallStatus, @@ -188,8 +167,6 @@ function getFileOpData( } function getReadManyFilesData(result: ReadManyFilesResult): ViewParts { - const items = result.files ?? []; - const maxVisible = 10; const includePatterns = result.include?.join(', ') ?? ''; const description = ( @@ -198,18 +175,12 @@ function getReadManyFilesData(result: ReadManyFilesResult): ViewParts { ); const skippedCount = result.skipped?.length ?? 0; - const summaryStr = `Read ${items.length} file(s)${ + const summaryStr = `Read ${result.files.length} file(s)${ skippedCount > 0 ? ` (${skippedCount} ignored)` : '' }`; const summary = → {summaryStr}; - const hasItems = items.length > 0; - const payload = hasItems ? ( - - {hasItems && } - - ) : undefined; - return { description, summary, payload }; + return { description, summary, payload: undefined }; } function getListDirectoryData( @@ -258,20 +229,11 @@ function getGenericSuccessData( ); } else if (isGrepResult(resultDisplay)) { - summary = → {resultDisplay.summary}; - const matches = resultDisplay.matches; - if (matches.length > 0) { - payload = ( - - `${m.filePath}:${m.lineNumber}: ${m.line.trim()}`, - )} - maxVisible={10} - /> - - ); - } + summary = ( + + → {resultDisplay.summary} + + ); } else if (isTodoList(resultDisplay)) { summary = ( @@ -488,15 +450,18 @@ export const DenseToolMessage: React.FC = (props) => { return ( - - - - {name}{' '} - - - - {description} + + + + + {name}{' '} + + + + {description} + + {summary && ( { // TODO(24053): Usage of type guards makes this class too aware of internals if (isFileDiff(res)) return true; if (tool.confirmationDetails?.type === 'edit') return true; - if (isGrepResult(res) && res.matches.length > 0) return true; - - // ReadManyFilesResult check (has 'include' and 'files') - if (isListResult(res) && 'include' in res) { - const includeProp = (res as { include?: unknown }).include; - if (Array.isArray(includeProp) && res.files.length > 0) { - return true; - } - } // Generic summary/payload pattern if ( diff --git a/packages/cli/src/ui/components/messages/__snapshots__/DenseToolMessage.test.tsx.snap b/packages/cli/src/ui/components/messages/__snapshots__/DenseToolMessage.test.tsx.snap index d08b84c1a9..01bb88b00e 100644 --- a/packages/cli/src/ui/components/messages/__snapshots__/DenseToolMessage.test.tsx.snap +++ b/packages/cli/src/ui/components/messages/__snapshots__/DenseToolMessage.test.tsx.snap @@ -51,10 +51,6 @@ exports[`DenseToolMessage > renders correctly for Errored Edit tool 1`] = ` exports[`DenseToolMessage > renders correctly for ReadManyFiles results 1`] = ` " ✓ test-tool Attempting to read files from **/*.ts → Read 3 file(s) (1 ignored) - - file1.ts - file2.ts - file3.ts " `; @@ -110,9 +106,6 @@ exports[`DenseToolMessage > renders correctly for file diff results with stats 1 exports[`DenseToolMessage > renders correctly for grep results 1`] = ` " ✓ test-tool Test description → Found 2 matches - - file1.ts:10: match 1 - file2.ts:20: match 2 " `; @@ -136,6 +129,12 @@ exports[`DenseToolMessage > renders generic output message for unknown object re " `; +exports[`DenseToolMessage > truncates long description but preserves tool name (< 25 chars) 1`] = ` +" ✓ tool-name-is-24-chars-!! This is a very long description that should definitely be truncated … + → Success result +" +`; + exports[`DenseToolMessage > truncates long string results 1`] = ` " ✓ test-tool Test description → AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA… diff --git a/packages/core/src/tools/ls.test.ts b/packages/core/src/tools/ls.test.ts index 372de8e8a6..e9a684719e 100644 --- a/packages/core/src/tools/ls.test.ts +++ b/packages/core/src/tools/ls.test.ts @@ -132,7 +132,7 @@ describe('LSTool', () => { expect(result.llmContent).toContain('[DIR] subdir'); expect(result.llmContent).toContain('file1.txt'); expect(result.returnDisplay).toEqual({ - summary: 'Listed 2 item(s).', + summary: 'Found 2 item(s).', files: ['[DIR] subdir', 'file1.txt'], }); }); @@ -150,7 +150,7 @@ describe('LSTool', () => { expect(result.llmContent).toContain('secondary-file.txt'); expect(result.returnDisplay).toEqual({ - summary: 'Listed 1 item(s).', + summary: 'Found 1 item(s).', files: expect.any(Array), }); }); @@ -178,7 +178,7 @@ describe('LSTool', () => { expect(result.llmContent).toContain('file1.txt'); expect(result.llmContent).not.toContain('file2.log'); expect(result.returnDisplay).toEqual({ - summary: 'Listed 1 item(s).', + summary: 'Found 1 item(s).', files: expect.any(Array), }); }); @@ -195,7 +195,7 @@ describe('LSTool', () => { expect(result.llmContent).not.toContain('file2.log'); // .git is always ignored by default. expect(result.returnDisplay).toEqual( - expect.objectContaining({ summary: 'Listed 2 item(s). (2 ignored)' }), + expect.objectContaining({ summary: 'Found 2 item(s). (2 ignored)' }), ); }); @@ -212,7 +212,7 @@ describe('LSTool', () => { expect(result.llmContent).toContain('file1.txt'); expect(result.llmContent).not.toContain('file2.log'); expect(result.returnDisplay).toEqual( - expect.objectContaining({ summary: 'Listed 2 item(s). (1 ignored)' }), + expect.objectContaining({ summary: 'Found 2 item(s). (1 ignored)' }), ); }); @@ -301,7 +301,7 @@ describe('LSTool', () => { expect(result.llmContent).toContain('file1.txt'); expect(result.llmContent).not.toContain('problematic.txt'); expect(result.returnDisplay).toEqual({ - summary: 'Listed 1 item(s).', + summary: 'Found 1 item(s).', files: expect.any(Array), }); @@ -364,7 +364,7 @@ describe('LSTool', () => { expect(result.llmContent).toContain('secondary-file.txt'); expect(result.returnDisplay).toEqual({ - summary: 'Listed 1 item(s).', + summary: 'Found 1 item(s).', files: expect.any(Array), }); }); diff --git a/packages/core/src/tools/ls.ts b/packages/core/src/tools/ls.ts index b8e2e6a803..249a28372b 100644 --- a/packages/core/src/tools/ls.ts +++ b/packages/core/src/tools/ls.ts @@ -276,7 +276,7 @@ class LSToolInvocation extends BaseToolInvocation { resultMessage = appendJitContext(resultMessage, jitContext); } - let displayMessage = `Listed ${entries.length} item(s).`; + let displayMessage = `Found ${entries.length} item(s).`; if (ignoredCount > 0) { displayMessage += ` (${ignoredCount} ignored)`; }