From f510394721e96772f4a5de81ad353bd9472a814c Mon Sep 17 00:00:00 2001 From: Coco Sheng Date: Thu, 2 Apr 2026 11:01:00 -0400 Subject: [PATCH] Implement background process monitoring and inspection tools (#23799) --- evals/background_processes.eval.ts | 77 +++++ integration-tests/shell-background.responses | 5 + integration-tests/shell-background.test.ts | 105 ++++++ packages/core/src/config/config.ts | 14 + .../services/shellExecutionService.test.ts | 107 +++++- .../src/services/shellExecutionService.ts | 114 ++++++- .../coreToolsModelSnapshots.test.ts.snap | 8 + .../dynamic-declaration-helpers.ts | 5 + packages/core/src/tools/shell.test.ts | 12 +- packages/core/src/tools/shell.ts | 29 +- .../shellBackgroundTools.integration.test.ts | 104 ++++++ .../src/tools/shellBackgroundTools.test.ts | 314 ++++++++++++++++++ .../core/src/tools/shellBackgroundTools.ts | 299 +++++++++++++++++ 13 files changed, 1181 insertions(+), 12 deletions(-) create mode 100644 evals/background_processes.eval.ts create mode 100644 integration-tests/shell-background.responses create mode 100644 integration-tests/shell-background.test.ts create mode 100644 packages/core/src/tools/shellBackgroundTools.integration.test.ts create mode 100644 packages/core/src/tools/shellBackgroundTools.test.ts create mode 100644 packages/core/src/tools/shellBackgroundTools.ts diff --git a/evals/background_processes.eval.ts b/evals/background_processes.eval.ts new file mode 100644 index 0000000000..039a416ae9 --- /dev/null +++ b/evals/background_processes.eval.ts @@ -0,0 +1,77 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect } from 'vitest'; +import { evalTest } from './test-helper.js'; +import fs from 'node:fs'; +import path from 'node:path'; + +describe('Background Process Monitoring', () => { + evalTest('USUALLY_PASSES', { + name: 'should naturally use read output tool to find token', + prompt: + "Run the script using 'bash generate_token.sh'. It will emit a token after a short delay and continue running. Find the token and tell me what it is.", + files: { + 'generate_token.sh': `#!/bin/bash +sleep 2 +echo "TOKEN=xyz123" +sleep 100 +`, + }, + setup: async (rig) => { + // Create .gemini directory to avoid file system error in test rig + if (rig.homeDir) { + const geminiDir = path.join(rig.homeDir, '.gemini'); + fs.mkdirSync(geminiDir, { recursive: true }); + } + }, + assert: async (rig, result) => { + const toolCalls = rig.readToolLogs(); + + // Check if read_background_output was called + const hasReadCall = toolCalls.some( + (call) => call.toolRequest.name === 'read_background_output', + ); + + expect( + hasReadCall, + 'Expected agent to call read_background_output to find the token', + ).toBe(true); + + // Verify that the agent found the correct token + expect( + result.includes('xyz123'), + `Expected agent to find the token xyz123. Agent output: ${result}`, + ).toBe(true); + }, + }); + + evalTest('USUALLY_PASSES', { + name: 'should naturally use list tool to verify multiple processes', + prompt: + "Start three background processes that run 'sleep 100', 'sleep 200', and 'sleep 300' respectively. Verify that all three are currently running.", + setup: async (rig) => { + // Create .gemini directory to avoid file system error in test rig + if (rig.homeDir) { + const geminiDir = path.join(rig.homeDir, '.gemini'); + fs.mkdirSync(geminiDir, { recursive: true }); + } + }, + assert: async (rig, result) => { + const toolCalls = rig.readToolLogs(); + + // Check if list_background_processes was called + const hasListCall = toolCalls.some( + (call) => call.toolRequest.name === 'list_background_processes', + ); + + expect( + hasListCall, + 'Expected agent to call list_background_processes', + ).toBe(true); + }, + }); +}); diff --git a/integration-tests/shell-background.responses b/integration-tests/shell-background.responses new file mode 100644 index 0000000000..652b82a8e0 --- /dev/null +++ b/integration-tests/shell-background.responses @@ -0,0 +1,5 @@ +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I will run the command in the background for you."},{"functionCall":{"name":"run_shell_command","args":{"command":"sleep 10 && echo hello-from-background","is_background":true}}}],"role":"model"},"finishReason":"STOP","index":0}]}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The background process has been started. Now I will list the background processes to verify."},{"functionCall":{"name":"list_background_processes","args":{}}}],"role":"model"},"finishReason":"STOP","index":0}]}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I see the background process 'sleep 10 && echo hello-from-background' is running. Would you like me to read its output?"}],"role":"model"},"finishReason":"STOP","index":0}]}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I will read the output for you."},{"functionCall":{"name":"read_background_output","args":{"pid":12345}}}],"role":"model"},"finishReason":"STOP","index":0}]}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The output of the background process is:\nhello-from-background"}],"role":"model"},"finishReason":"STOP","index":0}]}]} diff --git a/integration-tests/shell-background.test.ts b/integration-tests/shell-background.test.ts new file mode 100644 index 0000000000..f28120e7e4 --- /dev/null +++ b/integration-tests/shell-background.test.ts @@ -0,0 +1,105 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, beforeEach, afterEach } from 'vitest'; +import { TestRig } from './test-helper.js'; +import { join, dirname } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +describe('shell-background-tools', () => { + let rig: TestRig; + + beforeEach(() => { + rig = new TestRig(); + }); + + afterEach(async () => await rig.cleanup()); + + it('should run a command in the background, list it, and read its output', async () => { + // We use a fake responses file to make the test deterministic and run in CI. + rig.setup('shell-background-workflow', { + fakeResponsesPath: join(__dirname, 'shell-background.responses'), + settings: { + tools: { + core: [ + 'run_shell_command', + 'list_background_processes', + 'read_background_output', + ], + }, + hooksConfig: { + enabled: true, + }, + hooks: { + BeforeTool: [ + { + matcher: 'run_shell_command', + hooks: [ + { + type: 'command', + // This hook intercepts run_shell_command. + // If is_background is true, it returns a mock result with PID 12345. + // It also creates the mock log file that read_background_output expects. + command: `node -e " + const fs = require('fs'); + const path = require('path'); + const input = JSON.parse(fs.readFileSync(0, 'utf-8')); + const args = JSON.parse(input.tool_call.args); + + if (args.is_background) { + const logDir = path.join(process.env.GEMINI_CLI_HOME, 'background-processes'); + if (!fs.existsSync(logDir)) fs.mkdirSync(logDir, { recursive: true }); + fs.writeFileSync(path.join(logDir, 'background-12345.log'), 'hello-from-background\\n'); + + console.log(JSON.stringify({ + decision: 'replace', + hookSpecificOutput: { + result: { + llmContent: 'Command moved to background (PID: 12345). Output hidden. Press Ctrl+B to view.', + data: { pid: 12345, command: args.command } + } + } + })); + } else { + console.log(JSON.stringify({ decision: 'allow' })); + } + "`, + }, + ], + }, + ], + }, + }, + }); + + const run = await rig.runInteractive({ approvalMode: 'yolo' }); + + // 1. Start a background process + // We use a command that stays alive for a bit to ensure it shows up in lists + await run.type( + "Run 'sleep 10 && echo hello-from-background' in the background.", + ); + await run.type('\r'); + + // Wait for the model's canned response acknowledging the start + await run.expectText('background', 30000); + + // 2. List background processes + await run.type('List my background processes.'); + await run.type('\r'); + // Wait for the model's canned response showing the list + await run.expectText('hello-from-background', 30000); + + // 3. Read the output + await run.type('Read the output of that process.'); + await run.type('\r'); + // Wait for the model's canned response showing the output + await run.expectText('hello-from-background', 30000); + }, 60000); +}); diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 34a19f01d5..d203e047b4 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -41,6 +41,10 @@ import { UpdateTopicTool } from '../tools/topicTool.js'; import { TopicState } from './topicState.js'; import { ExitPlanModeTool } from '../tools/exit-plan-mode.js'; import { EnterPlanModeTool } from '../tools/enter-plan-mode.js'; +import { + ListBackgroundProcessesTool, + ReadBackgroundOutputTool, +} from '../tools/shellBackgroundTools.js'; import { GeminiClient } from '../core/client.js'; import { BaseLlmClient } from '../core/baseLlmClient.js'; import { LocalLiteRtLmClient } from '../core/localLiteRtLmClient.js'; @@ -3516,6 +3520,16 @@ export class Config implements McpContext, AgentLoopContext { maybeRegister(ShellTool, () => registry.registerTool(new ShellTool(this, this.messageBus)), ); + maybeRegister(ListBackgroundProcessesTool, () => + registry.registerTool( + new ListBackgroundProcessesTool(this, this.messageBus), + ), + ); + maybeRegister(ReadBackgroundOutputTool, () => + registry.registerTool( + new ReadBackgroundOutputTool(this, this.messageBus), + ), + ); if (!this.isMemoryManagerEnabled()) { maybeRegister(MemoryTool, () => registry.registerTool(new MemoryTool(this.messageBus, this.storage)), diff --git a/packages/core/src/services/shellExecutionService.test.ts b/packages/core/src/services/shellExecutionService.test.ts index c1f2a954f2..0fc20225ac 100644 --- a/packages/core/src/services/shellExecutionService.test.ts +++ b/packages/core/src/services/shellExecutionService.test.ts @@ -128,6 +128,7 @@ const mockProcessKill = vi .mockImplementation(() => true); const shellExecutionConfig: ShellExecutionConfig = { + sessionId: 'default', terminalWidth: 80, terminalHeight: 24, pager: 'cat', @@ -483,6 +484,7 @@ describe('ShellExecutionService', () => { ptyProcess: mockPtyProcess as any, // eslint-disable-next-line @typescript-eslint/no-explicit-any headlessTerminal: mockHeadlessTerminal as any, + command: 'some-command', }); }); @@ -753,6 +755,8 @@ describe('ShellExecutionService', () => { (ShellExecutionService as any).activePtys.clear(); // eslint-disable-next-line @typescript-eslint/no-explicit-any (ShellExecutionService as any).activeChildProcesses.clear(); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (ShellExecutionService as any).backgroundProcessHistory.clear(); }); afterEach(() => { @@ -783,7 +787,11 @@ describe('ShellExecutionService', () => { ]); // Background the process - ShellExecutionService.background(handle.pid!); + ShellExecutionService.background( + handle.pid!, + 'default', + 'long-running-pty', + ); const result = await handle.result; expect(result.backgrounded).toBe(true); @@ -791,7 +799,7 @@ describe('ShellExecutionService', () => { expect(mockMkdirSync).toHaveBeenCalledWith( expect.stringContaining('background-processes'), - { recursive: true }, + { recursive: true, mode: 0o700 }, ); // Verify initial output was written @@ -822,7 +830,11 @@ describe('ShellExecutionService', () => { mockBgChildProcess.stdout?.emit('data', Buffer.from('initial cp output')); await new Promise((resolve) => process.nextTick(resolve)); - ShellExecutionService.background(handle.pid!); + ShellExecutionService.background( + handle.pid!, + 'default', + 'long-running-child', + ); const result = await handle.result; expect(result.backgrounded).toBe(true); @@ -861,7 +873,11 @@ describe('ShellExecutionService', () => { }); // Background the process - ShellExecutionService.background(handle.pid!); + ShellExecutionService.background( + handle.pid!, + 'default', + 'failing-log-setup', + ); const result = await handle.result; expect(result.backgrounded).toBe(true); @@ -872,6 +888,89 @@ describe('ShellExecutionService', () => { await ShellExecutionService.kill(handle.pid!); }); + + it('should track background process history', async () => { + await simulateExecution( + 'history-test-cmd', + async (pty) => { + ShellExecutionService.background( + pty.pid, + 'default', + 'history-test-cmd', + ); + + const history = + ShellExecutionService.listBackgroundProcesses('default'); + expect(history).toHaveLength(1); + expect(history[0]).toEqual( + expect.objectContaining({ + pid: pty.pid, + command: 'history-test-cmd', + status: 'running', + }), + ); + + // Simulate exit + pty.onExit.mock.calls[0][0]({ exitCode: 0, signal: null }); + }, + { ...shellExecutionConfig, originalCommand: 'history-test-cmd' }, + ); + + const history = ShellExecutionService.listBackgroundProcesses('default'); + expect(history[0]).toEqual( + expect.objectContaining({ + pid: mockPtyProcess.pid, + command: 'history-test-cmd', + status: 'exited', + exitCode: 0, + }), + ); + }); + + it('should evict oldest process history when exceeding max size', () => { + const MAX = 100; + const history = new Map(); + for (let i = 1; i <= MAX; i++) { + history.set(i, { + command: `cmd-${i}`, + status: 'running', + startTime: Date.now(), + }); + } + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (ShellExecutionService as any).backgroundProcessHistory.set( + 'default', + history, + ); + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (ShellExecutionService as any).activeChildProcesses.set(101, { + process: {}, + state: { output: '' }, + command: 'cmd-101', + sessionId: 'default', + }); + + ShellExecutionService.background(101, 'default', 'cmd-101'); + + const processes = + ShellExecutionService.listBackgroundProcesses('default'); + expect(processes).toHaveLength(MAX); + expect(processes.some((p) => p.pid === 1)).toBe(false); + }); + + it('should throw error if sessionId is missing for background operations', () => { + expect(() => ShellExecutionService.background(102)).toThrow( + 'Session ID is required for background operations', + ); + }); + + it('should throw error if sessionId is missing for listBackgroundProcesses', () => { + expect(() => + // eslint-disable-next-line @typescript-eslint/no-explicit-any + ShellExecutionService.listBackgroundProcesses(undefined as any), + ).toThrow('Session ID is required'); + }); }); describe('Binary Output', () => { diff --git a/packages/core/src/services/shellExecutionService.ts b/packages/core/src/services/shellExecutionService.ts index 08b03ec539..dfbb3a5033 100644 --- a/packages/core/src/services/shellExecutionService.ts +++ b/packages/core/src/services/shellExecutionService.ts @@ -103,6 +103,8 @@ export interface ShellExecutionConfig { maxSerializedLines?: number; sandboxConfig?: SandboxConfig; backgroundCompletionBehavior?: 'inject' | 'notify' | 'silent'; + originalCommand?: string; + sessionId?: string; } /** @@ -114,6 +116,8 @@ interface ActivePty { ptyProcess: IPty; headlessTerminal: pkg.Terminal; maxSerializedLines?: number; + command: string; + sessionId?: string; } interface ActiveChildProcess { @@ -124,6 +128,8 @@ interface ActiveChildProcess { sniffChunks: Buffer[]; binaryBytesReceived: number; }; + command: string; + sessionId?: string; } const findLastContentLine = ( @@ -230,11 +236,28 @@ const writeBufferToLogStream = ( * */ +export type BackgroundProcess = { + pid: number; + command: string; + status: 'running' | 'exited'; + exitCode?: number | null; + signal?: number | null; +}; + +export type BackgroundProcessRecord = Omit & { + startTime: number; + endTime?: number; +}; + export class ShellExecutionService { private static activePtys = new Map(); private static activeChildProcesses = new Map(); private static backgroundLogPids = new Set(); private static backgroundLogStreams = new Map(); + private static backgroundProcessHistory = new Map< + string, // sessionId + Map + >(); static getLogDir(): string { return path.join(Storage.getGlobalTempDir(), 'background-processes'); @@ -519,10 +542,12 @@ export class ShellExecutionService { binaryBytesReceived: 0, }; - if (child.pid) { + if (child.pid !== undefined) { this.activeChildProcesses.set(child.pid, { process: child, state, + command: shellExecutionConfig.originalCommand ?? commandToExecute, + sessionId: shellExecutionConfig.sessionId, }); } @@ -696,6 +721,17 @@ export class ShellExecutionService { exitCode, signal: exitSignal, }; + + const sessionId = shellExecutionConfig.sessionId ?? 'default'; + const history = + ShellExecutionService.backgroundProcessHistory.get(sessionId); + const historyItem = history?.get(pid); + if (historyItem) { + historyItem.status = 'exited'; + historyItem.exitCode = exitCode ?? undefined; + historyItem.signal = exitSignal ?? undefined; + historyItem.endTime = Date.now(); + } onOutputEvent(event); // eslint-disable-next-line @typescript-eslint/no-floating-promises @@ -849,6 +885,8 @@ export class ShellExecutionService { ptyProcess, headlessTerminal, maxSerializedLines: shellExecutionConfig.maxSerializedLines, + command: shellExecutionConfig.originalCommand ?? commandToExecute, + sessionId: shellExecutionConfig.sessionId, }); const result = ExecutionLifecycleService.attachExecution(ptyPid, { @@ -1116,6 +1154,17 @@ export class ShellExecutionService { exitCode, signal: signal ?? null, }; + + const sessionId = shellExecutionConfig.sessionId ?? 'default'; + const history = + ShellExecutionService.backgroundProcessHistory.get(sessionId); + const historyItem = history?.get(ptyPid); + if (historyItem) { + historyItem.status = 'exited'; + historyItem.exitCode = exitCode; + historyItem.signal = signal ?? null; + historyItem.endTime = Date.now(); + } onOutputEvent(event); // eslint-disable-next-line @typescript-eslint/no-floating-promises @@ -1269,16 +1318,57 @@ export class ShellExecutionService { * * @param pid The process ID of the target PTY. */ - static background(pid: number): void { + static background(pid: number, sessionId?: string, command?: string): void { const activePty = this.activePtys.get(pid); const activeChild = this.activeChildProcesses.get(pid); + const resolvedSessionId = + sessionId ?? activePty?.sessionId ?? activeChild?.sessionId; + const resolvedCommand = + command ?? + activePty?.command ?? + activeChild?.command ?? + 'unknown command'; + + if (!resolvedSessionId) { + throw new Error('Session ID is required for background operations'); + } + + const MAX_BACKGROUND_PROCESS_HISTORY_SIZE = 100; + const history = + this.backgroundProcessHistory.get(resolvedSessionId) ?? + new Map< + number, + { + command: string; + status: 'running' | 'exited'; + exitCode?: number | null; + signal?: number | null; + startTime: number; + endTime?: number; + } + >(); + + if (history.size >= MAX_BACKGROUND_PROCESS_HISTORY_SIZE) { + const oldestPid = history.keys().next().value; + if (oldestPid !== undefined) { + history.delete(oldestPid); + } + } + + history.set(pid, { + command: resolvedCommand, + status: 'running', + startTime: Date.now(), + }); + this.backgroundProcessHistory.set(resolvedSessionId, history); + // Set up background logging const logPath = this.getLogFilePath(pid); const logDir = this.getLogDir(); try { - mkdirSync(logDir, { recursive: true }); - const stream = fs.createWriteStream(logPath, { flags: 'w' }); + mkdirSync(logDir, { recursive: true, mode: 0o700 }); + const stream = fs.createWriteStream(logPath, { flags: 'wx' }); stream.on('error', (err) => { debugLogger.warn('Background log stream error:', err); }); @@ -1391,4 +1481,20 @@ export class ShellExecutionService { } } } + + static listBackgroundProcesses(sessionId: string): BackgroundProcess[] { + if (!sessionId) { + throw new Error('Session ID is required'); + } + const history = this.backgroundProcessHistory.get(sessionId); + if (!history) return []; + + return Array.from(history.entries()).map(([pid, info]) => ({ + pid, + command: info.command, + status: info.status, + exitCode: info.exitCode, + signal: info.signal, + })); + } } diff --git a/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap b/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap index ba93e42e62..5676b42132 100644 --- a/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap +++ b/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap @@ -616,6 +616,10 @@ exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snaps "description": "Exact bash command to execute as \`bash -c \`", "type": "string", }, + "delay_ms": { + "description": "Optional. Delay in milliseconds to wait after starting the process in the background. Useful to allow the process to start and generate initial output before returning.", + "type": "integer", + }, "description": { "description": "Brief description of the command for the user. Be specific and concise. Ideally a single sentence. Can be up to 3 sentences for clarity. No line breaks.", "type": "string", @@ -1418,6 +1422,10 @@ exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > "description": "Exact bash command to execute as \`bash -c \`", "type": "string", }, + "delay_ms": { + "description": "Optional. Delay in milliseconds to wait after starting the process in the background. Useful to allow the process to start and generate initial output before returning.", + "type": "integer", + }, "description": { "description": "Brief description of the command for the user. Be specific and concise. Ideally a single sentence. Can be up to 3 sentences for clarity. No line breaks.", "type": "string", diff --git a/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts b/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts index 1e7a36e639..29da313bf4 100644 --- a/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts +++ b/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts @@ -115,6 +115,11 @@ export function getShellDeclaration( description: 'Set to true if this command should be run in the background (e.g. for long-running servers or watchers). The command will be started, allowed to run for a brief moment to check for immediate errors, and then moved to the background.', }, + delay_ms: { + type: 'integer', + description: + 'Optional. Delay in milliseconds to wait after starting the process in the background. Useful to allow the process to start and generate initial output before returning.', + }, ...(enableToolSandboxing ? { [PARAM_ADDITIONAL_PERMISSIONS]: { diff --git a/packages/core/src/tools/shell.test.ts b/packages/core/src/tools/shell.test.ts index f215c5f241..d05091def2 100644 --- a/packages/core/src/tools/shell.test.ts +++ b/packages/core/src/tools/shell.test.ts @@ -416,7 +416,11 @@ describe('ShellTool', () => { // Advance time to trigger the background timeout await vi.advanceTimersByTimeAsync(250); - expect(mockShellBackground).toHaveBeenCalledWith(12345); + expect(mockShellBackground).toHaveBeenCalledWith( + 12345, + 'default', + 'sleep 10', + ); await promise; }); @@ -656,7 +660,11 @@ describe('ShellTool', () => { // Advance time to trigger the background timeout await vi.advanceTimersByTimeAsync(250); - expect(mockShellBackground).toHaveBeenCalledWith(12345); + expect(mockShellBackground).toHaveBeenCalledWith( + 12345, + 'default', + 'sleep 10', + ); await promise; }); diff --git a/packages/core/src/tools/shell.ts b/packages/core/src/tools/shell.ts index 6c0e946596..a467ef4c63 100644 --- a/packages/core/src/tools/shell.ts +++ b/packages/core/src/tools/shell.ts @@ -65,6 +65,7 @@ export interface ShellToolParams { description?: string; dir_path?: string; is_background?: boolean; + delay_ms?: number; [PARAM_ADDITIONAL_PERMISSIONS]?: SandboxPermissions; } @@ -521,6 +522,7 @@ export class ShellToolInvocation extends BaseToolInvocation< this.context.config.getEnableInteractiveShell(), { ...shellExecutionConfig, + sessionId: this.context.config?.getSessionId?.() ?? 'default', pager: 'cat', sanitizationConfig: shellExecutionConfig?.sanitizationConfig ?? @@ -547,6 +549,7 @@ export class ShellToolInvocation extends BaseToolInvocation< }, backgroundCompletionBehavior: this.context.config.getShellBackgroundCompletionBehavior(), + originalCommand: strippedCommand, }, ); @@ -556,10 +559,32 @@ export class ShellToolInvocation extends BaseToolInvocation< } // If the model requested to run in the background, do so after a short delay. + let completed = false; if (this.params.is_background) { + resultPromise + .then(() => { + completed = true; + }) + .catch(() => { + completed = true; // Also mark completed if it failed + }); + + const sessionId = this.context.config?.getSessionId?.() ?? 'default'; + const delay = this.params.delay_ms ?? BACKGROUND_DELAY_MS; setTimeout(() => { - ShellExecutionService.background(pid); - }, BACKGROUND_DELAY_MS); + ShellExecutionService.background(pid, sessionId, strippedCommand); + }, delay); + + // Wait for the delay amount to see if command returns quickly + await new Promise((resolve) => setTimeout(resolve, delay)); + + if (!completed) { + // Return early with initial output if still running + return { + llmContent: `Command is running in background. PID: ${pid}. Initial output:\n${cumulativeOutput}`, + returnDisplay: `Background process started with PID ${pid}.`, + }; + } } } diff --git a/packages/core/src/tools/shellBackgroundTools.integration.test.ts b/packages/core/src/tools/shellBackgroundTools.integration.test.ts new file mode 100644 index 0000000000..a3ef84f92d --- /dev/null +++ b/packages/core/src/tools/shellBackgroundTools.integration.test.ts @@ -0,0 +1,104 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, beforeEach, vi } from 'vitest'; +import { ShellExecutionService } from '../services/shellExecutionService.js'; +import { + ListBackgroundProcessesTool, + ReadBackgroundOutputTool, +} from './shellBackgroundTools.js'; +import { createMockMessageBus } from '../test-utils/mock-message-bus.js'; +import { NoopSandboxManager } from '../services/sandboxManager.js'; +import type { AgentLoopContext } from '../config/agent-loop-context.js'; + +// Integration test simulating model interaction cycle +describe('Background Tools Integration', () => { + const bus = createMockMessageBus(); + let listTool: ListBackgroundProcessesTool; + let readTool: ReadBackgroundOutputTool; + + beforeEach(() => { + vi.clearAllMocks(); + const mockContext = { + config: { getSessionId: () => 'default' }, + } as unknown as AgentLoopContext; + listTool = new ListBackgroundProcessesTool(mockContext, bus); + readTool = new ReadBackgroundOutputTool(mockContext, bus); + + // Clear history to avoid state leakage from previous runs + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (ShellExecutionService as any).backgroundProcessHistory.clear(); + }); + + it('should support interaction cycle: start background -> list -> read logs', async () => { + const controller = new AbortController(); + + // 1. Start a backgroundable process + // We use node to print continuous logs until killed + const commandString = `${process.execPath} -e "setInterval(() => console.log('Log line'), 50)"`; + + const realHandle = await ShellExecutionService.execute( + commandString, + '/', + () => {}, + controller.signal, + true, + { + originalCommand: 'node continuous_log', + sessionId: 'default', + sanitizationConfig: { + allowedEnvironmentVariables: [], + blockedEnvironmentVariables: [], + enableEnvironmentVariableRedaction: false, + }, + sandboxManager: new NoopSandboxManager(), + }, + ); + + const pid = realHandle.pid; + if (pid === undefined) { + throw new Error('pid is undefined'); + } + expect(pid).toBeGreaterThan(0); + + // 2. Simulate model triggering background operations + ShellExecutionService.background(pid, 'default', 'node continuous_log'); + + // 3. Model decides to inspect list + const listInvocation = listTool.build({}); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (listInvocation as any).context = { + config: { getSessionId: () => 'default' }, + }; + const listResult = await listInvocation.execute( + new AbortController().signal, + ); + + expect(listResult.llmContent).toContain( + `[PID ${pid}] RUNNING: \`node continuous_log\``, + ); + + // 4. Give it time to write output to interval + await new Promise((resolve) => setTimeout(resolve, 300)); + + // 5. Model decides to read logs + const readInvocation = readTool.build({ pid, lines: 2 }); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (readInvocation as any).context = { + config: { getSessionId: () => 'default' }, + }; + const readResult = await readInvocation.execute( + new AbortController().signal, + ); + + expect(readResult.llmContent).toContain('Showing last'); + expect(readResult.llmContent).toContain('Log line'); + + // Cleanup + await ShellExecutionService.kill(pid); + controller.abort(); + }); +}); diff --git a/packages/core/src/tools/shellBackgroundTools.test.ts b/packages/core/src/tools/shellBackgroundTools.test.ts new file mode 100644 index 0000000000..25af240ede --- /dev/null +++ b/packages/core/src/tools/shellBackgroundTools.test.ts @@ -0,0 +1,314 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, beforeEach, vi } from 'vitest'; +import { ShellExecutionService } from '../services/shellExecutionService.js'; +import { + ListBackgroundProcessesTool, + ReadBackgroundOutputTool, +} from './shellBackgroundTools.js'; +import { createMockMessageBus } from '../test-utils/mock-message-bus.js'; +import fs from 'node:fs'; +import type { AgentLoopContext } from '../config/agent-loop-context.js'; + +describe('Background Tools', () => { + let listTool: ListBackgroundProcessesTool; + let readTool: ReadBackgroundOutputTool; + const bus = createMockMessageBus(); + + beforeEach(() => { + vi.restoreAllMocks(); + const mockContext = { + config: { getSessionId: () => 'default' }, + } as unknown as AgentLoopContext; + listTool = new ListBackgroundProcessesTool(mockContext, bus); + readTool = new ReadBackgroundOutputTool(mockContext, bus); + + // Clear history to avoid state leakage from previous runs + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (ShellExecutionService as any).backgroundProcessHistory.clear(); + }); + + it('list_background_processes should return empty message when no processes', async () => { + const invocation = listTool.build({}); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (invocation as any).context = { config: { getSessionId: () => 'default' } }; + const result = await invocation.execute(new AbortController().signal); + expect(result.llmContent).toBe('No background processes found.'); + }); + + it('list_background_processes should list processes after they are backgrounded', async () => { + const pid = 99999 + Math.floor(Math.random() * 1000); + + // Simulate adding to history + // Since background method relies on activePtys/activeChildProcesses, + // we should probably mock those or just call the history add logic if we can't easily trigger background. + // Wait, ShellExecutionService.background() reads from activePtys/activeChildProcesses! + // So we MUST populate them or mock them! + // Let's use vi.spyOn or populate the map if accessible? + // activePtys is private static. + // Mock active process map to provide sessionId + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (ShellExecutionService as any).activeChildProcesses.set(pid, { + process: {}, + state: { output: '' }, + command: 'unknown command', + sessionId: 'default', + }); + + ShellExecutionService.background(pid, 'default', 'unknown command'); + + const invocation = listTool.build({}); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (invocation as any).context = { config: { getSessionId: () => 'default' } }; + const result = await invocation.execute(new AbortController().signal); + + expect(result.llmContent).toContain( + `[PID ${pid}] RUNNING: \`unknown command\``, + ); + }); + + it('list_background_processes should show exited status with code or signal', async () => { + const pid = 98989; + const history = new Map(); + history.set(pid, { + command: 'exited command', + status: 'exited', + exitCode: 1, + startTime: Date.now(), + }); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (ShellExecutionService as any).backgroundProcessHistory.set( + 'default', + history, + ); + + const invocation = listTool.build({}); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (invocation as any).context = { config: { getSessionId: () => 'default' } }; + const result = await invocation.execute(new AbortController().signal); + + expect(result.llmContent).toContain( + `- [PID ${pid}] EXITED: \`exited command\` (Exit Code: 1)`, + ); + }); + + it('read_background_output should return error if log file does not exist', async () => { + const pid = 12345 + Math.floor(Math.random() * 1000); + const history = new Map(); + history.set(pid, { + command: 'unknown command', + status: 'running', + startTime: Date.now(), + }); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (ShellExecutionService as any).backgroundProcessHistory.set( + 'default', + history, + ); + + const invocation = readTool.build({ pid }); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (invocation as any).context = { config: { getSessionId: () => 'default' } }; + const result = await invocation.execute(new AbortController().signal); + expect(result.error).toBeDefined(); + expect(result.llmContent).toContain('No output log found'); + }); + + it('read_background_output should read content from log file', async () => { + const pid = 88888 + Math.floor(Math.random() * 1000); + const logPath = ShellExecutionService.getLogFilePath(pid); + const logDir = ShellExecutionService.getLogDir(); + + // Ensure dir exists + // Add to history to pass access check + const history = new Map(); + history.set(pid, { + command: 'unknown command', + status: 'running', + startTime: Date.now(), + }); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (ShellExecutionService as any).backgroundProcessHistory.set( + 'default', + history, + ); + + // Ensure dir exists + fs.mkdirSync(logDir, { recursive: true }); + + // Write mock log + fs.writeFileSync(logPath, 'line 1\nline 2\nline 3\n'); + + const invocation = readTool.build({ pid, lines: 2 }); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (invocation as any).context = { config: { getSessionId: () => 'default' } }; + const result = await invocation.execute(new AbortController().signal); + + expect(result.llmContent).toContain('Showing last 2 of 3 lines'); + expect(result.llmContent).toContain('line 2\nline 3'); + + // Cleanup + fs.unlinkSync(logPath); + }); + + it('read_background_output should return Access Denied for processes in other sessions', async () => { + const pid = 77777; + const history = new Map(); + history.set(pid, { + command: 'other command', + status: 'running', + startTime: Date.now(), + }); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (ShellExecutionService as any).backgroundProcessHistory.set( + 'other-session', + history, + ); + + const invocation = readTool.build({ pid }); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (invocation as any).context = { config: { getSessionId: () => 'default' } }; // Asking for PID from another session + const result = await invocation.execute(new AbortController().signal); + + expect(result.error).toBeDefined(); + expect(result.llmContent).toContain('Access denied'); + }); + + it('read_background_output should handle empty log files', async () => { + const pid = 66666; + const logPath = ShellExecutionService.getLogFilePath(pid); + const logDir = ShellExecutionService.getLogDir(); + + const history = new Map(); + history.set(pid, { + command: 'empty output command', + status: 'running', + startTime: Date.now(), + }); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (ShellExecutionService as any).backgroundProcessHistory.set( + 'default', + history, + ); + + fs.mkdirSync(logDir, { recursive: true }); + fs.writeFileSync(logPath, ''); + + const invocation = readTool.build({ pid }); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (invocation as any).context = { config: { getSessionId: () => 'default' } }; + const result = await invocation.execute(new AbortController().signal); + + expect(result.llmContent).toContain('Log is empty'); + + fs.unlinkSync(logPath); + }); + + it('read_background_output should handle direct tool errors gracefully', async () => { + const pid = 55555; + const logPath = ShellExecutionService.getLogFilePath(pid); + const logDir = ShellExecutionService.getLogDir(); + + const history = new Map(); + history.set(pid, { + command: 'fail command', + status: 'running', + startTime: Date.now(), + }); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (ShellExecutionService as any).backgroundProcessHistory.set( + 'default', + history, + ); + + fs.mkdirSync(logDir, { recursive: true }); + fs.writeFileSync(logPath, 'dummy content'); + + // Mock open to throw to hit catch block + vi.spyOn(fs.promises, 'open').mockRejectedValue( + new Error('Simulated read error'), + ); + + const invocation = readTool.build({ pid }); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (invocation as any).context = { config: { getSessionId: () => 'default' } }; + const result = await invocation.execute(new AbortController().signal); + + expect(result.error).toBeDefined(); + expect(result.llmContent).toContain('Error reading background log'); + + fs.unlinkSync(logPath); + }); + + it('read_background_output should deny access if log is a symbolic link', async () => { + const pid = 66666; + const logPath = ShellExecutionService.getLogFilePath(pid); + const logDir = ShellExecutionService.getLogDir(); + + const history = new Map(); + history.set(pid, { + command: 'symlink command', + status: 'running', + startTime: Date.now(), + }); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (ShellExecutionService as any).backgroundProcessHistory.set( + 'default', + history, + ); + + fs.mkdirSync(logDir, { recursive: true }); + fs.writeFileSync(logPath, 'dummy content'); + + // Mock open to throw ELOOP error for symbolic link + const mockError = new Error('ELOOP: too many symbolic links encountered'); + Object.assign(mockError, { code: 'ELOOP' }); + vi.spyOn(fs.promises, 'open').mockRejectedValue(mockError); + + const invocation = readTool.build({ pid }); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (invocation as any).context = { config: { getSessionId: () => 'default' } }; + const result = await invocation.execute(new AbortController().signal); + + expect(result.llmContent).toContain('Access is denied'); + expect(result.error?.message).toContain('Symbolic link detected'); + + fs.unlinkSync(logPath); + }); + + it('read_background_output should tail reading trailing logic correctly', async () => { + const pid = 77777; + const logPath = ShellExecutionService.getLogFilePath(pid); + const logDir = ShellExecutionService.getLogDir(); + + const history = new Map(); + history.set(pid, { + command: 'tail command', + status: 'running', + startTime: Date.now(), + }); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (ShellExecutionService as any).backgroundProcessHistory.set( + 'default', + history, + ); + + fs.mkdirSync(logDir, { recursive: true }); + // Write 5 lines + fs.writeFileSync(logPath, 'line1\nline2\nline3\nline4\nline5'); + + const invocation = readTool.build({ pid, lines: 2 }); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (invocation as any).context = { config: { getSessionId: () => 'default' } }; + const result = await invocation.execute(new AbortController().signal); + + expect(result.llmContent).toContain('line4\nline5'); + expect(result.llmContent).not.toContain('line1'); + + fs.unlinkSync(logPath); + }); +}); diff --git a/packages/core/src/tools/shellBackgroundTools.ts b/packages/core/src/tools/shellBackgroundTools.ts new file mode 100644 index 0000000000..49cc0a9161 --- /dev/null +++ b/packages/core/src/tools/shellBackgroundTools.ts @@ -0,0 +1,299 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import fs from 'node:fs'; +import { ShellExecutionService } from '../services/shellExecutionService.js'; +import { + BaseDeclarativeTool, + BaseToolInvocation, + Kind, + type ToolResult, +} from './tools.js'; +import { ToolErrorType } from './tool-error.js'; +import type { MessageBus } from '../confirmation-bus/message-bus.js'; +import type { AgentLoopContext } from '../config/agent-loop-context.js'; +import { isNodeError } from '../utils/errors.js'; + +const MAX_BUFFER_LOAD_CAP_BYTES = 64 * 1024; // Safe 64KB buffer load Cap +const DEFAULT_TAIL_LINES_COUNT = 100; + +// --- list_background_processes --- + +class ListBackgroundProcessesInvocation extends BaseToolInvocation< + Record, + ToolResult +> { + constructor( + private readonly context: AgentLoopContext, + params: Record, + messageBus: MessageBus, + toolName?: string, + toolDisplayName?: string, + ) { + super(params, messageBus, toolName, toolDisplayName); + } + + getDescription(): string { + return 'Lists all active and recently completed background processes for the current session.'; + } + + async execute(_signal: AbortSignal): Promise { + const processes = ShellExecutionService.listBackgroundProcesses( + this.context.config.getSessionId(), + ); + if (processes.length === 0) { + return { + llmContent: 'No background processes found.', + returnDisplay: 'No background processes found.', + }; + } + + const lines = processes.map( + (p) => + `- [PID ${p.pid}] ${p.status.toUpperCase()}: \`${p.command}\`${ + p.exitCode !== undefined ? ` (Exit Code: ${p.exitCode})` : '' + }${p.signal ? ` (Signal: ${p.signal})` : ''}`, + ); + + const content = lines.join('\n'); + return { + llmContent: content, + returnDisplay: content, + }; + } +} + +export class ListBackgroundProcessesTool extends BaseDeclarativeTool< + Record, + ToolResult +> { + static readonly Name = 'list_background_processes'; + + constructor( + private readonly context: AgentLoopContext, + messageBus: MessageBus, + ) { + super( + ListBackgroundProcessesTool.Name, + 'List Background Processes', + 'Lists all active and recently completed background shell processes orchestrating by the agent.', + Kind.Read, + { + type: 'object', + properties: {}, + }, + messageBus, + ); + } + + protected createInvocation( + params: Record, + messageBus: MessageBus, + ) { + return new ListBackgroundProcessesInvocation( + this.context, + params, + messageBus, + this.name, + ); + } +} + +// --- read_background_output --- + +interface ReadBackgroundOutputParams { + pid: number; + lines?: number; + delay_ms?: number; +} + +class ReadBackgroundOutputInvocation extends BaseToolInvocation< + ReadBackgroundOutputParams, + ToolResult +> { + constructor( + private readonly context: AgentLoopContext, + params: ReadBackgroundOutputParams, + messageBus: MessageBus, + toolName?: string, + toolDisplayName?: string, + ) { + super(params, messageBus, toolName, toolDisplayName); + } + + getDescription(): string { + return `Reading output for background process ${this.params.pid}`; + } + + async execute(_signal: AbortSignal): Promise { + const pid = this.params.pid; + + if (this.params.delay_ms && this.params.delay_ms > 0) { + await new Promise((resolve) => setTimeout(resolve, this.params.delay_ms)); + } + + // Verify process belongs to this session to prevent reading logs of processes from other sessions/users + const processes = ShellExecutionService.listBackgroundProcesses( + this.context.config.getSessionId(), + ); + if (!processes.some((p) => p.pid === pid)) { + return { + llmContent: `Access denied. Background process ID ${pid} not found in this session's history.`, + returnDisplay: 'Access denied.', + error: { + message: `Background process history lookup failed for PID ${pid}`, + type: ToolErrorType.EXECUTION_FAILED, + }, + }; + } + + const logPath = ShellExecutionService.getLogFilePath(pid); + + try { + await fs.promises.access(logPath); + } catch { + return { + llmContent: `No output log found for process ID ${pid}. It might not have produced output or was cleaned up.`, + returnDisplay: `No log found for PID ${pid}`, + error: { + message: `Log file not found at ${logPath}`, + type: ToolErrorType.EXECUTION_FAILED, + }, + }; + } + + try { + const fileHandle = await fs.promises.open( + logPath, + fs.constants.O_RDONLY | fs.constants.O_NOFOLLOW, + ); + + let content = ''; + let position = 0; + try { + const stats = await fileHandle.stat(); + const readSize = Math.min(stats.size, MAX_BUFFER_LOAD_CAP_BYTES); + position = Math.max(0, stats.size - readSize); + + const buffer = Buffer.alloc(readSize); + await fileHandle.read(buffer, 0, readSize, position); + content = buffer.toString('utf-8'); + } finally { + await fileHandle.close(); + } + + if (!content) { + return { + llmContent: 'Log is empty.', + returnDisplay: 'Log is empty.', + }; + } + + const logLines = content.split('\n'); + if (logLines.length > 0 && logLines[logLines.length - 1] === '') { + logLines.pop(); + } + + // Discard first line if we started reading from middle of file to avoid partial lines + if (position > 0 && logLines.length > 0) { + logLines.shift(); + } + + const requestedLinesCount = this.params.lines ?? DEFAULT_TAIL_LINES_COUNT; + const tailLines = logLines.slice(-requestedLinesCount); + const output = tailLines.join('\n'); + + const header = + requestedLinesCount < logLines.length + ? `Showing last ${requestedLinesCount} of ${logLines.length} lines:\n` + : 'Full Log Output:\n'; + + const responseContent = header + output; + + return { + llmContent: responseContent, + returnDisplay: responseContent, + }; + } catch (error) { + if (isNodeError(error) && error.code === 'ELOOP') { + return { + llmContent: + 'Symbolic link detected at predicted log path. Access is denied for security reasons.', + returnDisplay: `Symlink detected for PID ${pid}`, + error: { + message: + 'Symbolic link detected at predicted log path. Access is denied for security reasons.', + type: ToolErrorType.EXECUTION_FAILED, + }, + }; + } + const errorMessage = + error instanceof Error ? error.message : String(error); + return { + llmContent: `Error reading background log: ${errorMessage}`, + returnDisplay: 'Failed to read log.', + error: { + message: errorMessage, + type: ToolErrorType.EXECUTION_FAILED, + }, + }; + } + } +} + +export class ReadBackgroundOutputTool extends BaseDeclarativeTool< + ReadBackgroundOutputParams, + ToolResult +> { + static readonly Name = 'read_background_output'; + + constructor( + private readonly context: AgentLoopContext, + messageBus: MessageBus, + ) { + super( + ReadBackgroundOutputTool.Name, + 'Read Background Output', + 'Reads the output log of a background shell process. Support reading tail snapshot.', + Kind.Read, + { + type: 'object', + properties: { + pid: { + type: 'integer', + description: + 'The process ID (PID) of the background process to inspect.', + }, + lines: { + type: 'integer', + minimum: 1, + description: + 'Optional. Number of lines to read from the end of the log. Defaults to 100.', + }, + delay_ms: { + type: 'integer', + description: + 'Optional. Delay in milliseconds to wait before reading the output. Useful to allow the process to start and generate initial output.', + }, + }, + required: ['pid'], + }, + messageBus, + ); + } + + protected createInvocation( + params: ReadBackgroundOutputParams, + messageBus: MessageBus, + ) { + return new ReadBackgroundOutputInvocation( + this.context, + params, + messageBus, + this.name, + ); + } +}