From 128e3567cb021d6bf6aa069441643ff6667262c6 Mon Sep 17 00:00:00 2001 From: Spencer Date: Fri, 10 Apr 2026 14:57:09 -0400 Subject: [PATCH] fix(core): resolve PTY exhaustion and orphan MCP subprocess leaks (#25079) --- .../services/shellExecutionService.test.ts | 28 ++++-- .../src/services/shellExecutionService.ts | 94 ++++++++++++------ packages/core/src/utils/process-utils.test.ts | 14 ++- packages/core/src/utils/process-utils.ts | 95 +++++++++++++------ 4 files changed, 160 insertions(+), 71 deletions(-) diff --git a/packages/core/src/services/shellExecutionService.test.ts b/packages/core/src/services/shellExecutionService.test.ts index b2ec495d09..3bba16f9fa 100644 --- a/packages/core/src/services/shellExecutionService.test.ts +++ b/packages/core/src/services/shellExecutionService.test.ts @@ -75,6 +75,7 @@ vi.mock('../utils/shell-utils.js', async (importOriginal) => { return { ...actual, resolveExecutable: mockResolveExecutable, + spawnAsync: vi.fn().mockResolvedValue({ stdout: '', stderr: '' }), }; }); vi.mock('node:child_process', async (importOriginal) => { @@ -695,7 +696,7 @@ describe('ShellExecutionService', () => { ); expect(sigtermCallIndex).toBe(0); - expect(sigkillCallIndex).toBe(1); + expect(sigkillCallIndex).toBeGreaterThan(0); expect(sigtermCallIndex).toBeLessThan(sigkillCallIndex); expect(result.signal).toBe(9); @@ -1476,8 +1477,11 @@ describe('ShellExecutionService child_process fallback', () => { const { result } = await simulateExecution( 'sleep 10', - (cp, abortController) => { + async (cp, abortController) => { abortController.abort(); + await new Promise(process.nextTick); + await new Promise(process.nextTick); + await new Promise(process.nextTick); if (expectedExit.signal) { cp.emit('exit', null, expectedExit.signal); cp.emit('close', null, expectedExit.signal); @@ -1497,11 +1501,14 @@ describe('ShellExecutionService child_process fallback', () => { expectedSignal, ); } else { - expect(mockCpSpawn).toHaveBeenCalledWith( - expectedCommand, - ['/pid', String(mockChildProcess.pid), '/f', '/t'], - expect.anything(), - ); + // Taskkill is spawned via spawnAsync which is mocked + const { spawnAsync } = await import('../utils/shell-utils.js'); + expect(spawnAsync).toHaveBeenCalledWith(expectedCommand, [ + '/pid', + String(mockChildProcess.pid), + '/f', + '/t', + ]); } }); }, @@ -1531,6 +1538,7 @@ describe('ShellExecutionService child_process fallback', () => { ); abortController.abort(); + await vi.advanceTimersByTimeAsync(0); // Check the first kill signal expect(mockProcessKill).toHaveBeenCalledWith( @@ -1733,10 +1741,12 @@ describe('ShellExecutionService execution method selection', () => { ); // Simulate exit to allow promise to resolve + if (!mockPtyProcess.onExit.mock.calls[0]) { + const res = await handle.result; + throw new Error(`Failed early in executeWithPty: ${res.error}`); + } mockPtyProcess.onExit.mock.calls[0][0]({ exitCode: 0, signal: null }); const result = await handle.result; - - expect(mockGetPty).toHaveBeenCalled(); expect(mockPtySpawn).toHaveBeenCalled(); expect(mockCpSpawn).not.toHaveBeenCalled(); expect(result.executionMethod).toBe('mock-pty'); diff --git a/packages/core/src/services/shellExecutionService.ts b/packages/core/src/services/shellExecutionService.ts index 1c126dab6f..4fbee62e2f 100644 --- a/packages/core/src/services/shellExecutionService.ts +++ b/packages/core/src/services/shellExecutionService.ts @@ -112,8 +112,10 @@ export interface ShellExecutionConfig { */ export type ShellOutputEvent = ExecutionOutputEvent; +export type DestroyablePty = IPty & { destroy?: () => void }; + interface ActivePty { - ptyProcess: IPty; + ptyProcess: DestroyablePty; headlessTerminal: pkg.Terminal; maxSerializedLines?: number; command: string; @@ -833,6 +835,42 @@ export class ShellExecutionService { }; } } + /** + * Destroys a PTY process to release its file descriptors. + * This is critical to prevent system-wide PTY exhaustion (see #15945). + */ + private static destroyPtyProcess(ptyProcess: DestroyablePty): void { + try { + if (typeof ptyProcess?.destroy === 'function') { + ptyProcess.destroy(); + } else if (typeof ptyProcess?.kill === 'function') { + // Fallback: if destroy() is unavailable, kill() may still close FDs + ptyProcess.kill(); + } + } catch { + // Ignore errors during PTY cleanup — process may already be dead + } + } + + /** + * Cleans up all resources associated with a PTY entry: + * the PTY process (file descriptors) and the headless terminal (memory buffers). + */ + private static cleanupPtyEntry(pid: number): void { + const entry = this.activePtys.get(pid); + if (!entry) return; + + this.destroyPtyProcess(entry.ptyProcess); + + try { + entry.headlessTerminal.dispose(); + } catch { + // Ignore errors during terminal cleanup + } + + this.activePtys.delete(pid); + } + private static async executeWithPty( commandToExecute: string, cwd: string, @@ -845,7 +883,7 @@ export class ShellExecutionService { // This should not happen, but as a safeguard... throw new Error('PTY implementation not found'); } - let spawnedPty: IPty | undefined; + let spawnedPty: DestroyablePty | undefined; let cmdCleanup: (() => void) | undefined; try { @@ -878,7 +916,7 @@ export class ShellExecutionService { }); // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion - spawnedPty = ptyProcess as IPty; + spawnedPty = ptyProcess as DestroyablePty; const ptyPid = Number(ptyProcess.pid); const headlessTerminal = new Terminal({ @@ -912,13 +950,6 @@ export class ShellExecutionService { // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment pty: ptyProcess, }).catch(() => {}); - try { - // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion - (ptyProcess as IPty & { destroy?: () => void }).destroy?.(); - } catch { - // Ignore errors during cleanup - } - this.activePtys.delete(ptyPid); }, isActive: () => { try { @@ -1146,13 +1177,11 @@ export class ShellExecutionService { ({ exitCode, signal }: { exitCode: number; signal?: number }) => { exited = true; abortSignal.removeEventListener('abort', abortHandler); - // Attempt to destroy the PTY to ensure FD is closed - try { - // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion - (ptyProcess as IPty & { destroy?: () => void }).destroy?.(); - } catch { - // Ignore errors during cleanup - } + + // Immediately destroy the PTY to release its master FD. + // The headless terminal is kept alive until finalize() extracts + // its buffer contents, then disposed to free memory. + ShellExecutionService.destroyPtyProcess(ptyProcess); const finalize = () => { render(true); @@ -1176,11 +1205,6 @@ export class ShellExecutionService { } onOutputEvent(event); - // eslint-disable-next-line @typescript-eslint/no-floating-promises - ShellExecutionService.cleanupLogStream(ptyPid).then(() => { - ShellExecutionService.activePtys.delete(ptyPid); - }); - const endLine = headlessTerminal.buffer.active.length; const startLine = Math.max( 0, @@ -1191,10 +1215,24 @@ export class ShellExecutionService { startLine, endLine, ); + const finalOutput = getFullBufferText(headlessTerminal); + + // Dispose the headless terminal to free scrollback buffers. + // This must happen after getFullBufferText() extracts the output. + try { + headlessTerminal.dispose(); + } catch { + // Ignore errors during terminal cleanup + } + + // eslint-disable-next-line @typescript-eslint/no-floating-promises + ShellExecutionService.cleanupLogStream(ptyPid).then(() => { + ShellExecutionService.activePtys.delete(ptyPid); + }); ExecutionLifecycleService.completeWithResult(ptyPid, { rawOutput: Buffer.from(''), - output: getFullBufferText(headlessTerminal), + output: finalOutput, ansiOutput: ansiOutputSnapshot, exitCode, signal: signal ?? null, @@ -1249,14 +1287,10 @@ export class ShellExecutionService { cmdCleanup?.(); if (spawnedPty) { - try { - (spawnedPty as IPty & { destroy?: () => void }).destroy?.(); - } catch { - // Ignore errors during cleanup - } + ShellExecutionService.destroyPtyProcess(spawnedPty); } - if (error.message.includes('posix_spawnp failed')) { + if (error?.message?.includes('posix_spawnp failed')) { onOutputEvent({ type: 'data', chunk: @@ -1316,9 +1350,9 @@ export class ShellExecutionService { */ static async kill(pid: number): Promise { await this.cleanupLogStream(pid); - this.activePtys.delete(pid); this.activeChildProcesses.delete(pid); ExecutionLifecycleService.kill(pid); + this.cleanupPtyEntry(pid); } /** diff --git a/packages/core/src/utils/process-utils.test.ts b/packages/core/src/utils/process-utils.test.ts index 8bfcd506f8..e100aefb5f 100644 --- a/packages/core/src/utils/process-utils.test.ts +++ b/packages/core/src/utils/process-utils.test.ts @@ -75,7 +75,6 @@ describe('process-utils', () => { expect(mockProcessKill).toHaveBeenCalledWith(-1234, 'SIGKILL'); }); - it('should use escalation on Unix if requested', async () => { vi.mocked(os.platform).mockReturnValue('linux'); const exited = false; @@ -87,6 +86,11 @@ describe('process-utils', () => { isExited, }); + // flush microtasks + await new Promise(process.nextTick); + await new Promise(process.nextTick); + await new Promise(process.nextTick); + // First call should be SIGTERM expect(mockProcessKill).toHaveBeenCalledWith(-1234, 'SIGTERM'); @@ -110,6 +114,11 @@ describe('process-utils', () => { isExited, }); + // flush microtasks + await new Promise(process.nextTick); + await new Promise(process.nextTick); + await new Promise(process.nextTick); + expect(mockProcessKill).toHaveBeenCalledWith(-1234, 'SIGTERM'); // Simulate process exiting @@ -117,10 +126,11 @@ describe('process-utils', () => { await vi.advanceTimersByTimeAsync(SIGKILL_TIMEOUT_MS); + // Second call should NOT be SIGKILL because it exited expect(mockProcessKill).not.toHaveBeenCalledWith(-1234, 'SIGKILL'); + await killPromise; }); - it('should fallback to specific process kill if group kill fails', async () => { vi.mocked(os.platform).mockReturnValue('linux'); mockProcessKill.mockImplementationOnce(() => { diff --git a/packages/core/src/utils/process-utils.ts b/packages/core/src/utils/process-utils.ts index 9a8824747c..f0332ecdfc 100644 --- a/packages/core/src/utils/process-utils.ts +++ b/packages/core/src/utils/process-utils.ts @@ -32,7 +32,8 @@ export interface KillOptions { * or the PTY's built-in kill method. * * On Unix, it attempts to kill the process group (using -pid) with escalation - * from SIGTERM to SIGKILL if requested. + * from SIGTERM to SIGKILL if requested. It also walks the process tree using pgrep + * to ensure all descendants are killed. */ export async function killProcessGroup(options: KillOptions): Promise { const { pid, escalate = false, isExited = () => false, pty } = options; @@ -55,12 +56,59 @@ export async function killProcessGroup(options: KillOptions): Promise { return; } - // Unix logic + // Unix logic: Walk process tree to find all descendants + const getAllDescendants = async (parentPid: number): Promise => { + let children: number[] = []; + try { + const { stdout } = await spawnAsync('pgrep', [ + '-P', + parentPid.toString(), + ]); + const pids = stdout + .trim() + .split('\n') + .map((p: string) => parseInt(p, 10)) + .filter((p: number) => !isNaN(p)); + for (const p of pids) { + children.push(p); + const grandchildren = await getAllDescendants(p); + children = children.concat(grandchildren); + } + } catch { + // pgrep exits with 1 if no children are found + } + return children; + }; + + const descendants = await getAllDescendants(pid); + const allPidsToKill = [...descendants.reverse(), pid]; + try { const initialSignal = options.signal || (escalate ? 'SIGTERM' : 'SIGKILL'); // Try killing the process group first (-pid) - process.kill(-pid, initialSignal); + try { + process.kill(-pid, initialSignal); + } catch { + // Ignore + } + + // Kill individual processes in the tree to ensure detached descendants are caught + for (const targetPid of allPidsToKill) { + try { + process.kill(targetPid, initialSignal); + } catch { + // Ignore + } + } + + if (pty) { + try { + pty.kill(typeof initialSignal === 'string' ? initialSignal : undefined); + } catch { + // Ignore + } + } if (escalate && !isExited()) { await new Promise((res) => setTimeout(res, SIGKILL_TIMEOUT_MS)); @@ -70,43 +118,30 @@ export async function killProcessGroup(options: KillOptions): Promise { } catch { // Ignore } - } - } - } catch { - // Fallback to specific process kill if group kill fails or on error - if (!isExited()) { - if (pty) { - if (escalate) { + + for (const targetPid of allPidsToKill) { try { - // Attempt the group kill BEFORE the pty session leader dies - process.kill(-pid, 'SIGTERM'); - pty.kill('SIGTERM'); - await new Promise((res) => setTimeout(res, SIGKILL_TIMEOUT_MS)); - if (!isExited()) { - try { - process.kill(-pid, 'SIGKILL'); - } catch { - // Ignore - } - pty.kill('SIGKILL'); - } + process.kill(targetPid, 'SIGKILL'); } catch { // Ignore } - } else { + } + if (pty) { try { - process.kill(-pid, 'SIGKILL'); // Group kill first pty.kill('SIGKILL'); } catch { // Ignore } } - } else { - try { - process.kill(pid, 'SIGKILL'); - } catch { - // Ignore - } + } + } + } catch { + // Ultimate fallback if something unexpected throws + if (!isExited()) { + try { + process.kill(pid, 'SIGKILL'); + } catch { + // Ignore } } }