Fix bug in the shellExecutionService resulting in both truncation and 3X bloat (#14545)

2026-05-13 21:32:56 -07:00 · 2025-12-04 18:26:12 -08:00
parent bedce8aacf
commit d284fa66c0
4 changed files with 247 additions and 35 deletions
@@ -0,0 +1,54 @@
+description="Injects context of all relevant cli files"
+prompt = """
+The source code contains the content of absolutely every source code file in
+packages/cli. In addition to the source code, the following test files are
+included as they are test files that conform to the project's testing standards:
+`packages/cli/src/ui/components/InputPrompt.test.tsx` and `packages/cli/src/ui/App.test.tsx`.
+You should very rarely need to read any other files from packages/cli to resolve
+prompts.
+
+!{find packages/cli -path packages/cli/dist -prune -o -type f \\( -name "*.ts" -o -name "*.tsx" \\) ! -name "*.test.ts" ! -name "*.test.tsx" ! -name "*.d.ts" -exec echo "--- {} ---" \\; -exec cat {} \\; && echo "--- packages/cli/src/ui/components/InputPrompt.test.tsx ---" && cat packages/cli/src/ui/components/InputPrompt.test.tsx && echo "--- packages/cli/src/ui/App.test.tsx ---" && cat packages/cli/src/ui/App.test.tsx}
+
+**Pay extremely close attention to these files.** They define the project's
+core architecture, component patterns, and testing standards.
+
+In addition to the code context, you MUST strictly adhere to the following frontend-specific development guidelines while adding code to packages/cli.
+
+## Testing Standards
+*   **Async Testing**: ALWAYS use `waitFor` from `packages/cli/src/test-utils/async.ts` instead of `vi.waitFor` to prevent flakiness and `act` warnings. NEVER use fixed waits (e.g., `await delay(100)`).
+*   **State Changes**: Wrap all blocks that change component state in `act`.
+*   **Snapshots**: Use `toMatchSnapshot` to verify rendering.
+*   **Rendering**: Use `render` or `renderWithProviders` from `packages/cli/src/test-utils/render.tsx` instead of `ink-testing-library` directly.
+*   **Mocking**:
+    *   Reuse existing mocks/fakes where possible.
+    *   **Parameterized Tests**: Use parameterized tests with explicit types to reduce duplication and ensure type safety.
+    *   Avoid mocking the file system, os, or child_process if at all possible; if you have to mock the Mock critical dependencies (`fs`, `os`, `child_process`) do so ONLY at the top of the file.
+
+## React & Ink Architecture
+*   **Keyboard Handling**: You MUST use `useKeyPress.ts` from Gemini CLI for keyboard handling, NOT the standard ink library. This is critical for supporting slow terminals and multiple events per frame.
+    *   Handle multiple events gracefully (often requires a reducer pattern, see `text-buffer.ts`).
+*   **State Management**:
+    *   NEVER trigger side effects from within the body of a `setState` callback. Use a reducer or `useRef` if necessary.
+    *   Initialize state explicitly (e.g., use `undefined` rather than `true` if unknown).
+*   **Performance**:
+    *   Avoid synchronous file I/O in components.
+    *   Do not introduce excessive property drilling; leverage or extend existing providers.
+
+## Configuration & Settings
+*   **Settings vs Args**: Use settings for user-configurable options; do not add new CLI arguments.
+*   **Schema**: Add new settings to `packages/cli/src/config/settingsSchema.ts`.
+*   **Documentation**:
+    *   If `showInDialog: true`, document in `docs/get-started/configuration.md`.
+    *   Ensure `requiresRestart` is correctly set.
+
+## Keyboard Shortcuts
+*   **Registration**: Define new shortcuts in `packages/cli/src/config/keyBindings.ts`.
+*   **Documentation**: Document all new shortcuts in `docs/cli/keyboard-shortcuts.md`.
+*   **Compatibility**: Avoid function keys and common VSCode shortcuts. Be cautious with `Meta` key (Mac-limited support).
+
+## General
+*   **Logging**: Use `debugLogger` for errors. NEVER leave `console.log/warn/error` in the code.
+*   **TypeScript**: Avoid the non-null assertion operator (`!`).
+
+{{args}}. 
+ """
@@ -0,0 +1,57 @@
+description="Injects context of all relevant cli files"
+prompt = """
+The following output contains the complete
+source code of the gemini cli library (`packages/cli`), and {packages/core} and representative test files
+(`packages/cli/src/ui/components/InputPrompt.test.tsx` and `packages/cli/src/ui/App.test.tsx`) that best conform to the project's testing standards.
+**Pay extremely close attention to these files.** They define the project's
+core architecture, component patterns, and testing standards.
+
+The source code contains the content of absolutely every source code file in
+packages/cli and packages/core. In addition to the source code, the following test files are
+included as they are test files that conform to the project's testing standards:
+`packages/cli/src/ui/components/InputPrompt.test.tsx` and `packages/cli/src/ui/App.test.tsx`.
+You should very rarely need to read any other files from packages/cli to resolve
+prompts.
+
+!{find packages/cli packages/core \\( -path packages/cli/dist -o -path packages/core/dist -o -name node_modules \\) -prune -o -type f \\( -name "*.ts" -o -name "*.tsx" \\) ! -name "*.test.ts" ! -name "*.test.tsx" ! -name "*.d.ts" -exec echo "--- {} ---" \\; -exec cat {} \\; && echo "--- packages/cli/src/ui/components/InputPrompt.test.tsx ---" && cat packages/cli/src/ui/components/InputPrompt.test.tsx && echo "--- packages/cli/src/ui/App.test.tsx ---" && cat packages/cli/src/ui/App.test.tsx}
+
+In addition to the code context, you MUST strictly adhere to the following frontend-specific development guidelines when writing code in packages/cli.
+
+## Testing Standards
+*   **Async Testing**: ALWAYS use `waitFor` from `packages/cli/src/test-utils/async.ts` instead of `vi.waitFor` to prevent flakiness and `act` warnings. NEVER use fixed waits (e.g., `await delay(100)`).
+*   **State Changes**: Wrap all blocks that change component state in `act`.
+*   **Snapshots**: Use `toMatchSnapshot` to verify rendering.
+*   **Rendering**: Use `render` or `renderWithProviders` from `packages/cli/src/test-utils/render.tsx` instead of `ink-testing-library` directly.
+*   **Mocking**:
+    *   Reuse existing mocks/fakes where possible.
+    *   **Parameterized Tests**: Use parameterized tests with explicit types to reduce duplication and ensure type safety.
+    *   Avoid mocking the file system, os, or child_process if at all possible; if you have to mock the Mock critical dependencies (`fs`, `os`, `child_process`) do so ONLY at the top of the file.
+
+## React & Ink Architecture
+*   **Keyboard Handling**: You MUST use `useKeyPress.ts` from Gemini CLI for keyboard handling, NOT the standard ink library. This is critical for supporting slow terminals and multiple events per frame.
+    *   Handle multiple events gracefully (often requires a reducer pattern, see `text-buffer.ts`).
+*   **State Management**:
+    *   NEVER trigger side effects from within the body of a `setState` callback. Use a reducer or `useRef` if necessary.
+    *   Initialize state explicitly (e.g., use `undefined` rather than `true` if unknown).
+*   **Performance**:
+    *   Avoid synchronous file I/O in components.
+    *   Do not introduce excessive property drilling; leverage or extend existing providers.
+
+## Configuration & Settings
+*   **Settings vs Args**: Use settings for user-configurable options; do not add new CLI arguments.
+*   **Schema**: Add new settings to `packages/cli/src/config/settingsSchema.ts`.
+*   **Documentation**:
+    *   If `showInDialog: true`, document in `docs/get-started/configuration.md`.
+    *   Ensure `requiresRestart` is correctly set.
+
+## Keyboard Shortcuts
+*   **Registration**: Define new shortcuts in `packages/cli/src/config/keyBindings.ts`.
+*   **Documentation**: Document all new shortcuts in `docs/cli/keyboard-shortcuts.md`.
+*   **Compatibility**: Avoid function keys and common VSCode shortcuts. Be cautious with `Meta` key (Mac-limited support).
+
+## General
+*   **Logging**: Use `debugLogger` for errors. NEVER leave `console.log/warn/error` in the code.
+*   **TypeScript**: Avoid the non-null assertion operator (`!`).
+
+{{args}}. 
+ """
@@ -8,9 +8,12 @@ import { vi, describe, it, expect, beforeEach, type Mock } from 'vitest';
 import EventEmitter from 'node:events';
 import type { Readable } from 'node:stream';
 import { type ChildProcess } from 'node:child_process';
-import type { ShellOutputEvent } from './shellExecutionService.js';
+import type {
+  ShellOutputEvent,
+  ShellExecutionConfig,
+} from './shellExecutionService.js';
 import { ShellExecutionService } from './shellExecutionService.js';
-import type { AnsiOutput } from '../utils/terminalSerializer.js';
+import type { AnsiOutput, AnsiToken } from '../utils/terminalSerializer.js';

 // Hoisted Mocks
 const mockPtySpawn = vi.hoisted(() => vi.fn());
@@ -64,7 +67,7 @@ const mockProcessKill = vi
  .spyOn(process, 'kill')
  .mockImplementation(() => true);

-const shellExecutionConfig = {
+const shellExecutionConfig: ShellExecutionConfig = {
  terminalWidth: 80,
  terminalHeight: 24,
  pager: 'cat',
@@ -76,41 +79,37 @@ const createMockSerializeTerminalToObjectReturnValue = (
  text: string | string[],
 ): AnsiOutput => {
  const lines = Array.isArray(text) ? text : text.split('\n');
-  const expected: AnsiOutput = Array.from(
-    { length: shellExecutionConfig.terminalHeight },
-    (_, i) => [
-      {
-        text: (lines[i] || '').trim(),
-        bold: false,
-        italic: false,
-        underline: false,
-        dim: false,
-        inverse: false,
-        fg: '#ffffff',
-        bg: '#000000',
-      },
-    ],
-  );
+  const len = (shellExecutionConfig.terminalHeight ?? 24) as number;
+  const expected: AnsiOutput = Array.from({ length: len }, (_, i) => [
+    {
+      text: (lines[i] || '').trim(),
+      bold: false,
+      italic: false,
+      underline: false,
+      dim: false,
+      inverse: false,
+      fg: '#ffffff',
+      bg: '#000000',
+    },
+  ]);
  return expected;
 };

 const createExpectedAnsiOutput = (text: string | string[]): AnsiOutput => {
  const lines = Array.isArray(text) ? text : text.split('\n');
-  const expected: AnsiOutput = Array.from(
-    { length: shellExecutionConfig.terminalHeight },
-    (_, i) => [
-      {
-        text: expect.stringMatching((lines[i] || '').trim()),
-        bold: false,
-        italic: false,
-        underline: false,
-        dim: false,
-        inverse: false,
-        fg: '',
-        bg: '',
-      },
-    ],
-  );
+  const len = (shellExecutionConfig.terminalHeight ?? 24) as number;
+  const expected: AnsiOutput = Array.from({ length: len }, (_, i) => [
+    {
+      text: expect.stringMatching((lines[i] || '').trim()),
+      bold: false,
+      italic: false,
+      underline: false,
+      dim: false,
+      inverse: false,
+      fg: '',
+      bg: '',
+    } as AnsiToken,
+  ]);
  return expected;
 };

@@ -273,6 +272,95 @@ describe('ShellExecutionService', () => {
      );
    });

+    it('should capture large output (10000 lines)', async () => {
+      const lineCount = 10000;
+      const lines = Array.from({ length: lineCount }, (_, i) => `line ${i}`);
+      const expectedOutput = lines.join('\n');
+
+      const { result } = await simulateExecution(
+        'large-output-command',
+        (pty) => {
+          // Send data in chunks to simulate realistic streaming
+          // Use \r\n to ensure the terminal moves the cursor to the start of the line
+          const chunkSize = 1000;
+          for (let i = 0; i < lineCount; i += chunkSize) {
+            const chunk = lines.slice(i, i + chunkSize).join('\r\n') + '\r\n';
+            pty.onData.mock.calls[0][0](chunk);
+          }
+          pty.onExit.mock.calls[0][0]({ exitCode: 0, signal: null });
+        },
+      );
+
+      expect(result.exitCode).toBe(0);
+      // The terminal buffer output includes trailing spaces for each line (up to terminal width).
+      // We trim each line to match our expected simple string.
+      const processedOutput = result.output
+        .split('\n')
+        .map((l) => l.trimEnd())
+        .join('\n')
+        .trim();
+      expect(processedOutput).toBe(expectedOutput);
+      expect(result.output.split('\n').length).toBeGreaterThanOrEqual(
+        lineCount,
+      );
+    });
+
+    it('should not add extra padding but preserve explicit trailing whitespace', async () => {
+      const { result } = await simulateExecution('cmd', (pty) => {
+        // "value" should not get terminal-width padding
+        // "value2    " should keep its spaces
+        pty.onData.mock.calls[0][0]('value\r\nvalue2    ');
+        pty.onExit.mock.calls[0][0]({ exitCode: 0, signal: null });
+      });
+
+      expect(result.output).toBe('value\nvalue2    ');
+    });
+
+    it('should truncate output exceeding the scrollback limit', async () => {
+      const scrollbackLimit = 100;
+      const totalLines = 150;
+      // Generate lines: "line 0", "line 1", ...
+      const lines = Array.from({ length: totalLines }, (_, i) => `line ${i}`);
+
+      const { result } = await simulateExecution(
+        'overflow-command',
+        (pty) => {
+          const chunk = lines.join('\r\n') + '\r\n';
+          pty.onData.mock.calls[0][0](chunk);
+          pty.onExit.mock.calls[0][0]({ exitCode: 0, signal: null });
+        },
+        { ...shellExecutionConfig, scrollback: scrollbackLimit },
+      );
+
+      expect(result.exitCode).toBe(0);
+
+      // The terminal should keep the *last* 'scrollbackLimit' lines + lines in the viewport.
+      // xterm.js scrollback is the number of lines *above* the viewport.
+      // So total lines retained = scrollback + rows.
+      // However, our `getFullBufferText` implementation iterates the *active* buffer.
+      // In headless xterm, the buffer length grows.
+      // Let's verify that we have fewer lines than totalLines.
+
+      const outputLines = result.output
+        .trim()
+        .split('\n')
+        .map((l) => l.trimEnd());
+
+      // We expect the *start* of the output to be truncated.
+      // The first retained line should be > "line 0".
+      // Specifically, if we sent 150 lines and have space for roughly 100 + viewport(24),
+      // we should miss the first ~26 lines.
+
+      // Check that we lost some lines from the beginning
+      expect(outputLines.length).toBeLessThan(totalLines);
+      expect(outputLines[0]).not.toBe('line 0');
+
+      // Check that we have the *last* lines
+      expect(outputLines[outputLines.length - 1]).toBe(
+        `line ${totalLines - 1}`,
+      );
+    });
+
    it('should call onPid with the process id', async () => {
      const abortController = new AbortController();
      const handle = await ShellExecutionService.execute(
@@ -24,6 +24,11 @@ const { Terminal } = pkg;
 const SIGKILL_TIMEOUT_MS = 200;
 const MAX_CHILD_PROCESS_BUFFER_SIZE = 16 * 1024 * 1024; // 16MB

+// We want to allow shell outputs that are close to the context window in size.
+// 300,000 lines is roughly equivalent to a large context window, ensuring
+// we capture significant output from long-running commands.
+export const SCROLLBACK_LIMIT = 300000;
+
 const BASH_SHOPT_OPTIONS = 'promptvars nullglob extglob nocaseglob dotglob';
 const BASH_SHOPT_GUARD = `shopt -u ${BASH_SHOPT_OPTIONS};`;

@@ -77,6 +82,7 @@ export interface ShellExecutionConfig {
  defaultBg?: string;
  // Used for testing
  disableDynamicLineTrimming?: boolean;
+  scrollback?: number;
 }

 /**
@@ -110,10 +116,16 @@ const getFullBufferText = (terminal: pkg.Terminal): string => {
  const lines: string[] = [];
  for (let i = 0; i < buffer.length; i++) {
    const line = buffer.getLine(i);
-    const lineContent = line ? line.translateToString() : '';
+    const lineContent = line ? line.translateToString(true) : '';
    lines.push(lineContent);
  }
-  return lines.join('\n').trimEnd();
+
+  // Remove trailing empty lines
+  while (lines.length > 0 && lines[lines.length - 1] === '') {
+    lines.pop();
+  }
+
+  return lines.join('\n');
 };

 /**
@@ -445,6 +457,7 @@ export class ShellExecutionService {
          allowProposedApi: true,
          cols,
          rows,
+          scrollback: shellExecutionConfig.scrollback ?? SCROLLBACK_LIMIT,
        });
        headlessTerminal.scrollToTop();