fix(core): fix browser agent UX issues and improve E2E test reliability (#24312)

2026-04-02 01:11:24 -07:00 · 2026-04-01 07:00:16 +08:00
parent 94f9480a3a
commit 782bb4e4bd
5 changed files with 144 additions and 128 deletions
--- a/integration-tests/browser-policy.test.ts
+++ b/integration-tests/browser-policy.test.ts
@@ -10,8 +10,13 @@ import { dirname, join } from 'node:path';
 import { fileURLToPath } from 'node:url';
 import { execSync } from 'node:child_process';
 import { existsSync, writeFileSync, readFileSync, mkdirSync } from 'node:fs';
+import { env } from 'node:process';
 import stripAnsi from 'strip-ansi';

+// Browser agent Chrome DevTools MCP connection is flaky in Docker sandbox.
+// See: https://github.com/google-gemini/gemini-cli/issues/24382
+const isDockerSandbox = env['GEMINI_SANDBOX'] === 'docker';
+
 const __filename = fileURLToPath(import.meta.url);
 const __dirname = dirname(__filename);

@@ -59,122 +64,128 @@ describe.skipIf(!chromeAvailable)('browser-policy', () => {
    await rig.cleanup();
  });

-  it('should skip confirmation when "Allow all server tools for this session" is chosen', async () => {
-    rig.setup('browser-policy-skip-confirmation', {
-      fakeResponsesPath: join(__dirname, 'browser-policy.responses'),
-      settings: {
-        agents: {
-          overrides: {
-            browser_agent: {
-              enabled: true,
+  it.skipIf(isDockerSandbox)(
+    'should skip confirmation when "Allow all server tools for this session" is chosen',
+    async () => {
+      rig.setup('browser-policy-skip-confirmation', {
+        fakeResponsesPath: join(__dirname, 'browser-policy.responses'),
+        settings: {
+          agents: {
+            overrides: {
+              browser_agent: {
+                enabled: true,
+              },
+            },
+            browser: {
+              headless: true,
+              sessionMode: 'isolated',
+              allowedDomains: ['example.com'],
            },
          },
-          browser: {
-            headless: true,
-            sessionMode: 'isolated',
-            allowedDomains: ['example.com'],
-          },
        },
-      },
-    });
+      });

-    // Manually trust the folder to avoid the dialog and enable option 3
-    const geminiDir = join(rig.homeDir!, '.gemini');
-    mkdirSync(geminiDir, { recursive: true });
+      // Manually trust the folder to avoid the dialog and enable option 3
+      const geminiDir = join(rig.homeDir!, '.gemini');
+      mkdirSync(geminiDir, { recursive: true });

-    // Write to trustedFolders.json
-    const trustedFoldersPath = join(geminiDir, 'trustedFolders.json');
-    const trustedFolders = {
-      [rig.testDir!]: 'TRUST_FOLDER',
-    };
-    writeFileSync(trustedFoldersPath, JSON.stringify(trustedFolders, null, 2));
+      // Write to trustedFolders.json
+      const trustedFoldersPath = join(geminiDir, 'trustedFolders.json');
+      const trustedFolders = {
+        [rig.testDir!]: 'TRUST_FOLDER',
+      };
+      writeFileSync(
+        trustedFoldersPath,
+        JSON.stringify(trustedFolders, null, 2),
+      );

-    // Force confirmation for browser agent.
-    // NOTE: We don't force confirm browser tools here because "Allow all server tools"
-    // adds a rule with ALWAYS_ALLOW_PRIORITY (3.9x) which would be overshadowed by
-    // a rule in the user tier (4.x) like the one from this TOML.
-    // By removing the explicit mcp rule, the first MCP tool will still prompt
-    // due to default approvalMode = 'default', and then "Allow all" will correctly
-    // bypass subsequent tools.
-    const policyFile = join(rig.testDir!, 'force-confirm.toml');
-    writeFileSync(
-      policyFile,
-      `
+      // Force confirmation for browser agent.
+      // NOTE: We don't force confirm browser tools here because "Allow all server tools"
+      // adds a rule with ALWAYS_ALLOW_PRIORITY (3.9x) which would be overshadowed by
+      // a rule in the user tier (4.x) like the one from this TOML.
+      // By removing the explicit mcp rule, the first MCP tool will still prompt
+      // due to default approvalMode = 'default', and then "Allow all" will correctly
+      // bypass subsequent tools.
+      const policyFile = join(rig.testDir!, 'force-confirm.toml');
+      writeFileSync(
+        policyFile,
+        `
 [[rule]]
 name = "Force confirm browser_agent"
 toolName = "browser_agent"
 decision = "ask_user"
 priority = 200
 `,
-    );
+      );

-    // Update settings.json in both project and home directories to point to the policy file
-    for (const baseDir of [rig.testDir!, rig.homeDir!]) {
-      const settingsPath = join(baseDir, '.gemini', 'settings.json');
-      if (existsSync(settingsPath)) {
-        const settings = JSON.parse(readFileSync(settingsPath, 'utf-8'));
-        settings.policyPaths = [policyFile];
-        // Ensure folder trust is enabled
-        settings.security = settings.security || {};
-        settings.security.folderTrust = settings.security.folderTrust || {};
-        settings.security.folderTrust.enabled = true;
-        writeFileSync(settingsPath, JSON.stringify(settings, null, 2));
+      // Update settings.json in both project and home directories to point to the policy file
+      for (const baseDir of [rig.testDir!, rig.homeDir!]) {
+        const settingsPath = join(baseDir, '.gemini', 'settings.json');
+        if (existsSync(settingsPath)) {
+          const settings = JSON.parse(readFileSync(settingsPath, 'utf-8'));
+          settings.policyPaths = [policyFile];
+          // Ensure folder trust is enabled
+          settings.security = settings.security || {};
+          settings.security.folderTrust = settings.security.folderTrust || {};
+          settings.security.folderTrust.enabled = true;
+          writeFileSync(settingsPath, JSON.stringify(settings, null, 2));
+        }
      }
-    }

-    const run = await rig.runInteractive({
-      approvalMode: 'default',
-      env: {
-        GEMINI_CLI_INTEGRATION_TEST: 'true',
-      },
-    });
+      const run = await rig.runInteractive({
+        approvalMode: 'default',
+        env: {
+          GEMINI_CLI_INTEGRATION_TEST: 'true',
+        },
+      });

-    await run.sendKeys(
-      'Open https://example.com and check if there is a heading\r',
-    );
-    await run.sendKeys('\r');
+      await run.sendKeys(
+        'Open https://example.com and check if there is a heading\r',
+      );
+      await run.sendKeys('\r');

-    // Handle confirmations.
-    // 1. Initial browser_agent delegation (likely only 3 options, so use option 1: Allow once)
-    await poll(
-      () => stripAnsi(run.output).toLowerCase().includes('action required'),
-      60000,
-      1000,
-    );
-    await run.sendKeys('1\r');
-    await new Promise((r) => setTimeout(r, 2000));
+      // Handle confirmations.
+      // 1. Initial browser_agent delegation (likely only 3 options, so use option 1: Allow once)
+      await poll(
+        () => stripAnsi(run.output).toLowerCase().includes('action required'),
+        60000,
+        1000,
+      );
+      await run.sendKeys('1\r');
+      await new Promise((r) => setTimeout(r, 2000));

-    // Handle privacy notice
-    await poll(
-      () => stripAnsi(run.output).toLowerCase().includes('privacy notice'),
-      5000,
-      100,
-    );
-    await run.sendKeys('1\r');
-    await new Promise((r) => setTimeout(r, 5000));
+      // Handle privacy notice
+      await poll(
+        () => stripAnsi(run.output).toLowerCase().includes('privacy notice'),
+        5000,
+        100,
+      );
+      await run.sendKeys('1\r');
+      await new Promise((r) => setTimeout(r, 5000));

-    // new_page (MCP tool, should have 4 options, use option 3: Allow all server tools)
-    await poll(
-      () => {
-        const stripped = stripAnsi(run.output).toLowerCase();
-        return (
-          stripped.includes('new_page') &&
-          stripped.includes('allow all server tools for this session')
-        );
-      },
-      60000,
-      1000,
-    );
+      // new_page (MCP tool, should have 4 options, use option 3: Allow all server tools)
+      await poll(
+        () => {
+          const stripped = stripAnsi(run.output).toLowerCase();
+          return (
+            stripped.includes('new_page') &&
+            stripped.includes('allow all server tools for this session')
+          );
+        },
+        60000,
+        1000,
+      );

-    // Select "Allow all server tools for this session" (option 3)
-    await run.sendKeys('3\r');
-    await new Promise((r) => setTimeout(r, 30000));
+      // Select "Allow all server tools for this session" (option 3)
+      await run.sendKeys('3\r');
+      await new Promise((r) => setTimeout(r, 30000));

-    const output = stripAnsi(run.output).toLowerCase();
+      const output = stripAnsi(run.output).toLowerCase();

-    expect(output).toContain('browser_agent');
-    expect(output).toContain('completed successfully');
-  });
+      expect(output).toContain('browser_agent');
+      expect(output).toContain('completed successfully');
+    },
+  );

  it('should show the visible warning when browser agent starts in existing session mode', async () => {
    rig.setup('browser-session-warning', {
--- a/integration-tests/file-system.test.ts
+++ b/integration-tests/file-system.test.ts
@@ -121,6 +121,7 @@ describe('file-system', () => {

    const result = await rig.run({
      args: `write "hello" to "${fileName}" and then stop. Do not perform any other actions.`,
+      timeout: 600000, // 10 min — real LLM can be slow in Docker sandbox
    });

    const foundToolCall = await rig.waitForToolCall('write_file');