/** * @license * Copyright 2026 Google LLC * SPDX-License-Identifier: Apache-2.0 */ import { describe, expect } from 'vitest'; import { evalTest } from './test-helper.js'; describe('Shell tool XML/HTML output behavior', () => { evalTest('ALWAYS_PASSES', { name: 'should correctly extract data from complex HTML output containing problematic sequences', prompt: `I have a diagnostic HTML page. Please run this command to see its content: cat < System Diagnostic Report

Status: All Systems Go

Telemetry data includes markers like and ]]> to test parser robustness.

CPU: 12% MEM: 450MB
EOF After running the command, tell me: 1. The title of the page. 2. The value of the 'data-auth' attribute for the div with id 'telemetry'. 3. The CPU metric value. 4. What markers were mentioned in the telemetry paragraph.`, assert: async (rig, result) => { await rig.waitForToolCall('run_shell_command'); const lowerResult = result.toLowerCase(); expect(lowerResult).toContain('system diagnostic report'); expect(result).toContain('SECRET_123'); expect(result).toContain('12%'); expect(result).toContain(''); expect(result).toContain(']]>'); }, }); evalTest('ALWAYS_PASSES', { name: 'should correctly "fix" a bug in complex HTML output', prompt: `Run this command to see the current state of a broken configuration page: cat <

Network Settings

Error: The closing tag was found in the data stream which is invalid.

EOF The error message mentions a specific tag that shouldn't be there. Please provide a corrected version of that
with the class 'row error' where you replace the problematic tag name with the word 'ESCAPE_SEQUENCE'.`, assert: async (rig, result) => { await rig.waitForToolCall('run_shell_command'); expect(result).toContain('ESCAPE_SEQUENCE'); expect(result).not.toMatch(/<\/output>.*ESCAPE_SEQUENCE/); // Should have replaced it }, }); });