mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-14 16:10:59 -07:00
83 lines
2.7 KiB
TypeScript
83 lines
2.7 KiB
TypeScript
/**
|
|
* @license
|
|
* Copyright 2026 Google LLC
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
import { describe, expect } from 'vitest';
|
|
import { evalTest } from './test-helper.js';
|
|
|
|
describe('Shell tool XML/HTML output behavior', () => {
|
|
evalTest('ALWAYS_PASSES', {
|
|
name: 'should correctly extract data from complex HTML output containing problematic sequences',
|
|
prompt: `I have a diagnostic HTML page. Please run this command to see its content:
|
|
cat <<EOF
|
|
<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<title>System Diagnostic Report</title>
|
|
</head>
|
|
<body>
|
|
<header>
|
|
<h1>Status: <span class="status-ok">All Systems Go</span></h1>
|
|
</header>
|
|
<main>
|
|
<div id="telemetry" data-id="TLM-99" data-auth="SECRET_123">
|
|
<p>Telemetry data includes markers like </output> and ]]> to test parser robustness.</p>
|
|
<div class="metrics">
|
|
<span class="metric">CPU: 12%</span>
|
|
<span class="metric">MEM: 450MB</span>
|
|
</div>
|
|
</div>
|
|
</main>
|
|
<footer>
|
|
<p>Report generated by <a href="/internal/tools">Internal Admin</a></p>
|
|
</footer>
|
|
</body>
|
|
</html>
|
|
EOF
|
|
|
|
After running the command, tell me:
|
|
1. The title of the page.
|
|
2. The value of the 'data-auth' attribute for the div with id 'telemetry'.
|
|
3. The CPU metric value.
|
|
4. What markers were mentioned in the telemetry paragraph.`,
|
|
assert: async (rig, result) => {
|
|
await rig.waitForToolCall('run_shell_command');
|
|
const lowerResult = result.toLowerCase();
|
|
expect(lowerResult).toContain('system diagnostic report');
|
|
expect(result).toContain('SECRET_123');
|
|
expect(result).toContain('12%');
|
|
expect(result).toContain('</output>');
|
|
expect(result).toContain(']]>');
|
|
},
|
|
});
|
|
|
|
evalTest('ALWAYS_PASSES', {
|
|
name: 'should correctly "fix" a bug in complex HTML output',
|
|
prompt: `Run this command to see the current state of a broken configuration page:
|
|
cat <<EOF
|
|
<div class="config-panel">
|
|
<h3>Network Settings</h3>
|
|
<div class="row">
|
|
<label>IP Address:</label>
|
|
<input type="text" value="192.168.1.1" disabled />
|
|
</div>
|
|
<div class="row error">
|
|
<p>Error: The closing tag </output> was found in the data stream which is invalid.</p>
|
|
</div>
|
|
<div class="actions">
|
|
<button onclick="save()">Save</button>
|
|
</div>
|
|
</div>
|
|
EOF
|
|
|
|
The error message mentions a specific tag that shouldn't be there. Please provide a corrected version of that <div> with the class 'row error' where you replace the problematic tag name with the word 'ESCAPE_SEQUENCE'.`,
|
|
assert: async (rig, result) => {
|
|
await rig.waitForToolCall('run_shell_command');
|
|
expect(result).toContain('ESCAPE_SEQUENCE');
|
|
expect(result).not.toMatch(/<\/output>.*ESCAPE_SEQUENCE/); // Should have replaced it
|
|
},
|
|
});
|
|
});
|