2026-02-18 21:54:13 +00:00
/ * *
* @license
* Copyright 2026 Google LLC
* SPDX - License - Identifier : Apache - 2.0
* /
import { describe , expect } from 'vitest' ;
import { evalTest } from './test-helper.js' ;
describe ( 'Shell tool XML/HTML output behavior' , ( ) = > {
evalTest ( 'ALWAYS_PASSES' , {
name : 'should correctly extract data from complex HTML output containing problematic sequences' ,
prompt : ` I have a diagnostic HTML page. Please run this command to see its content:
cat << EOF
< ! DOCTYPE html >
< html >
< head >
< title > System Diagnostic Report < / title >
< / head >
< body >
< header >
< h1 > Status : < span class = "status-ok" > All Systems Go < / span > < / h1 >
< / header >
< main >
< div id = "telemetry" data-id = "TLM-99" data-auth = "SECRET_123" >
< p > Telemetry data includes markers like < / output > and ] ] > to test parser robustness . < / p >
< div class = "metrics" >
< span class = "metric" > CPU : 12 % < / span >
< span class = "metric" > MEM : 450MB < / span >
< / div >
< / div >
< / main >
< footer >
< p > Report generated by < a href = "/internal/tools" > Internal Admin < / a > < / p >
< / footer >
< / body >
< / html >
EOF
2026-02-18 22:13:03 +00:00
After running the command , provide the answer as a JSON object with the following keys :
- "title" : The title of the page .
- "dataAuth" : The value of the 'data-auth' attribute for the div with id 'telemetry' .
- "cpuMetric" : The CPU metric value .
- "markers" : An array of markers mentioned in the telemetry paragraph . ` ,
2026-02-18 21:54:13 +00:00
assert : async ( rig , result ) = > {
await rig . waitForToolCall ( 'run_shell_command' ) ;
2026-02-18 22:13:03 +00:00
const jsonMatch = result . match ( /\{[\s\S]*\}/ ) ;
if ( ! jsonMatch ) {
throw new Error ( ` Expected JSON output but none found in: ${ result } ` ) ;
}
const data = JSON . parse ( jsonMatch [ 0 ] ) ;
expect ( data . title ) . toMatch ( /system diagnostic report/i ) ;
expect ( data . dataAuth ) . toBe ( 'SECRET_123' ) ;
expect ( data . cpuMetric ) . toContain ( '12%' ) ;
const trimmedMarkers = data . markers . map ( ( m : string ) = > m . trim ( ) ) ;
expect ( trimmedMarkers ) . toContain ( '</output>' ) ;
expect ( trimmedMarkers ) . toContain ( ']]>' ) ;
2026-02-18 21:54:13 +00:00
} ,
} ) ;
evalTest ( 'ALWAYS_PASSES' , {
name : 'should correctly "fix" a bug in complex HTML output' ,
prompt : ` Run this command to see the current state of a broken configuration page:
cat << EOF
< div class = "config-panel" >
< h3 > Network Settings < / h3 >
< div class = "row" >
< label > IP Address : < / label >
< input type = "text" value = "192.168.1.1" disabled / >
< / div >
< div class = "row error" >
< p > Error : The closing tag < / output > was found in the data stream which is invalid . < / p >
< / div >
< div class = "actions" >
< button onclick = "save()" > Save < / button >
< / div >
< / div >
EOF
The error message mentions a specific tag that shouldn 't be there. Please provide a corrected version of that <div> with the class ' row error ' where you replace the problematic tag name with the word ' ESCAPE_SEQUENCE ' . ` ,
assert : async ( rig , result ) = > {
await rig . waitForToolCall ( 'run_shell_command' ) ;
expect ( result ) . toContain ( 'ESCAPE_SEQUENCE' ) ;
expect ( result ) . not . toMatch ( /<\/output>.*ESCAPE_SEQUENCE/ ) ; // Should have replaced it
} ,
} ) ;
} ) ;