System Diagnostic Report

tag. 4. Save the modified HTML to a file named 'final_dashboard.html'.`, assert: async (rig, result) => { await rig.waitForToolCall('run_shell_command'); // Create await rig.waitForToolCall(); // Read/Write const finalContent = fs.readFileSync( path.join(rig.testDir!, 'final_dashboard.html'), 'utf-8', ); expect(finalContent).toContain( 'Maintenance Mode', ); expect(finalContent).toContain( "

System update scheduled

", ); expect(finalContent).toMatch( /[\s\S]*

[\s\S]*<\/main>[\s\S]*<\/body>/i, ); expect(finalContent).toContain( 'console.log("Found in the stream");', ); }, }); }); describe('Subprocess XML tagging behavior', () => { evalTest('ALWAYS_PASSES', { name: 'should detect successful command execution with exit code 0', prompt: "Run 'echo Hello' and tell me if it succeeded. Only say 'Yes' or 'No'.", assert: async (rig, result) => { await rig.waitForToolCall('run_shell_command'); expect(result.toLowerCase()).toContain('yes'); const lastRequest = rig.readLastApiRequest(); expect(lastRequest?.attributes?.request_text).toContain( '0', ); }, }); evalTest('ALWAYS_PASSES', { name: 'should detect failed command execution with non-zero exit code', prompt: "Run 'ls non_existent_file_12345' and tell me if it failed. Only say 'Yes' or 'No'.", assert: async (rig, result) => { await rig.waitForToolCall('run_shell_command'); expect(result.toLowerCase()).toContain('yes'); const lastRequest = rig.readLastApiRequest(); expect(lastRequest?.attributes?.request_text).toMatch( /[1-9]\d*<\/exit_code>/, ); }, }); evalTest('ALWAYS_PASSES', { name: 'should correctly parse content from tag', prompt: "Run 'echo UNIQUE_STRING_99' and tell me what the output was.", assert: async (rig, result) => { await rig.waitForToolCall('run_shell_command'); expect(result).toContain('UNIQUE_STRING_99'); }, }); evalTest('ALWAYS_PASSES', { name: 'should correctly parse error messages from tag', prompt: "Try to execute the current directory './' as a command and tell me what the error message was.", assert: async (rig, result) => { await rig.waitForToolCall('run_shell_command'); expect(result.toLowerCase()).toMatch( /permission denied|is a directory/, ); const lastRequest = rig.readLastApiRequest(); expect(lastRequest?.attributes?.request_text).toContain(''); expect(lastRequest?.attributes?.request_text).toContain( '126', ); }, }); }); });

Status: All Systems Go

Network Settings

System Overview

Performance Metrics

Security Alerts