/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, expect } from 'vitest';
import { evalTest } from './test-helper.js';
describe('Subprocess XML tagging behavior', () => {
evalTest('ALWAYS_PASSES', {
name: 'should detect successful command execution with exit code 0',
prompt:
"Run 'echo Hello' and tell me if it succeeded. Only say 'Yes' or 'No'.",
assert: async (rig, result) => {
await rig.waitForToolCall('run_shell_command');
expect(result.toLowerCase()).toContain('yes');
const lastRequest = rig.readLastApiRequest();
expect(lastRequest?.attributes?.request_text).toContain(
'0',
);
},
});
evalTest('ALWAYS_PASSES', {
name: 'should detect failed command execution with non-zero exit code',
prompt:
"Run 'ls non_existent_file_12345' and tell me if it failed. Only say 'Yes' or 'No'.",
assert: async (rig, result) => {
await rig.waitForToolCall('run_shell_command');
expect(result.toLowerCase()).toContain('yes');
const lastRequest = rig.readLastApiRequest();
expect(lastRequest?.attributes?.request_text).toMatch(
/[1-9]\d*<\/exit_code>/,
);
},
});
evalTest('ALWAYS_PASSES', {
name: 'should correctly parse content from