fix(evals): add typecheck coverage for evals, integration-tests, and memory-tests (#25480)

This commit is contained in:
Sandy Tao
2026-04-16 11:20:27 -07:00
committed by GitHub
parent f16f1cced3
commit fafe3e35d2
15 changed files with 503 additions and 198 deletions

View File

@@ -11,6 +11,8 @@ import path from 'node:path';
describe('Background Process Monitoring', () => {
evalTest('USUALLY_PASSES', {
suiteName: 'default',
suiteType: 'behavioral',
name: 'should naturally use read output tool to find token',
prompt:
"Run the script using 'bash generate_token.sh'. It will emit a token after a short delay and continue running. Find the token and tell me what it is.",
@@ -50,6 +52,8 @@ sleep 100
});
evalTest('USUALLY_PASSES', {
suiteName: 'default',
suiteType: 'behavioral',
name: 'should naturally use list tool to verify multiple processes',
prompt:
"Start three background processes that run 'sleep 100', 'sleep 200', and 'sleep 300' respectively. Verify that all three are currently running.",