Files
gemini-cli/evals/data/compression/scenario-working-set-amnesia.json
T
Christian Gunderman de1d5d3ca2 Compression eval.
2026-04-02 09:44:19 -07:00

563 lines
9.2 KiB
JSON

{
"scenarioId": "scenario-working-set-amnesia",
"description": "Tests if the agent remembers which file it was just modifying before compression occurred.",
"history": [
{
"role": "user",
"parts": [
{
"text": "Fix the typo in src/utils/math.ts. It should say \"addition\" instead of \"addtion\"."
}
]
},
{
"role": "model",
"parts": [
{
"functionCall": {
"name": "read_file",
"args": {
"file_path": "src/utils/math.ts"
}
}
}
]
},
{
"role": "user",
"parts": [
{
"functionResponse": {
"name": "read_file",
"response": {
"content": "// Math utilities\nexport const add = (a, b) => a + b; // This is for addtion"
}
}
}
]
},
{
"role": "model",
"parts": [
{
"functionCall": {
"name": "write_file",
"args": {
"file_path": "src/utils/math.ts",
"content": "// Math utilities\nexport const add = (a, b) => a + b; // This is for addition"
}
}
}
]
},
{
"role": "user",
"parts": [
{
"functionResponse": {
"name": "write_file",
"response": {
"success": true
}
}
}
]
},
{
"role": "model",
"parts": [
{
"text": "Fixed the typo. Now what?"
}
]
},
{
"role": "user",
"parts": [
{
"text": "Check unrelated component 0..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Component 0 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Check unrelated component 1..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Component 1 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Check unrelated component 2..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Component 2 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Check unrelated component 3..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Component 3 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Check unrelated component 4..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Component 4 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Check unrelated component 5..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Component 5 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Check unrelated component 6..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Component 6 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Check unrelated component 7..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Component 7 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Check unrelated component 8..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Component 8 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Check unrelated component 9..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Component 9 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Check unrelated component 10..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Component 10 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Check unrelated component 11..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Component 11 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Check unrelated component 12..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Component 12 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Check unrelated component 13..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Component 13 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Check unrelated component 14..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Component 14 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Check unrelated component 15..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Component 15 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Check unrelated component 16..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Component 16 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Check unrelated component 17..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Component 17 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Check unrelated component 18..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Component 18 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Check unrelated component 19..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Component 19 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Check unrelated component 20..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Component 20 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Check unrelated component 21..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Component 21 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Check unrelated component 22..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Component 22 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Check unrelated component 23..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Component 23 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Check unrelated component 24..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Component 24 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Check unrelated component 25..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Component 25 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Check unrelated component 26..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Component 26 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Check unrelated component 27..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Component 27 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Check unrelated component 28..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Component 28 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Check unrelated component 29..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Component 29 is fine."
}
]
}
],
"questions": [
{
"id": "recall-modified-file",
"prompt": "Which file did we just modify and should now run tests for?",
"expectedSubstring": "src/utils/math.ts"
}
]
}