Files
gemini-cli/evals/data/compression/scenario-context-thrashing.json
T
Christian Gunderman de1d5d3ca2 Compression eval.
2026-04-02 09:44:19 -07:00

484 lines
8.0 KiB
JSON

{
"scenarioId": "scenario-context-thrashing",
"description": "Tests if the agent remembers the outcome of a failed tool call and the state of the file after compression. Targets \"Redundant Re-Reading\".",
"history": [
{
"role": "user",
"parts": [
{
"text": "Update the version in package.json to 1.0.5."
}
]
},
{
"role": "model",
"parts": [
{
"functionCall": {
"name": "read_file",
"args": {
"file_path": "package.json"
}
}
}
]
},
{
"role": "user",
"parts": [
{
"functionResponse": {
"name": "read_file",
"response": {
"content": "{\n \"name\": \"app\",\n \"version\": \"1.0.4\"\n}"
}
}
}
]
},
{
"role": "model",
"parts": [
{
"functionCall": {
"name": "replace",
"args": {
"file_path": "package.json",
"old_string": "\"version\": \"1.0.3\"",
"new_string": "\"version\": \"1.0.5\""
}
}
}
]
},
{
"role": "user",
"parts": [
{
"functionResponse": {
"name": "replace",
"response": {
"error": "Failed to edit, 0 occurrences found for old_string"
}
}
}
]
},
{
"role": "model",
"parts": [
{
"text": "That failed. I must have misremembered the version. Let me check the logs first."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Checking unrelated log part 0"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log part 0 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Checking unrelated log part 1"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log part 1 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Checking unrelated log part 2"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log part 2 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Checking unrelated log part 3"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log part 3 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Checking unrelated log part 4"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log part 4 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Checking unrelated log part 5"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log part 5 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Checking unrelated log part 6"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log part 6 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Checking unrelated log part 7"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log part 7 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Checking unrelated log part 8"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log part 8 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Checking unrelated log part 9"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log part 9 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Checking unrelated log part 10"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log part 10 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Checking unrelated log part 11"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log part 11 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Checking unrelated log part 12"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log part 12 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Checking unrelated log part 13"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log part 13 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Checking unrelated log part 14"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log part 14 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Checking unrelated log part 15"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log part 15 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Checking unrelated log part 16"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log part 16 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Checking unrelated log part 17"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log part 17 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Checking unrelated log part 18"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log part 18 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Checking unrelated log part 19"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log part 19 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Checking unrelated log part 20"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log part 20 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Checking unrelated log part 21"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log part 21 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Checking unrelated log part 22"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log part 22 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Checking unrelated log part 23"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log part 23 is fine."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Checking unrelated log part 24"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log part 24 is fine."
}
]
}
],
"questions": [
{
"id": "failed-edit-reason",
"prompt": "We tried to update package.json but it failed. Why did it fail, and what is the ACTUAL version currently in that file?",
"expectedSubstring": "1.0.4"
}
]
}