mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-23 02:02:31 -07:00
484 lines
8.0 KiB
JSON
484 lines
8.0 KiB
JSON
{
|
|
"scenarioId": "scenario-context-thrashing",
|
|
"description": "Tests if the agent remembers the outcome of a failed tool call and the state of the file after compression. Targets \"Redundant Re-Reading\".",
|
|
"history": [
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Update the version in package.json to 1.0.5."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"functionCall": {
|
|
"name": "read_file",
|
|
"args": {
|
|
"file_path": "package.json"
|
|
}
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"functionResponse": {
|
|
"name": "read_file",
|
|
"response": {
|
|
"content": "{\n \"name\": \"app\",\n \"version\": \"1.0.4\"\n}"
|
|
}
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"functionCall": {
|
|
"name": "replace",
|
|
"args": {
|
|
"file_path": "package.json",
|
|
"old_string": "\"version\": \"1.0.3\"",
|
|
"new_string": "\"version\": \"1.0.5\""
|
|
}
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"functionResponse": {
|
|
"name": "replace",
|
|
"response": {
|
|
"error": "Failed to edit, 0 occurrences found for old_string"
|
|
}
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "That failed. I must have misremembered the version. Let me check the logs first."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Checking unrelated log part 0"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Log part 0 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Checking unrelated log part 1"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Log part 1 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Checking unrelated log part 2"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Log part 2 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Checking unrelated log part 3"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Log part 3 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Checking unrelated log part 4"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Log part 4 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Checking unrelated log part 5"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Log part 5 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Checking unrelated log part 6"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Log part 6 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Checking unrelated log part 7"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Log part 7 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Checking unrelated log part 8"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Log part 8 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Checking unrelated log part 9"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Log part 9 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Checking unrelated log part 10"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Log part 10 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Checking unrelated log part 11"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Log part 11 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Checking unrelated log part 12"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Log part 12 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Checking unrelated log part 13"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Log part 13 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Checking unrelated log part 14"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Log part 14 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Checking unrelated log part 15"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Log part 15 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Checking unrelated log part 16"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Log part 16 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Checking unrelated log part 17"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Log part 17 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Checking unrelated log part 18"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Log part 18 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Checking unrelated log part 19"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Log part 19 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Checking unrelated log part 20"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Log part 20 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Checking unrelated log part 21"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Log part 21 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Checking unrelated log part 22"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Log part 22 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Checking unrelated log part 23"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Log part 23 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Checking unrelated log part 24"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Log part 24 is fine."
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"questions": [
|
|
{
|
|
"id": "failed-edit-reason",
|
|
"prompt": "We tried to update package.json but it failed. Why did it fail, and what is the ACTUAL version currently in that file?",
|
|
"expectedSubstring": "1.0.4"
|
|
}
|
|
]
|
|
}
|