mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-06-11 20:07:00 -07:00
563 lines
9.2 KiB
JSON
563 lines
9.2 KiB
JSON
{
|
|
"scenarioId": "scenario-working-set-amnesia",
|
|
"description": "Tests if the agent remembers which file it was just modifying before compression occurred.",
|
|
"history": [
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Fix the typo in src/utils/math.ts. It should say \"addition\" instead of \"addtion\"."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"functionCall": {
|
|
"name": "read_file",
|
|
"args": {
|
|
"file_path": "src/utils/math.ts"
|
|
}
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"functionResponse": {
|
|
"name": "read_file",
|
|
"response": {
|
|
"content": "// Math utilities\nexport const add = (a, b) => a + b; // This is for addtion"
|
|
}
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"functionCall": {
|
|
"name": "write_file",
|
|
"args": {
|
|
"file_path": "src/utils/math.ts",
|
|
"content": "// Math utilities\nexport const add = (a, b) => a + b; // This is for addition"
|
|
}
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"functionResponse": {
|
|
"name": "write_file",
|
|
"response": {
|
|
"success": true
|
|
}
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Fixed the typo. Now what?"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Check unrelated component 0..."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Component 0 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Check unrelated component 1..."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Component 1 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Check unrelated component 2..."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Component 2 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Check unrelated component 3..."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Component 3 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Check unrelated component 4..."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Component 4 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Check unrelated component 5..."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Component 5 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Check unrelated component 6..."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Component 6 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Check unrelated component 7..."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Component 7 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Check unrelated component 8..."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Component 8 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Check unrelated component 9..."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Component 9 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Check unrelated component 10..."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Component 10 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Check unrelated component 11..."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Component 11 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Check unrelated component 12..."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Component 12 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Check unrelated component 13..."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Component 13 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Check unrelated component 14..."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Component 14 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Check unrelated component 15..."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Component 15 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Check unrelated component 16..."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Component 16 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Check unrelated component 17..."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Component 17 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Check unrelated component 18..."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Component 18 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Check unrelated component 19..."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Component 19 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Check unrelated component 20..."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Component 20 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Check unrelated component 21..."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Component 21 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Check unrelated component 22..."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Component 22 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Check unrelated component 23..."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Component 23 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Check unrelated component 24..."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Component 24 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Check unrelated component 25..."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Component 25 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Check unrelated component 26..."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Component 26 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Check unrelated component 27..."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Component 27 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Check unrelated component 28..."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Component 28 is fine."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Check unrelated component 29..."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Component 29 is fine."
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"questions": [
|
|
{
|
|
"id": "recall-modified-file",
|
|
"prompt": "Which file did we just modify and should now run tests for?",
|
|
"expectedSubstring": "src/utils/math.ts"
|
|
}
|
|
]
|
|
}
|