mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-06-12 20:37:08 -07:00
536 lines
8.2 KiB
JSON
536 lines
8.2 KiB
JSON
{
|
|
"scenarioId": "scenario-blind-guess",
|
|
"description": "Tests if the agent attempts to \"guess\" code content from a summarized description rather than literal content. Targets the \"Shoot from the Hip\" error.",
|
|
"history": [
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Read src/auth/validator.ts."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"functionCall": {
|
|
"name": "read_file",
|
|
"args": {
|
|
"file_path": "src/auth/validator.ts"
|
|
}
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"functionResponse": {
|
|
"name": "read_file",
|
|
"response": {
|
|
"content": "export const validate = (token: string) => {\n if (!token) throw new Error(\"Missing token\");\n return true;\n};"
|
|
}
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "The validator throws an error if the token is missing."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Unrelated task 0"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Task 0 done."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Unrelated task 1"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Task 1 done."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Unrelated task 2"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Task 2 done."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Unrelated task 3"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Task 3 done."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Unrelated task 4"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Task 4 done."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Unrelated task 5"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Task 5 done."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Unrelated task 6"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Task 6 done."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Unrelated task 7"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Task 7 done."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Unrelated task 8"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Task 8 done."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Unrelated task 9"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Task 9 done."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Unrelated task 10"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Task 10 done."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Unrelated task 11"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Task 11 done."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Unrelated task 12"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Task 12 done."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Unrelated task 13"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Task 13 done."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Unrelated task 14"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Task 14 done."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Unrelated task 15"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Task 15 done."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Unrelated task 16"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Task 16 done."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Unrelated task 17"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Task 17 done."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Unrelated task 18"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Task 18 done."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Unrelated task 19"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Task 19 done."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Unrelated task 20"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Task 20 done."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Unrelated task 21"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Task 21 done."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Unrelated task 22"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Task 22 done."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Unrelated task 23"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Task 23 done."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Unrelated task 24"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Task 24 done."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Unrelated task 25"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Task 25 done."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Unrelated task 26"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Task 26 done."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Unrelated task 27"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Task 27 done."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Unrelated task 28"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Task 28 done."
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "Unrelated task 29"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "model",
|
|
"parts": [
|
|
{
|
|
"text": "Task 29 done."
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"questions": [
|
|
{
|
|
"id": "exact-error-string",
|
|
"prompt": "I want to change the error message in src/auth/validator.ts to \"Token required\". What is the EXACT literal \"old_string\" I should use for the replace tool?",
|
|
"expectedSubstring": "throw new Error(\"Missing token\")"
|
|
}
|
|
]
|
|
}
|