Files
gemini-cli/evals/data/compression/scenario-blind-guess.json
T
Christian Gunderman de1d5d3ca2 Compression eval.
2026-04-02 09:44:19 -07:00

536 lines
8.2 KiB
JSON

{
"scenarioId": "scenario-blind-guess",
"description": "Tests if the agent attempts to \"guess\" code content from a summarized description rather than literal content. Targets the \"Shoot from the Hip\" error.",
"history": [
{
"role": "user",
"parts": [
{
"text": "Read src/auth/validator.ts."
}
]
},
{
"role": "model",
"parts": [
{
"functionCall": {
"name": "read_file",
"args": {
"file_path": "src/auth/validator.ts"
}
}
}
]
},
{
"role": "user",
"parts": [
{
"functionResponse": {
"name": "read_file",
"response": {
"content": "export const validate = (token: string) => {\n if (!token) throw new Error(\"Missing token\");\n return true;\n};"
}
}
}
]
},
{
"role": "model",
"parts": [
{
"text": "The validator throws an error if the token is missing."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Unrelated task 0"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Task 0 done."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Unrelated task 1"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Task 1 done."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Unrelated task 2"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Task 2 done."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Unrelated task 3"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Task 3 done."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Unrelated task 4"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Task 4 done."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Unrelated task 5"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Task 5 done."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Unrelated task 6"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Task 6 done."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Unrelated task 7"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Task 7 done."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Unrelated task 8"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Task 8 done."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Unrelated task 9"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Task 9 done."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Unrelated task 10"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Task 10 done."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Unrelated task 11"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Task 11 done."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Unrelated task 12"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Task 12 done."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Unrelated task 13"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Task 13 done."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Unrelated task 14"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Task 14 done."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Unrelated task 15"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Task 15 done."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Unrelated task 16"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Task 16 done."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Unrelated task 17"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Task 17 done."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Unrelated task 18"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Task 18 done."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Unrelated task 19"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Task 19 done."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Unrelated task 20"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Task 20 done."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Unrelated task 21"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Task 21 done."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Unrelated task 22"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Task 22 done."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Unrelated task 23"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Task 23 done."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Unrelated task 24"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Task 24 done."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Unrelated task 25"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Task 25 done."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Unrelated task 26"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Task 26 done."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Unrelated task 27"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Task 27 done."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Unrelated task 28"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Task 28 done."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Unrelated task 29"
}
]
},
{
"role": "model",
"parts": [
{
"text": "Task 29 done."
}
]
}
],
"questions": [
{
"id": "exact-error-string",
"prompt": "I want to change the error message in src/auth/validator.ts to \"Token required\". What is the EXACT literal \"old_string\" I should use for the replace tool?",
"expectedSubstring": "throw new Error(\"Missing token\")"
}
]
}