Files
gemini-cli/evals/data/compression/scenario-nested-logic.json
T
Christian Gunderman de1d5d3ca2 Compression eval.
2026-04-02 09:44:19 -07:00

510 lines
7.7 KiB
JSON

{
"scenarioId": "scenario-nested-logic",
"description": "Tests complex conditional plans buried in history.",
"history": [
{
"role": "user",
"parts": [
{
"text": "Our strategy: First try to fix via CSS. If that fails, try a JS polyfill. If both fail, we must revert to the 1.0 legacy-fallback-mode. Do not forget the fallback mode name."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Understood. Fallback is legacy-fallback-mode."
}
]
},
{
"role": "user",
"parts": [
{
"text": "CSS attempt 0..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "CSS try 0 failed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "CSS attempt 1..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "CSS try 1 failed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "CSS attempt 2..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "CSS try 2 failed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "CSS attempt 3..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "CSS try 3 failed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "CSS attempt 4..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "CSS try 4 failed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "CSS attempt 5..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "CSS try 5 failed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "CSS attempt 6..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "CSS try 6 failed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "CSS attempt 7..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "CSS try 7 failed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "CSS attempt 8..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "CSS try 8 failed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "CSS attempt 9..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "CSS try 9 failed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "CSS attempt 10..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "CSS try 10 failed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "CSS attempt 11..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "CSS try 11 failed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "CSS attempt 12..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "CSS try 12 failed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "CSS attempt 13..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "CSS try 13 failed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "CSS attempt 14..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "CSS try 14 failed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "CSS attempt 15..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "CSS try 15 failed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "CSS attempt 16..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "CSS try 16 failed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "CSS attempt 17..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "CSS try 17 failed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "CSS attempt 18..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "CSS try 18 failed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "CSS attempt 19..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "CSS try 19 failed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "CSS attempt 20..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "CSS try 20 failed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "CSS attempt 21..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "CSS try 21 failed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "CSS attempt 22..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "CSS try 22 failed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "CSS attempt 23..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "CSS try 23 failed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "CSS attempt 24..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "CSS try 24 failed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "CSS attempt 25..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "CSS try 25 failed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "CSS attempt 26..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "CSS try 26 failed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "CSS attempt 27..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "CSS try 27 failed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "CSS attempt 28..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "CSS try 28 failed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "CSS attempt 29..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "CSS try 29 failed."
}
]
}
],
"questions": [
{
"id": "fallback-mode",
"prompt": "If both CSS and JS polyfills fail, what is the exact name of the mode we revert to?",
"expectedSubstring": "legacy-fallback-mode"
}
]
}