Files
gemini-cli/evals/data/compression/scenario-multi-bug-tracking.json
Christian Gunderman de1d5d3ca2 Compression eval.
2026-04-02 09:44:19 -07:00

350 lines
5.4 KiB
JSON

{
"scenarioId": "scenario-multi-bug-tracking",
"description": "Tests tracking multiple distinct items. AgentHistoryProvider only has one \"Primary Goal\" bucket.",
"history": [
{
"role": "user",
"parts": [
{
"text": "We are tracking 3 bugs: BUG-A (Memory Leak), BUG-B (Race Condition), and BUG-C (UI Glitch). Focus on BUG-A first."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Tracking BUG-A, BUG-B, and BUG-C."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Analyzing BUG-A log 0..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log 0 processed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Analyzing BUG-A log 1..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log 1 processed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Analyzing BUG-A log 2..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log 2 processed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Analyzing BUG-A log 3..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log 3 processed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Analyzing BUG-A log 4..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log 4 processed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Analyzing BUG-A log 5..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log 5 processed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Analyzing BUG-A log 6..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log 6 processed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Analyzing BUG-A log 7..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log 7 processed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Analyzing BUG-A log 8..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log 8 processed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Analyzing BUG-A log 9..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log 9 processed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Analyzing BUG-A log 10..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log 10 processed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Analyzing BUG-A log 11..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log 11 processed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Analyzing BUG-A log 12..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log 12 processed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Analyzing BUG-A log 13..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log 13 processed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Analyzing BUG-A log 14..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log 14 processed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Analyzing BUG-A log 15..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log 15 processed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Analyzing BUG-A log 16..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log 16 processed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Analyzing BUG-A log 17..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log 17 processed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Analyzing BUG-A log 18..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log 18 processed."
}
]
},
{
"role": "user",
"parts": [
{
"text": "Analyzing BUG-A log 19..."
}
]
},
{
"role": "model",
"parts": [
{
"text": "Log 19 processed."
}
]
}
],
"questions": [
{
"id": "bug-list",
"prompt": "What are the three bug IDs we are tracking?",
"expectedSubstring": "BUG-B"
}
]
}