# 📊 Implement Triage Accuracy Metric

This PR implements the "Triage Accuracy (Overrides)" metric as requested in #26660 (Option 1).

## Changes
- Created `tools/gemini-cli-bot/metrics/scripts/triage_accuracy.ts`.
- The script uses GraphQL to analyze the last 100 issues.
- It detects the first `area/*` label added by a bot (`gemini-cli-robot` or any `*[bot]`).
- It flags an "override" if a human later removes that label or replaces it with a different `area/*` label.

## Expected Impact
This metric provides a feedback loop for our automated triage system, allowing us to measure how often maintainers need to correct the bot's classification. This will help in fine-tuning the triage prompts and logic.

## Metrics Added
- `triage_accuracy_overrides`: Total number of human overrides in the sample.
- `triage_accuracy_total_bot_labeled`: Total number of issues labeled by the bot in the sample.
- `triage_accuracy_rate`: The ratio of correct (non-overridden) triage actions.
This commit is contained in:
gemini-cli[bot]
2026-05-07 18:59:27 +00:00
parent 49456e4e15
commit 294fd9b75a
@@ -0,0 +1,125 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { execSync } from 'node:child_process';
import { GITHUB_OWNER, GITHUB_REPO } from '../types.js';
/**
* This script calculates the triage accuracy by detecting human overrides of bot-applied labels.
* It identifies the first 'area/' label added by a bot and checks if it was later removed
* or replaced by a human.
*/
async function run() {
try {
const query = `
query($owner: String!, $repo: String!) {
repository(owner: $owner, name: $repo) {
issues(last: 100, orderBy: {field: CREATED_AT, direction: ASC}) {
nodes {
number
timelineItems(last: 50, itemTypes: [LABELED_EVENT, UNLABELED_EVENT]) {
nodes {
__typename
... on LabeledEvent {
label { name }
actor { login }
createdAt
}
... on UnlabeledEvent {
label { name }
actor { login }
createdAt
}
}
}
}
}
}
}
`;
const output = execSync(
`gh api graphql -F owner=${GITHUB_OWNER} -F repo=${GITHUB_REPO} -f query='${query}'`,
{ encoding: 'utf-8', stdio: ['ignore', 'pipe', 'ignore'] }
);
const response = JSON.parse(output);
if (response.errors) {
throw new Error(`GraphQL Errors: ${JSON.stringify(response.errors)}`);
}
const issues = response.data?.repository?.issues?.nodes || [];
let botLabeledCount = 0;
let overrideCount = 0;
const isBot = (login: string) =>
login.toLowerCase().includes('[bot]') || login === 'gemini-cli-robot';
for (const issue of issues) {
if (!issue) continue;
const events = (issue.timelineItems?.nodes || []) as any[];
// Find first area/ label added by a bot
const firstBotLabelEvent = events.find(
(e: any) =>
e.__typename === 'LabeledEvent' &&
e.label.name.startsWith('area/') &&
e.actor?.login &&
isBot(e.actor.login)
);
if (firstBotLabelEvent) {
botLabeledCount++;
const botLabelName = firstBotLabelEvent.label.name;
const botLabelTime = new Date(firstBotLabelEvent.createdAt).getTime();
// Check for overrides after this event
const isOverridden = events.some((e: any) => {
const eventTime = new Date(e.createdAt).getTime();
if (eventTime <= botLabelTime) return false;
const actorLogin = e.actor?.login;
if (!actorLogin || isBot(actorLogin)) return false;
// Case 1: Human removed the bot's label
if (e.__typename === 'UnlabeledEvent' && e.label.name === botLabelName) {
return true;
}
// Case 2: Human added a different area/ label
if (
e.__typename === 'LabeledEvent' &&
e.label.name.startsWith('area/') &&
e.label.name !== botLabelName
) {
return true;
}
return false;
});
if (isOverridden) {
overrideCount++;
}
}
}
const accuracyRate = botLabeledCount > 0
? (botLabeledCount - overrideCount) / botLabeledCount
: 1;
process.stdout.write(`triage_accuracy_overrides,${overrideCount}\n`);
process.stdout.write(`triage_accuracy_total_bot_labeled,${botLabeledCount}\n`);
process.stdout.write(`triage_accuracy_rate,${Math.round(accuracyRate * 100) / 100}\n`);
} catch (err) {
process.stderr.write(err instanceof Error ? err.message : String(err));
process.exit(1);
}
}
run();