From 294fd9b75a64fc041cacedefdb49798cfc625813 Mon Sep 17 00:00:00 2001 From: "gemini-cli[bot]" Date: Thu, 7 May 2026 18:59:27 +0000 Subject: [PATCH] =?UTF-8?q?#=20=F0=9F=93=8A=20Implement=20Triage=20Accurac?= =?UTF-8?q?y=20Metric?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR implements the "Triage Accuracy (Overrides)" metric as requested in #26660 (Option 1). ## Changes - Created `tools/gemini-cli-bot/metrics/scripts/triage_accuracy.ts`. - The script uses GraphQL to analyze the last 100 issues. - It detects the first `area/*` label added by a bot (`gemini-cli-robot` or any `*[bot]`). - It flags an "override" if a human later removes that label or replaces it with a different `area/*` label. ## Expected Impact This metric provides a feedback loop for our automated triage system, allowing us to measure how often maintainers need to correct the bot's classification. This will help in fine-tuning the triage prompts and logic. ## Metrics Added - `triage_accuracy_overrides`: Total number of human overrides in the sample. - `triage_accuracy_total_bot_labeled`: Total number of issues labeled by the bot in the sample. - `triage_accuracy_rate`: The ratio of correct (non-overridden) triage actions. --- .../metrics/scripts/triage_accuracy.ts | 125 ++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 tools/gemini-cli-bot/metrics/scripts/triage_accuracy.ts diff --git a/tools/gemini-cli-bot/metrics/scripts/triage_accuracy.ts b/tools/gemini-cli-bot/metrics/scripts/triage_accuracy.ts new file mode 100644 index 0000000000..560476f561 --- /dev/null +++ b/tools/gemini-cli-bot/metrics/scripts/triage_accuracy.ts @@ -0,0 +1,125 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { execSync } from 'node:child_process'; +import { GITHUB_OWNER, GITHUB_REPO } from '../types.js'; + +/** + * This script calculates the triage accuracy by detecting human overrides of bot-applied labels. + * It identifies the first 'area/' label added by a bot and checks if it was later removed + * or replaced by a human. + */ +async function run() { + try { + const query = ` + query($owner: String!, $repo: String!) { + repository(owner: $owner, name: $repo) { + issues(last: 100, orderBy: {field: CREATED_AT, direction: ASC}) { + nodes { + number + timelineItems(last: 50, itemTypes: [LABELED_EVENT, UNLABELED_EVENT]) { + nodes { + __typename + ... on LabeledEvent { + label { name } + actor { login } + createdAt + } + ... on UnlabeledEvent { + label { name } + actor { login } + createdAt + } + } + } + } + } + } + } + `; + + const output = execSync( + `gh api graphql -F owner=${GITHUB_OWNER} -F repo=${GITHUB_REPO} -f query='${query}'`, + { encoding: 'utf-8', stdio: ['ignore', 'pipe', 'ignore'] } + ); + + const response = JSON.parse(output); + if (response.errors) { + throw new Error(`GraphQL Errors: ${JSON.stringify(response.errors)}`); + } + + const issues = response.data?.repository?.issues?.nodes || []; + + let botLabeledCount = 0; + let overrideCount = 0; + + const isBot = (login: string) => + login.toLowerCase().includes('[bot]') || login === 'gemini-cli-robot'; + + for (const issue of issues) { + if (!issue) continue; + const events = (issue.timelineItems?.nodes || []) as any[]; + + // Find first area/ label added by a bot + const firstBotLabelEvent = events.find( + (e: any) => + e.__typename === 'LabeledEvent' && + e.label.name.startsWith('area/') && + e.actor?.login && + isBot(e.actor.login) + ); + + if (firstBotLabelEvent) { + botLabeledCount++; + const botLabelName = firstBotLabelEvent.label.name; + const botLabelTime = new Date(firstBotLabelEvent.createdAt).getTime(); + + // Check for overrides after this event + const isOverridden = events.some((e: any) => { + const eventTime = new Date(e.createdAt).getTime(); + if (eventTime <= botLabelTime) return false; + + const actorLogin = e.actor?.login; + if (!actorLogin || isBot(actorLogin)) return false; + + // Case 1: Human removed the bot's label + if (e.__typename === 'UnlabeledEvent' && e.label.name === botLabelName) { + return true; + } + + // Case 2: Human added a different area/ label + if ( + e.__typename === 'LabeledEvent' && + e.label.name.startsWith('area/') && + e.label.name !== botLabelName + ) { + return true; + } + + return false; + }); + + if (isOverridden) { + overrideCount++; + } + } + } + + const accuracyRate = botLabeledCount > 0 + ? (botLabeledCount - overrideCount) / botLabeledCount + : 1; + + process.stdout.write(`triage_accuracy_overrides,${overrideCount}\n`); + process.stdout.write(`triage_accuracy_total_bot_labeled,${botLabeledCount}\n`); + process.stdout.write(`triage_accuracy_rate,${Math.round(accuracyRate * 100) / 100}\n`); + + } catch (err) { + process.stderr.write(err instanceof Error ? err.message : String(err)); + process.exit(1); + } +} + +run();