diff --git a/.github/scripts/apply-issue-labels.cjs b/.github/scripts/apply-issue-labels.cjs new file mode 100644 index 0000000000..03c11403fe --- /dev/null +++ b/.github/scripts/apply-issue-labels.cjs @@ -0,0 +1,159 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +module.exports = async ({ github, context, core }) => { + const rawLabels = process.env.LABELS_OUTPUT; + core.info(`Raw labels JSON: ${rawLabels}`); + let parsedLabels; + try { + // First, try to parse the raw output as JSON. + parsedLabels = JSON.parse(rawLabels); + } catch (jsonError) { + // If that fails, check for a markdown code block. + core.warning( + `Direct JSON parsing failed: ${jsonError.message}. Trying to extract from a markdown block.`, + ); + const jsonMatch = rawLabels.match(/```json\s*([\s\S]*?)\s*```/); + if (jsonMatch && jsonMatch[1]) { + try { + parsedLabels = JSON.parse(jsonMatch[1].trim()); + } catch (markdownError) { + core.setFailed( + `Failed to parse JSON even after extracting from markdown block: ${markdownError.message}\nRaw output: ${rawLabels}`, + ); + return; + } + } else { + // If no markdown block, try to find a raw JSON array in the output. + // The CLI may include debug/log lines (e.g. telemetry init, YOLO mode) + // before the actual JSON response. + const jsonArrayMatch = rawLabels.match( + /\[\s*\{\s*"issue_number"[\s\S]*\}\s*\]/, + ); + if (jsonArrayMatch) { + try { + parsedLabels = JSON.parse(jsonArrayMatch[0]); + } catch (extractError) { + // It's possible the regex matched from a `[STARTUP]` log all the way to the end + // of the JSON array. We need to be more aggressive and find the FIRST `[ { "issue_number"` + core.warning( + `Strict array match failed: ${extractError.message}. Attempting to clean leading noisy brackets.`, + ); + const fallbackMatch = rawLabels.match( + /(\[\s*\{\s*"issue_number"[\s\S]*)/, + ); + if (fallbackMatch) { + try { + // We might have grabbed trailing noise too, so we find the last closing bracket + const cleaned = fallbackMatch[0].substring( + 0, + fallbackMatch[0].lastIndexOf(']') + 1, + ); + parsedLabels = JSON.parse(cleaned); + } catch (fallbackError) { + core.setFailed( + `Found JSON-like content but failed to parse: ${fallbackError.message}\nRaw output: ${rawLabels}`, + ); + return; + } + } else { + core.setFailed( + `Found JSON-like content but failed to parse: ${extractError.message}\nRaw output: ${rawLabels}`, + ); + return; + } + } + } else { + core.setFailed( + `Output is not valid JSON and does not contain extractable JSON.\nRaw output: ${rawLabels}`, + ); + return; + } + } + } + core.info(`Parsed labels JSON: ${JSON.stringify(parsedLabels)}`); + + for (const entry of parsedLabels) { + const issueNumber = entry.issue_number; + if (!issueNumber) { + core.info( + `Skipping entry with no issue number: ${JSON.stringify(entry)}`, + ); + continue; + } + + const labelsToAdd = entry.labels_to_add || []; + labelsToAdd.push('status/bot-triaged'); + + let labelsToRemove = entry.labels_to_remove || []; + labelsToRemove.push('status/need-triage'); + // Deduplicate array + labelsToRemove = [...new Set(labelsToRemove)]; + + if (labelsToAdd.length > 0) { + await github.rest.issues.addLabels({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: issueNumber, + labels: labelsToAdd, + }); + + const explanation = entry.explanation ? ` - ${entry.explanation}` : ''; + core.info( + `Successfully added labels for #${issueNumber}: ${labelsToAdd.join(', ')}${explanation}`, + ); + } + + if (labelsToRemove.length > 0) { + for (const label of labelsToRemove) { + try { + await github.rest.issues.removeLabel({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: issueNumber, + name: label, + }); + } catch (e) { + if (e.status !== 404) { + core.warning( + `Failed to remove label ${label} from #${issueNumber}: ${e.message}`, + ); + } + } + } + core.info( + `Successfully removed labels for #${issueNumber}: ${labelsToRemove.join(', ')}`, + ); + } + + if (entry.explanation || entry.effort_analysis) { + let commentBody = ''; + if (entry.explanation) { + commentBody += entry.explanation; + } + if (entry.effort_analysis) { + if (commentBody) commentBody += '\n\n'; + commentBody += `**Effort Analysis:**\n${entry.effort_analysis}`; + } + + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: issueNumber, + body: commentBody, + }); + } + + if ( + (!entry.labels_to_add || entry.labels_to_add.length === 0) && + (!entry.labels_to_remove || entry.labels_to_remove.length === 0) + ) { + core.info( + `No labels to add or remove for #${issueNumber}, leaving as is`, + ); + } + } +}; diff --git a/.github/scripts/cleanup-triage-labels.cjs b/.github/scripts/cleanup-triage-labels.cjs new file mode 100644 index 0000000000..5b492bb423 --- /dev/null +++ b/.github/scripts/cleanup-triage-labels.cjs @@ -0,0 +1,50 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +const fs = require('node:fs'); + +module.exports = async ({ github, context, core }) => { + let issuesToCleanup = []; + try { + const fileContent = fs.readFileSync('issues_to_cleanup.json', 'utf8'); + issuesToCleanup = JSON.parse(fileContent); + } catch (error) { + if (error.code === 'ENOENT') { + core.info('No issues found to clean up.'); + return; + } + core.setFailed(`Failed to read issues_to_cleanup.json: ${error.message}`); + return; + } + + for (const issue of issuesToCleanup) { + try { + await github.rest.issues.removeLabel({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: issue.number, + name: 'status/need-triage', + }); + core.info( + `Successfully removed status/need-triage from #${issue.number}`, + ); + } catch (error) { + if (error.status === 404) { + core.info( + `Label status/need-triage not found on #${issue.number}, skipping.`, + ); + } else { + core.warning( + `Failed to remove label from #${issue.number}: ${error.message}`, + ); + } + } + } + + core.info( + `Cleaned up status/need-triage from ${issuesToCleanup.length} issues.`, + ); +}; diff --git a/.github/scripts/sync-issue-types.cjs b/.github/scripts/sync-issue-types.cjs new file mode 100644 index 0000000000..4e547d4a66 --- /dev/null +++ b/.github/scripts/sync-issue-types.cjs @@ -0,0 +1,99 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +const fs = require('node:fs'); + +module.exports = async ({ github, context, core }) => { + const query = ` + query($owner: String!, $repo: String!) { + repository(owner: $owner, name: $repo) { + issues(first: 50, states: OPEN, orderBy: {field: UPDATED_AT, direction: DESC}) { + nodes { + id + number + title + body + issueType { + name + } + labels(first: 20) { + nodes { + name + } + } + } + } + } + } + `; + + try { + const result = await github.graphql(query, { + owner: context.repo.owner, + repo: context.repo.repo, + }); + + const issues = result.repository.issues.nodes; + const issuesNeedingAnalysis = []; + let syncedCount = 0; + + for (const issue of issues) { + if (issue.issueType === null) { + const labelNames = issue.labels.nodes.map((l) => l.name); + const hasBug = labelNames.includes('kind/bug'); + const hasFeature = + labelNames.includes('kind/feature') || + labelNames.includes('kind/enhancement'); + + let issueTypeId = null; + if (hasBug) { + issueTypeId = 'IT_kwDOCaSVvs4BR7vP'; // Bug + } else if (hasFeature) { + issueTypeId = 'IT_kwDOCaSVvs4BR7vQ'; // Feature + } + + if (issueTypeId) { + await github.graphql( + ` + mutation($issueId: ID!, $issueTypeId: ID!) { + updateIssue(input: {id: $issueId, issueTypeId: $issueTypeId}) { + issue { + id + } + } + } + `, + { + issueId: issue.id, + issueTypeId: issueTypeId, + }, + ); + core.info(`Successfully synced Issue Type for #${issue.number}`); + syncedCount++; + } else { + // Needs analysis to determine kind/type + issuesNeedingAnalysis.push({ + number: issue.number, + title: issue.title, + body: issue.body, + }); + } + } + } + + // Write issues needing analysis to a file so the AI can process them + fs.writeFileSync( + 'no_type_issues.json', + JSON.stringify(issuesNeedingAnalysis), + ); + core.info(`Synced ${syncedCount} issues from labels.`); + core.info( + `Found ${issuesNeedingAnalysis.length} issues missing both type and kind label to be analyzed.`, + ); + } catch (error) { + core.setFailed(`Failed to sync issue types: ${error.message}`); + } +}; diff --git a/.github/workflows/gemini-scheduled-issue-triage.yml b/.github/workflows/gemini-scheduled-issue-triage.yml index a91b0cc60e..6c8f10dcb7 100644 --- a/.github/workflows/gemini-scheduled-issue-triage.yml +++ b/.github/workflows/gemini-scheduled-issue-triage.yml @@ -23,7 +23,7 @@ permissions: jobs: triage-issues: - timeout-minutes: 10 + timeout-minutes: 60 if: |- ${{ github.repository == 'google-gemini/gemini-cli' }} runs-on: 'ubuntu-latest' @@ -51,6 +51,16 @@ jobs: echo "has_issues=true" >> "${GITHUB_OUTPUT}" echo "โœ… Found issue #${{ github.event.issue.number }} from event to triage! ๐ŸŽฏ" + - name: 'Sync Issue Types' + if: |- + ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} + uses: 'actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea' + with: + github-token: '${{ steps.generate_token.outputs.token }}' + script: |- + const syncIssueTypes = require('./.github/scripts/sync-issue-types.cjs'); + await syncIssueTypes({ github, context, core }); + - name: 'Find untriaged issues' if: |- ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} @@ -63,18 +73,22 @@ jobs: echo '๐Ÿ” Finding issues missing area labels...' gh issue list --repo "${GITHUB_REPOSITORY}" \ - --search 'is:open is:issue -label:status/bot-triaged -label:area/core -label:area/agent -label:area/enterprise -label:area/non-interactive -label:area/security -label:area/platform -label:area/extensions -label:area/documentation -label:area/unknown' --limit 100 --json number,title,body > no_area_issues.json + --search 'is:open is:issue -label:status/bot-triaged -label:area/core -label:area/agent -label:area/enterprise -label:area/non-interactive -label:area/security -label:area/platform -label:area/extensions -label:area/documentation -label:area/unknown' --limit 50 --json number,title,body > no_area_issues.json echo '๐Ÿ” Finding issues missing kind labels...' gh issue list --repo "${GITHUB_REPOSITORY}" \ - --search 'is:open is:issue -label:status/bot-triaged -label:kind/bug -label:kind/enhancement -label:kind/customer-issue -label:kind/question' --limit 100 --json number,title,body > no_kind_issues.json + --search 'is:open is:issue -label:status/bot-triaged -label:kind/bug -label:kind/enhancement -label:kind/customer-issue -label:kind/question' --limit 50 --json number,title,body > no_kind_issues.json echo '๐Ÿท๏ธ Finding issues missing priority labels...' gh issue list --repo "${GITHUB_REPOSITORY}" \ - --search 'is:open is:issue -label:status/bot-triaged -label:priority/p0 -label:priority/p1 -label:priority/p2 -label:priority/p3 -label:priority/unknown' --limit 100 --json number,title,body > no_priority_issues.json + --search 'is:open is:issue -label:status/bot-triaged -label:priority/p0 -label:priority/p1 -label:priority/p2 -label:priority/p3 -label:priority/unknown' --limit 50 --json number,title,body > no_priority_issues.json + + echo '๐Ÿ“ Finding issues missing effort labels...' + gh issue list --repo "${GITHUB_REPOSITORY}" \ + --search 'is:open is:issue -label:status/bot-triaged -label:effort/small -label:effort/medium -label:effort/large label:area/core,area/extensions,area/site,area/non-interactive' --limit 20 --json number,title,body > no_effort_issues.json echo '๐Ÿ”„ Merging and deduplicating issues...' - jq -c -s 'add | unique_by(.number)' no_area_issues.json no_kind_issues.json no_priority_issues.json > issues_to_triage.json + jq -c -s 'add | unique_by(.number)' no_area_issues.json no_kind_issues.json no_priority_issues.json no_effort_issues.json no_type_issues.json > issues_to_triage.json ISSUE_COUNT="$(jq 'length' issues_to_triage.json)" if [ "$ISSUE_COUNT" -gt 0 ]; then @@ -84,6 +98,22 @@ jobs: fi echo "โœ… Found ${ISSUE_COUNT} unique issues to triage! ๐ŸŽฏ" + - name: 'Create Gemini CLI Experiments Override' + if: |- + steps.get_issue_from_event.outputs.has_issues == 'true' || steps.find_issues.outputs.has_issues == 'true' + run: | + cat << 'EOF' > gemini_exp.json + { + "flags": [ + { + "flagId": 45750526, + "boolValue": false + } + ], + "experimentIds": [] + } + EOF + - name: 'Get Repository Labels' id: 'get_labels' uses: 'actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea' @@ -109,6 +139,8 @@ jobs: REPOSITORY: '${{ github.repository }}' AVAILABLE_LABELS: '${{ steps.get_labels.outputs.available_labels }}' GEMINI_CLI_TRUST_WORKSPACE: 'true' + GEMINI_EXP: 'gemini_exp.json' + GEMINI_STRICT_TELEMETRY_LIMITS: 'true' with: gcp_workload_identity_provider: '${{ vars.GCP_WIF_PROVIDER }}' gcp_project_id: '${{ vars.GOOGLE_CLOUD_PROJECT }}' @@ -122,6 +154,8 @@ jobs: "maxSessionTurns": 25, "coreTools": [ "run_shell_command(echo)", + "grep_search", + "glob", "read_file" ], "telemetry": { @@ -141,23 +175,26 @@ jobs: 1. You are only able to use the echo and read_file commands. Review the available labels in the environment variable: "${AVAILABLE_LABELS}". 2. Use the read_file tool to read the file "issues_to_triage.json" which contains the JSON array of issues to triage. 3. Review the issue title, body and any comments provided in the JSON file. - 4. Identify the most relevant labels from the existing labels, specifically focusing on area/*, kind/* and priority/*. + 4. Identify the most relevant labels from the existing labels, specifically focusing on area/*, kind/*, priority/*, and effort/*. 5. Label Policy: - If the issue already has a kind/ label, do not change it. - If the issue already has a priority/ label, do not change it. - If the issue already has an area/ label, do not change it. + - If the issue already has an effort/ label, do not change it. + - If the issue is missing an effort/ label AND its area is area/core, area/extensions, area/site, or area/non-interactive, you must evaluate the architectural complexity to determine the effort level. You MUST NOT guess the root cause. You MUST actively use your codebase search tools (grep_search and glob) to search for keywords from the issue and explore the codebase. You must identify the specific files and components involved before deciding the effort. Do NOT evaluate or assign an effort/ label to issues in any other areas (such as area/agent). - If any of these are missing, select exactly ONE appropriate label for the missing category. 6. Identify other applicable labels based on the issue content, such as status/*, help wanted, good first issue, etc. 7. Give me a single short explanation about why you are selecting each label in the process. 8. Output a JSON array of objects, each containing the issue number - and the labels to add and remove, along with an explanation. For example: + and the labels to add and remove, along with an explanation. If you assigned an effort/ label, you MUST also include an effort_analysis field. This effort_analysis must be highly detailed, technical, and empirical. It MUST NOT contain vague guesses (e.g., avoid words like "likely points to" or "possibly"). You must explicitly cite the specific file paths and architectural mechanisms you discovered using your search tools, explain the root cause, and then explicitly state how that complexity maps to the chosen effort level guidelines. For example: ``` [ { "issue_number": 123, - "labels_to_add": ["area/core", "kind/bug", "priority/p2"], + "labels_to_add": ["area/core", "kind/bug", "priority/p2", "effort/small"], "labels_to_remove": ["status/need-triage"], - "explanation": "This issue is a UI bug that needs to be addressed with medium priority." + "explanation": "This issue is a UI bug that needs to be addressed with medium priority.", + "effort_analysis": "The `vscode-ide-companion` extension indiscriminately tracks active text editors via `vscode.window.onDidChangeActiveTextEditor` in `open-files-manager.ts`. When a user opens `.vscode/settings.json`, its content is sent to the CLI's context. The fix is highly localized to the VS Code companion extension's event listener. It involves adding a simple conditional check to exclude specific configuration files from the active editor tracking logic, which is a trivial logic adjustment with a clear root cause." } ] ``` @@ -181,6 +218,23 @@ jobs: - Identify only one priority/ label. - Once you categorize the issue if it needs information bump down the priority by 1 eg.. a p0 would become a p1 a p1 would become a p2. P2 and P3 can stay as is in this scenario. + Categorization Guidelines (Effort): + effort/small (1 day or less): + - Trivial Logic & Config: Schema updates (Zod), feature flag toggles, adding missing fields to package.json or settings.json. + - UI/Aesthetic Adjustments: Fixing minor layout bugs in Ink components (e.g., adding flexShrink, correcting padding in a single Box), text color changes. + - Documentation & Strings: Typos, log message updates, CLI argument descriptions. + - Localized Bug Fixes: Single-file logic errors, straightforward promise rejections (e.g., wrapping a known failure in a try/catch), simple regex or string parsing fixes. + effort/medium (2-3 days): + - React/Ink State Management: Debugging useState/useEffect/useReducer bugs, component lifecycle issues (memory leaks in the UI), terminal redraw flickering, or state synchronization between the CLI's internal input buffer and the interactive React components. + - Asynchronous Flow & Integration: Resolving complex Promise chains, ERR_STREAM_PREMATURE_CLOSE, debugging IDE companion extensions (VS Code, Android Studio) or resolving hanging HTTP requests/IPC between the CLI and external plugins, timeouts in non-interactive/ACP modes. + - Tooling & Output Parsers: Modifying how tools parse streaming stdout/stderr buffers, adding new built-in tools that don't require native bindings. + - Cross-Component Refactors: Changes that span across packages/cli and packages/core to pass new data models or telemetry state. + effort/large (3+ days): + - Platform-Specific Complexities (PTY/Signals): Any issue involving node-pty, child_process.spawn, OS-level shell behavior (Windows vs Linux vs macOS), pseudo-terminal exhaustion (ENXIO), raw mode terminal desyncs, or POSIX signal forwarding (SIGINT/SIGTERM). + - Core Architecture & Protocols: Refactoring the Scheduler, Agent-to-Agent (A2A) protocol implementation, low-level MCP (Model Context Protocol) transport mechanisms. + - Performance & Memory: Diagnosing massive disk/memory leaks, severe boot time regressions, high-throughput streaming optimizations (e.g., voice streaming pipelines). + Note: Any bug that is described as intermittent, flickering, difficult to reproduce, platform-specific, or requiring cross-environment setups (e.g., involving the VS Code IDE companion, GCA plugin, or Android Studio) MUST NOT be rated as effort/small because of the increased overhead of testing and reproducing. + Categorization Guidelines (Priority): P0 - Urgent Blocking Issues: - DO NOT APPLY THIS LABEL AUTOMATICALLY. Use status/manual-triage instead. @@ -229,74 +283,37 @@ jobs: with: github-token: '${{ steps.generate_token.outputs.token }}' script: |- - const rawLabels = process.env.LABELS_OUTPUT; - core.info(`Raw labels JSON: ${rawLabels}`); - let parsedLabels; - try { - // First, try to parse the raw output as JSON. - parsedLabels = JSON.parse(rawLabels); - } catch (jsonError) { - // If that fails, check for a markdown code block. - core.warning(`Direct JSON parsing failed: ${jsonError.message}. Trying to extract from a markdown block.`); - const jsonMatch = rawLabels.match(/```json\s*([\s\S]*?)\s*```/); - if (jsonMatch && jsonMatch[1]) { - try { - parsedLabels = JSON.parse(jsonMatch[1].trim()); - } catch (markdownError) { - core.setFailed(`Failed to parse JSON even after extracting from markdown block: ${markdownError.message}\nRaw output: ${rawLabels}`); - return; - } - } else { - // If no markdown block, try to find a raw JSON array in the output. - // The CLI may include debug/log lines (e.g. telemetry init, YOLO mode) - // before the actual JSON response. - const jsonArrayMatch = rawLabels.match(/(\[[\s\S]*\])/); - if (jsonArrayMatch) { - try { - parsedLabels = JSON.parse(jsonArrayMatch[0]); - } catch (extractError) { - core.setFailed(`Found JSON-like content but failed to parse: ${extractError.message}\nRaw output: ${rawLabels}`); - return; - } - } else { - core.setFailed(`Output is not valid JSON and does not contain extractable JSON.\nRaw output: ${rawLabels}`); - return; - } - } - } - core.info(`Parsed labels JSON: ${JSON.stringify(parsedLabels)}`); + const applyLabels = require('./.github/scripts/apply-issue-labels.cjs'); + await applyLabels({ github, context, core }); - for (const entry of parsedLabels) { - const issueNumber = entry.issue_number; - if (!issueNumber) { - core.info(`Skipping entry with no issue number: ${JSON.stringify(entry)}`); - continue; - } + - name: 'Sync Issue Types (Post-Analysis)' + if: |- + always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') + uses: 'actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea' + with: + github-token: '${{ steps.generate_token.outputs.token }}' + script: |- + const syncIssueTypes = require('./.github/scripts/sync-issue-types.cjs'); + await syncIssueTypes({ github, context, core }); - const labelsToAdd = entry.labels_to_add || []; - labelsToAdd.push('status/bot-triaged'); + - name: 'Find Triaged Issues to Clean Up' + if: |- + always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') + env: + GITHUB_TOKEN: '${{ steps.generate_token.outputs.token }}' + GITHUB_REPOSITORY: '${{ github.repository }}' + run: |- + set -euo pipefail + echo '๐Ÿงน Finding issues that have both bot-triaged and need-triage labels...' + gh issue list --repo "${GITHUB_REPOSITORY}" \ + --search 'is:open is:issue label:status/bot-triaged label:status/need-triage' --limit 50 --json number > issues_to_cleanup.json - if (labelsToAdd.length > 0) { - await github.rest.issues.addLabels({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: issueNumber, - labels: labelsToAdd - }); - const explanation = entry.explanation ? ` - ${entry.explanation}` : ''; - core.info(`Successfully added labels for #${issueNumber}: ${labelsToAdd.join(', ')}${explanation}`); - } - - if (entry.explanation) { - await github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: issueNumber, - body: entry.explanation, - }); - } - - if ((!entry.labels_to_add || entry.labels_to_add.length === 0) && (!entry.labels_to_remove || entry.labels_to_remove.length === 0)) { - core.info(`No labels to add or remove for #${issueNumber}, leaving as is`); - } - } + - name: 'Clean Up Triage Labels' + if: |- + always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') + uses: 'actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea' + with: + github-token: '${{ steps.generate_token.outputs.token }}' + script: |- + const cleanupLabels = require('./.github/scripts/cleanup-triage-labels.cjs'); + await cleanupLabels({ github, context, core }); diff --git a/packages/core/src/telemetry/gcp-exporters.test.ts b/packages/core/src/telemetry/gcp-exporters.test.ts index 6415104d53..fa35829e81 100644 --- a/packages/core/src/telemetry/gcp-exporters.test.ts +++ b/packages/core/src/telemetry/gcp-exporters.test.ts @@ -143,6 +143,75 @@ describe('GCP Exporters', () => { }); }); + it('should truncate payload strictly if GEMINI_STRICT_TELEMETRY_LIMITS is true', async () => { + vi.stubEnv('GEMINI_STRICT_TELEMETRY_LIMITS', 'true'); + + // Create an array of 60 strings, each 10,000 characters long. + // Even after the 2k strict truncation pass, the total size will be + // ~120k, which forces the final fallback structural strip. + const largeArray = Array(60).fill('a'.repeat(10000)); + + const mockLogRecords: ReadableLogRecord[] = [ + { + hrTime: [1234567890, 123456789], + hrTimeObserved: [1234567890, 123456789], + severityNumber: 9, + body: 'Test', + attributes: { + huge_data: largeArray, + }, + } as unknown as ReadableLogRecord, + ]; + + const callback = vi.fn(); + exporter.export(mockLogRecords, callback); + + await new Promise((resolve) => setTimeout(resolve, 0)); + + expect(mockLog.entry).toHaveBeenCalled(); + const entryCallArgs = mockLog.entry.mock.calls[0]; + const payload = entryCallArgs[1]; + + // Should have fallen back to structural strip due to strict limit + expect(payload).toHaveProperty( + '_warning', + 'Payload heavily truncated due to strict limits', + ); + expect(payload.data.length).toBeLessThanOrEqual(50050); // 50000 + '... (truncated)' + + vi.unstubAllEnvs(); + }); + + it('should completely bypass truncation if GEMINI_STRICT_TELEMETRY_LIMITS is false or unset', async () => { + const largeArray = Array(60).fill('a'.repeat(10000)); + + const mockLogRecords: ReadableLogRecord[] = [ + { + hrTime: [1234567890, 123456789], + hrTimeObserved: [1234567890, 123456789], + severityNumber: 9, + body: 'Test', + attributes: { + huge_data: largeArray, + }, + } as unknown as ReadableLogRecord, + ]; + + const callback = vi.fn(); + exporter.export(mockLogRecords, callback); + + await new Promise((resolve) => setTimeout(resolve, 0)); + + expect(mockLog.entry).toHaveBeenCalled(); + const entryCallArgs = mockLog.entry.mock.calls[0]; + const payload = entryCallArgs[1]; + + // Should NOT have fallen back to structural strip, array should be intact + expect(payload).not.toHaveProperty('_warning'); + expect(payload).toHaveProperty('huge_data'); + expect(payload.huge_data).toEqual(largeArray); + }); + it('should handle export failures', async () => { const mockLogRecords: ReadableLogRecord[] = [ { diff --git a/packages/core/src/telemetry/gcp-exporters.ts b/packages/core/src/telemetry/gcp-exporters.ts index 3bf1781b87..b4140fca20 100644 --- a/packages/core/src/telemetry/gcp-exporters.ts +++ b/packages/core/src/telemetry/gcp-exporters.ts @@ -17,6 +17,7 @@ import type { ReadableLogRecord, LogRecordExporter, } from '@opentelemetry/sdk-logs'; +import type { ResourceMetrics } from '@opentelemetry/sdk-metrics'; /** * Google Cloud Trace exporter that extends the official trace exporter @@ -42,6 +43,52 @@ export class GcpMetricExporter extends MetricExporter { prefix: 'custom.googleapis.com/gemini_cli', }); } + + override export( + metrics: ResourceMetrics, + resultCallback: (result: ExportResult) => void, + ): void { + super.export(metrics, (result: ExportResult) => { + if (result.code === ExportResultCode.FAILED && result.error) { + // Suppress errors related to writing too frequently, as they are + // expected when the CLI shuts down quickly after a periodic export. + const errorMessage = result.error.message || String(result.error); + if ( + process.env['GEMINI_STRICT_TELEMETRY_LIMITS'] === 'true' && + errorMessage.includes( + 'written more frequently than the maximum sampling period', + ) + ) { + resultCallback({ code: ExportResultCode.SUCCESS }); + return; + } + } + resultCallback(result); + }); + } +} + +/** + * Deeply truncates strings in an object to prevent GCP log size limit errors. + */ +function truncateLogPayload(payload: unknown, limit = 200000): unknown { + if (typeof payload === 'string') { + return payload.length > limit + ? payload.substring(0, limit) + '... (truncated due to size)' + : payload; + } + if (Array.isArray(payload)) { + return payload.map((item) => truncateLogPayload(item, limit)); + } + if (payload !== null && typeof payload === 'object') { + const truncatedObj: Record = {}; + for (const [key, value] of Object.entries(payload)) { + // Keys are also strings, but usually small. Truncate values. + truncatedObj[key] = truncateLogPayload(value, limit); + } + return truncatedObj; + } + return payload; } /** @@ -63,6 +110,43 @@ export class GcpLogExporter implements LogRecordExporter { ): void { try { const entries = logs.map((log) => { + const rawPayload = { + ...log.attributes, + ...log.resource?.attributes, + message: log.body, + }; + + const isStrictTelemetry = + process.env['GEMINI_STRICT_TELEMETRY_LIMITS'] === 'true'; + + let finalPayload: unknown = rawPayload; + + if (isStrictTelemetry) { + // Enforce a strict cap on the entire payload to avoid 256KB limit crashes in CI. + let safePayload = truncateLogPayload(rawPayload, 10000); + let payloadString = JSON.stringify(safePayload); + + if (payloadString && payloadString.length > 100000) { + // If still too large, apply a stricter limit + safePayload = truncateLogPayload(rawPayload, 2000); + payloadString = JSON.stringify(safePayload); + + if (payloadString && payloadString.length > 100000) { + safePayload = truncateLogPayload(rawPayload, 5000); + payloadString = JSON.stringify(safePayload); + + if (payloadString && payloadString.length > 100000) { + // Fallback: strip structure and send a truncated raw string + safePayload = { + _warning: 'Payload heavily truncated due to strict limits', + data: payloadString.substring(0, 50000) + '... (truncated)', + }; + } + } + } + finalPayload = safePayload; + } + const entry = this.log.entry( { severity: this.mapSeverityToCloudLogging(log.severityNumber), @@ -74,11 +158,8 @@ export class GcpLogExporter implements LogRecordExporter { }, }, }, - { - ...log.attributes, - ...log.resource?.attributes, - message: log.body, - }, + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion + finalPayload as Record, ); return entry; }); diff --git a/packages/core/src/telemetry/metrics.test.ts b/packages/core/src/telemetry/metrics.test.ts index 0bca699b16..281c3a6d8a 100644 --- a/packages/core/src/telemetry/metrics.test.ts +++ b/packages/core/src/telemetry/metrics.test.ts @@ -4,7 +4,15 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { describe, it, expect, vi, beforeEach, type Mock } from 'vitest'; +import { + describe, + it, + expect, + vi, + beforeEach, + afterEach, + type Mock, +} from 'vitest'; import type { Counter, Meter, @@ -501,6 +509,10 @@ describe('Telemetry Metrics', () => { getTelemetryEnabled: () => true, } as unknown as Config; + afterEach(() => { + vi.unstubAllEnvs(); + }); + it('should not record metrics if not initialized', () => { const event = new ModelRoutingEvent( 'gemini-pro', @@ -580,6 +592,85 @@ describe('Telemetry Metrics', () => { 'routing.error_message': 'test-error', }); }); + + it('should truncate long reasoning and error_message when GEMINI_STRICT_TELEMETRY_LIMITS is true', () => { + vi.stubEnv('GEMINI_STRICT_TELEMETRY_LIMITS', 'true'); + initializeMetricsModule(mockConfig); + const longReason = 'a'.repeat(2000); + const longError = 'b'.repeat(2000); + const event = new ModelRoutingEvent( + 'gemini-pro', + 'Classifier', + 200, + longReason, + true, + longError, + ApprovalMode.DEFAULT, + ); + recordModelRoutingMetricsModule(mockConfig, event); + + expect(mockHistogramRecordFn).toHaveBeenCalledWith(200, { + 'session.id': 'test-session-id', + 'installation.id': 'test-installation-id', + 'user.email': 'test@example.com', + 'routing.decision_model': 'gemini-pro', + 'routing.decision_source': 'Classifier', + 'routing.failed': true, + 'routing.reasoning': 'a'.repeat(1000) + '...', + 'routing.approval_mode': ApprovalMode.DEFAULT, + }); + + expect(mockCounterAddFn).toHaveBeenNthCalledWith(2, 1, { + 'session.id': 'test-session-id', + 'installation.id': 'test-installation-id', + 'user.email': 'test@example.com', + 'routing.decision_model': 'gemini-pro', + 'routing.decision_source': 'Classifier', + 'routing.failed': true, + 'routing.reasoning': 'a'.repeat(1000) + '...', + 'routing.approval_mode': ApprovalMode.DEFAULT, + 'routing.error_message': 'b'.repeat(1000) + '...', + }); + }); + + it('should NOT truncate long reasoning and error_message when GEMINI_STRICT_TELEMETRY_LIMITS is false or unset', () => { + initializeMetricsModule(mockConfig); + const longReason = 'a'.repeat(2000); + const longError = 'b'.repeat(2000); + const event = new ModelRoutingEvent( + 'gemini-pro', + 'Classifier', + 200, + longReason, + true, + longError, + ApprovalMode.DEFAULT, + ); + recordModelRoutingMetricsModule(mockConfig, event); + + expect(mockHistogramRecordFn).toHaveBeenCalledWith(200, { + 'session.id': 'test-session-id', + 'installation.id': 'test-installation-id', + 'user.email': 'test@example.com', + 'routing.decision_model': 'gemini-pro', + 'routing.decision_source': 'Classifier', + 'routing.failed': true, + 'routing.reasoning': longReason, + 'routing.approval_mode': ApprovalMode.DEFAULT, + }); + + expect(mockCounterAddFn).toHaveBeenNthCalledWith(2, 1, { + 'session.id': 'test-session-id', + 'installation.id': 'test-installation-id', + 'user.email': 'test@example.com', + 'routing.decision_model': 'gemini-pro', + 'routing.decision_source': 'Classifier', + 'routing.failed': true, + 'routing.reasoning': longReason, + 'routing.approval_mode': ApprovalMode.DEFAULT, + 'routing.error_message': longError, + }); + }); }); describe('recordAgentRunMetrics', () => { diff --git a/packages/core/src/telemetry/metrics.ts b/packages/core/src/telemetry/metrics.ts index 377479c1e4..e09699e798 100644 --- a/packages/core/src/telemetry/metrics.ts +++ b/packages/core/src/telemetry/metrics.ts @@ -1129,7 +1129,14 @@ export function recordModelRoutingMetrics( }; if (event.reasoning) { - attributes['routing.reasoning'] = event.reasoning; + // GCP metric labels have a maximum string size of 1024 characters. + // Apply strict truncation only in CI workflows to avoid masking data for normal users. + const isStrictTelemetry = + process.env['GEMINI_STRICT_TELEMETRY_LIMITS'] === 'true'; + attributes['routing.reasoning'] = + isStrictTelemetry && event.reasoning.length > 1000 + ? event.reasoning.substring(0, 1000) + '...' + : event.reasoning; } if (event.enable_numerical_routing !== undefined) { attributes['routing.enable_numerical_routing'] = @@ -1142,9 +1149,16 @@ export function recordModelRoutingMetrics( modelRoutingLatencyHistogram.record(event.routing_latency_ms, attributes); if (event.failed) { + const isStrictTelemetry = + process.env['GEMINI_STRICT_TELEMETRY_LIMITS'] === 'true'; modelRoutingFailureCounter.add(1, { ...attributes, - 'routing.error_message': event.error_message, + 'routing.error_message': + isStrictTelemetry && + event.error_message && + event.error_message.length > 1000 + ? event.error_message.substring(0, 1000) + '...' + : event.error_message, }); } }