/** * @license * Copyright 2025 Google LLC * SPDX-License-Identifier: Apache-2.0 */ import { describe, expect } from 'vitest'; import { evalTest } from '../test-helper.js'; import fs from 'node:fs/promises'; import path from 'node:path'; import yaml from 'js-yaml'; // Read the workflow file to extract the prompt and settings const workflowPath = path.join( process.cwd(), '.github/workflows/gemini-automated-issue-dedup.yml', ); const workflowContent = await fs.readFile(workflowPath, 'utf8'); const workflowData = yaml.load(workflowContent) as any; const geminiStep = workflowData.jobs?.['find-duplicates']?.steps?.find( (step: any) => step.id === 'gemini_issue_deduplication', ); const DEDUP_PROMPT_TEMPLATE = geminiStep?.with?.prompt; const ORIGINAL_SETTINGS = JSON.parse(geminiStep?.with?.settings || '{}'); if (!DEDUP_PROMPT_TEMPLATE) { throw new Error('Could not extract prompt from de-duplication workflow.'); } const mockMcpPath = path.join(process.cwd(), 'evals/mocks/dedup_mcp.ts'); const createPrompt = (issueNumber: number) => { // The prompt uses ${{ github.event.issue.number }} but also references ${ISSUE_NUMBER} (env) return DEDUP_PROMPT_TEMPLATE.replace( /\${{ github\.repository }}/g, 'google-gemini/gemini-cli', ).replace(/\${{ github\.event\.issue\.number }}/g, issueNumber.toString()); }; const tsxPath = path.join(process.cwd(), 'node_modules', '.bin', 'tsx'); const DEDUP_SETTINGS = { ...ORIGINAL_SETTINGS, mcpServers: { issue_deduplication: { command: tsxPath, args: [mockMcpPath], }, }, }; if (DEDUP_SETTINGS.telemetry) { delete DEDUP_SETTINGS.telemetry; } describe('dedup_agent', () => { evalTest('USUALLY_PASSES', { name: 'should identify duplicate issues', prompt: ['--output-format', 'json', '--prompt', createPrompt(101)], env: { ISSUE_NUMBER: '101', GITHUB_ENV: 'github_env', }, params: { settings: DEDUP_SETTINGS, }, files: { github_env: '', // Mock gh binary 'bin/gh': `#!/usr/bin/env node const args = process.argv.slice(2).join(' '); if (args.includes('issue view')) { const issueNum = args.match(/view (\\d+)/)?.[1]; if (issueNum === '101') { console.log(JSON.stringify({ number: 101, title: 'CLI crashes on start', body: 'It segfaults immediately.', comments: [] })); } else if (issueNum === '201') { console.log(JSON.stringify({ number: 201, title: 'Segfault on launch', body: 'The app crashes right away.', comments: [] })); } else if (issueNum === '202') { console.log(JSON.stringify({ number: 202, title: 'Unrelated bug', body: 'Themes are not working.', comments: [] })); } } `, }, assert: async (rig: any, result) => { // Verify JSON output stats const output = JSON.parse(result); expect(output.stats).toBeDefined(); expect(output.stats.tools.byName['duplicates']).toBeDefined(); expect(output.stats.tools.byName['run_shell_command']).toBeDefined(); // Verify detailed tool usage via telemetry const toolLogs = rig.readToolLogs(); const duplicatesCall = toolLogs.find( (l: any) => l.toolRequest.name === 'duplicates', ); expect(duplicatesCall).toBeDefined(); // The current prompt uses echo to set GITHUB_ENV // We check the tool call for the echo command const shellCalls = toolLogs.filter( (l: any) => l.toolRequest.name === 'run_shell_command', ); const envCall = shellCalls.find((call: any) => call.toolRequest.args.includes('DUPLICATE_ISSUES_CSV'), ); expect(envCall).toBeDefined(); // Check the command content const match = envCall.toolRequest.args.match( /DUPLICATE_ISSUES_CSV=\[?([\d, ]*)\]?/, ); expect(match).not.toBeNull(); const issues = match![1] .split(',') .map((s: string) => s.trim()) .filter((s: string) => s); expect(issues).toContain('201'); expect(issues).not.toContain('202'); }, }); evalTest('USUALLY_PASSES', { name: 'should respect "not a duplicate" comments', prompt: ['--output-format', 'json', '--prompt', createPrompt(101)], env: { ISSUE_NUMBER: '101', GITHUB_ENV: 'github_env', }, params: { settings: DEDUP_SETTINGS, }, files: { github_env: '', 'bin/gh': `#!/usr/bin/env node const args = process.argv.slice(2).join(' '); if (args.includes('issue view')) { const issueNum = args.match(/view (\\d+)/)?.[1]; if (issueNum === '101') { console.log(JSON.stringify({ number: 101, title: 'CLI crashes on start', body: 'It segfaults immediately.', comments: [{ body: 'Note: This is NOT a duplicate of #201, different root cause.' }] })); } else if (issueNum === '201') { console.log(JSON.stringify({ number: 201, title: 'Segfault on launch', body: 'The app crashes right away.', comments: [] })); } else { console.log(JSON.stringify({ number: parseInt(issueNum), title: '', body: '', comments: [] })); } } `, }, assert: async (rig: any, result) => { // Verify JSON output stats const output = JSON.parse(result); expect(output.stats).toBeDefined(); const toolLogs = rig.readToolLogs(); const duplicatesCall = toolLogs.find( (l: any) => l.toolRequest.name === 'duplicates', ); expect(duplicatesCall).toBeDefined(); const shellCalls = toolLogs.filter( (l: any) => l.toolRequest.name === 'run_shell_command', ); // It might not call echo if no duplicates are found, or it might echo an empty list. // We'll check if it does call echo, that 201 is NOT in it. const envCall = shellCalls.find((call: any) => call.toolRequest.args.includes('DUPLICATE_ISSUES_CSV'), ); if (envCall) { const match = envCall.toolRequest.args.match( /DUPLICATE_ISSUES_CSV=\[?([\d, ]*)\]?/, ); const issues = match ? match[1] .split(',') .map((s: string) => s.trim()) .filter((s: string) => s) : []; expect(issues).not.toContain('201'); } }, }); evalTest('USUALLY_PASSES', { name: 'should differentiate false positives with high similarity', prompt: ['--output-format', 'json', '--prompt', createPrompt(301)], env: { ISSUE_NUMBER: '301', GITHUB_ENV: 'github_env', }, params: { settings: DEDUP_SETTINGS, }, files: { github_env: '', 'bin/gh': `#!/usr/bin/env node const args = process.argv.slice(2).join(' '); if (args.includes('issue view')) { const issueNum = args.match(/view (\\d+)/)?.[1]; if (issueNum === '301') { console.log(JSON.stringify({ number: 301, title: 'App crashes when I click Save', body: 'I click the save button and it crashes.', comments: [] })); } else if (issueNum === '302') { console.log(JSON.stringify({ number: 302, title: 'App crashes when I click Load', body: 'I click the load button and it crashes. This seems related to the loader component.', comments: [] })); } else { console.log(JSON.stringify({ number: parseInt(issueNum), title: '', body: '', comments: [] })); } } `, }, assert: async (rig: any, result) => { // Verify JSON output stats const output = JSON.parse(result); expect(output.stats).toBeDefined(); const toolLogs = rig.readToolLogs(); const duplicatesCall = toolLogs.find( (l: any) => l.toolRequest.name === 'duplicates', ); expect(duplicatesCall).toBeDefined(); const shellCalls = toolLogs.filter( (l: any) => l.toolRequest.name === 'run_shell_command', ); const envCall = shellCalls.find((call: any) => call.toolRequest.args.includes('DUPLICATE_ISSUES_CSV'), ); if (envCall) { const match = envCall.toolRequest.args.match( /DUPLICATE_ISSUES_CSV=\[?([\d, ]*)\]?/, ); const issues = match ? match[1] .split(',') .map((s: string) => s.trim()) .filter((s: string) => s) : []; // Should NOT contain 302 because it's a different feature (Save vs Load) despite crash expect(issues).not.toContain('302'); } }, }); evalTest('USUALLY_PASSES', { name: 'should reject matches with low similarity', prompt: ['--output-format', 'json', '--prompt', createPrompt(401)], env: { ISSUE_NUMBER: '401', GITHUB_ENV: 'github_env', }, params: { settings: DEDUP_SETTINGS, }, files: { github_env: '', 'bin/gh': `#!/usr/bin/env node const args = process.argv.slice(2).join(' '); if (args.includes('issue view')) { const issueNum = args.match(/view (\\d+)/)?.[1]; if (issueNum === '401') { console.log(JSON.stringify({ number: 401, title: 'Feature request: Dark mode', body: 'Please add dark mode.', comments: [] })); } else if (issueNum === '402') { console.log(JSON.stringify({ number: 402, title: 'Feature request: Light mode', body: 'Please add light mode.', comments: [] })); } else { console.log(JSON.stringify({ number: parseInt(issueNum), title: '', body: '', comments: [] })); } } `, }, assert: async (rig: any, result) => { // Verify JSON output stats const output = JSON.parse(result); expect(output.stats).toBeDefined(); const toolLogs = rig.readToolLogs(); const duplicatesCall = toolLogs.find( (l: any) => l.toolRequest.name === 'duplicates', ); expect(duplicatesCall).toBeDefined(); const shellCalls = toolLogs.filter( (l: any) => l.toolRequest.name === 'run_shell_command', ); const envCall = shellCalls.find((call: any) => call.toolRequest.args.includes('DUPLICATE_ISSUES_CSV'), ); if (envCall) { const match = envCall.toolRequest.args.match( /DUPLICATE_ISSUES_CSV=\[?([\d, ]*)\]?/, ); const issues = match ? match[1] .split(',') .map((s: string) => s.trim()) .filter((s: string) => s) : []; expect(issues).not.toContain('402'); expect(issues.length).toBe(0); } }, }); evalTest('USUALLY_PASSES', { name: 'should identify multiple duplicates', prompt: ['--output-format', 'json', '--prompt', createPrompt(501)], env: { ISSUE_NUMBER: '501', GITHUB_ENV: 'github_env', }, params: { settings: DEDUP_SETTINGS, }, files: { github_env: '', 'bin/gh': `#!/usr/bin/env node const args = process.argv.slice(2).join(' '); if (args.includes('issue view')) { const issueNum = args.match(/view (\\d+)/)?.[1]; if (issueNum === '501') { console.log(JSON.stringify({ number: 501, title: 'Crash on login', body: 'The app crashes when I try to log in.', comments: [] })); } else if (issueNum === '502') { console.log(JSON.stringify({ number: 502, title: 'Crash on sign in', body: 'Crashes during sign in process.', comments: [] })); } else if (issueNum === '503') { console.log(JSON.stringify({ number: 503, title: 'Crashes on login page', body: 'I get a crash immediately on the login page.', comments: [] })); } else { console.log(JSON.stringify({ number: parseInt(issueNum), title: '', body: '', comments: [] })); } } `, }, assert: async (rig: any, result) => { // Verify JSON output stats const output = JSON.parse(result); expect(output.stats).toBeDefined(); const toolLogs = rig.readToolLogs(); const duplicatesCall = toolLogs.find( (l: any) => l.toolRequest.name === 'duplicates', ); expect(duplicatesCall).toBeDefined(); const shellCalls = toolLogs.filter( (l: any) => l.toolRequest.name === 'run_shell_command', ); const envCall = shellCalls.find((call: any) => call.toolRequest.args.includes('DUPLICATE_ISSUES_CSV'), ); expect(envCall).toBeDefined(); const match = envCall.toolRequest.args.match( /DUPLICATE_ISSUES_CSV=\[?([\d, ]*)\]?/, ); const issues = match ? match[1] .split(',') .map((s: string) => s.trim()) .filter((s: string) => s) : []; expect(issues).toContain('502'); expect(issues).toContain('503'); }, }); });