chore: remove old eval files moved to workflows/

2026-06-12 20:37:08 -07:00 · 2026-02-03 21:03:24 -05:00
parent ff4e816a70
commit 36ce66933e
4 changed files with 0 additions and 1180 deletions
@@ -1,264 +0,0 @@
-/**
- * @license
- * Copyright 2025 Google LLC
- * SPDX-License-Identifier: Apache-2.0
- */
-
-import { describe, expect } from 'vitest';
-import { evalTest } from './test-helper.js';
-import fs from 'node:fs/promises';
-import path from 'node:path';
-import yaml from 'js-yaml';
-
-// Read the workflow file to extract the prompt
-const workflowPath = path.join(
-  process.cwd(),
-  '.github/workflows/gemini-scheduled-issue-triage.yml',
-);
-const workflowContent = await fs.readFile(workflowPath, 'utf8');
-
-// Use a YAML parser for robustness
-const workflowData = yaml.load(workflowContent) as {
-  jobs?: {
-    'triage-issues'?: {
-      steps?: {
-        id?: string;
-        with?: { prompt?: string; script?: string };
-        env?: { AVAILABLE_LABELS?: string };
-      }[];
-    };
-  };
-};
-
-const geminiStep = workflowData.jobs?.['triage-issues']?.steps?.find(
-  (step) => step.id === 'gemini_issue_analysis',
-);
-
-const labelsStep = workflowData.jobs?.['triage-issues']?.steps?.find(
-  (step) => step.id === 'get_labels',
-);
-
-const BATCH_TRIAGE_PROMPT_TEMPLATE = geminiStep?.with?.prompt;
-const ORIGINAL_SETTINGS = JSON.parse(geminiStep?.with?.settings || '{}');
-const LABELS_SCRIPT = labelsStep?.with?.script;
-
-if (!BATCH_TRIAGE_PROMPT_TEMPLATE) {
-  throw new Error(
-    'Could not extract prompt from workflow file. Check for `jobs.triage-issues.steps[id=gemini_issue_analysis].with.prompt` in the YAML file.',
-  );
-}
-
-// Extract available labels from the script
-let availableLabels = '';
-if (LABELS_SCRIPT) {
-  const match = LABELS_SCRIPT.match(
-    /const labelNames = labels.map\(label => label.name\);/,
-  );
-  // Wait, the script in scheduled triage is different!
-  // const labelNames = labels.map(label => label.name);
-  // It gets ALL labels.
-  // But the prompt expects "${AVAILABLE_LABELS}".
-  // In the test, we can just mock a reasonable set of labels.
-  availableLabels =
-    'area/agent, area/core, area/enterprise, area/extensions, area/non-interactive, area/platform, area/security, area/unknown, kind/bug, kind/feature, kind/question, priority/p0, priority/p1, priority/p2, priority/p3';
-}
-
-const createPrompt = () => {
-  return BATCH_TRIAGE_PROMPT_TEMPLATE.replace(
-    '${AVAILABLE_LABELS}',
-    availableLabels,
-  );
-};
-
-const BATCH_TRIAGE_SETTINGS = {
-  ...ORIGINAL_SETTINGS,
-};
-if (BATCH_TRIAGE_SETTINGS.telemetry) {
-  delete BATCH_TRIAGE_SETTINGS.telemetry;
-}
-
-const escapeHtml = (str: string) => {
-  return str.replace(/[<>&'"]/g, (c) => {
-    switch (c) {
-      case '<':
-        return '&lt;';
-      case '>':
-        return '&gt;';
-      case '&':
-        return '&amp;';
-      case "'":
-        return '&apos;';
-      case '"':
-        return '&quot;';
-    }
-    return '';
-  });
-};
-
-const assertHasIssueLabel = (issueNumber: number, expectedLabel: string) => {
-  return async (rig: any, result: string) => {
-    // Verify JSON output stats
-    const output = JSON.parse(result);
-    expect(output.stats).toBeDefined();
-
-    // The model response JSON is in the 'response' field
-    const responseText = output.response;
-    let jsonString: string;
-    const match = responseText.match(/```json\s*([\s\S]*?)\s*```/);
-    if (match?.[1]) {
-      jsonString = match[1];
-    } else {
-      const firstBracket = responseText.indexOf('[');
-      const lastBracket = responseText.lastIndexOf(']');
-      if (
-        firstBracket === -1 ||
-        lastBracket === -1 ||
-        lastBracket < firstBracket
-      ) {
-        throw new Error(
-          `Could not find a JSON array in the response: "${escapeHtml(responseText)}"`,
-        );
-      }
-      jsonString = responseText.substring(firstBracket, lastBracket + 1);
-    }
-
-    let data: { issue_number: number; labels_to_add: string[] }[];
-    try {
-      data = JSON.parse(jsonString);
-    } catch (e) {
-      const err = e as Error;
-      throw new Error(
-        `Failed to parse JSON. Error: ${err.message}. Response: "${escapeHtml(responseText)}"`,
-      );
-    }
-
-    const issue = data.find((i) => i.issue_number === issueNumber);
-    if (!issue) {
-      throw new Error(
-        `Issue #${issueNumber} not found in output: ${JSON.stringify(data)}`,
-      );
-    }
-
-    expect(issue.labels_to_add).toContain(expectedLabel);
-  };
-};
-
-describe('batch_triage_agent', () => {
-  evalTest('USUALLY_PASSES', {
-    name: 'should identify area/core for local test failures in batch',
-    prompt: ['--output-format', 'json', '--prompt', createPrompt()],
-    env: {
-      AVAILABLE_LABELS: availableLabels,
-      ISSUES_TO_TRIAGE: JSON.stringify([
-        {
-          number: 101,
-          title: 'Local tests failing',
-          body: 'I am running npm test locally and it fails with an error.',
-        },
-      ]),
-    },
-    params: { settings: BATCH_TRIAGE_SETTINGS },
-    assert: assertHasIssueLabel(101, 'area/core'),
-  });
-
-  evalTest('USUALLY_PASSES', {
-    name: 'should identify area/platform for CI failures in batch',
-    prompt: ['--output-format', 'json', '--prompt', createPrompt()],
-    env: {
-      AVAILABLE_LABELS: availableLabels,
-      ISSUES_TO_TRIAGE: JSON.stringify([
-        {
-          number: 102,
-          title: 'CI pipeline failed',
-          body: 'The GitHub Action for tests failed on the main branch.',
-        },
-      ]),
-    },
-    params: { settings: BATCH_TRIAGE_SETTINGS },
-    assert: assertHasIssueLabel(102, 'area/platform'),
-  });
-
-  evalTest('USUALLY_PASSES', {
-    name: 'should handle mixed batch correctly',
-    prompt: ['--output-format', 'json', '--prompt', createPrompt()],
-    env: {
-      AVAILABLE_LABELS: availableLabels,
-      ISSUES_TO_TRIAGE: JSON.stringify([
-        {
-          number: 103,
-          title: 'Cannot install on MacOS',
-          body: 'Install fails with permission error.',
-        },
-        {
-          number: 104,
-          title: 'Click to win',
-          body: 'Spam body',
-        },
-      ]),
-    },
-    params: { settings: BATCH_TRIAGE_SETTINGS },
-    assert: async (rig: any, result) => {
-      // Assert issue 103 has area/core
-      await assertHasIssueLabel(103, 'area/core')(rig, result);
-      // Assert issue 104 has area/unknown
-      await assertHasIssueLabel(104, 'area/unknown')(rig, result);
-    },
-  });
-
-  evalTest('USUALLY_PASSES', {
-    name: 'should handle issues needing retesting (old version)',
-    prompt: ['--output-format', 'json', '--prompt', createPrompt()],
-    env: {
-      AVAILABLE_LABELS: availableLabels,
-      ISSUES_TO_TRIAGE: JSON.stringify([
-        {
-          number: 105,
-          title: 'Crash on version 0.1.0',
-          body: 'I am using /about and it says 0.1.0. The app crashes when I run it.',
-        },
-      ]),
-    },
-    params: { settings: BATCH_TRIAGE_SETTINGS },
-    assert: assertHasIssueLabel(105, 'status/need-retesting'),
-  });
-
-  evalTest('USUALLY_PASSES', {
-    name: 'should handle issues needing more information',
-    prompt: ['--output-format', 'json', '--prompt', createPrompt()],
-    env: {
-      AVAILABLE_LABELS: availableLabels,
-      ISSUES_TO_TRIAGE: JSON.stringify([
-        {
-          number: 106,
-          title: 'It does not work',
-          body: 'Something is broken.',
-        },
-      ]),
-    },
-    params: { settings: BATCH_TRIAGE_SETTINGS },
-    assert: assertHasIssueLabel(106, 'status/need-information'),
-  });
-
-  evalTest('USUALLY_PASSES', {
-    name: 'should handle large batch of diverse issues',
-    prompt: ['--output-format', 'json', '--prompt', createPrompt()],
-    env: {
-      AVAILABLE_LABELS: availableLabels,
-      ISSUES_TO_TRIAGE: JSON.stringify([
-        { number: 107, title: 'Bug A', body: 'Local test failure' },
-        { number: 108, title: 'Bug B', body: 'CI failure' },
-        { number: 109, title: 'Bug C', body: 'Security leak' },
-        { number: 110, title: 'Bug D', body: 'Spam' },
-        { number: 111, title: 'Bug E', body: 'Old version 0.0.1' },
-      ]),
-    },
-    params: { settings: BATCH_TRIAGE_SETTINGS },
-    assert: async (rig: any, result) => {
-      await assertHasIssueLabel(107, 'area/core')(rig, result);
-      await assertHasIssueLabel(108, 'area/platform')(rig, result);
-      await assertHasIssueLabel(109, 'area/security')(rig, result);
-      await assertHasIssueLabel(110, 'area/unknown')(rig, result);
-      await assertHasIssueLabel(111, 'status/need-retesting')(rig, result);
-    },
-  });
-});
@@ -1,422 +0,0 @@
-/**
- * @license
- * Copyright 2025 Google LLC
- * SPDX-License-Identifier: Apache-2.0
- */
-
-import { describe, expect } from 'vitest';
-import { evalTest } from './test-helper.js';
-import fs from 'node:fs/promises';
-import path from 'node:path';
-import yaml from 'js-yaml';
-
-// Read the workflow file to extract the prompt and settings
-const workflowPath = path.join(
-  process.cwd(),
-  '.github/workflows/gemini-automated-issue-dedup.yml',
-);
-const workflowContent = await fs.readFile(workflowPath, 'utf8');
-
-const workflowData = yaml.load(workflowContent) as any;
-const geminiStep = workflowData.jobs?.['find-duplicates']?.steps?.find(
-  (step: any) => step.id === 'gemini_issue_deduplication',
-);
-
-const DEDUP_PROMPT_TEMPLATE = geminiStep?.with?.prompt;
-const ORIGINAL_SETTINGS = JSON.parse(geminiStep?.with?.settings || '{}');
-
-if (!DEDUP_PROMPT_TEMPLATE) {
-  throw new Error('Could not extract prompt from de-duplication workflow.');
-}
-
-const mockMcpPath = path.join(process.cwd(), 'evals/mocks/dedup_mcp.ts');
-
-const createPrompt = (issueNumber: number) => {
-  // The prompt uses ${{ github.event.issue.number }} but also references ${ISSUE_NUMBER} (env)
-  return DEDUP_PROMPT_TEMPLATE.replace(
-    /\${{ github\.repository }}/g,
-    'google-gemini/gemini-cli',
-  ).replace(/\${{ github\.event\.issue\.number }}/g, issueNumber.toString());
-};
-
-const DEDUP_SETTINGS = {
-  ...ORIGINAL_SETTINGS,
-  mcpServers: {
-    issue_deduplication: {
-      command: 'npx',
-      args: ['tsx', mockMcpPath],
-    },
-  },
-};
-if (DEDUP_SETTINGS.telemetry) {
-  delete DEDUP_SETTINGS.telemetry;
-}
-
-describe('dedup_agent', () => {
-  evalTest('USUALLY_PASSES', {
-    name: 'should identify duplicate issues',
-    prompt: ['--output-format', 'json', '--prompt', createPrompt(101)],
-    env: {
-      ISSUE_NUMBER: '101',
-      GITHUB_ENV: 'github_env',
-    },
-    params: {
-      settings: DEDUP_SETTINGS,
-    },
-    files: {
-      github_env: '',
-      // Mock gh binary
-      'bin/gh': `#!/usr/bin/env node
-const args = process.argv.slice(2).join(' ');
-if (args.includes('issue view')) {
-    const issueNum = args.match(/view (\\d+)/)?.[1];
-    if (issueNum === '101') {
-        console.log(JSON.stringify({
-            number: 101,
-            title: 'CLI crashes on start',
-            body: 'It segfaults immediately.',
-            comments: []
-        }));
-    } else if (issueNum === '201') {
-        console.log(JSON.stringify({
-            number: 201,
-            title: 'Segfault on launch',
-            body: 'The app crashes right away.',
-            comments: []
-        }));
-    } else if (issueNum === '202') {
-        console.log(JSON.stringify({
-            number: 202,
-            title: 'Unrelated bug',
-            body: 'Themes are not working.',
-            comments: []
-        }));
-    }
-}
-`,
-    },
-    assert: async (rig: any, result) => {
-      // Verify JSON output stats
-      const output = JSON.parse(result);
-      expect(output.stats).toBeDefined();
-      expect(output.stats.tools.byName['duplicates']).toBeDefined();
-      expect(output.stats.tools.byName['run_shell_command']).toBeDefined();
-
-      // Verify detailed tool usage via telemetry
-      const toolLogs = rig.readToolLogs();
-      const duplicatesCall = toolLogs.find(
-        (l: any) => l.toolRequest.name === 'duplicates',
-      );
-      expect(duplicatesCall).toBeDefined();
-
-      // The current prompt uses echo to set GITHUB_ENV
-      // We check the tool call for the echo command
-      const shellCalls = toolLogs.filter(
-        (l: any) => l.toolRequest.name === 'run_shell_command',
-      );
-      const envCall = shellCalls.find((call: any) =>
-        call.toolRequest.args.includes('DUPLICATE_ISSUES_CSV'),
-      );
-
-      expect(envCall).toBeDefined();
-      // Check the command content
-      const match = envCall.toolRequest.args.match(
-        /DUPLICATE_ISSUES_CSV=\[?([\d, ]*)\]?/,
-      );
-      expect(match).not.toBeNull();
-      const issues = match![1]
-        .split(',')
-        .map((s: string) => s.trim())
-        .filter((s: string) => s);
-      expect(issues).toContain('201');
-      expect(issues).not.toContain('202');
-    },
-  });
-
-  evalTest('USUALLY_PASSES', {
-    name: 'should respect "not a duplicate" comments',
-    prompt: ['--output-format', 'json', '--prompt', createPrompt(101)],
-    env: {
-      ISSUE_NUMBER: '101',
-      GITHUB_ENV: 'github_env',
-    },
-    params: {
-      settings: DEDUP_SETTINGS,
-    },
-    files: {
-      github_env: '',
-      'bin/gh': `#!/usr/bin/env node
-const args = process.argv.slice(2).join(' ');
-if (args.includes('issue view')) {
-    const issueNum = args.match(/view (\\d+)/)?.[1];
-    if (issueNum === '101') {
-        console.log(JSON.stringify({
-            number: 101,
-            title: 'CLI crashes on start',
-            body: 'It segfaults immediately.',
-            comments: [{ body: 'Note: This is NOT a duplicate of #201, different root cause.' }]
-        }));
-    } else if (issueNum === '201') {
-        console.log(JSON.stringify({
-            number: 201,
-            title: 'Segfault on launch',
-            body: 'The app crashes right away.',
-            comments: []
-        }));
-    } else {
-        console.log(JSON.stringify({ number: parseInt(issueNum), title: '', body: '', comments: [] }));
-    }
-}
-`,
-    },
-    assert: async (rig: any, result) => {
-      // Verify JSON output stats
-      const output = JSON.parse(result);
-      expect(output.stats).toBeDefined();
-
-      const toolLogs = rig.readToolLogs();
-      const duplicatesCall = toolLogs.find(
-        (l: any) => l.toolRequest.name === 'duplicates',
-      );
-      expect(duplicatesCall).toBeDefined();
-
-      const shellCalls = toolLogs.filter(
-        (l: any) => l.toolRequest.name === 'run_shell_command',
-      );
-      // It might not call echo if no duplicates are found, or it might echo an empty list.
-      // We'll check if it does call echo, that 201 is NOT in it.
-      const envCall = shellCalls.find((call: any) =>
-        call.toolRequest.args.includes('DUPLICATE_ISSUES_CSV'),
-      );
-
-      if (envCall) {
-        const match = envCall.toolRequest.args.match(
-          /DUPLICATE_ISSUES_CSV=\[?([\d, ]*)\]?/,
-        );
-        const issues = match
-          ? match[1]
-              .split(',')
-              .map((s: string) => s.trim())
-              .filter((s: string) => s)
-          : [];
-        expect(issues).not.toContain('201');
-      }
-    },
-  });
-
-  evalTest('USUALLY_PASSES', {
-    name: 'should differentiate false positives with high similarity',
-    prompt: ['--output-format', 'json', '--prompt', createPrompt(301)],
-    env: {
-      ISSUE_NUMBER: '301',
-      GITHUB_ENV: 'github_env',
-    },
-    params: {
-      settings: DEDUP_SETTINGS,
-    },
-    files: {
-      github_env: '',
-      'bin/gh': `#!/usr/bin/env node
-const args = process.argv.slice(2).join(' ');
-if (args.includes('issue view')) {
-    const issueNum = args.match(/view (\\d+)/)?.[1];
-    if (issueNum === '301') {
-        console.log(JSON.stringify({
-            number: 301,
-            title: 'App crashes when I click Save',
-            body: 'I click the save button and it crashes.',
-            comments: []
-        }));
-    } else if (issueNum === '302') {
-        console.log(JSON.stringify({
-            number: 302,
-            title: 'App crashes when I click Load',
-            body: 'I click the load button and it crashes. This seems related to the loader component.',
-            comments: []
-        }));
-    } else {
-        console.log(JSON.stringify({ number: parseInt(issueNum), title: '', body: '', comments: [] }));
-    }
-}
-`,
-    },
-    assert: async (rig: any, result) => {
-      // Verify JSON output stats
-      const output = JSON.parse(result);
-      expect(output.stats).toBeDefined();
-
-      const toolLogs = rig.readToolLogs();
-      const duplicatesCall = toolLogs.find(
-        (l: any) => l.toolRequest.name === 'duplicates',
-      );
-      expect(duplicatesCall).toBeDefined();
-
-      const shellCalls = toolLogs.filter(
-        (l: any) => l.toolRequest.name === 'run_shell_command',
-      );
-      const envCall = shellCalls.find((call: any) =>
-        call.toolRequest.args.includes('DUPLICATE_ISSUES_CSV'),
-      );
-
-      if (envCall) {
-        const match = envCall.toolRequest.args.match(
-          /DUPLICATE_ISSUES_CSV=\[?([\d, ]*)\]?/,
-        );
-        const issues = match
-          ? match[1]
-              .split(',')
-              .map((s: string) => s.trim())
-              .filter((s: string) => s)
-          : [];
-        // Should NOT contain 302 because it's a different feature (Save vs Load) despite crash
-        expect(issues).not.toContain('302');
-      }
-    },
-  });
-
-  evalTest('USUALLY_PASSES', {
-    name: 'should reject matches with low similarity',
-    prompt: ['--output-format', 'json', '--prompt', createPrompt(401)],
-    env: {
-      ISSUE_NUMBER: '401',
-      GITHUB_ENV: 'github_env',
-    },
-    params: {
-      settings: DEDUP_SETTINGS,
-    },
-    files: {
-      github_env: '',
-      'bin/gh': `#!/usr/bin/env node
-const args = process.argv.slice(2).join(' ');
-if (args.includes('issue view')) {
-    const issueNum = args.match(/view (\\d+)/)?.[1];
-    if (issueNum === '401') {
-        console.log(JSON.stringify({
-            number: 401,
-            title: 'Feature request: Dark mode',
-            body: 'Please add dark mode.',
-            comments: []
-        }));
-    } else if (issueNum === '402') {
-        console.log(JSON.stringify({
-            number: 402,
-            title: 'Feature request: Light mode',
-            body: 'Please add light mode.',
-            comments: []
-        }));
-    } else {
-        console.log(JSON.stringify({ number: parseInt(issueNum), title: '', body: '', comments: [] }));
-    }
-}
-`,
-    },
-    assert: async (rig: any, result) => {
-      // Verify JSON output stats
-      const output = JSON.parse(result);
-      expect(output.stats).toBeDefined();
-
-      const toolLogs = rig.readToolLogs();
-      const duplicatesCall = toolLogs.find(
-        (l: any) => l.toolRequest.name === 'duplicates',
-      );
-      expect(duplicatesCall).toBeDefined();
-
-      const shellCalls = toolLogs.filter(
-        (l: any) => l.toolRequest.name === 'run_shell_command',
-      );
-      const envCall = shellCalls.find((call: any) =>
-        call.toolRequest.args.includes('DUPLICATE_ISSUES_CSV'),
-      );
-
-      if (envCall) {
-        const match = envCall.toolRequest.args.match(
-          /DUPLICATE_ISSUES_CSV=\[?([\d, ]*)\]?/,
-        );
-        const issues = match
-          ? match[1]
-              .split(',')
-              .map((s: string) => s.trim())
-              .filter((s: string) => s)
-          : [];
-        expect(issues).not.toContain('402');
-        expect(issues.length).toBe(0);
-      }
-    },
-  });
-
-  evalTest('USUALLY_PASSES', {
-    name: 'should identify multiple duplicates',
-    prompt: ['--output-format', 'json', '--prompt', createPrompt(501)],
-    env: {
-      ISSUE_NUMBER: '501',
-      GITHUB_ENV: 'github_env',
-    },
-    params: {
-      settings: DEDUP_SETTINGS,
-    },
-    files: {
-      github_env: '',
-      'bin/gh': `#!/usr/bin/env node
-const args = process.argv.slice(2).join(' ');
-if (args.includes('issue view')) {
-    const issueNum = args.match(/view (\\d+)/)?.[1];
-    if (issueNum === '501') {
-        console.log(JSON.stringify({
-            number: 501,
-            title: 'Crash on login',
-            body: 'The app crashes when I try to log in.',
-            comments: []
-        }));
-    } else if (issueNum === '502') {
-        console.log(JSON.stringify({
-            number: 502,
-            title: 'Crash on sign in',
-            body: 'Crashes during sign in process.',
-            comments: []
-        }));
-    } else if (issueNum === '503') {
-        console.log(JSON.stringify({
-            number: 503,
-            title: 'Crashes on login page',
-            body: 'I get a crash immediately on the login page.',
-            comments: []
-        }));
-    } else {
-        console.log(JSON.stringify({ number: parseInt(issueNum), title: '', body: '', comments: [] }));
-    }
-}
-`,
-    },
-    assert: async (rig: any, result) => {
-      // Verify JSON output stats
-      const output = JSON.parse(result);
-      expect(output.stats).toBeDefined();
-
-      const toolLogs = rig.readToolLogs();
-      const duplicatesCall = toolLogs.find(
-        (l: any) => l.toolRequest.name === 'duplicates',
-      );
-      expect(duplicatesCall).toBeDefined();
-
-      const shellCalls = toolLogs.filter(
-        (l: any) => l.toolRequest.name === 'run_shell_command',
-      );
-      const envCall = shellCalls.find((call: any) =>
-        call.toolRequest.args.includes('DUPLICATE_ISSUES_CSV'),
-      );
-
-      expect(envCall).toBeDefined();
-      const match = envCall.toolRequest.args.match(
-        /DUPLICATE_ISSUES_CSV=\[?([\d, ]*)\]?/,
-      );
-      const issues = match
-        ? match[1]
-            .split(',')
-            .map((s: string) => s.trim())
-            .filter((s: string) => s)
-        : [];
-      expect(issues).toContain('502');
-      expect(issues).toContain('503');
-    },
-  });
-});
@@ -1,81 +0,0 @@
-/**
- * @license
- * Copyright 2025 Google LLC
- * SPDX-License-Identifier: Apache-2.0
- */
-
-import { describe, expect } from 'vitest';
-import { evalTest } from './test-helper.js';
-import fs from 'node:fs/promises';
-import path from 'node:path';
-import yaml from 'js-yaml';
-
-// Read the workflow file to extract the prompt and settings
-const workflowPath = path.join(
-  process.cwd(),
-  '.github/workflows/gemini-scheduled-issue-dedup.yml',
-);
-const workflowContent = await fs.readFile(workflowPath, 'utf8');
-
-const workflowData = yaml.load(workflowContent) as any;
-const geminiStep = workflowData.jobs?.['refresh-embeddings']?.steps?.find(
-  (step: any) => step.id === 'gemini_refresh_embeddings',
-);
-
-const REFRESH_PROMPT_TEMPLATE = geminiStep?.with?.prompt;
-const ORIGINAL_SETTINGS = JSON.parse(geminiStep?.with?.settings || '{}');
-
-if (!REFRESH_PROMPT_TEMPLATE) {
-  throw new Error('Could not extract prompt from dedup refresh workflow.');
-}
-
-const mockMcpPath = path.join(process.cwd(), 'evals/mocks/dedup_mcp.ts');
-
-const createPrompt = () => {
-  return REFRESH_PROMPT_TEMPLATE.replace(
-    /\${{ github\.repository }}/g,
-    'google-gemini/gemini-cli',
-  );
-};
-
-const REFRESH_SETTINGS = {
-  ...ORIGINAL_SETTINGS,
-  mcpServers: {
-    issue_deduplication: {
-      command: 'npx',
-      args: ['tsx', mockMcpPath],
-    },
-  },
-};
-if (REFRESH_SETTINGS.telemetry) {
-  delete REFRESH_SETTINGS.telemetry;
-}
-
-describe('dedup_refresh_agent', () => {
-  evalTest('USUALLY_PASSES', {
-    name: 'should call refresh tool',
-    prompt: ['--output-format', 'json', '--prompt', createPrompt()],
-    approvalMode: 'yolo',
-    params: {
-      settings: REFRESH_SETTINGS,
-    },
-    assert: async (rig: any, result) => {
-      // result is the JSON output
-      const output = JSON.parse(result);
-      expect(output.stats).toBeDefined();
-
-      const toolStats = output.stats.tools.byName;
-      expect(toolStats.refresh).toBeDefined();
-      expect(toolStats.refresh.count).toBe(1);
-      expect(toolStats.refresh.success).toBe(1);
-
-      // We still check telemetry for deep arg inspection if needed,
-      // but stats verify the high-level goal.
-      const toolLogs = rig.readToolLogs();
-      const refreshCall = toolLogs.find(
-        (l: any) => l.toolRequest.name === 'refresh',
-      );
-      expect(refreshCall).toBeDefined();
-    },
-  });
-});
@@ -1,413 +0,0 @@
-/**
- * @license
- * Copyright 2025 Google LLC
- * SPDX-License-Identifier: Apache-2.0
- */
-
-import { describe, expect } from 'vitest';
-import { evalTest } from './test-helper.js';
-import fs from 'node:fs/promises';
-import path from 'node:path';
-import yaml from 'js-yaml';
-
-// Read the workflow file to extract the prompt
-const workflowPath = path.join(
-  process.cwd(),
-  '.github/workflows/gemini-automated-issue-triage.yml',
-);
-const workflowContent = await fs.readFile(workflowPath, 'utf8');
-
-// Use a YAML parser for robustness
-const workflowData = yaml.load(workflowContent) as {
-  jobs?: {
-    'triage-issue'?: {
-      steps?: {
-        id?: string;
-        with?: { prompt?: string; script?: string };
-      }[];
-    };
-  };
-};
-
-const triageStep = workflowData.jobs?.['triage-issue']?.steps?.find(
-  (step) => step.id === 'gemini_issue_analysis',
-);
-
-const labelsStep = workflowData.jobs?.['triage-issue']?.steps?.find(
-  (step) => step.id === 'get_labels',
-);
-
-const TRIAGE_PROMPT_TEMPLATE = triageStep?.with?.prompt;
-const LABELS_SCRIPT = labelsStep?.with?.script;
-
-if (!TRIAGE_PROMPT_TEMPLATE) {
-  throw new Error(
-    'Could not extract prompt from workflow file. Check for `jobs.triage-issue.steps[id=gemini_issue_analysis].with.prompt` in the YAML file.',
-  );
-}
-
-// Extract available labels from the script
-let availableLabels = '';
-if (LABELS_SCRIPT) {
-  const match = LABELS_SCRIPT.match(/const allowedLabels = \[([\s\S]+?)\];/);
-  if (match && match[1]) {
-    // Clean up the extracted string: remove quotes, commas, and whitespace
-    availableLabels = match[1]
-      .replace(/['"\n\r]/g, '')
-      .split(',')
-      .map((s) => s.trim())
-      .filter((s) => s.length > 0)
-      .join(', ');
-  }
-}
-
-if (!availableLabels) {
-  throw new Error(
-    'Could not extract available labels from workflow file. Check for `jobs.triage-issue.steps[id=get_labels].with.script` containing `const allowedLabels = [...]`.',
-  );
-}
-
-const createPrompt = (title: string, body: string) => {
-  // The placeholders in the YAML are ${{ env.ISSUE_TITLE }} etc.
-  // We need to replace them with the actual values for the test.
-  return TRIAGE_PROMPT_TEMPLATE.replace('${{ env.ISSUE_TITLE }}', title)
-    .replace('${{ env.ISSUE_BODY }}', body)
-    .replace('${{ env.AVAILABLE_LABELS }}', availableLabels);
-};
-
-const TRIAGE_SETTINGS = {};
-
-const escapeHtml = (str: string) => {
-  return str.replace(/[<>&'"]/g, (c) => {
-    switch (c) {
-      case '<':
-        return '&lt;';
-      case '>':
-        return '&gt;';
-      case '&':
-        return '&amp;';
-      case "'":
-        return '&apos;';
-      case '"':
-        return '&quot;';
-    }
-    return ''; // Should not happen
-  });
-};
-
-const assertHasLabel = (expectedLabel: string) => {
-  return async (rig: any, result: string) => {
-    // Verify JSON output stats
-    const output = JSON.parse(result);
-    expect(output.stats).toBeDefined();
-
-    // The model response JSON is in the 'response' field
-    const responseText = output.response;
-    let jsonString: string;
-    const match = responseText.match(/```json\s*([\s\S]*?)\s*```/);
-    if (match?.[1]) {
-      jsonString = match[1];
-    } else {
-      const firstBrace = responseText.indexOf('{');
-      const lastBrace = responseText.lastIndexOf('}');
-      if (firstBrace === -1 || lastBrace === -1 || lastBrace < firstBrace) {
-        throw new Error(
-          `Could not find a JSON object in the response: "${escapeHtml(responseText)}"`,
-        );
-      }
-      jsonString = responseText.substring(firstBrace, lastBrace + 1);
-    }
-
-    let data: { labels_to_set?: string[] };
-    try {
-      data = JSON.parse(jsonString);
-    } catch (e) {
-      const err = e as Error;
-      throw new Error(
-        `Failed to parse JSON. Error: ${err.message}. Response: "${escapeHtml(responseText)}"`,
-      );
-    }
-
-    expect(data).toHaveProperty('labels_to_set');
-    expect(Array.isArray(data.labels_to_set)).toBe(true);
-    expect(data.labels_to_set).toContain(expectedLabel);
-  };
-};
-
-describe('triage_agent', () => {
-  evalTest('USUALLY_PASSES', {
-    name: 'should identify area/core for windows installation issues',
-    prompt: [
-      '--output-format',
-      'json',
-      '--prompt',
-      createPrompt(
-        'CLI failed to install on Windows',
-        'I tried running npm install but it failed with an error on Windows 11.',
-      ),
-    ],
-    params: { settings: TRIAGE_SETTINGS },
-    assert: assertHasLabel('area/core'),
-  });
-
-  evalTest('USUALLY_PASSES', {
-    name: 'should identify area/platform for CI/CD failures',
-    prompt: [
-      '--output-format',
-      'json',
-      '--prompt',
-      createPrompt(
-        'Tests are failing in the CI/CD pipeline',
-        'The github action is failing with a 500 error.',
-      ),
-    ],
-    params: { settings: TRIAGE_SETTINGS },
-    assert: assertHasLabel('area/platform'),
-  });
-
-  evalTest('USUALLY_PASSES', {
-    name: 'should identify area/platform for quota issues',
-    prompt: [
-      '--output-format',
-      'json',
-      '--prompt',
-      createPrompt(
-        'Resource Exhausted 429',
-        'I am getting a 429 error when running the CLI.',
-      ),
-    ],
-    params: { settings: TRIAGE_SETTINGS },
-    assert: assertHasLabel('area/platform'),
-  });
-
-  evalTest('USUALLY_PASSES', {
-    name: 'should identify area/core for local build failures',
-    prompt: [
-      '--output-format',
-      'json',
-      '--prompt',
-      createPrompt(
-        'Local build failing',
-        'I cannot build the project locally. npm run build fails.',
-      ),
-    ],
-    params: { settings: TRIAGE_SETTINGS },
-    assert: assertHasLabel('area/core'),
-  });
-
-  evalTest('USUALLY_PASSES', {
-    name: 'should identify area/platform for sandbox issues',
-    prompt: [
-      '--output-format',
-      'json',
-      '--prompt',
-      createPrompt(
-        'Sandbox connection failed',
-        'I cannot connect to the docker sandbox environment.',
-      ),
-    ],
-    params: { settings: TRIAGE_SETTINGS },
-    assert: assertHasLabel('area/platform'),
-  });
-
-  evalTest('USUALLY_PASSES', {
-    name: 'should identify area/core for local test failures',
-    prompt: [
-      '--output-format',
-      'json',
-      '--prompt',
-      createPrompt(
-        'Local tests failing',
-        'I am running npm test locally and it fails.',
-      ),
-    ],
-    params: { settings: TRIAGE_SETTINGS },
-    assert: assertHasLabel('area/core'),
-  });
-
-  evalTest('USUALLY_PASSES', {
-    name: 'should identify area/agent for questions about tools',
-    prompt: [
-      '--output-format',
-      'json',
-      '--prompt',
-      createPrompt(
-        'Bug with web search?',
-        'I am trying to use web search but I do not know the syntax. Is it @web or /web?',
-      ),
-    ],
-    params: { settings: TRIAGE_SETTINGS },
-    assert: assertHasLabel('area/agent'),
-  });
-
-  evalTest('USUALLY_PASSES', {
-    name: 'should identify area/extensions for feature requests',
-    prompt: [
-      '--output-format',
-      'json',
-      '--prompt',
-      createPrompt(
-        'Please add a python extension',
-        'I want to write python scripts as an extension.',
-      ),
-    ],
-    params: { settings: TRIAGE_SETTINGS },
-    assert: assertHasLabel('area/extensions'),
-  });
-
-  evalTest('USUALLY_PASSES', {
-    name: 'should identify area/unknown for off-topic spam',
-    prompt: [
-      '--output-format',
-      'json',
-      '--prompt',
-      createPrompt('Buy cheap rolex', 'Click here for discount.'),
-    ],
-    params: { settings: TRIAGE_SETTINGS },
-    assert: assertHasLabel('area/unknown'),
-  });
-
-  evalTest('USUALLY_PASSES', {
-    name: 'should identify area/core for crash reports phrased as questions',
-    prompt: [
-      '--output-format',
-      'json',
-      '--prompt',
-      createPrompt(
-        'Why does it segfault?',
-        'Why does the CLI segfault immediately when I run it on Ubuntu?',
-      ),
-    ],
-    params: { settings: TRIAGE_SETTINGS },
-    assert: assertHasLabel('area/core'),
-  });
-
-  evalTest('USUALLY_PASSES', {
-    name: 'should identify area/agent for feature requests for built-in tools',
-    prompt: [
-      '--output-format',
-      'json',
-      '--prompt',
-      createPrompt(
-        'Can we have a diff tool?',
-        'Is it possible to add a built-in tool to show diffs before editing?',
-      ),
-    ],
-    params: { settings: TRIAGE_SETTINGS },
-    assert: assertHasLabel('area/agent'),
-  });
-
-  evalTest('USUALLY_PASSES', {
-    name: 'should identify area/enterprise for license questions',
-    prompt: [
-      '--output-format',
-      'json',
-      '--prompt',
-      createPrompt(
-        'License key issue',
-        'Where do I enter my enterprise license key? I cannot find the setting.',
-      ),
-    ],
-    params: { settings: TRIAGE_SETTINGS },
-    assert: assertHasLabel('area/enterprise'),
-  });
-
-  evalTest('USUALLY_PASSES', {
-    name: 'should identify area/unknown for extremely vague reports',
-    prompt: [
-      '--output-format',
-      'json',
-      '--prompt',
-      createPrompt('It does not work', 'I tried to use it and it failed.'),
-    ],
-    params: { settings: TRIAGE_SETTINGS },
-    assert: assertHasLabel('area/unknown'),
-  });
-
-  evalTest('USUALLY_PASSES', {
-    name: 'should identify area/security for prompt injection reports',
-    prompt: [
-      '--output-format',
-      'json',
-      '--prompt',
-      createPrompt(
-        'Prompt injection vulnerability',
-        'I found a way to make the agent ignore instructions by saying "Ignore all previous instructions".',
-      ),
-    ],
-    params: { settings: TRIAGE_SETTINGS },
-    assert: assertHasLabel('area/security'),
-  });
-
-  evalTest('USUALLY_PASSES', {
-    name: 'should identify area/non-interactive for headless crashes',
-    prompt: [
-      '--output-format',
-      'json',
-      '--prompt',
-      createPrompt(
-        'Headless mode segfault',
-        'When I run with --headless, the CLI crashes immediately.',
-      ),
-    ],
-    params: { settings: TRIAGE_SETTINGS },
-    assert: assertHasLabel('area/non-interactive'),
-  });
-
-  evalTest('USUALLY_PASSES', {
-    name: 'should identify area/agent for mixed feedback and tool bugs',
-    prompt: [
-      '--output-format',
-      'json',
-      '--prompt',
-      createPrompt(
-        'Great tool but web search fails',
-        'I love using Gemini CLI, it is amazing! However, the @web tool gives me an error every time I search for "react".',
-      ),
-    ],
-    params: { settings: TRIAGE_SETTINGS },
-    assert: assertHasLabel('area/agent'),
-  });
-
-  evalTest('USUALLY_PASSES', {
-    name: 'should identify area/core for UI performance issues',
-    prompt: [
-      '--output-format',
-      'json',
-      '--prompt',
-      createPrompt(
-        'UI is very slow',
-        'The new interface is lagging and unresponsive when I scroll.',
-      ),
-    ],
-    params: { settings: TRIAGE_SETTINGS },
-    assert: assertHasLabel('area/core'),
-  });
-
-  evalTest('USUALLY_PASSES', {
-    name: 'should identify area/security for accidental secret leakage',
-    prompt: [
-      '--output-format',
-      'json',
-      '--prompt',
-      createPrompt(
-        'Leaked API key in logs',
-        'I accidentally posted my API key in a previous issue comment. Can you delete it?',
-      ),
-    ],
-    params: { settings: TRIAGE_SETTINGS },
-    assert: assertHasLabel('area/security'),
-  });
-
-  evalTest('USUALLY_PASSES', {
-    name: 'should identify area/unknown for nonsensical input',
-    prompt: [
-      '--output-format',
-      'json',
-      '--prompt',
-      createPrompt('asdfasdf', 'qwerqwer zxcvbnm'),
-    ],
-    params: { settings: TRIAGE_SETTINGS },
-    assert: assertHasLabel('area/unknown'),
-  });
-});