mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-13 23:51:16 -07:00
84 lines
2.4 KiB
TypeScript
84 lines
2.4 KiB
TypeScript
/**
|
|
* @license
|
|
* Copyright 2025 Google LLC
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
import { describe, expect } from 'vitest';
|
|
import { evalTest } from '../test-helper.js';
|
|
import fs from 'node:fs/promises';
|
|
import path from 'node:path';
|
|
import yaml from 'js-yaml';
|
|
import { WORKFLOW_TARGET_MODELS } from './constants.js';
|
|
|
|
// Read the workflow file to extract the prompt and settings
|
|
const workflowPath = path.join(
|
|
process.cwd(),
|
|
'.github/workflows/gemini-scheduled-issue-dedup.yml',
|
|
);
|
|
const workflowContent = await fs.readFile(workflowPath, 'utf8');
|
|
|
|
const workflowData = yaml.load(workflowContent) as any;
|
|
const geminiStep = workflowData.jobs?.['refresh-embeddings']?.steps?.find(
|
|
(step: any) => step.id === 'gemini_refresh_embeddings',
|
|
);
|
|
|
|
const REFRESH_PROMPT_TEMPLATE = geminiStep?.with?.prompt;
|
|
const ORIGINAL_SETTINGS = JSON.parse(geminiStep?.with?.settings || '{}');
|
|
|
|
if (!REFRESH_PROMPT_TEMPLATE) {
|
|
throw new Error('Could not extract prompt from dedup refresh workflow.');
|
|
}
|
|
|
|
const mockMcpPath = path.join(process.cwd(), 'evals/mocks/dedup_mcp.ts');
|
|
|
|
const createPrompt = () => {
|
|
return REFRESH_PROMPT_TEMPLATE.replace(
|
|
/\${{ github\.repository }}/g,
|
|
'google-gemini/gemini-cli',
|
|
);
|
|
};
|
|
|
|
const REFRESH_SETTINGS = {
|
|
...ORIGINAL_SETTINGS,
|
|
mcpServers: {
|
|
issue_deduplication: {
|
|
command: 'npx',
|
|
args: ['tsx', mockMcpPath],
|
|
},
|
|
},
|
|
};
|
|
if (REFRESH_SETTINGS.telemetry) {
|
|
delete REFRESH_SETTINGS.telemetry;
|
|
}
|
|
|
|
describe('dedup_refresh_agent', () => {
|
|
evalTest('USUALLY_PASSES', {
|
|
name: 'should call refresh tool',
|
|
prompt: ['--output-format', 'json', '--prompt', createPrompt()],
|
|
approvalMode: 'yolo',
|
|
params: {
|
|
settings: REFRESH_SETTINGS,
|
|
},
|
|
targetModels: WORKFLOW_TARGET_MODELS,
|
|
assert: async (rig: any, result) => {
|
|
// result is the JSON output
|
|
const output = JSON.parse(result);
|
|
expect(output.stats).toBeDefined();
|
|
|
|
const toolStats = output.stats.tools.byName;
|
|
expect(toolStats.refresh).toBeDefined();
|
|
expect(toolStats.refresh.count).toBe(1);
|
|
expect(toolStats.refresh.success).toBe(1);
|
|
|
|
// We still check telemetry for deep arg inspection if needed,
|
|
// but stats verify the high-level goal.
|
|
const toolLogs = rig.readToolLogs();
|
|
const refreshCall = toolLogs.find(
|
|
(l: any) => l.toolRequest.name === 'refresh',
|
|
);
|
|
expect(refreshCall).toBeDefined();
|
|
},
|
|
});
|
|
});
|