mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-20 00:32:31 -07:00
267 lines
8.8 KiB
TypeScript
267 lines
8.8 KiB
TypeScript
/**
|
|
* @license
|
|
* Copyright 2026 Google LLC
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
import { describe, it, expect } from 'vitest';
|
|
import { IrMapper } from './mapper.js';
|
|
import { ContextTokenCalculator } from '../utils/contextTokenCalculator.js';
|
|
import type { Content } from '@google/genai';
|
|
import type { UserPrompt, ToolExecution, AgentThought } from './types.js';
|
|
|
|
describe('IrMapper', () => {
|
|
it('should correctly map a complex conversation into Episodes and back', () => {
|
|
const rawHistory: Content[] = [
|
|
{ role: 'user', parts: [{ text: 'Can you read file A and B?' }] },
|
|
{
|
|
role: 'model',
|
|
parts: [
|
|
{ text: 'Let me check those files.' },
|
|
{
|
|
functionCall: {
|
|
id: 'call_1',
|
|
name: 'read_file',
|
|
args: { filepath: 'A.txt' },
|
|
},
|
|
},
|
|
{
|
|
functionCall: {
|
|
id: 'call_2',
|
|
name: 'read_file',
|
|
args: { filepath: 'B.txt' },
|
|
},
|
|
},
|
|
],
|
|
},
|
|
{
|
|
role: 'user',
|
|
parts: [
|
|
{
|
|
functionResponse: {
|
|
id: 'call_1',
|
|
name: 'read_file',
|
|
response: { output: 'Contents of A' },
|
|
},
|
|
},
|
|
{
|
|
functionResponse: {
|
|
id: 'call_2',
|
|
name: 'read_file',
|
|
response: { output: 'Contents of B' },
|
|
},
|
|
},
|
|
],
|
|
},
|
|
{
|
|
role: 'model',
|
|
parts: [
|
|
{ text: 'Thanks. Now I will compile.' },
|
|
{
|
|
functionCall: {
|
|
id: 'call_3',
|
|
name: 'shell',
|
|
args: { cmd: 'make' },
|
|
},
|
|
},
|
|
],
|
|
},
|
|
{
|
|
role: 'user',
|
|
parts: [
|
|
{
|
|
functionResponse: {
|
|
id: 'call_3',
|
|
name: 'shell',
|
|
response: { output: 'success' },
|
|
},
|
|
},
|
|
],
|
|
},
|
|
{ role: 'model', parts: [{ text: 'Everything is done!' }] },
|
|
];
|
|
|
|
const tokenCalculator = new ContextTokenCalculator(4);
|
|
const episodes = IrMapper.toIr(rawHistory, tokenCalculator);
|
|
|
|
expect(episodes).toHaveLength(1);
|
|
const ep = episodes[0];
|
|
|
|
expect(ep.trigger.type).toBe('USER_PROMPT');
|
|
expect(
|
|
((ep.trigger as UserPrompt).semanticParts[0] as { text: string }).text,
|
|
).toBe('Can you read file A and B?');
|
|
|
|
// Steps should be: Thought, ToolExecution(A), ToolExecution(B), Thought, ToolExecution(make)
|
|
expect(ep.steps).toHaveLength(5);
|
|
expect(ep.steps[0].type).toBe('AGENT_THOUGHT');
|
|
expect(ep.steps[1].type).toBe('TOOL_EXECUTION');
|
|
expect((ep.steps[1] as ToolExecution).toolName).toBe('read_file');
|
|
expect((ep.steps[1] as ToolExecution).intent).toEqual({
|
|
filepath: 'A.txt',
|
|
});
|
|
expect((ep.steps[1] as ToolExecution).observation).toEqual({
|
|
output: 'Contents of A',
|
|
});
|
|
|
|
expect(ep.steps[2].type).toBe('TOOL_EXECUTION');
|
|
expect((ep.steps[2] as ToolExecution).intent).toEqual({
|
|
filepath: 'B.txt',
|
|
});
|
|
|
|
expect(ep.steps[3].type).toBe('AGENT_THOUGHT');
|
|
|
|
expect(ep.steps[4].type).toBe('TOOL_EXECUTION');
|
|
expect((ep.steps[4] as ToolExecution).toolName).toBe('shell');
|
|
|
|
expect(ep.yield?.type).toBe('AGENT_YIELD');
|
|
expect(ep.yield?.text).toBe('Everything is done!');
|
|
|
|
// Test Re-serialization
|
|
const reconstituted = IrMapper.fromIr(episodes);
|
|
|
|
// Compare basic structure (the reconstituted version might have slightly different grouping of calls/responses
|
|
// based on flush logic, but semantically equivalent)
|
|
expect(reconstituted[0]).toEqual(rawHistory[0]);
|
|
// Reconstituted history is identical except tool IDs will be reassigned because IrMapper discards string IDs in favor of deterministic object hash IDs
|
|
expect(reconstituted[1].parts![0]).toEqual(rawHistory[1].parts![0]);
|
|
|
|
// The exact structural equivalence isn't mathematically perfect because Gemini allows mixing text and calls
|
|
// in one Content block, but the flat representation is semantically identical.
|
|
});
|
|
|
|
it('should correctly handle multi-tool-calls grouped within a single turn without dropping observations', () => {
|
|
const rawHistory: Content[] = [
|
|
{ role: 'user', parts: [{ text: 'Examine both of these tools please.' }] },
|
|
{
|
|
role: 'model',
|
|
parts: [
|
|
{ text: 'I will call them concurrently.' },
|
|
{
|
|
functionCall: {
|
|
id: 'c1',
|
|
name: 'tool_one',
|
|
args: { p: 1 },
|
|
},
|
|
},
|
|
{
|
|
functionCall: {
|
|
id: 'c2',
|
|
name: 'tool_two',
|
|
args: { p: 2 },
|
|
},
|
|
},
|
|
],
|
|
},
|
|
// Gemini forces the user turn to contain ALL function responses for that model turn
|
|
{
|
|
role: 'user',
|
|
parts: [
|
|
{
|
|
functionResponse: {
|
|
id: 'c1',
|
|
name: 'tool_one',
|
|
response: { r: 1 },
|
|
},
|
|
},
|
|
{
|
|
functionResponse: {
|
|
id: 'c2',
|
|
name: 'tool_two',
|
|
response: { r: 2 },
|
|
},
|
|
},
|
|
],
|
|
},
|
|
{
|
|
role: 'model',
|
|
parts: [{ text: 'Both complete.' }],
|
|
},
|
|
];
|
|
|
|
const tokenCalculator = new ContextTokenCalculator(4);
|
|
const episodes = IrMapper.toIr(rawHistory, tokenCalculator);
|
|
|
|
// It should collapse into a single episode
|
|
expect(episodes).toHaveLength(1);
|
|
const ep = episodes[0];
|
|
|
|
expect(ep.trigger.type).toBe('USER_PROMPT');
|
|
|
|
// The steps array should contain:
|
|
// 0: AgentThought ("I will call them concurrently")
|
|
// 1: ToolExecution(tool_one)
|
|
// 2: ToolExecution(tool_two)
|
|
|
|
expect(ep.steps).toHaveLength(3);
|
|
|
|
expect(ep.steps[0].type).toBe('AGENT_THOUGHT');
|
|
expect((ep.steps[0] as AgentThought).text).toBe('I will call them concurrently.');
|
|
|
|
expect(ep.steps[1].type).toBe('TOOL_EXECUTION');
|
|
expect((ep.steps[1] as ToolExecution).toolName).toBe('tool_one');
|
|
expect((ep.steps[1] as ToolExecution).intent).toEqual({ p: 1 });
|
|
expect((ep.steps[1] as ToolExecution).observation).toEqual({ r: 1 });
|
|
|
|
expect(ep.steps[2].type).toBe('TOOL_EXECUTION');
|
|
expect((ep.steps[2] as ToolExecution).toolName).toBe('tool_two');
|
|
expect((ep.steps[2] as ToolExecution).intent).toEqual({ p: 2 });
|
|
expect((ep.steps[2] as ToolExecution).observation).toEqual({ r: 2 });
|
|
|
|
// The final model turn should become the yield
|
|
expect(ep.yield).toBeDefined();
|
|
expect(ep.yield?.type).toBe('AGENT_YIELD');
|
|
expect(ep.yield?.text).toBe('Both complete.');
|
|
|
|
// Now verify we can reconstitute it without dropping the multiple calls
|
|
const reconstituted = IrMapper.fromIr(episodes);
|
|
|
|
// The reconstituted history should have exactly 4 turns, same as original
|
|
expect(reconstituted).toHaveLength(4);
|
|
|
|
// Check that the Model turn has both function calls
|
|
expect(reconstituted[1].role).toBe('model');
|
|
expect(reconstituted[1].parts).toHaveLength(3); // text + call1 + call2
|
|
expect(reconstituted[1].parts![1].functionCall?.name).toBe('tool_one');
|
|
expect(reconstituted[1].parts![2].functionCall?.name).toBe('tool_two');
|
|
|
|
// Check that the User turn has both function responses
|
|
expect(reconstituted[2].role).toBe('user');
|
|
expect(reconstituted[2].parts).toHaveLength(2); // response1 + response2
|
|
expect(reconstituted[2].parts![0].functionResponse?.name).toBe('tool_one');
|
|
expect(reconstituted[2].parts![1].functionResponse?.name).toBe('tool_two');
|
|
});
|
|
|
|
it('should guarantee WeakMap ID stability across continuous mapping', () => {
|
|
// 1. Initial history
|
|
const history: Content[] = [
|
|
{ role: 'user', parts: [{ text: 'Hello' }] },
|
|
{ role: 'model', parts: [{ text: 'Hi there' }] },
|
|
];
|
|
|
|
const tokenCalculator = new ContextTokenCalculator(4);
|
|
const initialIr = IrMapper.toIr(history, tokenCalculator);
|
|
expect(initialIr).toHaveLength(1);
|
|
|
|
// Save the uniquely generated deterministic ID for the first episode
|
|
const episodeId = initialIr[0].id;
|
|
const triggerId = initialIr[0].trigger.id;
|
|
|
|
// 2. Push new history (simulating a continuing conversation)
|
|
history.push({ role: 'user', parts: [{ text: 'How are you?' }] });
|
|
history.push({ role: 'model', parts: [{ text: 'I am an AI.' }] });
|
|
|
|
const updatedIr = IrMapper.toIr(history, tokenCalculator);
|
|
expect(updatedIr).toHaveLength(2);
|
|
|
|
// 3. Verify ID Stability
|
|
// The exact same ID must be generated for the first episode because the underlying Content object reference hasn't changed.
|
|
// This proves the WeakMap successfully pinned the reference!
|
|
expect(updatedIr[0].id).toBe(episodeId);
|
|
expect(updatedIr[0].trigger.id).toBe(triggerId);
|
|
|
|
// Ensure the new episode has a different ID
|
|
expect(updatedIr[1].id).not.toBe(episodeId);
|
|
});
|
|
});
|