mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-06-11 03:46:49 -07:00
fix(cli): filter internal session context from history during resumption (#27391)
This commit is contained in:
committed by
GitHub
parent
3cc7e5b096
commit
630ecc21b9
@@ -32,227 +32,256 @@ describe('Context Management Fidelity E2E', () => {
|
||||
|
||||
afterEach(async () => await rig.cleanup());
|
||||
|
||||
it('should reproduce the exact context working buffer on resume', async () => {
|
||||
// Mock responses to trigger GC (summarization)
|
||||
const snapshotResponse: FakeResponse = {
|
||||
method: 'generateContent',
|
||||
response: {
|
||||
candidates: [
|
||||
{
|
||||
content: {
|
||||
parts: [
|
||||
{
|
||||
text: JSON.stringify({
|
||||
new_facts: ['GC Triggered.'],
|
||||
new_constraints: [],
|
||||
new_tasks: [],
|
||||
resolved_task_ids: [],
|
||||
obsolete_fact_indices: [],
|
||||
obsolete_constraint_indices: [],
|
||||
chronological_summary: 'Snapshot created.',
|
||||
}),
|
||||
},
|
||||
],
|
||||
role: 'model',
|
||||
},
|
||||
finishReason: FinishReason.STOP,
|
||||
index: 0,
|
||||
},
|
||||
],
|
||||
} as unknown as GenerateContentResponse,
|
||||
};
|
||||
|
||||
const countTokensResponse: FakeResponse = {
|
||||
method: 'countTokens',
|
||||
response: { totalTokens: 1000 },
|
||||
};
|
||||
|
||||
const streamResponse = (text: string): FakeResponse => ({
|
||||
method: 'generateContentStream',
|
||||
response: [
|
||||
{
|
||||
it(
|
||||
'should reproduce the exact context working buffer on resume',
|
||||
{ timeout: 300000 },
|
||||
async () => {
|
||||
// Mock responses to trigger GC (summarization)
|
||||
const snapshotResponse: FakeResponse = {
|
||||
method: 'generateContent',
|
||||
response: {
|
||||
candidates: [
|
||||
{
|
||||
content: { parts: [{ text }], role: 'model' },
|
||||
content: {
|
||||
parts: [
|
||||
{
|
||||
text: JSON.stringify({
|
||||
new_facts: ['GC Triggered.'],
|
||||
new_constraints: [],
|
||||
new_tasks: [],
|
||||
resolved_task_ids: [],
|
||||
obsolete_fact_indices: [],
|
||||
obsolete_constraint_indices: [],
|
||||
chronological_summary: 'Snapshot created.',
|
||||
}),
|
||||
},
|
||||
],
|
||||
role: 'model',
|
||||
},
|
||||
finishReason: FinishReason.STOP,
|
||||
index: 0,
|
||||
},
|
||||
],
|
||||
},
|
||||
] as unknown as GenerateContentResponse[],
|
||||
});
|
||||
} as unknown as GenerateContentResponse,
|
||||
};
|
||||
|
||||
const setupResponses = (fileName: string, mocks: FakeResponse[]) => {
|
||||
const filePath = path.join(rig.testDir!, fileName);
|
||||
const countTokensResponse: FakeResponse = {
|
||||
method: 'countTokens',
|
||||
response: { totalTokens: 1000 },
|
||||
};
|
||||
|
||||
const streamResponse = (text: string): FakeResponse => ({
|
||||
method: 'generateContentStream',
|
||||
response: [
|
||||
{
|
||||
candidates: [
|
||||
{
|
||||
content: { parts: [{ text }], role: 'model' },
|
||||
finishReason: FinishReason.STOP,
|
||||
index: 0,
|
||||
},
|
||||
],
|
||||
},
|
||||
] as unknown as GenerateContentResponse[],
|
||||
});
|
||||
|
||||
const setupResponses = (fileName: string, mocks: FakeResponse[]) => {
|
||||
const filePath = path.join(rig.testDir!, fileName);
|
||||
fs.writeFileSync(
|
||||
filePath,
|
||||
mocks.map((m) => JSON.stringify(m)).join('\n'),
|
||||
);
|
||||
return filePath;
|
||||
};
|
||||
|
||||
await rig.setup('context-fidelity', {
|
||||
settings: {
|
||||
experimental: {
|
||||
stressTestProfile: true, // Lowers thresholds to trigger GC easily
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const traceDir = path.join(rig.testDir!, 'traces');
|
||||
fs.mkdirSync(traceDir, { recursive: true });
|
||||
const traceLog = path.join(traceDir, 'trace.log');
|
||||
|
||||
// Ignore trace and response files to keep environment context clean and stable
|
||||
fs.writeFileSync(
|
||||
filePath,
|
||||
mocks.map((m) => JSON.stringify(m)).join('\n'),
|
||||
path.join(rig.testDir!, '.geminiignore'),
|
||||
'traces/\nresp*.json\ndebug.log\n',
|
||||
);
|
||||
return filePath;
|
||||
};
|
||||
|
||||
await rig.setup('context-fidelity', {
|
||||
settings: {
|
||||
experimental: {
|
||||
stressTestProfile: true, // Lowers thresholds to trigger GC easily
|
||||
},
|
||||
},
|
||||
});
|
||||
const commonEnv = {
|
||||
GEMINI_API_KEY: 'mock-key',
|
||||
GEMINI_CONTEXT_TRACE_DIR: traceDir,
|
||||
GEMINI_CONTEXT_TRACE_ENABLED: 'true',
|
||||
GEMINI_DEBUG_LOG_FILE: path.join(rig.testDir!, 'debug.log'),
|
||||
};
|
||||
|
||||
const traceDir = path.join(rig.testDir!, 'traces');
|
||||
fs.mkdirSync(traceDir, { recursive: true });
|
||||
const traceLog = path.join(traceDir, 'trace.log');
|
||||
const runMocks: FakeResponse[] = [
|
||||
streamResponse('Ack 1'),
|
||||
streamResponse('Ack 2'),
|
||||
streamResponse('Ack 3'),
|
||||
streamResponse('Ack 4'),
|
||||
streamResponse('Ack 5'),
|
||||
streamResponse('Ack 6'),
|
||||
streamResponse('Ack 7'),
|
||||
streamResponse('Ack 8'),
|
||||
streamResponse('Ack 9'),
|
||||
streamResponse('Ack 10'),
|
||||
streamResponse('Ack 11'),
|
||||
streamResponse('Ack 12'),
|
||||
];
|
||||
for (let i = 0; i < 50; i++) {
|
||||
runMocks.push(snapshotResponse);
|
||||
runMocks.push(countTokensResponse);
|
||||
}
|
||||
|
||||
const commonEnv = {
|
||||
GEMINI_API_KEY: 'mock-key',
|
||||
GEMINI_CONTEXT_TRACE_DIR: traceDir,
|
||||
GEMINI_CONTEXT_TRACE_ENABLED: 'true',
|
||||
GEMINI_DEBUG_LOG_FILE: path.join(rig.testDir!, 'debug.log'),
|
||||
};
|
||||
// Turns 1-10: Build up history
|
||||
for (let i = 1; i <= 10; i++) {
|
||||
await rig.run({
|
||||
args: [
|
||||
'--debug',
|
||||
i === 1 ? '' : '--resume',
|
||||
i === 1 ? '' : 'latest',
|
||||
'--fake-responses-non-strict',
|
||||
setupResponses(`resp_init_${i}.json`, runMocks),
|
||||
].filter(Boolean),
|
||||
stdin: `Turn ${i}: ` + generateRandomString(900),
|
||||
env: commonEnv,
|
||||
});
|
||||
}
|
||||
|
||||
const runMocks: FakeResponse[] = [
|
||||
streamResponse('Ack 1'),
|
||||
streamResponse('Ack 2'),
|
||||
streamResponse('Ack 3'),
|
||||
streamResponse('Ack 4'),
|
||||
streamResponse('Ack 5'),
|
||||
streamResponse('Ack 6'),
|
||||
streamResponse('Ack 7'),
|
||||
streamResponse('Ack 8'),
|
||||
streamResponse('Ack 9'),
|
||||
streamResponse('Ack 10'),
|
||||
streamResponse('Ack 11'),
|
||||
streamResponse('Ack 12'),
|
||||
];
|
||||
for (let i = 0; i < 50; i++) {
|
||||
runMocks.push(snapshotResponse);
|
||||
runMocks.push(countTokensResponse);
|
||||
}
|
||||
|
||||
// Turns 1-10: Build up history
|
||||
for (let i = 1; i <= 10; i++) {
|
||||
// Turn 11: Penultimate turn
|
||||
await rig.run({
|
||||
args: [
|
||||
'--debug',
|
||||
i === 1 ? '' : '--resume',
|
||||
i === 1 ? '' : 'latest',
|
||||
'--resume',
|
||||
'latest',
|
||||
'--fake-responses-non-strict',
|
||||
setupResponses(`resp_init_${i}.json`, runMocks),
|
||||
].filter(Boolean),
|
||||
stdin: `Turn ${i}: ` + generateRandomString(900),
|
||||
setupResponses('resp2.json', runMocks),
|
||||
],
|
||||
stdin: 'Turn 11: ' + generateRandomString(900),
|
||||
env: commonEnv,
|
||||
});
|
||||
}
|
||||
|
||||
// Turn 11: Penultimate turn
|
||||
await rig.run({
|
||||
args: [
|
||||
'--debug',
|
||||
'--resume',
|
||||
'latest',
|
||||
'--fake-responses-non-strict',
|
||||
setupResponses('resp2.json', runMocks),
|
||||
],
|
||||
stdin: 'Turn 11: ' + generateRandomString(900),
|
||||
env: commonEnv,
|
||||
});
|
||||
// Turn 12: Breach threshold and force GC
|
||||
await rig.run({
|
||||
args: [
|
||||
'--debug',
|
||||
'--resume',
|
||||
'latest',
|
||||
'--fake-responses-non-strict',
|
||||
setupResponses('resp3.json', runMocks),
|
||||
],
|
||||
stdin: 'Turn 12: ' + generateRandomString(900),
|
||||
env: commonEnv,
|
||||
});
|
||||
|
||||
// Turn 12: Breach threshold and force GC
|
||||
await rig.run({
|
||||
args: [
|
||||
'--debug',
|
||||
'--resume',
|
||||
'latest',
|
||||
'--fake-responses-non-strict',
|
||||
setupResponses('resp3.json', runMocks),
|
||||
],
|
||||
stdin: 'Turn 12: ' + generateRandomString(900),
|
||||
env: commonEnv,
|
||||
});
|
||||
// Extract the rendered context asset from the log
|
||||
const getRenderedContext = (logContent: string): HistoryTurn[] | null => {
|
||||
const lines = logContent.split('\n');
|
||||
const renderLines = lines.filter(
|
||||
(l) =>
|
||||
l.includes('[Render] Render Sanitized Context for LLM') ||
|
||||
l.includes('[Render] Render Context for LLM'),
|
||||
);
|
||||
if (renderLines.length === 0) return null;
|
||||
|
||||
// Extract the rendered context asset from the log
|
||||
const getRenderedContext = (logContent: string): HistoryTurn[] | null => {
|
||||
const lines = logContent.split('\n');
|
||||
const renderLines = lines.filter(
|
||||
(l) =>
|
||||
l.includes('[Render] Render Sanitized Context for LLM') ||
|
||||
l.includes('[Render] Render Context for LLM'),
|
||||
const lastRender = renderLines[renderLines.length - 1];
|
||||
const detailsMatch = lastRender.match(/\| Details: (.*)$/);
|
||||
if (!detailsMatch) return null;
|
||||
|
||||
const details = JSON.parse(detailsMatch[1]);
|
||||
const assetInfo =
|
||||
details.renderedContextSanitized || details.renderedContext;
|
||||
if (assetInfo && assetInfo.$asset) {
|
||||
const assetPath = path.join(traceDir, 'assets', assetInfo.$asset);
|
||||
return JSON.parse(fs.readFileSync(assetPath, 'utf-8'));
|
||||
}
|
||||
return assetInfo;
|
||||
};
|
||||
|
||||
const log1 = fs.readFileSync(traceLog, 'utf-8');
|
||||
const contextBeforeExit = getRenderedContext(log1);
|
||||
expect(contextBeforeExit).toBeDefined();
|
||||
console.log(
|
||||
'Context Before Exit (First 2 turns):',
|
||||
JSON.stringify(contextBeforeExit!.slice(0, 2), null, 2),
|
||||
);
|
||||
if (renderLines.length === 0) return null;
|
||||
|
||||
const lastRender = renderLines[renderLines.length - 1];
|
||||
const detailsMatch = lastRender.match(/\| Details: (.*)$/);
|
||||
if (!detailsMatch) return null;
|
||||
// Turn 4: Resume and run a small command
|
||||
await rig.run({
|
||||
args: [
|
||||
'--debug',
|
||||
'--resume',
|
||||
'latest',
|
||||
'--fake-responses-non-strict',
|
||||
setupResponses('resp4.json', runMocks),
|
||||
'continue',
|
||||
],
|
||||
env: commonEnv,
|
||||
});
|
||||
|
||||
const details = JSON.parse(detailsMatch[1]);
|
||||
const assetInfo =
|
||||
details.renderedContextSanitized || details.renderedContext;
|
||||
if (assetInfo && assetInfo.$asset) {
|
||||
const assetPath = path.join(traceDir, 'assets', assetInfo.$asset);
|
||||
return JSON.parse(fs.readFileSync(assetPath, 'utf-8'));
|
||||
const log2 = fs.readFileSync(traceLog, 'utf-8');
|
||||
const contextAfterResume = getRenderedContext(log2);
|
||||
expect(contextAfterResume).toBeDefined();
|
||||
console.log(
|
||||
'Context After Resume (First 2 turns):',
|
||||
JSON.stringify(contextAfterResume!.slice(0, 2), null, 2),
|
||||
);
|
||||
|
||||
expect(contextAfterResume!.length).toBeGreaterThanOrEqual(
|
||||
contextBeforeExit!.length,
|
||||
);
|
||||
|
||||
// The environment context is intentionally refreshed on resume to reflect
|
||||
// the current state of the workspace (e.g. new files, current date).
|
||||
// We allow its content to differ but ensure it's still an environment context.
|
||||
const isEnvContext = (turn: HistoryTurn) =>
|
||||
turn.content.parts?.some((p) => p.text?.includes('<session_context>'));
|
||||
|
||||
for (let i = 0; i < contextBeforeExit!.length; i++) {
|
||||
expect(contextAfterResume![i].id).toBe(contextBeforeExit![i].id);
|
||||
|
||||
const turnBefore = contextBeforeExit![i];
|
||||
const turnAfter = contextAfterResume![i];
|
||||
|
||||
if (isEnvContext(turnBefore)) {
|
||||
expect(isEnvContext(turnAfter)).toBe(true);
|
||||
continue;
|
||||
}
|
||||
|
||||
expect(turnAfter.content).toEqual(turnBefore.content);
|
||||
}
|
||||
return assetInfo;
|
||||
};
|
||||
|
||||
const log1 = fs.readFileSync(traceLog, 'utf-8');
|
||||
const contextBeforeExit = getRenderedContext(log1);
|
||||
expect(contextBeforeExit).toBeDefined();
|
||||
console.log(
|
||||
'Context Before Exit (First 2 turns):',
|
||||
JSON.stringify(contextBeforeExit!.slice(0, 2), null, 2),
|
||||
);
|
||||
|
||||
// Turn 4: Resume and run a small command
|
||||
await rig.run({
|
||||
args: [
|
||||
'--debug',
|
||||
'--resume',
|
||||
'latest',
|
||||
'--fake-responses-non-strict',
|
||||
setupResponses('resp4.json', runMocks),
|
||||
'continue',
|
||||
],
|
||||
env: commonEnv,
|
||||
});
|
||||
|
||||
const log2 = fs.readFileSync(traceLog, 'utf-8');
|
||||
const contextAfterResume = getRenderedContext(log2);
|
||||
expect(contextAfterResume).toBeDefined();
|
||||
console.log(
|
||||
'Context After Resume (First 2 turns):',
|
||||
JSON.stringify(contextAfterResume!.slice(0, 2), null, 2),
|
||||
);
|
||||
|
||||
expect(contextAfterResume!.length).toBeGreaterThanOrEqual(
|
||||
contextBeforeExit!.length,
|
||||
);
|
||||
|
||||
for (let i = 0; i < contextBeforeExit!.length; i++) {
|
||||
expect(contextAfterResume![i].id).toBe(contextBeforeExit![i].id);
|
||||
expect(contextAfterResume![i].content).toEqual(
|
||||
contextBeforeExit![i].content,
|
||||
// Most importantly, synthetic IDs (like summaries) must be stable.
|
||||
const syntheticTurns = contextBeforeExit!.filter(
|
||||
(t: HistoryTurn) =>
|
||||
t.content.parts?.some((p) => p.text?.includes('active_tasks')) ||
|
||||
(t.id && t.id.length === 32),
|
||||
);
|
||||
}
|
||||
expect(syntheticTurns.length).toBeGreaterThan(0);
|
||||
|
||||
// Most importantly, synthetic IDs (like summaries) must be stable.
|
||||
const syntheticTurns = contextBeforeExit!.filter(
|
||||
(t: HistoryTurn) =>
|
||||
t.content.parts?.some((p) => p.text?.includes('active_tasks')) ||
|
||||
(t.id && t.id.length === 32),
|
||||
);
|
||||
expect(syntheticTurns.length).toBeGreaterThan(0);
|
||||
const syntheticTurnsAfter = contextAfterResume!.filter(
|
||||
(t: HistoryTurn) =>
|
||||
t.content.parts?.some((p) => p.text?.includes('active_tasks')) ||
|
||||
(t.id && t.id.length === 32),
|
||||
);
|
||||
expect(syntheticTurnsAfter.length).toBeGreaterThanOrEqual(
|
||||
syntheticTurns.length,
|
||||
);
|
||||
|
||||
const syntheticTurnsAfter = contextAfterResume!.filter(
|
||||
(t: HistoryTurn) =>
|
||||
t.content.parts?.some((p) => p.text?.includes('active_tasks')) ||
|
||||
(t.id && t.id.length === 32),
|
||||
);
|
||||
expect(syntheticTurnsAfter.length).toBeGreaterThanOrEqual(
|
||||
syntheticTurns.length,
|
||||
);
|
||||
|
||||
// Check if the first synthetic turn is identical
|
||||
expect(syntheticTurnsAfter[0].id).toBe(syntheticTurns[0].id);
|
||||
expect(syntheticTurnsAfter[0].content).toEqual(syntheticTurns[0].content);
|
||||
});
|
||||
// Check if the first synthetic turn is identical (with relaxation for environment context)
|
||||
expect(syntheticTurnsAfter[0].id).toBe(syntheticTurns[0].id);
|
||||
if (isEnvContext(syntheticTurns[0])) {
|
||||
expect(isEnvContext(syntheticTurnsAfter[0])).toBe(true);
|
||||
} else {
|
||||
expect(syntheticTurnsAfter[0].content).toEqual(
|
||||
syntheticTurns[0].content,
|
||||
);
|
||||
}
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
@@ -1111,6 +1111,28 @@ describe('convertSessionToHistoryFormats', () => {
|
||||
});
|
||||
});
|
||||
|
||||
it('should filter out <session_context> from UI history', () => {
|
||||
const messages: MessageRecord[] = [
|
||||
{
|
||||
id: '1',
|
||||
timestamp: new Date().toISOString(),
|
||||
type: 'user',
|
||||
content:
|
||||
'<session_context>\nThis is the Gemini CLI\n</session_context>',
|
||||
},
|
||||
{
|
||||
id: '2',
|
||||
timestamp: new Date().toISOString(),
|
||||
type: 'user',
|
||||
content: 'Real message',
|
||||
},
|
||||
];
|
||||
|
||||
const result = convertSessionToHistoryFormats(messages);
|
||||
expect(result.uiHistory).toHaveLength(1);
|
||||
expect(result.uiHistory[0].text).toBe('Real message');
|
||||
});
|
||||
|
||||
it('should handle missing tool descriptions and displayNames', () => {
|
||||
const messages: MessageRecord[] = [
|
||||
{
|
||||
|
||||
@@ -606,7 +606,16 @@ export function convertSessionToHistoryFormats(
|
||||
const contentString = partListUnionToString(msg.content);
|
||||
const uiText = displayContentString || contentString;
|
||||
|
||||
if (uiText.trim()) {
|
||||
// Skip internal context messages in the UI history
|
||||
const trimmedText = uiText.trim();
|
||||
if (
|
||||
trimmedText.startsWith('<session_context>') ||
|
||||
trimmedText.startsWith('<hook_context>')
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (trimmedText) {
|
||||
let messageType: MessageType;
|
||||
switch (msg.type) {
|
||||
case 'user':
|
||||
|
||||
@@ -105,6 +105,35 @@ describe('convertSessionToClientHistory', () => {
|
||||
]);
|
||||
});
|
||||
|
||||
it('should ignore <session_context> and <hook_context>', () => {
|
||||
const messages: ConversationRecord['messages'] = [
|
||||
{
|
||||
id: '1',
|
||||
type: 'user',
|
||||
timestamp: '2024-01-01T10:00:00Z',
|
||||
content: '<session_context>\nOld context\n</session_context>',
|
||||
},
|
||||
{
|
||||
id: '2',
|
||||
type: 'user',
|
||||
timestamp: '2024-01-01T10:01:00Z',
|
||||
content: '<hook_context>\nOld hook context\n</hook_context>',
|
||||
},
|
||||
{
|
||||
id: '3',
|
||||
type: 'user',
|
||||
timestamp: '2024-01-01T10:02:00Z',
|
||||
content: 'Actual query',
|
||||
},
|
||||
];
|
||||
|
||||
const history = convertSessionToClientHistory(messages);
|
||||
|
||||
expect(history.map((h) => h.content)).toEqual([
|
||||
{ role: 'user', parts: [{ text: 'Actual query' }] },
|
||||
]);
|
||||
});
|
||||
|
||||
it('should correctly map tool calls and their responses', () => {
|
||||
const messages: ConversationRecord['messages'] = [
|
||||
{
|
||||
|
||||
@@ -109,9 +109,12 @@ export function convertSessionToClientHistory(
|
||||
|
||||
if (msg.type === 'user') {
|
||||
const contentString = partListUnionToString(msg.content);
|
||||
const trimmedContent = contentString.trim();
|
||||
if (
|
||||
contentString.trim().startsWith('/') ||
|
||||
contentString.trim().startsWith('?')
|
||||
trimmedContent.startsWith('/') ||
|
||||
trimmedContent.startsWith('?') ||
|
||||
trimmedContent.startsWith('<session_context>') ||
|
||||
trimmedContent.startsWith('<hook_context>')
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user