feat(ui): shorten context overflow message when <50% of limit (#10812)

This commit is contained in:
Sandy Tao
2025-10-09 10:22:26 -07:00
committed by GitHub
parent 70610c740e
commit b60c8858af
2 changed files with 127 additions and 50 deletions

View File

@@ -31,6 +31,7 @@ import {
GeminiEventType as ServerGeminiEventType,
ToolErrorType,
ToolConfirmationOutcome,
tokenLimit,
} from '@google/gemini-cli-core';
import type { Part, PartListUnion } from '@google/genai';
import type { UseHistoryManagerReturn } from './useHistoryManager.js';
@@ -77,6 +78,7 @@ vi.mock('@google/gemini-cli-core', async (importOriginal) => {
GeminiClient: MockedGeminiClientClass,
UserPromptEvent: MockedUserPromptEvent,
parseAndFormatApiError: mockParseAndFormatApiError,
tokenLimit: vi.fn().mockReturnValue(100), // Mock tokenLimit
};
});
@@ -1854,57 +1856,119 @@ describe('useGeminiStream', () => {
});
});
it('should add info message for ContextWindowWillOverflow event', async () => {
// Setup mock to return a stream with ContextWindowWillOverflow event
mockSendMessageStream.mockReturnValue(
(async function* () {
yield {
type: ServerGeminiEventType.ContextWindowWillOverflow,
value: {
estimatedRequestTokenCount: 100,
remainingTokenCount: 50,
},
};
})(),
);
const { result } = renderHook(() =>
useGeminiStream(
new MockedGeminiClientClass(mockConfig),
[],
mockAddItem,
mockConfig,
mockLoadedSettings,
mockOnDebugMessage,
mockHandleSlashCommand,
false,
() => 'vscode' as EditorType,
() => {},
() => Promise.resolve(),
false,
() => {},
() => {},
() => {},
() => {},
80,
24,
),
);
// Submit a query
await act(async () => {
await result.current.submitQuery('Test overflow');
describe('ContextWindowWillOverflow event', () => {
beforeEach(() => {
vi.mocked(tokenLimit).mockReturnValue(100);
});
// Check that the info message was added
await waitFor(() => {
expect(mockAddItem).toHaveBeenCalledWith(
{
type: 'info',
text: `Sending this message (100 tokens) might exceed the remaining context window limit (50 tokens). Please try reducing the size of your message or use the \`/compress\` command to compress the chat history.`,
},
expect.any(Number),
it('should add message without suggestion when remaining tokens are > 75% of limit', async () => {
// Setup mock to return a stream with ContextWindowWillOverflow event
// Limit is 100, remaining is 80 (> 75)
mockSendMessageStream.mockReturnValue(
(async function* () {
yield {
type: ServerGeminiEventType.ContextWindowWillOverflow,
value: {
estimatedRequestTokenCount: 20,
remainingTokenCount: 80,
},
};
})(),
);
const { result } = renderHook(() =>
useGeminiStream(
new MockedGeminiClientClass(mockConfig),
[],
mockAddItem,
mockConfig,
mockLoadedSettings,
mockOnDebugMessage,
mockHandleSlashCommand,
false,
() => 'vscode' as EditorType,
() => {},
() => Promise.resolve(),
false,
() => {},
() => {},
() => {},
() => {},
80,
24,
),
);
// Submit a query
await act(async () => {
await result.current.submitQuery('Test overflow');
});
// Check that the message was added without suggestion
await waitFor(() => {
expect(mockAddItem).toHaveBeenCalledWith(
{
type: 'info',
text: `Sending this message (20 tokens) might exceed the remaining context window limit (80 tokens).`,
},
expect.any(Number),
);
});
});
it('should add message with suggestion when remaining tokens are < 75% of limit', async () => {
// Setup mock to return a stream with ContextWindowWillOverflow event
// Limit is 100, remaining is 70 (< 75)
mockSendMessageStream.mockReturnValue(
(async function* () {
yield {
type: ServerGeminiEventType.ContextWindowWillOverflow,
value: {
estimatedRequestTokenCount: 30,
remainingTokenCount: 70,
},
};
})(),
);
const { result } = renderHook(() =>
useGeminiStream(
new MockedGeminiClientClass(mockConfig),
[],
mockAddItem,
mockConfig,
mockLoadedSettings,
mockOnDebugMessage,
mockHandleSlashCommand,
false,
() => 'vscode' as EditorType,
() => {},
() => Promise.resolve(),
false,
() => {},
() => {},
() => {},
() => {},
80,
24,
),
);
// Submit a query
await act(async () => {
await result.current.submitQuery('Test overflow');
});
// Check that the message was added with suggestion
await waitFor(() => {
expect(mockAddItem).toHaveBeenCalledWith(
{
type: 'info',
text: `Sending this message (30 tokens) might exceed the remaining context window limit (70 tokens). Please try reducing the size of your message or use the \`/compress\` command to compress the chat history.`,
},
expect.any(Number),
);
});
});
});

View File

@@ -34,6 +34,7 @@ import {
ToolConfirmationOutcome,
promptIdContext,
WRITE_FILE_TOOL_NAME,
tokenLimit,
} from '@google/gemini-cli-core';
import { type Part, type PartListUnion, FinishReason } from '@google/genai';
import type {
@@ -642,15 +643,27 @@ export const useGeminiStream = (
(estimatedRequestTokenCount: number, remainingTokenCount: number) => {
onCancelSubmit();
const limit = tokenLimit(config.getModel());
const isLessThan75Percent =
limit > 0 && remainingTokenCount < limit * 0.75;
let text = `Sending this message (${estimatedRequestTokenCount} tokens) might exceed the remaining context window limit (${remainingTokenCount} tokens).`;
if (isLessThan75Percent) {
text +=
' Please try reducing the size of your message or use the `/compress` command to compress the chat history.';
}
addItem(
{
type: 'info',
text: `Sending this message (${estimatedRequestTokenCount} tokens) might exceed the remaining context window limit (${remainingTokenCount} tokens). Please try reducing the size of your message or use the \`/compress\` command to compress the chat history.`,
text,
},
Date.now(),
);
},
[addItem, onCancelSubmit],
[addItem, onCancelSubmit, config],
);
const handleLoopDetectionConfirmation = useCallback(