mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-12 21:03:05 -07:00
Merge branch 'main' into feat/browser-allowed-domain
This commit is contained in:
+1
-1
@@ -111,7 +111,7 @@
|
|||||||
"badge": "🔬",
|
"badge": "🔬",
|
||||||
"slug": "docs/cli/notifications"
|
"slug": "docs/cli/notifications"
|
||||||
},
|
},
|
||||||
{ "label": "Plan mode", "badge": "🔬", "slug": "docs/cli/plan-mode" },
|
{ "label": "Plan mode", "slug": "docs/cli/plan-mode" },
|
||||||
{
|
{
|
||||||
"label": "Subagents",
|
"label": "Subagents",
|
||||||
"badge": "🔬",
|
"badge": "🔬",
|
||||||
|
|||||||
@@ -0,0 +1,116 @@
|
|||||||
|
/**
|
||||||
|
* @license
|
||||||
|
* Copyright 2026 Google LLC
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { describe, expect } from 'vitest';
|
||||||
|
import {
|
||||||
|
TRACKER_CREATE_TASK_TOOL_NAME,
|
||||||
|
TRACKER_UPDATE_TASK_TOOL_NAME,
|
||||||
|
} from '@google/gemini-cli-core';
|
||||||
|
import { evalTest, assertModelHasOutput } from './test-helper.js';
|
||||||
|
import fs from 'node:fs';
|
||||||
|
import path from 'node:path';
|
||||||
|
|
||||||
|
const FILES = {
|
||||||
|
'package.json': JSON.stringify({
|
||||||
|
name: 'test-project',
|
||||||
|
version: '1.0.0',
|
||||||
|
scripts: { test: 'echo "All tests passed!"' },
|
||||||
|
}),
|
||||||
|
'src/login.js':
|
||||||
|
'function login(username, password) {\n if (!username) throw new Error("Missing username");\n // BUG: missing password check\n return true;\n}',
|
||||||
|
} as const;
|
||||||
|
|
||||||
|
describe('tracker_mode', () => {
|
||||||
|
evalTest('USUALLY_PASSES', {
|
||||||
|
name: 'should manage tasks in the tracker when explicitly requested during a bug fix',
|
||||||
|
params: {
|
||||||
|
settings: { experimental: { taskTracker: true } },
|
||||||
|
},
|
||||||
|
files: FILES,
|
||||||
|
prompt:
|
||||||
|
'We have a bug in src/login.js: the password check is missing. First, create a task in the tracker to fix it. Then fix the bug, and mark the task as closed.',
|
||||||
|
assert: async (rig, result) => {
|
||||||
|
const wasCreateCalled = await rig.waitForToolCall(
|
||||||
|
TRACKER_CREATE_TASK_TOOL_NAME,
|
||||||
|
);
|
||||||
|
expect(
|
||||||
|
wasCreateCalled,
|
||||||
|
'Expected tracker_create_task tool to be called',
|
||||||
|
).toBe(true);
|
||||||
|
|
||||||
|
const toolLogs = rig.readToolLogs();
|
||||||
|
const createCall = toolLogs.find(
|
||||||
|
(log) => log.toolRequest.name === TRACKER_CREATE_TASK_TOOL_NAME,
|
||||||
|
);
|
||||||
|
expect(createCall).toBeDefined();
|
||||||
|
const args = JSON.parse(createCall!.toolRequest.args);
|
||||||
|
expect(
|
||||||
|
(args.title?.toLowerCase() ?? '') +
|
||||||
|
(args.description?.toLowerCase() ?? ''),
|
||||||
|
).toContain('login');
|
||||||
|
|
||||||
|
const wasUpdateCalled = await rig.waitForToolCall(
|
||||||
|
TRACKER_UPDATE_TASK_TOOL_NAME,
|
||||||
|
);
|
||||||
|
expect(
|
||||||
|
wasUpdateCalled,
|
||||||
|
'Expected tracker_update_task tool to be called',
|
||||||
|
).toBe(true);
|
||||||
|
|
||||||
|
const updateCall = toolLogs.find(
|
||||||
|
(log) => log.toolRequest.name === TRACKER_UPDATE_TASK_TOOL_NAME,
|
||||||
|
);
|
||||||
|
expect(updateCall).toBeDefined();
|
||||||
|
const updateArgs = JSON.parse(updateCall!.toolRequest.args);
|
||||||
|
expect(updateArgs.status).toBe('closed');
|
||||||
|
|
||||||
|
const loginContent = fs.readFileSync(
|
||||||
|
path.join(rig.testDir!, 'src/login.js'),
|
||||||
|
'utf-8',
|
||||||
|
);
|
||||||
|
expect(loginContent).not.toContain('// BUG: missing password check');
|
||||||
|
|
||||||
|
assertModelHasOutput(result);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
evalTest('USUALLY_PASSES', {
|
||||||
|
name: 'should implicitly create tasks when asked to build a feature plan',
|
||||||
|
params: {
|
||||||
|
settings: { experimental: { taskTracker: true } },
|
||||||
|
},
|
||||||
|
files: FILES,
|
||||||
|
prompt:
|
||||||
|
'I need to build a complex new feature for user authentication in our project. Create a detailed implementation plan and organize the work into bite-sized chunks. Do not actually implement the code yet, just plan it.',
|
||||||
|
assert: async (rig, result) => {
|
||||||
|
// The model should proactively use tracker_create_task to organize the work
|
||||||
|
const wasToolCalled = await rig.waitForToolCall(
|
||||||
|
TRACKER_CREATE_TASK_TOOL_NAME,
|
||||||
|
);
|
||||||
|
expect(
|
||||||
|
wasToolCalled,
|
||||||
|
'Expected tracker_create_task to be called implicitly to organize plan',
|
||||||
|
).toBe(true);
|
||||||
|
|
||||||
|
const toolLogs = rig.readToolLogs();
|
||||||
|
const createCalls = toolLogs.filter(
|
||||||
|
(log) => log.toolRequest.name === TRACKER_CREATE_TASK_TOOL_NAME,
|
||||||
|
);
|
||||||
|
|
||||||
|
// We expect it to create at least one task for authentication, likely more.
|
||||||
|
expect(createCalls.length).toBeGreaterThan(0);
|
||||||
|
|
||||||
|
// Verify it didn't write any code since we asked it to just plan
|
||||||
|
const loginContent = fs.readFileSync(
|
||||||
|
path.join(rig.testDir!, 'src/login.js'),
|
||||||
|
'utf-8',
|
||||||
|
);
|
||||||
|
expect(loginContent).toContain('// BUG: missing password check');
|
||||||
|
|
||||||
|
assertModelHasOutput(result);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -3510,6 +3510,116 @@ describe('useGeminiStream', () => {
|
|||||||
expect(result.current.loopDetectionConfirmationRequest).not.toBeNull();
|
expect(result.current.loopDetectionConfirmationRequest).not.toBeNull();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('Race Condition Prevention', () => {
|
||||||
|
it('should reject concurrent submitQuery when already responding', async () => {
|
||||||
|
// Stream that stays open (simulates "still responding")
|
||||||
|
mockSendMessageStream.mockReturnValue(
|
||||||
|
(async function* () {
|
||||||
|
yield {
|
||||||
|
type: ServerGeminiEventType.Content,
|
||||||
|
value: 'First response',
|
||||||
|
};
|
||||||
|
// Keep the stream open
|
||||||
|
await new Promise(() => {});
|
||||||
|
})(),
|
||||||
|
);
|
||||||
|
|
||||||
|
const { result } = renderTestHook();
|
||||||
|
|
||||||
|
// Start first query without awaiting (fire-and-forget, like existing tests)
|
||||||
|
await act(async () => {
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-floating-promises
|
||||||
|
result.current.submitQuery('first query');
|
||||||
|
});
|
||||||
|
|
||||||
|
// Wait for the stream to start responding
|
||||||
|
await waitFor(() => {
|
||||||
|
expect(result.current.streamingState).toBe(StreamingState.Responding);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Try a second query while first is still responding
|
||||||
|
await act(async () => {
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-floating-promises
|
||||||
|
result.current.submitQuery('second query');
|
||||||
|
});
|
||||||
|
|
||||||
|
// Should have only called sendMessageStream once (second was rejected)
|
||||||
|
expect(mockSendMessageStream).toHaveBeenCalledTimes(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should allow continuation queries via loop detection retry', async () => {
|
||||||
|
const mockLoopDetectionService = {
|
||||||
|
disableForSession: vi.fn(),
|
||||||
|
};
|
||||||
|
const mockClient = {
|
||||||
|
...new MockedGeminiClientClass(mockConfig),
|
||||||
|
getLoopDetectionService: () => mockLoopDetectionService,
|
||||||
|
};
|
||||||
|
mockConfig.getGeminiClient = vi.fn().mockReturnValue(mockClient);
|
||||||
|
|
||||||
|
// First call triggers loop detection
|
||||||
|
mockSendMessageStream.mockReturnValueOnce(
|
||||||
|
(async function* () {
|
||||||
|
yield {
|
||||||
|
type: ServerGeminiEventType.LoopDetected,
|
||||||
|
};
|
||||||
|
})(),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Retry call succeeds
|
||||||
|
mockSendMessageStream.mockReturnValueOnce(
|
||||||
|
(async function* () {
|
||||||
|
yield {
|
||||||
|
type: ServerGeminiEventType.Content,
|
||||||
|
value: 'Retry success',
|
||||||
|
};
|
||||||
|
yield {
|
||||||
|
type: ServerGeminiEventType.Finished,
|
||||||
|
value: { reason: 'STOP' },
|
||||||
|
};
|
||||||
|
})(),
|
||||||
|
);
|
||||||
|
|
||||||
|
const { result } = renderTestHook();
|
||||||
|
|
||||||
|
await act(async () => {
|
||||||
|
await result.current.submitQuery('test query');
|
||||||
|
});
|
||||||
|
|
||||||
|
await waitFor(() => {
|
||||||
|
expect(
|
||||||
|
result.current.loopDetectionConfirmationRequest,
|
||||||
|
).not.toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
// User selects "disable" which triggers a continuation query
|
||||||
|
await act(async () => {
|
||||||
|
result.current.loopDetectionConfirmationRequest?.onComplete({
|
||||||
|
userSelection: 'disable',
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Verify disableForSession was called
|
||||||
|
expect(
|
||||||
|
mockLoopDetectionService.disableForSession,
|
||||||
|
).toHaveBeenCalledTimes(1);
|
||||||
|
|
||||||
|
// Continuation query should have gone through (2 total calls)
|
||||||
|
await waitFor(() => {
|
||||||
|
expect(mockSendMessageStream).toHaveBeenCalledTimes(2);
|
||||||
|
expect(mockSendMessageStream).toHaveBeenNthCalledWith(
|
||||||
|
2,
|
||||||
|
'test query',
|
||||||
|
expect.any(AbortSignal),
|
||||||
|
expect.any(String),
|
||||||
|
undefined,
|
||||||
|
false,
|
||||||
|
'test query',
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe('Agent Execution Events', () => {
|
describe('Agent Execution Events', () => {
|
||||||
|
|||||||
@@ -216,7 +216,15 @@ export const useGeminiStream = (
|
|||||||
const previousApprovalModeRef = useRef<ApprovalMode>(
|
const previousApprovalModeRef = useRef<ApprovalMode>(
|
||||||
config.getApprovalMode(),
|
config.getApprovalMode(),
|
||||||
);
|
);
|
||||||
const [isResponding, setIsResponding] = useState<boolean>(false);
|
const [isResponding, setIsRespondingState] = useState<boolean>(false);
|
||||||
|
const isRespondingRef = useRef<boolean>(false);
|
||||||
|
const setIsResponding = useCallback(
|
||||||
|
(value: boolean) => {
|
||||||
|
setIsRespondingState(value);
|
||||||
|
isRespondingRef.current = value;
|
||||||
|
},
|
||||||
|
[setIsRespondingState],
|
||||||
|
);
|
||||||
const [thought, thoughtRef, setThought] =
|
const [thought, thoughtRef, setThought] =
|
||||||
useStateAndRef<ThoughtSummary | null>(null);
|
useStateAndRef<ThoughtSummary | null>(null);
|
||||||
const [pendingHistoryItem, pendingHistoryItemRef, setPendingHistoryItem] =
|
const [pendingHistoryItem, pendingHistoryItemRef, setPendingHistoryItem] =
|
||||||
@@ -320,11 +328,14 @@ export const useGeminiStream = (
|
|||||||
return (executingShellTool as TrackedExecutingToolCall | undefined)?.pid;
|
return (executingShellTool as TrackedExecutingToolCall | undefined)?.pid;
|
||||||
}, [toolCalls]);
|
}, [toolCalls]);
|
||||||
|
|
||||||
const onExec = useCallback(async (done: Promise<void>) => {
|
const onExec = useCallback(
|
||||||
|
async (done: Promise<void>) => {
|
||||||
setIsResponding(true);
|
setIsResponding(true);
|
||||||
await done;
|
await done;
|
||||||
setIsResponding(false);
|
setIsResponding(false);
|
||||||
}, []);
|
},
|
||||||
|
[setIsResponding],
|
||||||
|
);
|
||||||
|
|
||||||
const {
|
const {
|
||||||
handleShellCommand,
|
handleShellCommand,
|
||||||
@@ -538,7 +549,7 @@ export const useGeminiStream = (
|
|||||||
setIsResponding(false);
|
setIsResponding(false);
|
||||||
}
|
}
|
||||||
prevActiveShellPtyIdRef.current = activeShellPtyId;
|
prevActiveShellPtyIdRef.current = activeShellPtyId;
|
||||||
}, [activeShellPtyId, addItem]);
|
}, [activeShellPtyId, addItem, setIsResponding]);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (
|
if (
|
||||||
@@ -700,6 +711,7 @@ export const useGeminiStream = (
|
|||||||
cancelAllToolCalls,
|
cancelAllToolCalls,
|
||||||
toolCalls,
|
toolCalls,
|
||||||
activeShellPtyId,
|
activeShellPtyId,
|
||||||
|
setIsResponding,
|
||||||
]);
|
]);
|
||||||
|
|
||||||
useKeypress(
|
useKeypress(
|
||||||
@@ -952,7 +964,13 @@ export const useGeminiStream = (
|
|||||||
setIsResponding(false);
|
setIsResponding(false);
|
||||||
setThought(null); // Reset thought when user cancels
|
setThought(null); // Reset thought when user cancels
|
||||||
},
|
},
|
||||||
[addItem, pendingHistoryItemRef, setPendingHistoryItem, setThought],
|
[
|
||||||
|
addItem,
|
||||||
|
pendingHistoryItemRef,
|
||||||
|
setPendingHistoryItem,
|
||||||
|
setThought,
|
||||||
|
setIsResponding,
|
||||||
|
],
|
||||||
);
|
);
|
||||||
|
|
||||||
const handleErrorEvent = useCallback(
|
const handleErrorEvent = useCallback(
|
||||||
@@ -1358,14 +1376,15 @@ export const useGeminiStream = (
|
|||||||
async ({ metadata: spanMetadata }) => {
|
async ({ metadata: spanMetadata }) => {
|
||||||
spanMetadata.input = query;
|
spanMetadata.input = query;
|
||||||
|
|
||||||
const queryId = `${Date.now()}-${Math.random()}`;
|
|
||||||
activeQueryIdRef.current = queryId;
|
|
||||||
if (
|
if (
|
||||||
(streamingState === StreamingState.Responding ||
|
(isRespondingRef.current ||
|
||||||
|
streamingState === StreamingState.Responding ||
|
||||||
streamingState === StreamingState.WaitingForConfirmation) &&
|
streamingState === StreamingState.WaitingForConfirmation) &&
|
||||||
!options?.isContinuation
|
!options?.isContinuation
|
||||||
)
|
)
|
||||||
return;
|
return;
|
||||||
|
const queryId = `${Date.now()}-${Math.random()}`;
|
||||||
|
activeQueryIdRef.current = queryId;
|
||||||
|
|
||||||
const userMessageTimestamp = Date.now();
|
const userMessageTimestamp = Date.now();
|
||||||
|
|
||||||
@@ -1452,7 +1471,7 @@ export const useGeminiStream = (
|
|||||||
loopDetectedRef.current = false;
|
loopDetectedRef.current = false;
|
||||||
// Show the confirmation dialog to choose whether to disable loop detection
|
// Show the confirmation dialog to choose whether to disable loop detection
|
||||||
setLoopDetectionConfirmationRequest({
|
setLoopDetectionConfirmationRequest({
|
||||||
onComplete: (result: {
|
onComplete: async (result: {
|
||||||
userSelection: 'disable' | 'keep';
|
userSelection: 'disable' | 'keep';
|
||||||
}) => {
|
}) => {
|
||||||
setLoopDetectionConfirmationRequest(null);
|
setLoopDetectionConfirmationRequest(null);
|
||||||
@@ -1468,8 +1487,7 @@ export const useGeminiStream = (
|
|||||||
});
|
});
|
||||||
|
|
||||||
if (lastQueryRef.current && lastPromptIdRef.current) {
|
if (lastQueryRef.current && lastPromptIdRef.current) {
|
||||||
// eslint-disable-next-line @typescript-eslint/no-floating-promises
|
await submitQuery(
|
||||||
submitQuery(
|
|
||||||
lastQueryRef.current,
|
lastQueryRef.current,
|
||||||
{ isContinuation: true },
|
{ isContinuation: true },
|
||||||
lastPromptIdRef.current,
|
lastPromptIdRef.current,
|
||||||
@@ -1537,6 +1555,7 @@ export const useGeminiStream = (
|
|||||||
maybeAddSuppressedToolErrorNote,
|
maybeAddSuppressedToolErrorNote,
|
||||||
maybeAddLowVerbosityFailureNote,
|
maybeAddLowVerbosityFailureNote,
|
||||||
settings.merged.billing?.overageStrategy,
|
settings.merged.billing?.overageStrategy,
|
||||||
|
setIsResponding,
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -1803,6 +1822,7 @@ export const useGeminiStream = (
|
|||||||
isLowErrorVerbosity,
|
isLowErrorVerbosity,
|
||||||
maybeAddSuppressedToolErrorNote,
|
maybeAddSuppressedToolErrorNote,
|
||||||
maybeAddLowVerbosityFailureNote,
|
maybeAddLowVerbosityFailureNote,
|
||||||
|
setIsResponding,
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user