Merge branch 'main' into feat/browser-allowed-domain

This commit is contained in:
cynthialong0-0
2026-03-10 12:17:22 -07:00
committed by GitHub
4 changed files with 261 additions and 15 deletions

View File

@@ -111,7 +111,7 @@
"badge": "🔬",
"slug": "docs/cli/notifications"
},
{ "label": "Plan mode", "badge": "🔬", "slug": "docs/cli/plan-mode" },
{ "label": "Plan mode", "slug": "docs/cli/plan-mode" },
{
"label": "Subagents",
"badge": "🔬",

116
evals/tracker.eval.ts Normal file
View File

@@ -0,0 +1,116 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, expect } from 'vitest';
import {
TRACKER_CREATE_TASK_TOOL_NAME,
TRACKER_UPDATE_TASK_TOOL_NAME,
} from '@google/gemini-cli-core';
import { evalTest, assertModelHasOutput } from './test-helper.js';
import fs from 'node:fs';
import path from 'node:path';
const FILES = {
'package.json': JSON.stringify({
name: 'test-project',
version: '1.0.0',
scripts: { test: 'echo "All tests passed!"' },
}),
'src/login.js':
'function login(username, password) {\n if (!username) throw new Error("Missing username");\n // BUG: missing password check\n return true;\n}',
} as const;
describe('tracker_mode', () => {
evalTest('USUALLY_PASSES', {
name: 'should manage tasks in the tracker when explicitly requested during a bug fix',
params: {
settings: { experimental: { taskTracker: true } },
},
files: FILES,
prompt:
'We have a bug in src/login.js: the password check is missing. First, create a task in the tracker to fix it. Then fix the bug, and mark the task as closed.',
assert: async (rig, result) => {
const wasCreateCalled = await rig.waitForToolCall(
TRACKER_CREATE_TASK_TOOL_NAME,
);
expect(
wasCreateCalled,
'Expected tracker_create_task tool to be called',
).toBe(true);
const toolLogs = rig.readToolLogs();
const createCall = toolLogs.find(
(log) => log.toolRequest.name === TRACKER_CREATE_TASK_TOOL_NAME,
);
expect(createCall).toBeDefined();
const args = JSON.parse(createCall!.toolRequest.args);
expect(
(args.title?.toLowerCase() ?? '') +
(args.description?.toLowerCase() ?? ''),
).toContain('login');
const wasUpdateCalled = await rig.waitForToolCall(
TRACKER_UPDATE_TASK_TOOL_NAME,
);
expect(
wasUpdateCalled,
'Expected tracker_update_task tool to be called',
).toBe(true);
const updateCall = toolLogs.find(
(log) => log.toolRequest.name === TRACKER_UPDATE_TASK_TOOL_NAME,
);
expect(updateCall).toBeDefined();
const updateArgs = JSON.parse(updateCall!.toolRequest.args);
expect(updateArgs.status).toBe('closed');
const loginContent = fs.readFileSync(
path.join(rig.testDir!, 'src/login.js'),
'utf-8',
);
expect(loginContent).not.toContain('// BUG: missing password check');
assertModelHasOutput(result);
},
});
evalTest('USUALLY_PASSES', {
name: 'should implicitly create tasks when asked to build a feature plan',
params: {
settings: { experimental: { taskTracker: true } },
},
files: FILES,
prompt:
'I need to build a complex new feature for user authentication in our project. Create a detailed implementation plan and organize the work into bite-sized chunks. Do not actually implement the code yet, just plan it.',
assert: async (rig, result) => {
// The model should proactively use tracker_create_task to organize the work
const wasToolCalled = await rig.waitForToolCall(
TRACKER_CREATE_TASK_TOOL_NAME,
);
expect(
wasToolCalled,
'Expected tracker_create_task to be called implicitly to organize plan',
).toBe(true);
const toolLogs = rig.readToolLogs();
const createCalls = toolLogs.filter(
(log) => log.toolRequest.name === TRACKER_CREATE_TASK_TOOL_NAME,
);
// We expect it to create at least one task for authentication, likely more.
expect(createCalls.length).toBeGreaterThan(0);
// Verify it didn't write any code since we asked it to just plan
const loginContent = fs.readFileSync(
path.join(rig.testDir!, 'src/login.js'),
'utf-8',
);
expect(loginContent).toContain('// BUG: missing password check');
assertModelHasOutput(result);
},
});
});

View File

@@ -3510,6 +3510,116 @@ describe('useGeminiStream', () => {
expect(result.current.loopDetectionConfirmationRequest).not.toBeNull();
});
});
describe('Race Condition Prevention', () => {
it('should reject concurrent submitQuery when already responding', async () => {
// Stream that stays open (simulates "still responding")
mockSendMessageStream.mockReturnValue(
(async function* () {
yield {
type: ServerGeminiEventType.Content,
value: 'First response',
};
// Keep the stream open
await new Promise(() => {});
})(),
);
const { result } = renderTestHook();
// Start first query without awaiting (fire-and-forget, like existing tests)
await act(async () => {
// eslint-disable-next-line @typescript-eslint/no-floating-promises
result.current.submitQuery('first query');
});
// Wait for the stream to start responding
await waitFor(() => {
expect(result.current.streamingState).toBe(StreamingState.Responding);
});
// Try a second query while first is still responding
await act(async () => {
// eslint-disable-next-line @typescript-eslint/no-floating-promises
result.current.submitQuery('second query');
});
// Should have only called sendMessageStream once (second was rejected)
expect(mockSendMessageStream).toHaveBeenCalledTimes(1);
});
it('should allow continuation queries via loop detection retry', async () => {
const mockLoopDetectionService = {
disableForSession: vi.fn(),
};
const mockClient = {
...new MockedGeminiClientClass(mockConfig),
getLoopDetectionService: () => mockLoopDetectionService,
};
mockConfig.getGeminiClient = vi.fn().mockReturnValue(mockClient);
// First call triggers loop detection
mockSendMessageStream.mockReturnValueOnce(
(async function* () {
yield {
type: ServerGeminiEventType.LoopDetected,
};
})(),
);
// Retry call succeeds
mockSendMessageStream.mockReturnValueOnce(
(async function* () {
yield {
type: ServerGeminiEventType.Content,
value: 'Retry success',
};
yield {
type: ServerGeminiEventType.Finished,
value: { reason: 'STOP' },
};
})(),
);
const { result } = renderTestHook();
await act(async () => {
await result.current.submitQuery('test query');
});
await waitFor(() => {
expect(
result.current.loopDetectionConfirmationRequest,
).not.toBeNull();
});
// User selects "disable" which triggers a continuation query
await act(async () => {
result.current.loopDetectionConfirmationRequest?.onComplete({
userSelection: 'disable',
});
});
// Verify disableForSession was called
expect(
mockLoopDetectionService.disableForSession,
).toHaveBeenCalledTimes(1);
// Continuation query should have gone through (2 total calls)
await waitFor(() => {
expect(mockSendMessageStream).toHaveBeenCalledTimes(2);
expect(mockSendMessageStream).toHaveBeenNthCalledWith(
2,
'test query',
expect.any(AbortSignal),
expect.any(String),
undefined,
false,
'test query',
);
});
});
});
});
describe('Agent Execution Events', () => {

View File

@@ -216,7 +216,15 @@ export const useGeminiStream = (
const previousApprovalModeRef = useRef<ApprovalMode>(
config.getApprovalMode(),
);
const [isResponding, setIsResponding] = useState<boolean>(false);
const [isResponding, setIsRespondingState] = useState<boolean>(false);
const isRespondingRef = useRef<boolean>(false);
const setIsResponding = useCallback(
(value: boolean) => {
setIsRespondingState(value);
isRespondingRef.current = value;
},
[setIsRespondingState],
);
const [thought, thoughtRef, setThought] =
useStateAndRef<ThoughtSummary | null>(null);
const [pendingHistoryItem, pendingHistoryItemRef, setPendingHistoryItem] =
@@ -320,11 +328,14 @@ export const useGeminiStream = (
return (executingShellTool as TrackedExecutingToolCall | undefined)?.pid;
}, [toolCalls]);
const onExec = useCallback(async (done: Promise<void>) => {
setIsResponding(true);
await done;
setIsResponding(false);
}, []);
const onExec = useCallback(
async (done: Promise<void>) => {
setIsResponding(true);
await done;
setIsResponding(false);
},
[setIsResponding],
);
const {
handleShellCommand,
@@ -538,7 +549,7 @@ export const useGeminiStream = (
setIsResponding(false);
}
prevActiveShellPtyIdRef.current = activeShellPtyId;
}, [activeShellPtyId, addItem]);
}, [activeShellPtyId, addItem, setIsResponding]);
useEffect(() => {
if (
@@ -700,6 +711,7 @@ export const useGeminiStream = (
cancelAllToolCalls,
toolCalls,
activeShellPtyId,
setIsResponding,
]);
useKeypress(
@@ -952,7 +964,13 @@ export const useGeminiStream = (
setIsResponding(false);
setThought(null); // Reset thought when user cancels
},
[addItem, pendingHistoryItemRef, setPendingHistoryItem, setThought],
[
addItem,
pendingHistoryItemRef,
setPendingHistoryItem,
setThought,
setIsResponding,
],
);
const handleErrorEvent = useCallback(
@@ -1358,14 +1376,15 @@ export const useGeminiStream = (
async ({ metadata: spanMetadata }) => {
spanMetadata.input = query;
const queryId = `${Date.now()}-${Math.random()}`;
activeQueryIdRef.current = queryId;
if (
(streamingState === StreamingState.Responding ||
(isRespondingRef.current ||
streamingState === StreamingState.Responding ||
streamingState === StreamingState.WaitingForConfirmation) &&
!options?.isContinuation
)
return;
const queryId = `${Date.now()}-${Math.random()}`;
activeQueryIdRef.current = queryId;
const userMessageTimestamp = Date.now();
@@ -1452,7 +1471,7 @@ export const useGeminiStream = (
loopDetectedRef.current = false;
// Show the confirmation dialog to choose whether to disable loop detection
setLoopDetectionConfirmationRequest({
onComplete: (result: {
onComplete: async (result: {
userSelection: 'disable' | 'keep';
}) => {
setLoopDetectionConfirmationRequest(null);
@@ -1468,8 +1487,7 @@ export const useGeminiStream = (
});
if (lastQueryRef.current && lastPromptIdRef.current) {
// eslint-disable-next-line @typescript-eslint/no-floating-promises
submitQuery(
await submitQuery(
lastQueryRef.current,
{ isContinuation: true },
lastPromptIdRef.current,
@@ -1537,6 +1555,7 @@ export const useGeminiStream = (
maybeAddSuppressedToolErrorNote,
maybeAddLowVerbosityFailureNote,
settings.merged.billing?.overageStrategy,
setIsResponding,
],
);
@@ -1803,6 +1822,7 @@ export const useGeminiStream = (
isLowErrorVerbosity,
maybeAddSuppressedToolErrorNote,
maybeAddLowVerbosityFailureNote,
setIsResponding,
],
);