Merge branch 'main' into feat/browser-allowed-domain

2026-04-25 04:24:51 -07:00 · 2026-03-10 12:17:22 -07:00
parent 589562ab65 2dd037682c
commit ba67a8ed8c
4 changed files with 261 additions and 15 deletions
@@ -111,7 +111,7 @@
            "badge": "🔬",
            "slug": "docs/cli/notifications"
          },
-          { "label": "Plan mode", "badge": "🔬", "slug": "docs/cli/plan-mode" },
+          { "label": "Plan mode", "slug": "docs/cli/plan-mode" },
          {
            "label": "Subagents",
            "badge": "🔬",
@@ -0,0 +1,116 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, expect } from 'vitest';
+import {
+  TRACKER_CREATE_TASK_TOOL_NAME,
+  TRACKER_UPDATE_TASK_TOOL_NAME,
+} from '@google/gemini-cli-core';
+import { evalTest, assertModelHasOutput } from './test-helper.js';
+import fs from 'node:fs';
+import path from 'node:path';
+
+const FILES = {
+  'package.json': JSON.stringify({
+    name: 'test-project',
+    version: '1.0.0',
+    scripts: { test: 'echo "All tests passed!"' },
+  }),
+  'src/login.js':
+    'function login(username, password) {\n  if (!username) throw new Error("Missing username");\n  // BUG: missing password check\n  return true;\n}',
+} as const;
+
+describe('tracker_mode', () => {
+  evalTest('USUALLY_PASSES', {
+    name: 'should manage tasks in the tracker when explicitly requested during a bug fix',
+    params: {
+      settings: { experimental: { taskTracker: true } },
+    },
+    files: FILES,
+    prompt:
+      'We have a bug in src/login.js: the password check is missing. First, create a task in the tracker to fix it. Then fix the bug, and mark the task as closed.',
+    assert: async (rig, result) => {
+      const wasCreateCalled = await rig.waitForToolCall(
+        TRACKER_CREATE_TASK_TOOL_NAME,
+      );
+      expect(
+        wasCreateCalled,
+        'Expected tracker_create_task tool to be called',
+      ).toBe(true);
+
+      const toolLogs = rig.readToolLogs();
+      const createCall = toolLogs.find(
+        (log) => log.toolRequest.name === TRACKER_CREATE_TASK_TOOL_NAME,
+      );
+      expect(createCall).toBeDefined();
+      const args = JSON.parse(createCall!.toolRequest.args);
+      expect(
+        (args.title?.toLowerCase() ?? '') +
+          (args.description?.toLowerCase() ?? ''),
+      ).toContain('login');
+
+      const wasUpdateCalled = await rig.waitForToolCall(
+        TRACKER_UPDATE_TASK_TOOL_NAME,
+      );
+      expect(
+        wasUpdateCalled,
+        'Expected tracker_update_task tool to be called',
+      ).toBe(true);
+
+      const updateCall = toolLogs.find(
+        (log) => log.toolRequest.name === TRACKER_UPDATE_TASK_TOOL_NAME,
+      );
+      expect(updateCall).toBeDefined();
+      const updateArgs = JSON.parse(updateCall!.toolRequest.args);
+      expect(updateArgs.status).toBe('closed');
+
+      const loginContent = fs.readFileSync(
+        path.join(rig.testDir!, 'src/login.js'),
+        'utf-8',
+      );
+      expect(loginContent).not.toContain('// BUG: missing password check');
+
+      assertModelHasOutput(result);
+    },
+  });
+
+  evalTest('USUALLY_PASSES', {
+    name: 'should implicitly create tasks when asked to build a feature plan',
+    params: {
+      settings: { experimental: { taskTracker: true } },
+    },
+    files: FILES,
+    prompt:
+      'I need to build a complex new feature for user authentication in our project. Create a detailed implementation plan and organize the work into bite-sized chunks. Do not actually implement the code yet, just plan it.',
+    assert: async (rig, result) => {
+      // The model should proactively use tracker_create_task to organize the work
+      const wasToolCalled = await rig.waitForToolCall(
+        TRACKER_CREATE_TASK_TOOL_NAME,
+      );
+      expect(
+        wasToolCalled,
+        'Expected tracker_create_task to be called implicitly to organize plan',
+      ).toBe(true);
+
+      const toolLogs = rig.readToolLogs();
+      const createCalls = toolLogs.filter(
+        (log) => log.toolRequest.name === TRACKER_CREATE_TASK_TOOL_NAME,
+      );
+
+      // We expect it to create at least one task for authentication, likely more.
+      expect(createCalls.length).toBeGreaterThan(0);
+
+      // Verify it didn't write any code since we asked it to just plan
+      const loginContent = fs.readFileSync(
+        path.join(rig.testDir!, 'src/login.js'),
+        'utf-8',
+      );
+      expect(loginContent).toContain('// BUG: missing password check');
+
+      assertModelHasOutput(result);
+    },
+  });
+});
@@ -3510,6 +3510,116 @@ describe('useGeminiStream', () => {
        expect(result.current.loopDetectionConfirmationRequest).not.toBeNull();
      });
    });
+
+    describe('Race Condition Prevention', () => {
+      it('should reject concurrent submitQuery when already responding', async () => {
+        // Stream that stays open (simulates "still responding")
+        mockSendMessageStream.mockReturnValue(
+          (async function* () {
+            yield {
+              type: ServerGeminiEventType.Content,
+              value: 'First response',
+            };
+            // Keep the stream open
+            await new Promise(() => {});
+          })(),
+        );
+
+        const { result } = renderTestHook();
+
+        // Start first query without awaiting (fire-and-forget, like existing tests)
+        await act(async () => {
+          // eslint-disable-next-line @typescript-eslint/no-floating-promises
+          result.current.submitQuery('first query');
+        });
+
+        // Wait for the stream to start responding
+        await waitFor(() => {
+          expect(result.current.streamingState).toBe(StreamingState.Responding);
+        });
+
+        // Try a second query while first is still responding
+        await act(async () => {
+          // eslint-disable-next-line @typescript-eslint/no-floating-promises
+          result.current.submitQuery('second query');
+        });
+
+        // Should have only called sendMessageStream once (second was rejected)
+        expect(mockSendMessageStream).toHaveBeenCalledTimes(1);
+      });
+
+      it('should allow continuation queries via loop detection retry', async () => {
+        const mockLoopDetectionService = {
+          disableForSession: vi.fn(),
+        };
+        const mockClient = {
+          ...new MockedGeminiClientClass(mockConfig),
+          getLoopDetectionService: () => mockLoopDetectionService,
+        };
+        mockConfig.getGeminiClient = vi.fn().mockReturnValue(mockClient);
+
+        // First call triggers loop detection
+        mockSendMessageStream.mockReturnValueOnce(
+          (async function* () {
+            yield {
+              type: ServerGeminiEventType.LoopDetected,
+            };
+          })(),
+        );
+
+        // Retry call succeeds
+        mockSendMessageStream.mockReturnValueOnce(
+          (async function* () {
+            yield {
+              type: ServerGeminiEventType.Content,
+              value: 'Retry success',
+            };
+            yield {
+              type: ServerGeminiEventType.Finished,
+              value: { reason: 'STOP' },
+            };
+          })(),
+        );
+
+        const { result } = renderTestHook();
+
+        await act(async () => {
+          await result.current.submitQuery('test query');
+        });
+
+        await waitFor(() => {
+          expect(
+            result.current.loopDetectionConfirmationRequest,
+          ).not.toBeNull();
+        });
+
+        // User selects "disable" which triggers a continuation query
+        await act(async () => {
+          result.current.loopDetectionConfirmationRequest?.onComplete({
+            userSelection: 'disable',
+          });
+        });
+
+        // Verify disableForSession was called
+        expect(
+          mockLoopDetectionService.disableForSession,
+        ).toHaveBeenCalledTimes(1);
+
+        // Continuation query should have gone through (2 total calls)
+        await waitFor(() => {
+          expect(mockSendMessageStream).toHaveBeenCalledTimes(2);
+          expect(mockSendMessageStream).toHaveBeenNthCalledWith(
+            2,
+            'test query',
+            expect.any(AbortSignal),
+            expect.any(String),
+            undefined,
+            false,
+            'test query',
+          );
+        });
+      });
+    });
  });

  describe('Agent Execution Events', () => {
@@ -216,7 +216,15 @@ export const useGeminiStream = (
  const previousApprovalModeRef = useRef<ApprovalMode>(
    config.getApprovalMode(),
  );
-  const [isResponding, setIsResponding] = useState<boolean>(false);
+  const [isResponding, setIsRespondingState] = useState<boolean>(false);
+  const isRespondingRef = useRef<boolean>(false);
+  const setIsResponding = useCallback(
+    (value: boolean) => {
+      setIsRespondingState(value);
+      isRespondingRef.current = value;
+    },
+    [setIsRespondingState],
+  );
  const [thought, thoughtRef, setThought] =
    useStateAndRef<ThoughtSummary | null>(null);
  const [pendingHistoryItem, pendingHistoryItemRef, setPendingHistoryItem] =
@@ -320,11 +328,14 @@ export const useGeminiStream = (
    return (executingShellTool as TrackedExecutingToolCall | undefined)?.pid;
  }, [toolCalls]);

-  const onExec = useCallback(async (done: Promise<void>) => {
-    setIsResponding(true);
-    await done;
-    setIsResponding(false);
-  }, []);
+  const onExec = useCallback(
+    async (done: Promise<void>) => {
+      setIsResponding(true);
+      await done;
+      setIsResponding(false);
+    },
+    [setIsResponding],
+  );

  const {
    handleShellCommand,
@@ -538,7 +549,7 @@ export const useGeminiStream = (
      setIsResponding(false);
    }
    prevActiveShellPtyIdRef.current = activeShellPtyId;
-  }, [activeShellPtyId, addItem]);
+  }, [activeShellPtyId, addItem, setIsResponding]);

  useEffect(() => {
    if (
@@ -700,6 +711,7 @@ export const useGeminiStream = (
    cancelAllToolCalls,
    toolCalls,
    activeShellPtyId,
+    setIsResponding,
  ]);

  useKeypress(
@@ -952,7 +964,13 @@ export const useGeminiStream = (
      setIsResponding(false);
      setThought(null); // Reset thought when user cancels
    },
-    [addItem, pendingHistoryItemRef, setPendingHistoryItem, setThought],
+    [
+      addItem,
+      pendingHistoryItemRef,
+      setPendingHistoryItem,
+      setThought,
+      setIsResponding,
+    ],
  );

  const handleErrorEvent = useCallback(
@@ -1358,14 +1376,15 @@ export const useGeminiStream = (
        async ({ metadata: spanMetadata }) => {
          spanMetadata.input = query;

-          const queryId = `${Date.now()}-${Math.random()}`;
-          activeQueryIdRef.current = queryId;
          if (
-            (streamingState === StreamingState.Responding ||
+            (isRespondingRef.current ||
+              streamingState === StreamingState.Responding ||
              streamingState === StreamingState.WaitingForConfirmation) &&
            !options?.isContinuation
          )
            return;
+          const queryId = `${Date.now()}-${Math.random()}`;
+          activeQueryIdRef.current = queryId;

          const userMessageTimestamp = Date.now();

@@ -1452,7 +1471,7 @@ export const useGeminiStream = (
                loopDetectedRef.current = false;
                // Show the confirmation dialog to choose whether to disable loop detection
                setLoopDetectionConfirmationRequest({
-                  onComplete: (result: {
+                  onComplete: async (result: {
                    userSelection: 'disable' | 'keep';
                  }) => {
                    setLoopDetectionConfirmationRequest(null);
@@ -1468,8 +1487,7 @@ export const useGeminiStream = (
                      });

                      if (lastQueryRef.current && lastPromptIdRef.current) {
-                        // eslint-disable-next-line @typescript-eslint/no-floating-promises
-                        submitQuery(
+                        await submitQuery(
                          lastQueryRef.current,
                          { isContinuation: true },
                          lastPromptIdRef.current,
@@ -1537,6 +1555,7 @@ export const useGeminiStream = (
      maybeAddSuppressedToolErrorNote,
      maybeAddLowVerbosityFailureNote,
      settings.merged.billing?.overageStrategy,
+      setIsResponding,
    ],
  );

@@ -1803,6 +1822,7 @@ export const useGeminiStream = (
      isLowErrorVerbosity,
      maybeAddSuppressedToolErrorNote,
      maybeAddLowVerbosityFailureNote,
+      setIsResponding,
    ],
  );