++

2026-04-27 21:44:25 -07:00 · 2026-03-11 18:58:50 +00:00
parent 23575ff83d
commit 3291d3a58b
15 changed files with 542 additions and 117 deletions
@@ -4,10 +4,11 @@
 * SPDX-License-Identifier: Apache-2.0
 */

-import { describe, it, expect, afterEach, vi } from 'vitest';
+import { describe, it, expect, vi, afterEach } from 'vitest';
 import { AppRig } from '../test-utils/AppRig.js';
 import path from 'node:path';
 import { fileURLToPath } from 'node:url';
+import { FakeContentGenerator } from '@google/gemini-cli-core';
 import { PolicyDecision } from '@google/gemini-cli-core';

 const __dirname = path.dirname(fileURLToPath(import.meta.url));
@@ -27,8 +28,9 @@ describe('Auto-distillation Integration', () => {
      __dirname,
      '../test-utils/fixtures/auto-distillation.responses',
    );
+    const contentGenerator = await FakeContentGenerator.fromFile(fakeResponsesPath);
    rig = new AppRig({
-      fakeResponsesPath,
+      contentGenerator,
    });

    await rig.initialize();
@@ -0,0 +1,64 @@
+import { describe, it, expect } from 'vitest';
+import { AppRig } from '../test-utils/AppRig.js';
+import {
+  FakeContentGenerator,
+  FallbackContentGenerator,
+  userText,
+  mockGenerateContentStreamText,
+  extractUserPrompts,
+  extractFakeResponses,
+  type ScriptItem
+} from '@google/gemini-cli-core';
+
+describe('Hybrid Handoff (Mock User to Synthetic Live Model)', () => {
+  it('successfully transitions from mock responses to live responses', async () => {
+    // 1. Define the conversational script for the priming phase
+    const primingScript: ScriptItem[] = [
+      userText('Start priming'),
+      mockGenerateContentStreamText('Hello! I am a fake response.'),
+      userText('Continue priming'),
+      mockGenerateContentStreamText('Pump primed successfully. Ready for handoff.'),
+    ];
+
+    // 2. Setup the primary fake generator that runs through the priming script
+    const fakeGenerator = new FakeContentGenerator(extractFakeResponses(primingScript));
+
+    // 3. Setup a "live" fallback generator (it's synthetic so we don't need API keys)
+    const mockLiveFallback = new FakeContentGenerator([
+      mockGenerateContentStreamText('The answer is 4.'),
+    ]);
+    
+    // We need countTokens so AppRig doesn't hang checking size during truncation
+    mockLiveFallback.countTokens = async () => ({ totalTokens: 10 });
+    
+    // 4. Compose them using FallbackContentGenerator
+    const composedGenerator = new FallbackContentGenerator(fakeGenerator, mockLiveFallback);
+
+    // 5. Mount the AppRig natively supporting custom content generators
+    const rig = new AppRig({
+      contentGenerator: composedGenerator,
+      configOverrides: {
+        fakeResponses: [], // ensure it avoids disk IO attempts internally
+      }
+    });
+    await rig.initialize();
+    
+    rig.render();
+    await rig.waitForIdle();
+
+    // 6. Drive the Mock User sequence using the extracted prompts from the script
+    await rig.driveMockUser(extractUserPrompts(primingScript), 10000);
+
+    // 7. Send the final prompt that should exhaust the primary generator and trigger the fallback
+    await rig.sendMessage('What is 2 + 2?');
+    
+    // 8. Wait for the fallback response to render
+    await rig.waitForOutput('The answer is 4.', 10000);
+
+    const output = rig.getStaticOutput();
+    expect(output).toContain('The answer is 4.');
+
+    // Wait for everything to settle so React act() warnings don't fire during unmount
+    await rig.drainBreakpointsUntilIdle(undefined, 10000);
+  });
+});
@@ -8,6 +8,7 @@ import { describe, it, afterEach } from 'vitest';
 import { AppRig } from '../test-utils/AppRig.js';
 import path from 'node:path';
 import { fileURLToPath } from 'node:url';
+import { FakeContentGenerator } from '@google/gemini-cli-core';
 import { PolicyDecision } from '@google/gemini-cli-core';

 const __dirname = path.dirname(fileURLToPath(import.meta.url));
@@ -24,8 +25,9 @@ describe('Model Steering Integration', () => {
      __dirname,
      '../test-utils/fixtures/steering.responses',
    );
+    const contentGenerator = await FakeContentGenerator.fromFile(fakeResponsesPath);
    rig = new AppRig({
-      fakeResponsesPath,
+      contentGenerator,
      configOverrides: { modelSteering: true },
    });
    await rig.initialize();
@@ -9,6 +9,7 @@ import { act } from 'react';
 import { AppRig } from './AppRig.js';
 import path from 'node:path';
 import { fileURLToPath } from 'node:url';
+import { FakeContentGenerator } from '@google/gemini-cli-core';
 import { debugLogger } from '@google/gemini-cli-core';

 const __dirname = path.dirname(fileURLToPath(import.meta.url));
@@ -26,8 +27,9 @@ describe('AppRig', () => {
      'fixtures',
      'steering.responses',
    );
+    const contentGenerator = await FakeContentGenerator.fromFile(fakeResponsesPath);
    rig = new AppRig({
-      fakeResponsesPath,
+      contentGenerator,
      configOverrides: { modelSteering: true },
    });
    await rig.initialize();
@@ -67,7 +69,8 @@ describe('AppRig', () => {
      'fixtures',
      'simple.responses',
    );
-    rig = new AppRig({ fakeResponsesPath });
+    const contentGenerator = await FakeContentGenerator.fromFile(fakeResponsesPath);
+    rig = new AppRig({ contentGenerator });
    await rig.initialize();
    await act(async () => {
      rig!.render();
@@ -31,6 +31,7 @@ import {
  debugLogger,
  CoreToolCallStatus,
  ConsecaSafetyChecker,
+  type ContentGenerator,
 } from '@google/gemini-cli-core';
 import {
  type MockShellCommand,
@@ -54,32 +55,38 @@ import type { Content, GenerateContentParameters } from '@google/genai';
 const sessionStateMap = new Map<string, StreamingState>();
 const activeRigs = new Map<string, AppRig>();

-// Mock StreamingContext to report state changes back to the observer
-vi.mock('../ui/contexts/StreamingContext.js', async (importOriginal) => {
+// Mock useGeminiStream to report state changes back to the observer
+vi.mock('../ui/hooks/useGeminiStream.js', async (importOriginal) => {
  const original =
-    await importOriginal<typeof import('../ui/contexts/StreamingContext.js')>();
-  const { useConfig } = await import('../ui/contexts/ConfigContext.js');
+    await importOriginal<typeof import('../ui/hooks/useGeminiStream.js')>();
  const React = await import('react');

  return {
    ...original,
-    useStreamingContext: () => {
-      const state = original.useStreamingContext();
-      const config = useConfig();
-      const sessionId = config.getSessionId();
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    useGeminiStream: (...args: any[]) => {
+      // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion, @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-assignment
+      const result = (original.useGeminiStream as any)(...args);
+      const config = args[3]; // config is the 4th argument
+      const sessionId = config?.getSessionId?.();

      React.useEffect(() => {
-        sessionStateMap.set(sessionId, state);
-        // If we see activity, we are no longer "awaiting" the start of a response
-        if (state !== StreamingState.Idle) {
-          const rig = activeRigs.get(sessionId);
-          if (rig) {
-            rig.awaitingResponse = false;
+        if (sessionId) {
+          debugLogger.log(
+            `[AppRig React Hook] State updating to: ${result.streamingState}`,
+          );
+          sessionStateMap.set(sessionId, result.streamingState);
+          // If we see activity, we are no longer "awaiting" the start of a response
+          if (result.streamingState !== StreamingState.Idle) {
+            const rig = activeRigs.get(sessionId);
+            if (rig) {
+              rig.awaitingResponse = false;
+            }
          }
        }
-      }, [sessionId, state]);
+      }, [sessionId, result.streamingState]);

-      return state;
+      return result;
    },
  };
 });
@@ -137,10 +144,10 @@ vi.mock('../ui/components/GeminiRespondingSpinner.js', async () => {
 });

 export interface AppRigOptions {
-  fakeResponsesPath?: string;
  terminalWidth?: number;
  terminalHeight?: number;
  configOverrides?: Partial<ConfigParameters>;
+  contentGenerator?: ContentGenerator;
 }

 export interface PendingConfirmation {
@@ -160,11 +167,13 @@ export class AppRig {
  private pendingConfirmations = new Map<string, PendingConfirmation>();
  private breakpointTools = new Set<string | undefined>();
  private lastAwaitedConfirmation: PendingConfirmation | undefined;
+  private lastIsBusyLog = 0;

  /**
   * True if a message was just sent but React hasn't yet reported a non-idle state.
   */
  awaitingResponse = false;
+  activeStreamCount = 0;

  constructor(private options: AppRigOptions = {}) {
    const uniqueId = randomUUID();
@@ -194,7 +203,7 @@ export class AppRig {
      cwd: this.testDir,
      debugMode: false,
      model: 'test-model',
-      fakeResponses: this.options.fakeResponsesPath,
+      contentGenerator: this.options.contentGenerator,
      interactive: true,
      approvalMode,
      policyEngineConfig,
@@ -205,8 +214,38 @@ export class AppRig {
    };
    this.config = makeFakeConfig(configParams);

-    if (this.options.fakeResponsesPath) {
-      this.stubRefreshAuth();
+    // Track active streams directly from the client to prevent false idleness during synchronous mock yields
+    const client = this.config.getGeminiClient();
+    const originalStream = client.sendMessageStream.bind(client);
+    // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment, @typescript-eslint/no-unsafe-type-assertion
+    client.sendMessageStream = async function* (this: AppRig, ...args: any[]): AsyncGenerator<any, any, any> {
+      this.awaitingResponse = false;
+      this.activeStreamCount++;
+      try {
+        // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion, @typescript-eslint/no-explicit-any
+        yield* (originalStream as any)(...args);
+      } finally {
+        this.activeStreamCount = Math.max(0, this.activeStreamCount - 1);
+      }
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    }.bind(this) as any;
+
+    if (this.config.fakeResponses || this.options.contentGenerator) {
+      if (!this.options.contentGenerator && !this.config.fakeResponses) {
+        this.stubRefreshAuth();
+      }
+      if (!process.env['GEMINI_API_KEY']) {
+        vi.stubEnv('GEMINI_API_KEY', 'test-api-key');
+      }
+      MockShellExecutionService.setPassthrough(false);
+    } else {
+      if (!process.env['GEMINI_API_KEY']) {
+        throw new Error(
+          'GEMINI_API_KEY must be set in the environment for live model tests.',
+        );
+      }
+      // For live tests, we allow falling through to the real shell service if no mock matches
+      MockShellExecutionService.setPassthrough(true);
    }

    this.setupMessageBusListeners();
@@ -222,18 +261,6 @@ export class AppRig {
  private setupEnvironment() {
    // Stub environment variables to avoid interference from developer's machine
    vi.stubEnv('GEMINI_CLI_HOME', this.testDir);
-    if (this.options.fakeResponsesPath) {
-      vi.stubEnv('GEMINI_API_KEY', 'test-api-key');
-      MockShellExecutionService.setPassthrough(false);
-    } else {
-      if (!process.env['GEMINI_API_KEY']) {
-        throw new Error(
-          'GEMINI_API_KEY must be set in the environment for live model tests.',
-        );
-      }
-      // For live tests, we allow falling through to the real shell service if no mock matches
-      MockShellExecutionService.setPassthrough(true);
-    }
    vi.stubEnv('GEMINI_DEFAULT_AUTH_TYPE', AuthType.USE_GEMINI);
  }

@@ -348,18 +375,28 @@ export class AppRig {
   * Returns true if the agent is currently busy (responding or executing tools).
   */
  isBusy(): boolean {
-    if (this.awaitingResponse) {
+    const reactState = sessionStateMap.get(this.sessionId);
+    
+    if (reactState && reactState !== StreamingState.Idle) {
+       this.awaitingResponse = false;
+    }
+
+    if (!this.lastIsBusyLog || Date.now() - this.lastIsBusyLog > 1000) {
+      debugLogger.log(`[AppRig] isBusy check: awaitingResponse=${this.awaitingResponse}, activeStreams=${this.activeStreamCount}, reactState=${reactState}`);
+      this.lastIsBusyLog = Date.now();
+    }
+
+    if (this.awaitingResponse || this.activeStreamCount > 0) {
      return true;
    }

-    const reactState = sessionStateMap.get(this.sessionId);
    // If we have a React-based state, use it as the definitive signal.
    // 'responding' and 'waiting-for-confirmation' both count as busy for the overall task.
    if (reactState !== undefined) {
      return reactState !== StreamingState.Idle;
    }

-    // Fallback to tool tracking if React hasn't reported yet
+    // Fallback to tool tracking
    const isAnyToolActive = this.toolCalls.some((tc) => {
      if (
        tc.status === CoreToolCallStatus.Executing ||
@@ -535,6 +572,7 @@ export class AppRig {
    | { type: 'confirmation'; confirmation: PendingConfirmation }
    | { type: 'idle' }
  > {
+    debugLogger.log(`[AppRig] waitForNextEvent started`);
    let confirmation: PendingConfirmation | undefined;
    let isIdle = false;

@@ -554,6 +592,7 @@ export class AppRig {
      },
    );

+    debugLogger.log(`[AppRig] waitForNextEvent finished: confirmation=${!!confirmation}, isIdle=${isIdle}`);
    if (confirmation) {
      this.lastAwaitedConfirmation = confirmation;
      return { type: 'confirmation', confirmation };
@@ -630,8 +669,11 @@ export class AppRig {
    onConfirmation?: (confirmation: PendingConfirmation) => void | boolean,
    timeout = 60000,
  ) {
+    debugLogger.log(`[AppRig] drainBreakpointsUntilIdle started`);
    while (true) {
+      debugLogger.log(`[AppRig] drainBreakpointsUntilIdle: waiting for next event`);
      const event = await this.waitForNextEvent(timeout);
+      debugLogger.log(`[AppRig] drainBreakpointsUntilIdle: got event type ${event.type}`);
      if (event.type === 'idle') {
        break;
      }
@@ -640,9 +682,30 @@ export class AppRig {
      const handled = onConfirmation?.(confirmation);

      if (!handled) {
+        debugLogger.log(`[AppRig] drainBreakpointsUntilIdle: resolving tool ${confirmation.toolName}`);
        await this.resolveTool(confirmation);
      }
    }
+    debugLogger.log(`[AppRig] drainBreakpointsUntilIdle finished`);
+  }
+
+  /**
+   * Acts as an automated user ('Mock User') to prime the system with a specific
+   * history state before handing off control to a live trial or eval.
+   *
+   * @param prompts An array of user messages to send sequentially.
+   * @param timeout Optional timeout per interaction.
+   */
+  async driveMockUser(prompts: string[], timeout = 60000) {
+    debugLogger.log(`[AppRig] driveMockUser started with ${prompts.length} prompts`);
+    for (let i = 0; i < prompts.length; i++) {
+      const prompt = prompts[i];
+      debugLogger.log(`[AppRig] driveMockUser: sending prompt ${i + 1}: ${prompt}`);
+      await this.sendMessage(prompt);
+      debugLogger.log(`[AppRig] driveMockUser: draining breakpoints after prompt ${i + 1}`);
+      await this.drainBreakpointsUntilIdle(undefined, timeout);
+    }
+    debugLogger.log(`[AppRig] driveMockUser finished`);
  }

  getConfig(): Config {
@@ -776,9 +776,47 @@ export const renderWithProviders = (
            <VimModeProvider>
              <ShellFocusContext.Provider value={shellFocus}>
                <SessionStatsProvider>
-                  <StreamingContext.Provider
-                    value={finalUiState.streamingState}
-                  >
+                  {providedUiState?.streamingState !== undefined ? (
+                    <StreamingContext.Provider
+                      value={finalUiState.streamingState}
+                    >
+                      <UIActionsContext.Provider value={finalUIActions}>
+                        <OverflowProvider>
+                          <ToolActionsProvider
+                            config={finalConfig}
+                            toolCalls={allToolCalls}
+                          >
+                            <AskUserActionsProvider
+                              request={null}
+                              onSubmit={vi.fn()}
+                              onCancel={vi.fn()}
+                            >
+                              <KeypressProvider>
+                                <MouseProvider
+                                  mouseEventsEnabled={mouseEventsEnabled}
+                                >
+                                  <TerminalProvider>
+                                    <ScrollProvider>
+                                      <ContextCapture>
+                                        <Box
+                                          width={terminalWidth}
+                                          flexShrink={0}
+                                          flexGrow={0}
+                                          flexDirection="column"
+                                        >
+                                          {component}
+                                        </Box>
+                                      </ContextCapture>
+                                    </ScrollProvider>
+                                  </TerminalProvider>
+                                </MouseProvider>
+                              </KeypressProvider>
+                            </AskUserActionsProvider>
+                          </ToolActionsProvider>
+                        </OverflowProvider>
+                      </UIActionsContext.Provider>
+                    </StreamingContext.Provider>
+                  ) : (
                    <UIActionsContext.Provider value={finalUIActions}>
                      <OverflowProvider>
                        <ToolActionsProvider
@@ -814,7 +852,7 @@ export const renderWithProviders = (
                        </ToolActionsProvider>
                      </OverflowProvider>
                    </UIActionsContext.Provider>
-                  </StreamingContext.Provider>
+                  )}
                </SessionStatsProvider>
              </ShellFocusContext.Provider>
            </VimModeProvider>