fix: push all messages via Chat API for threading, add retry and model override

- All bot responses now pushed via Chat API instead of synchronous webhook response — Add-ons createMessageAction ignores thread info in Spaces, causing messages to appear as top-level instead of in-thread - Webhook returns bare {} for empty responses to prevent Google Chat from retrying (wrapped empty createMessageAction caused retry storm) - Add retryStream() with exponential backoff (3 retries, 5s/10s/20s) for transient A2A server 500/503 errors - Add GEMINI_MODEL env var override in config (was hardcoded to gemini-3-pro-preview which hit capacity limits) - Extract pushAndReturn() helper for fire-and-forget Chat API sends
2026-05-18 15:52:53 -07:00 · 2026-02-19 14:00:26 -05:00
parent fc9623248d
commit a9eb2ee5e3
3 changed files with 124 additions and 40 deletions
@@ -15,6 +15,7 @@ import type { SessionInfo } from './session-store.js';
 import { SessionStore } from './session-store.js';
 import {
  A2ABridgeClient,
+  type A2AStreamEventData,
  extractIdsFromResponse,
  extractAllParts,
  extractTextFromParts,
@@ -95,10 +96,30 @@ export class ChatBridgeHandler {
    }
  }

+  /**
+   * Pushes a text message via Chat API (properly threaded) and returns
+   * an empty webhook response. Add-ons createMessageAction ignores
+   * thread info and always creates a top-level message in Spaces,
+   * so ALL user-visible messages must go through the Chat API.
+   */
+  private pushAndReturn(
+    spaceName: string,
+    threadName: string,
+    text: string,
+  ): ChatResponse {
+    this.chatApiClient
+      .sendMessage(spaceName, threadName, { text })
+      .catch((err) => {
+        const msg = err instanceof Error ? err.message : 'Unknown error';
+        logger.warn(`[ChatBridge] Failed to push message: ${msg}`);
+      });
+    return {};
+  }
+
  /**
   * Handles a MESSAGE event: user sent a text message in Chat.
-   * Returns an immediate "Processing..." response and processes
-   * the A2A request asynchronously, pushing results via Chat API.
+   * All responses are pushed via Chat API for proper threading in Spaces.
+   * The webhook always returns an empty response.
   */
  private async handleMessage(event: ChatEvent): Promise<ChatResponse> {
    const message = event.message;
@@ -117,7 +138,7 @@ export class ChatBridgeHandler {
    const threadName = message.thread.name;
    const spaceName = event.space.name;

-    // Handle slash commands synchronously (fast, no A2A call)
+    // Handle slash commands — push response via Chat API for threading
    const trimmed = text.trim().toLowerCase();
    if (
      trimmed === '/reset' ||
@@ -127,7 +148,11 @@ export class ChatBridgeHandler {
    ) {
      this.sessionStore.remove(threadName);
      logger.info(`[ChatBridge] Session cleared for thread ${threadName}`);
-      return { text: 'Session cleared. Send a new message to start fresh.' };
+      return this.pushAndReturn(
+        spaceName,
+        threadName,
+        'Session cleared. Send a new message to start fresh.',
+      );
    }

    const session = this.sessionStore.getOrCreate(threadName, spaceName);
@@ -135,33 +160,39 @@ export class ChatBridgeHandler {
    if (trimmed === '/yolo') {
      session.yoloMode = true;
      logger.info(`[ChatBridge] YOLO mode enabled for thread ${threadName}`);
-      return {
-        text: 'YOLO mode enabled. All tool calls will be auto-approved.',
-      };
+      return this.pushAndReturn(
+        spaceName,
+        threadName,
+        'YOLO mode enabled. All tool calls will be auto-approved.',
+      );
    }

    if (trimmed === '/safe') {
      session.yoloMode = false;
      logger.info(`[ChatBridge] YOLO mode disabled for thread ${threadName}`);
-      return { text: 'Safe mode enabled. Tool calls will require approval.' };
+      return this.pushAndReturn(
+        spaceName,
+        threadName,
+        'Safe mode enabled. Tool calls will require approval.',
+      );
    }

    logger.info(
      `[ChatBridge] MESSAGE from ${event.user.displayName}: "${text.substring(0, 100)}"`,
    );

-    // Handle text-based tool approval responses synchronously
-    // (sendToolConfirmation is fast — no need for async)
+    // Handle text-based tool approval responses
    if (session.pendingToolApproval && this.isToolApprovalText(trimmed)) {
      return this.handleToolApprovalText(event, session, trimmed);
    }

    // Guard against overlapping async requests
    if (session.asyncProcessing) {
-      return {
-        text: 'Still processing your previous request. Please wait...',
-        thread: { name: threadName },
-      };
+      return this.pushAndReturn(
+        spaceName,
+        threadName,
+        'Still processing your previous request. Please wait...',
+      );
    }

    // Fire-and-forget async processing
@@ -170,14 +201,12 @@ export class ChatBridgeHandler {
      logger.error(`[ChatBridge] Async processing failed: ${msg}`, err);
    });

-    // Return immediate acknowledgment
-    return {
-      text: '_Processing your request..._',
-      thread: {
-        threadKey: message.thread.threadKey || threadName,
-        name: threadName,
-      },
-    };
+    // Push "Processing..." via Chat API for proper threading
+    return this.pushAndReturn(
+      spaceName,
+      threadName,
+      '_Processing your request..._',
+    );
  }

  /**
@@ -238,13 +267,7 @@ export class ChatBridgeHandler {
    const ackText = isReject
      ? '_Tool rejected._'
      : '_Tool approved, processing..._';
-    return {
-      text: ackText,
-      thread: {
-        threadKey: message.thread.threadKey || threadName,
-        name: threadName,
-      },
-    };
+    return this.pushAndReturn(event.space.name, threadName, ackText);
  }

  /**
@@ -368,10 +391,16 @@ export class ChatBridgeHandler {
    session.asyncProcessing = true;

    try {
-      const stream = this.a2aClient.sendMessageStream(text, {
-        contextId: session.contextId,
-        taskId: session.taskId,
-      });
+      // Retry streaming if the A2A server returns 500 (no available instance).
+      // With concurrency=1, this happens when another request is in-flight.
+      const stream = await this.retryStream(
+        () =>
+          this.a2aClient.sendMessageStream(text, {
+            contextId: session.contextId,
+            taskId: session.taskId,
+          }),
+        session,
+      );

      let lastText = '';
      let lastTaskId: string | undefined;
@@ -517,6 +546,54 @@ export class ChatBridgeHandler {
    }
  }

+  /**
+   * Retries creating a stream when the A2A server returns 500.
+   * Cloud Run returns 500 "no available instance" when concurrency is
+   * exhausted. We retry with exponential backoff up to 3 times.
+   */
+  private async retryStream(
+    createStream: () => AsyncGenerator<A2AStreamEventData, void, undefined>,
+    session: SessionInfo,
+  ): Promise<AsyncGenerator<A2AStreamEventData, void, undefined>> {
+    const MAX_RETRIES = 3;
+    const BASE_DELAY_MS = 5000;
+
+    for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
+      if (session.cancelled) return createStream(); // will be caught by caller
+      try {
+        const stream = createStream();
+        // Try to get the first value to verify the stream connects
+        const iter = stream[Symbol.asyncIterator]();
+        const first = await iter.next();
+
+        // Re-wrap into an async generator that yields the first value
+        // then delegates to the rest of the iterator
+        async function* replayStream(): AsyncGenerator<
+          A2AStreamEventData,
+          void,
+          undefined
+        > {
+          if (!first.done) {
+            yield first.value;
+            yield* { [Symbol.asyncIterator]: () => iter };
+          }
+        }
+        return replayStream();
+      } catch (error) {
+        const msg = error instanceof Error ? error.message : '';
+        const isRetryable = msg.includes('500') || msg.includes('503');
+        if (!isRetryable || attempt === MAX_RETRIES) throw error;
+        const delay = BASE_DELAY_MS * Math.pow(2, attempt);
+        logger.warn(
+          `[ChatBridge] A2A server unavailable, retrying in ${delay}ms (attempt ${attempt + 1}/${MAX_RETRIES})`,
+        );
+        await new Promise((r) => setTimeout(r, delay));
+      }
+    }
+    // Should not reach here, but just in case
+    return createStream();
+  }
+
  /**
   * Auto-approves tool calls in YOLO mode.
   * Sends all pending approvals in a single batch message to avoid hanging
@@ -325,12 +325,19 @@ export function createChatBridgeRoutes(config: ChatBridgeConfig): Router {
        }

        if (isAddOnsFormat) {
-          // Wrap in Workspace Add-ons response format
-          const addOnsResponse = wrapAddOnsResponse(response);
-          logger.info(
-            `[ChatBridge] Add-ons response: ${JSON.stringify(addOnsResponse).substring(0, 200)}`,
-          );
-          res.json(addOnsResponse);
+          // If the handler returned an empty response (messages pushed via
+          // Chat API), return a bare {} so Add-ons doesn't try to create
+          // an empty message — which causes Google Chat to retry the webhook.
+          if (!response.text && !response.cardsV2 && !response.actionResponse) {
+            logger.info(`[ChatBridge] Add-ons response: {} (empty ack)`);
+            res.json({});
+          } else {
+            const addOnsResponse = wrapAddOnsResponse(response);
+            logger.info(
+              `[ChatBridge] Add-ons response: ${JSON.stringify(addOnsResponse).substring(0, 200)}`,
+            );
+            res.json(addOnsResponse);
+          }
        } else {
          res.json(response);
        }
@@ -59,7 +59,7 @@ export async function loadConfig(

  const configParams: ConfigParameters = {
    sessionId: taskId,
-    model: PREVIEW_GEMINI_MODEL,
+    model: process.env['GEMINI_MODEL'] || PREVIEW_GEMINI_MODEL,
    embeddingModel: DEFAULT_GEMINI_EMBEDDING_MODEL,
    sandbox: undefined, // Sandbox might not be relevant for a server-side agent
    targetDir: workspaceDir, // Or a specific directory the agent operates on