refactor(core): Extract thought parsing logic into a dedicated utility (#9503)

2026-04-29 22:44:45 -07:00 · 2025-09-24 15:38:36 -04:00
parent ad59be0c81
commit 22740ddceb
5 changed files with 138 additions and 19 deletions
@@ -27,6 +27,7 @@ import {
  toFriendlyError,
 } from '../utils/errors.js';
 import type { GeminiChat } from './geminiChat.js';
+import { parseThought, type ThoughtSummary } from '../utils/thoughtUtils.js';

 // Define a structure for tools passed to the server
 export interface ServerTool {
@@ -100,11 +101,6 @@ export interface ServerToolCallConfirmationDetails {
  details: ToolCallConfirmationDetails;
 }

-export type ThoughtSummary = {
-  subject: string;
-  description: string;
-};
-
 export type ServerGeminiContentEvent = {
  type: GeminiEventType.Content;
  value: string;
@@ -249,19 +245,7 @@ export class Turn {

        const thoughtPart = resp.candidates?.[0]?.content?.parts?.[0];
        if (thoughtPart?.thought) {
-          // Thought always has a bold "subject" part enclosed in double asterisks
-          // (e.g., **Subject**). The rest of the string is considered the description.
-          const rawText = thoughtPart.text ?? '';
-          const subjectStringMatches = rawText.match(/\*\*(.*?)\*\*/s);
-          const subject = subjectStringMatches
-            ? subjectStringMatches[1].trim()
-            : '';
-          const description = rawText.replace(/\*\*(.*?)\*\*/s, '').trim();
-          const thought: ThoughtSummary = {
-            subject,
-            description,
-          };
-
+          const thought = parseThought(thoughtPart.text ?? '');
          yield {
            type: GeminiEventType.Thought,
            value: thought,
@@ -55,6 +55,7 @@ export * from './utils/workspaceContext.js';
 export * from './utils/ignorePatterns.js';
 export * from './utils/partUtils.js';
 export * from './utils/promptIdContext.js';
+export * from './utils/thoughtUtils.js';

 // Export services
 export * from './services/fileDiscoveryService.js';
@@ -6,7 +6,7 @@

 import { type Config } from '../config/config.js';
 import { type Status } from '../core/coreToolScheduler.js';
-import { type ThoughtSummary } from '../core/turn.js';
+import { type ThoughtSummary } from '../utils/thoughtUtils.js';
 import { getProjectHash } from '../utils/paths.js';
 import path from 'node:path';
 import fs from 'node:fs';
@@ -0,0 +1,80 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect } from 'vitest';
+import { parseThought } from './thoughtUtils.js';
+
+describe('parseThought', () => {
+  it.each([
+    {
+      name: 'a standard thought with subject and description',
+      rawText: '**Subject:** This is the description.',
+      expected: {
+        subject: 'Subject:',
+        description: 'This is the description.',
+      },
+    },
+    {
+      name: 'leading and trailing whitespace in the raw string',
+      rawText: '  **Subject** description with spaces   ',
+      expected: { subject: 'Subject', description: 'description with spaces' },
+    },
+    {
+      name: 'whitespace surrounding the subject content',
+      rawText: '** Subject  **',
+      expected: { subject: 'Subject', description: '' },
+    },
+    {
+      name: 'a thought with only a subject',
+      rawText: '**Only Subject**',
+      expected: { subject: 'Only Subject', description: '' },
+    },
+    {
+      name: 'a thought with only a description (no subject)',
+      rawText: 'This is just a description.',
+      expected: { subject: '', description: 'This is just a description.' },
+    },
+    {
+      name: 'an empty string input',
+      rawText: '',
+      expected: { subject: '', description: '' },
+    },
+    {
+      name: 'newlines within the subject and description',
+      rawText:
+        '**Multi-line\nSubject**\nHere is a description\nspread across lines.',
+      expected: {
+        subject: 'Multi-line\nSubject',
+        description: 'Here is a description\nspread across lines.',
+      },
+    },
+    {
+      name: 'only the first subject if multiple are present',
+      rawText: '**First** some text **Second**',
+      expected: { subject: 'First', description: 'some text **Second**' },
+    },
+    {
+      name: 'text before and after the subject',
+      rawText: 'Prefix text **Subject** Suffix text.',
+      expected: {
+        subject: 'Subject',
+        description: 'Prefix text  Suffix text.',
+      },
+    },
+    {
+      name: 'an unclosed subject tag',
+      rawText: 'Text with **an unclosed subject',
+      expected: { subject: '', description: 'Text with **an unclosed subject' },
+    },
+    {
+      name: 'an empty subject tag',
+      rawText: 'A thought with **** in the middle.',
+      expected: { subject: '', description: 'A thought with  in the middle.' },
+    },
+  ])('should correctly parse $name', ({ rawText, expected }) => {
+    expect(parseThought(rawText)).toEqual(expected);
+  });
+});
@@ -0,0 +1,54 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+export type ThoughtSummary = {
+  subject: string;
+  description: string;
+};
+
+const START_DELIMITER = '**';
+const END_DELIMITER = '**';
+
+/**
+ * Parses a raw thought string into a structured ThoughtSummary object.
+ *
+ * Thoughts are expected to have a bold "subject" part enclosed in double
+ * asterisks (e.g., **Subject**). The rest of the string is considered
+ * the description. This function only parses the first valid subject found.
+ *
+ * @param rawText The raw text of the thought.
+ * @returns A ThoughtSummary object. If no valid subject is found, the entire
+ * string is treated as the description.
+ */
+export function parseThought(rawText: string): ThoughtSummary {
+  const startIndex = rawText.indexOf(START_DELIMITER);
+  if (startIndex === -1) {
+    // No start delimiter found, the whole text is the description.
+    return { subject: '', description: rawText.trim() };
+  }
+
+  const endIndex = rawText.indexOf(
+    END_DELIMITER,
+    startIndex + START_DELIMITER.length,
+  );
+  if (endIndex === -1) {
+    // Start delimiter found but no end delimiter, so it's not a valid subject.
+    // Treat the entire string as the description.
+    return { subject: '', description: rawText.trim() };
+  }
+
+  const subject = rawText
+    .substring(startIndex + START_DELIMITER.length, endIndex)
+    .trim();
+
+  // The description is everything before the start delimiter and after the end delimiter.
+  const description = (
+    rawText.substring(0, startIndex) +
+    rawText.substring(endIndex + END_DELIMITER.length)
+  ).trim();
+
+  return { subject, description };
+}