refactor(core): Extract thought parsing logic into a dedicated utility (#9503)

This commit is contained in:
Abhi
2025-09-24 15:38:36 -04:00
committed by GitHub
parent ad59be0c81
commit 22740ddceb
5 changed files with 138 additions and 19 deletions

View File

@@ -27,6 +27,7 @@ import {
toFriendlyError,
} from '../utils/errors.js';
import type { GeminiChat } from './geminiChat.js';
import { parseThought, type ThoughtSummary } from '../utils/thoughtUtils.js';
// Define a structure for tools passed to the server
export interface ServerTool {
@@ -100,11 +101,6 @@ export interface ServerToolCallConfirmationDetails {
details: ToolCallConfirmationDetails;
}
export type ThoughtSummary = {
subject: string;
description: string;
};
export type ServerGeminiContentEvent = {
type: GeminiEventType.Content;
value: string;
@@ -249,19 +245,7 @@ export class Turn {
const thoughtPart = resp.candidates?.[0]?.content?.parts?.[0];
if (thoughtPart?.thought) {
// Thought always has a bold "subject" part enclosed in double asterisks
// (e.g., **Subject**). The rest of the string is considered the description.
const rawText = thoughtPart.text ?? '';
const subjectStringMatches = rawText.match(/\*\*(.*?)\*\*/s);
const subject = subjectStringMatches
? subjectStringMatches[1].trim()
: '';
const description = rawText.replace(/\*\*(.*?)\*\*/s, '').trim();
const thought: ThoughtSummary = {
subject,
description,
};
const thought = parseThought(thoughtPart.text ?? '');
yield {
type: GeminiEventType.Thought,
value: thought,

View File

@@ -55,6 +55,7 @@ export * from './utils/workspaceContext.js';
export * from './utils/ignorePatterns.js';
export * from './utils/partUtils.js';
export * from './utils/promptIdContext.js';
export * from './utils/thoughtUtils.js';
// Export services
export * from './services/fileDiscoveryService.js';

View File

@@ -6,7 +6,7 @@
import { type Config } from '../config/config.js';
import { type Status } from '../core/coreToolScheduler.js';
import { type ThoughtSummary } from '../core/turn.js';
import { type ThoughtSummary } from '../utils/thoughtUtils.js';
import { getProjectHash } from '../utils/paths.js';
import path from 'node:path';
import fs from 'node:fs';

View File

@@ -0,0 +1,80 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect } from 'vitest';
import { parseThought } from './thoughtUtils.js';
describe('parseThought', () => {
it.each([
{
name: 'a standard thought with subject and description',
rawText: '**Subject:** This is the description.',
expected: {
subject: 'Subject:',
description: 'This is the description.',
},
},
{
name: 'leading and trailing whitespace in the raw string',
rawText: ' **Subject** description with spaces ',
expected: { subject: 'Subject', description: 'description with spaces' },
},
{
name: 'whitespace surrounding the subject content',
rawText: '** Subject **',
expected: { subject: 'Subject', description: '' },
},
{
name: 'a thought with only a subject',
rawText: '**Only Subject**',
expected: { subject: 'Only Subject', description: '' },
},
{
name: 'a thought with only a description (no subject)',
rawText: 'This is just a description.',
expected: { subject: '', description: 'This is just a description.' },
},
{
name: 'an empty string input',
rawText: '',
expected: { subject: '', description: '' },
},
{
name: 'newlines within the subject and description',
rawText:
'**Multi-line\nSubject**\nHere is a description\nspread across lines.',
expected: {
subject: 'Multi-line\nSubject',
description: 'Here is a description\nspread across lines.',
},
},
{
name: 'only the first subject if multiple are present',
rawText: '**First** some text **Second**',
expected: { subject: 'First', description: 'some text **Second**' },
},
{
name: 'text before and after the subject',
rawText: 'Prefix text **Subject** Suffix text.',
expected: {
subject: 'Subject',
description: 'Prefix text Suffix text.',
},
},
{
name: 'an unclosed subject tag',
rawText: 'Text with **an unclosed subject',
expected: { subject: '', description: 'Text with **an unclosed subject' },
},
{
name: 'an empty subject tag',
rawText: 'A thought with **** in the middle.',
expected: { subject: '', description: 'A thought with in the middle.' },
},
])('should correctly parse $name', ({ rawText, expected }) => {
expect(parseThought(rawText)).toEqual(expected);
});
});

View File

@@ -0,0 +1,54 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
export type ThoughtSummary = {
subject: string;
description: string;
};
const START_DELIMITER = '**';
const END_DELIMITER = '**';
/**
* Parses a raw thought string into a structured ThoughtSummary object.
*
* Thoughts are expected to have a bold "subject" part enclosed in double
* asterisks (e.g., **Subject**). The rest of the string is considered
* the description. This function only parses the first valid subject found.
*
* @param rawText The raw text of the thought.
* @returns A ThoughtSummary object. If no valid subject is found, the entire
* string is treated as the description.
*/
export function parseThought(rawText: string): ThoughtSummary {
const startIndex = rawText.indexOf(START_DELIMITER);
if (startIndex === -1) {
// No start delimiter found, the whole text is the description.
return { subject: '', description: rawText.trim() };
}
const endIndex = rawText.indexOf(
END_DELIMITER,
startIndex + START_DELIMITER.length,
);
if (endIndex === -1) {
// Start delimiter found but no end delimiter, so it's not a valid subject.
// Treat the entire string as the description.
return { subject: '', description: rawText.trim() };
}
const subject = rawText
.substring(startIndex + START_DELIMITER.length, endIndex)
.trim();
// The description is everything before the start delimiter and after the end delimiter.
const description = (
rawText.substring(0, startIndex) +
rawText.substring(endIndex + END_DELIMITER.length)
).trim();
return { subject, description };
}