From 5b07a97630f5c8df3a680972cc2225bab355e231 Mon Sep 17 00:00:00 2001 From: Sandy Tao Date: Fri, 29 May 2026 11:19:06 -0700 Subject: [PATCH] Add Claude Vertex content generator --- packages/cli/src/utils/sessionUtils.test.ts | 66 + packages/cli/src/utils/sessionUtils.ts | 69 +- .../context/chatCompressionService.test.ts | 11 + .../src/context/chatCompressionService.ts | 5 + packages/core/src/core/contentGenerator.ts | 22 +- .../vertexAnthropicContentGenerator.test.ts | 916 +++++++++ .../core/vertexAnthropicContentGenerator.ts | 1775 +++++++++++++++++ packages/core/src/utils/partUtils.test.ts | 7 + packages/core/src/utils/partUtils.ts | 9 +- packages/core/src/utils/sessionUtils.test.ts | 147 ++ packages/core/src/utils/sessionUtils.ts | 87 +- 11 files changed, 3095 insertions(+), 19 deletions(-) create mode 100644 packages/core/src/core/vertexAnthropicContentGenerator.test.ts create mode 100644 packages/core/src/core/vertexAnthropicContentGenerator.ts diff --git a/packages/cli/src/utils/sessionUtils.test.ts b/packages/cli/src/utils/sessionUtils.test.ts index 5677da5727..b9d3aef513 100644 --- a/packages/cli/src/utils/sessionUtils.test.ts +++ b/packages/cli/src/utils/sessionUtils.test.ts @@ -1043,6 +1043,72 @@ describe('convertSessionToHistoryFormats', () => { }); }); + it('should not render inline thought parts as message text', () => { + const messages: MessageRecord[] = [ + { + id: '1', + timestamp: new Date().toISOString(), + type: 'gemini', + content: [ + { text: '**Planning** I should inspect the files.', thought: true }, + { text: 'I found the issue.' }, + ], + thoughts: [ + { + subject: 'Planning', + description: 'I should inspect the files.', + timestamp: new Date().toISOString(), + }, + ], + }, + ]; + + const result = convertSessionToHistoryFormats(messages); + + expect(result.uiHistory).toHaveLength(2); + expect(result.uiHistory[0]).toEqual({ + type: 'thinking', + thought: { + subject: 'Planning', + description: 'I should inspect the files.', + }, + }); + expect(result.uiHistory[1]).toEqual({ + type: 'gemini', + text: 'I found the issue.', + }); + expect(JSON.stringify(result.uiHistory)).not.toContain('[Thought: true]'); + }); + + it('should convert inline thought parts to thinking items without metadata', () => { + const messages: MessageRecord[] = [ + { + id: '1', + timestamp: new Date().toISOString(), + type: 'gemini', + content: [ + { text: '**Planning** I should inspect the files.', thought: true }, + { text: 'I found the issue.' }, + ], + }, + ]; + + const result = convertSessionToHistoryFormats(messages); + + expect(result.uiHistory).toHaveLength(2); + expect(result.uiHistory[0]).toEqual({ + type: 'thinking', + thought: { + subject: 'Planning', + description: 'I should inspect the files.', + }, + }); + expect(result.uiHistory[1]).toEqual({ + type: 'gemini', + text: 'I found the issue.', + }); + }); + it('should filter out from UI history', () => { const messages: MessageRecord[] = [ { diff --git a/packages/cli/src/utils/sessionUtils.ts b/packages/cli/src/utils/sessionUtils.ts index 2830451aa0..92ec58627d 100644 --- a/packages/cli/src/utils/sessionUtils.ts +++ b/packages/cli/src/utils/sessionUtils.ts @@ -7,13 +7,16 @@ import { checkExhaustive, partListUnionToString, + parseThought, SESSION_FILE_PREFIX, CoreToolCallStatus, type Storage, type ConversationRecord, type MessageRecord, + type ThoughtSummary, loadConversationRecord, } from '@google/gemini-cli-core'; +import { type Part, type PartListUnion } from '@google/genai'; import * as fs from 'node:fs/promises'; import path from 'node:path'; import { stripUnsafeCharacters } from '../ui/utils/textUtils.js'; @@ -139,6 +142,58 @@ export interface SessionSelectionResult { displayInfo: string; } +/** + * Checks if a session has at least one user or assistant (gemini) message. + * Sessions with only system messages (info, error, warning) are considered empty. + * @param messages - The array of message records to check + * @returns true if the session has meaningful content + */ +export const hasUserOrAssistantMessage = (messages: MessageRecord[]): boolean => + messages.some((msg) => msg.type === 'user' || msg.type === 'gemini'); + +function ensurePartArray(content: PartListUnion): Part[] { + if (Array.isArray(content)) { + return content.map((part) => + typeof part === 'string' ? { text: part } : part, + ); + } + if (typeof content === 'string') { + return [{ text: content }]; + } + return [content]; +} + +function inlineThoughtText(part: Part): string | undefined { + const thoughtValue = (part as { thought?: unknown }).thought; + if (!thoughtValue) { + return undefined; + } + if (typeof part.text === 'string' && part.text.trim()) { + return part.text; + } + if (typeof thoughtValue === 'string' && thoughtValue.trim()) { + return thoughtValue; + } + return undefined; +} + +function inlineThoughtSummaries(content: PartListUnion): ThoughtSummary[] { + return ensurePartArray(content) + .map(inlineThoughtText) + .filter((text): text is string => text !== undefined) + .map(parseThought); +} + +function visibleContentString(content: PartListUnion): string { + const visibleParts = ensurePartArray(content).filter( + (part) => !(part as { thought?: unknown }).thought, + ); + if (visibleParts.length === 0) { + return ''; + } + return partListUnionToString(visibleParts); +} + /** * Cleans and sanitizes message content for display by: * - Converting newlines to spaces @@ -579,9 +634,13 @@ export function convertSessionToHistoryFormats( const uiHistory: HistoryItemWithoutId[] = []; for (const msg of messages) { - // Add thoughts if present - if (msg.type === 'gemini' && msg.thoughts && msg.thoughts.length > 0) { - for (const thought of msg.thoughts) { + if (msg.type === 'gemini') { + const thoughts = + msg.thoughts && msg.thoughts.length > 0 + ? msg.thoughts + : inlineThoughtSummaries(msg.content); + + for (const thought of thoughts) { uiHistory.push({ type: 'thinking', thought: { @@ -594,9 +653,9 @@ export function convertSessionToHistoryFormats( // Add the message only if it has content const displayContentString = msg.displayContent - ? partListUnionToString(msg.displayContent) + ? visibleContentString(msg.displayContent) : undefined; - const contentString = partListUnionToString(msg.content); + const contentString = visibleContentString(msg.content); const uiText = displayContentString || contentString; // Skip internal context messages in the UI history diff --git a/packages/core/src/context/chatCompressionService.test.ts b/packages/core/src/context/chatCompressionService.test.ts index ea21bb0225..21d4319639 100644 --- a/packages/core/src/context/chatCompressionService.test.ts +++ b/packages/core/src/context/chatCompressionService.test.ts @@ -116,6 +116,17 @@ describe('modelStringToModelConfigAlias', () => { ); }); + it('should use Claude Vertex models directly', () => { + expect(modelStringToModelConfigAlias('claude-opus-4-8')).toBe( + 'claude-opus-4-8', + ); + expect( + modelStringToModelConfigAlias( + 'publishers/anthropic/models/claude-opus-4-8', + ), + ).toBe('publishers/anthropic/models/claude-opus-4-8'); + }); + it('should handle valid names', () => { expect(modelStringToModelConfigAlias('gemini-3-pro-preview')).toBe( 'chat-compression-3-pro', diff --git a/packages/core/src/context/chatCompressionService.ts b/packages/core/src/context/chatCompressionService.ts index 7eaa4df52a..6dfdf82662 100644 --- a/packages/core/src/context/chatCompressionService.ts +++ b/packages/core/src/context/chatCompressionService.ts @@ -33,6 +33,7 @@ import { PREVIEW_GEMINI_FLASH_LITE_MODEL, } from '../config/models.js'; import { PreCompressTrigger } from '../hooks/types.js'; +import { isClaudeVertexModel } from '../core/vertexAnthropicContentGenerator.js'; /** * Default threshold for compression token count as a fraction of the model's @@ -100,6 +101,10 @@ export function findCompressSplitPoint( } export function modelStringToModelConfigAlias(model: string): string { + if (isClaudeVertexModel(model)) { + return model; + } + switch (model) { case PREVIEW_GEMINI_MODEL: case PREVIEW_GEMINI_3_1_MODEL: diff --git a/packages/core/src/core/contentGenerator.ts b/packages/core/src/core/contentGenerator.ts index c893860d4c..e24c923c3a 100644 --- a/packages/core/src/core/contentGenerator.ts +++ b/packages/core/src/core/contentGenerator.ts @@ -32,6 +32,10 @@ import { getVersion, resolveModel } from '../../index.js'; import type { LlmRole } from '../telemetry/llmRole.js'; import { ModelMappingContentGenerator } from './modelMappingContentGenerator.js'; import { CCPA_AI_MODEL_MAPPINGS } from '../config/models.js'; +import { + VertexAiContentGeneratorRouter, + VertexAnthropicContentGenerator, +} from './vertexAnthropicContentGenerator.js'; /** * Interface abstracting the core functionalities for generating content and counting tokens. @@ -380,7 +384,23 @@ export async function createContentGenerator( }, }), }); - return new LoggingContentGenerator(googleGenAI.models, gcConfig); + const contentGenerator = + config.authType === AuthType.USE_VERTEX_AI + ? new VertexAiContentGeneratorRouter( + googleGenAI.models, + new VertexAnthropicContentGenerator({ + projectId: + process.env['GOOGLE_CLOUD_PROJECT'] || + process.env['GOOGLE_CLOUD_PROJECT_ID'] || + undefined, + location: process.env['GOOGLE_CLOUD_LOCATION'] || undefined, + baseUrl, + headers, + proxy: proxyUrl, + }), + ) + : googleGenAI.models; + return new LoggingContentGenerator(contentGenerator, gcConfig); } throw new Error( `Error creating contentGenerator: Unsupported authType: ${config.authType}`, diff --git a/packages/core/src/core/vertexAnthropicContentGenerator.test.ts b/packages/core/src/core/vertexAnthropicContentGenerator.test.ts new file mode 100644 index 0000000000..0c94e529b6 --- /dev/null +++ b/packages/core/src/core/vertexAnthropicContentGenerator.test.ts @@ -0,0 +1,916 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { + FunctionCallingConfigMode, + GenerateContentResponse, + ThinkingLevel, +} from '@google/genai'; +import { describe, expect, it, vi } from 'vitest'; +import { LlmRole } from '../telemetry/llmRole.js'; +import type { ContentGenerator } from './contentGenerator.js'; +import { + isClaudeVertexModel, + VertexAiContentGeneratorRouter, + VertexAnthropicContentGenerator, +} from './vertexAnthropicContentGenerator.js'; + +const mockAuth = { + getClient: vi.fn(async () => ({ + getRequestHeaders: vi.fn(async () => ({ + Authorization: 'Bearer test-token', + })), + })), +}; + +function sseResponse(chunks: unknown[]): Response { + const body = chunks + .map((chunk) => `data: ${JSON.stringify(chunk)}\n\n`) + .join(''); + return new Response( + new ReadableStream({ + start(controller) { + controller.enqueue(new TextEncoder().encode(body)); + controller.close(); + }, + }), + ); +} + +describe('isClaudeVertexModel', () => { + it('detects Claude Vertex model IDs', () => { + expect(isClaudeVertexModel('claude-opus-4-8')).toBe(true); + expect( + isClaudeVertexModel('publishers/anthropic/models/claude-sonnet-4-6'), + ).toBe(true); + expect(isClaudeVertexModel('gemini-2.5-pro')).toBe(false); + }); +}); + +describe('VertexAiContentGeneratorRouter', () => { + it('routes Claude models to the Claude generator and other models to Gemini', async () => { + const geminiResponse = new GenerateContentResponse(); + const claudeResponse = new GenerateContentResponse(); + const geminiGenerator = { + generateContent: vi.fn(async () => geminiResponse), + } as unknown as ContentGenerator; + const claudeGenerator = { + generateContent: vi.fn(async () => claudeResponse), + } as unknown as ContentGenerator; + + const router = new VertexAiContentGeneratorRouter( + geminiGenerator, + claudeGenerator, + ); + + await expect( + router.generateContent( + { model: 'claude-opus-4-8', contents: 'hello' }, + 'prompt-id', + LlmRole.MAIN, + ), + ).resolves.toBe(claudeResponse); + await expect( + router.generateContent( + { model: 'gemini-2.5-pro', contents: 'hello' }, + 'prompt-id', + LlmRole.MAIN, + ), + ).resolves.toBe(geminiResponse); + + expect(claudeGenerator.generateContent).toHaveBeenCalledOnce(); + expect(geminiGenerator.generateContent).toHaveBeenCalledOnce(); + }); +}); + +describe('VertexAnthropicContentGenerator', () => { + it('converts Gemini requests and Anthropic SSE chunks', async () => { + const fetchFn = vi.fn(async (_input: string | URL, _init?: RequestInit) => + sseResponse([ + { + type: 'message_start', + message: { + id: 'msg_1', + model: 'claude-opus-4-8', + usage: { input_tokens: 7 }, + }, + }, + { + type: 'content_block_delta', + index: 0, + delta: { type: 'text_delta', text: 'hello' }, + }, + { + type: 'content_block_start', + index: 1, + content_block: { + type: 'tool_use', + id: 'toolu_1', + name: 'read_file', + input: {}, + }, + }, + { + type: 'content_block_delta', + index: 1, + delta: { + type: 'input_json_delta', + partial_json: '{"path":"a.txt"}', + }, + }, + { type: 'content_block_stop', index: 1 }, + { + type: 'message_delta', + delta: { stop_reason: 'tool_use' }, + usage: { output_tokens: 3 }, + }, + { type: 'message_stop' }, + ]), + ); + const generator = new VertexAnthropicContentGenerator({ + projectId: 'my-project', + location: 'global', + auth: mockAuth, + fetchFn, + }); + + const stream = await generator.generateContentStream( + { + model: 'claude-opus-4-8', + contents: [ + { role: 'user', parts: [{ text: 'hi' }] }, + { + role: 'model', + parts: [ + { + functionCall: { + id: 'toolu_prev', + name: 'read_file', + args: { path: 'old.txt' }, + }, + }, + ], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + id: 'toolu_prev', + name: 'read_file', + response: { output: 'old contents' }, + }, + }, + ], + }, + ], + config: { + systemInstruction: 'system prompt', + maxOutputTokens: 123, + topP: 0.95, + topK: 40, + tools: [ + { + functionDeclarations: [ + { + name: 'read_file', + description: 'Read a file', + parametersJsonSchema: { + type: 'object', + properties: { path: { type: 'string' } }, + required: ['path'], + }, + }, + ], + }, + ], + toolConfig: { + functionCallingConfig: { + mode: FunctionCallingConfigMode.ANY, + allowedFunctionNames: ['read_file'], + }, + }, + }, + }, + 'prompt-id', + LlmRole.MAIN, + ); + + const chunks: GenerateContentResponse[] = []; + for await (const chunk of stream) { + chunks.push(chunk); + } + + expect(fetchFn).toHaveBeenCalledWith( + 'https://aiplatform.googleapis.com/v1/projects/my-project/locations/global/publishers/anthropic/models/claude-opus-4-8:streamRawPredict', + expect.objectContaining({ + method: 'POST', + headers: expect.objectContaining({ + Authorization: 'Bearer test-token', + 'Content-Type': 'application/json', + }), + }), + ); + + const body = JSON.parse( + (fetchFn.mock.calls[0]?.[1] as RequestInit).body as string, + ) as Record; + expect(body).toMatchObject({ + anthropic_version: 'vertex-2023-10-16', + system: 'system prompt', + max_tokens: 123, + stream: true, + tool_choice: { type: 'tool', name: 'read_file' }, + }); + expect(body).not.toHaveProperty('model'); + expect(body).not.toHaveProperty('top_p'); + expect(body).not.toHaveProperty('top_k'); + expect(body['tools']).toEqual([ + { + name: 'read_file', + description: 'Read a file', + input_schema: { + type: 'object', + properties: { path: { type: 'string' } }, + required: ['path'], + }, + }, + ]); + expect(body['messages']).toEqual([ + { role: 'user', content: [{ type: 'text', text: 'hi' }] }, + { + role: 'assistant', + content: [ + { + type: 'tool_use', + id: 'toolu_prev', + name: 'read_file', + input: { path: 'old.txt' }, + }, + ], + }, + { + role: 'user', + content: [ + { + type: 'tool_result', + tool_use_id: 'toolu_prev', + content: 'old contents', + }, + ], + }, + ]); + + expect(chunks[0].candidates?.[0]?.content?.parts?.[0]?.text).toBe('hello'); + expect(chunks[1].functionCalls).toEqual([ + { id: 'toolu_1', name: 'read_file', args: { path: 'a.txt' } }, + ]); + expect(chunks[2].candidates?.[0]?.finishReason).toBe('STOP'); + expect(chunks[2].usageMetadata).toMatchObject({ + promptTokenCount: 7, + candidatesTokenCount: 3, + totalTokenCount: 10, + }); + }); + + it('uses adaptive thinking for Claude Opus 4.8 and omits unsupported sampling parameters', async () => { + const fetchFn = vi.fn( + async (_input: string | URL, _init?: RequestInit) => + new Response( + JSON.stringify({ + id: 'msg_1', + model: 'claude-opus-4-8', + content: [{ type: 'text', text: 'hello' }], + stop_reason: 'end_turn', + }), + ), + ); + const generator = new VertexAnthropicContentGenerator({ + projectId: 'my-project', + location: 'global', + auth: mockAuth, + fetchFn, + }); + + await generator.generateContent( + { + model: 'claude-opus-4-8', + contents: 'hi', + config: { + temperature: 1, + thinkingConfig: { + includeThoughts: true, + thinkingBudget: 8192, + }, + }, + }, + 'prompt-id', + LlmRole.MAIN, + ); + + const body = JSON.parse( + (fetchFn.mock.calls[0]?.[1] as RequestInit).body as string, + ) as Record; + expect(body['thinking']).toEqual({ + type: 'adaptive', + display: 'summarized', + }); + expect(body['max_tokens']).toBe(128_000); + expect(body['thinking']).not.toHaveProperty('budget_tokens'); + expect(body).not.toHaveProperty('temperature'); + }); + + it('uses max output defaults only for Claude Opus 4 models', async () => { + const fetchFn = vi.fn( + async (_input: string | URL, _init?: RequestInit) => + new Response( + JSON.stringify({ + id: 'msg_1', + model: 'claude-opus-4-8', + content: [{ type: 'text', text: 'hello' }], + stop_reason: 'end_turn', + }), + ), + ); + const generator = new VertexAnthropicContentGenerator({ + projectId: 'my-project', + location: 'global', + auth: mockAuth, + fetchFn, + }); + + for (const model of [ + 'claude-opus-4-8', + 'claude-opus-4-5@20251101', + 'claude-opus-4-1@20250805', + 'claude-sonnet-4-6', + ]) { + await generator.generateContent( + { model, contents: 'hi' }, + 'prompt-id', + LlmRole.MAIN, + ); + } + + const bodies = fetchFn.mock.calls.map( + (call) => + JSON.parse((call[1] as RequestInit).body as string) as Record< + string, + unknown + >, + ); + expect(bodies.map((body) => body['max_tokens'])).toEqual([ + 128_000, 64_000, 32_000, 8192, + ]); + }); + + it('maps Gemini thinking levels to Claude effort and keeps tool choice compatible with thinking', async () => { + const fetchFn = vi.fn( + async (_input: string | URL, _init?: RequestInit) => + new Response( + JSON.stringify({ + id: 'msg_1', + model: 'claude-opus-4-8', + content: [{ type: 'text', text: 'hello' }], + stop_reason: 'end_turn', + }), + ), + ); + const generator = new VertexAnthropicContentGenerator({ + projectId: 'my-project', + location: 'global', + auth: mockAuth, + fetchFn, + }); + + await generator.generateContent( + { + model: 'claude-opus-4-8', + contents: 'hi', + config: { + thinkingConfig: { + thinkingLevel: ThinkingLevel.LOW, + }, + tools: [ + { + functionDeclarations: [ + { + name: 'read_file', + parametersJsonSchema: { type: 'object' }, + }, + ], + }, + ], + toolConfig: { + functionCallingConfig: { + mode: FunctionCallingConfigMode.ANY, + allowedFunctionNames: ['read_file'], + }, + }, + }, + }, + 'prompt-id', + LlmRole.MAIN, + ); + + const body = JSON.parse( + (fetchFn.mock.calls[0]?.[1] as RequestInit).body as string, + ) as Record; + expect(body['thinking']).toEqual({ type: 'adaptive' }); + expect(body['output_config']).toEqual({ effort: 'low' }); + expect(body['tool_choice']).toEqual({ type: 'auto' }); + }); + + it('keeps manual thinking budgets for older Claude models', async () => { + const fetchFn = vi.fn( + async (_input: string | URL, _init?: RequestInit) => + new Response( + JSON.stringify({ + id: 'msg_1', + model: 'claude-sonnet-4-5', + content: [{ type: 'text', text: 'hello' }], + stop_reason: 'end_turn', + }), + ), + ); + const generator = new VertexAnthropicContentGenerator({ + projectId: 'my-project', + location: 'global', + auth: mockAuth, + fetchFn, + }); + + await generator.generateContent( + { + model: 'claude-sonnet-4-5', + contents: 'hi', + config: { + maxOutputTokens: 5000, + thinkingConfig: { + includeThoughts: true, + thinkingBudget: 4096, + }, + }, + }, + 'prompt-id', + LlmRole.MAIN, + ); + + const body = JSON.parse( + (fetchFn.mock.calls[0]?.[1] as RequestInit).body as string, + ) as Record; + expect(body['thinking']).toEqual({ + type: 'enabled', + budget_tokens: 4096, + display: 'summarized', + }); + expect(body['max_tokens']).toBe(5120); + }); + + it('round-trips Claude thinking signatures on tool-use turns', async () => { + const fetchFn = vi + .fn() + .mockResolvedValueOnce( + sseResponse([ + { + type: 'message_start', + message: { id: 'msg_1', model: 'claude-opus-4-8' }, + }, + { + type: 'content_block_start', + index: 0, + content_block: { type: 'thinking', thinking: '', signature: '' }, + }, + { + type: 'content_block_delta', + index: 0, + delta: { + type: 'thinking_delta', + thinking: 'I should call the tool.', + }, + }, + { + type: 'content_block_delta', + index: 0, + delta: { type: 'signature_delta', signature: 'sig_old' }, + }, + { + type: 'content_block_delta', + index: 0, + delta: { type: 'signature_delta', signature: 'sig_abc' }, + }, + { type: 'content_block_stop', index: 0 }, + { + type: 'content_block_start', + index: 1, + content_block: { + type: 'redacted_thinking', + data: 'opaque_redacted_data', + }, + }, + { type: 'content_block_stop', index: 1 }, + { + type: 'content_block_start', + index: 2, + content_block: { + type: 'tool_use', + id: 'toolu_1', + name: 'read_file', + input: {}, + }, + }, + { + type: 'content_block_delta', + index: 2, + delta: { + type: 'input_json_delta', + partial_json: '{"path":"a.txt"}', + }, + }, + { type: 'content_block_stop', index: 2 }, + { type: 'message_delta', delta: { stop_reason: 'tool_use' } }, + ]), + ) + .mockResolvedValueOnce( + new Response( + JSON.stringify({ + id: 'msg_2', + model: 'claude-opus-4-8', + content: [{ type: 'text', text: 'done' }], + stop_reason: 'end_turn', + }), + ), + ); + const generator = new VertexAnthropicContentGenerator({ + projectId: 'my-project', + location: 'global', + auth: mockAuth, + fetchFn, + }); + + const stream = await generator.generateContentStream( + { + model: 'claude-opus-4-8', + contents: 'hi', + }, + 'prompt-id', + LlmRole.MAIN, + ); + const chunks: GenerateContentResponse[] = []; + for await (const chunk of stream) { + chunks.push(chunk); + } + + const toolPart = chunks + .flatMap((chunk) => chunk.candidates?.[0]?.content?.parts ?? []) + .find((part) => part.functionCall); + const thoughtSignature = (toolPart as { thoughtSignature?: string }) + .thoughtSignature; + expect(thoughtSignature).toMatch(/^claude_thinking:/); + + await generator.generateContent( + { + model: 'claude-opus-4-8', + contents: [ + { + role: 'model', + parts: [toolPart!], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + id: 'toolu_1', + name: 'read_file', + response: { output: 'contents' }, + }, + }, + ], + }, + ], + }, + 'prompt-id', + LlmRole.MAIN, + ); + + const body = JSON.parse( + (fetchFn.mock.calls[1]?.[1] as RequestInit).body as string, + ) as Record>; + expect(body['messages'][0]?.content).toEqual([ + { + type: 'thinking', + thinking: 'I should call the tool.', + signature: 'sig_abc', + }, + { + type: 'redacted_thinking', + data: 'opaque_redacted_data', + }, + { + type: 'tool_use', + id: 'toolu_1', + name: 'read_file', + input: { path: 'a.txt' }, + }, + ]); + }); + + it('handles a full JSON message on the streaming endpoint', async () => { + const fetchFn = vi.fn( + async (_input: string | URL, _init?: RequestInit) => + new Response( + JSON.stringify({ + id: 'msg_json', + type: 'message', + model: 'claude-opus-4-8', + content: [{ type: 'text', text: 'complete response' }], + stop_reason: 'end_turn', + usage: { input_tokens: 5, output_tokens: 2 }, + }), + { headers: { 'Content-Type': 'application/json' } }, + ), + ); + const generator = new VertexAnthropicContentGenerator({ + projectId: 'my-project', + location: 'global', + auth: mockAuth, + fetchFn, + }); + + const stream = await generator.generateContentStream( + { + model: 'claude-opus-4-8', + contents: [{ role: 'user', parts: [{ text: 'hi' }] }], + }, + 'prompt-id', + LlmRole.MAIN, + ); + + const chunks: GenerateContentResponse[] = []; + for await (const chunk of stream) { + chunks.push(chunk); + } + + const body = JSON.parse( + (fetchFn.mock.calls[0]?.[1] as RequestInit).body as string, + ) as Record; + expect(body['stream']).toBe(true); + expect(chunks).toHaveLength(1); + expect(chunks[0].candidates?.[0]?.content?.parts?.[0]?.text).toBe( + 'complete response', + ); + expect(chunks[0].candidates?.[0]?.finishReason).toBe('STOP'); + expect(chunks[0].usageMetadata).toMatchObject({ + promptTokenCount: 5, + candidatesTokenCount: 2, + totalTokenCount: 7, + }); + }); + + it('sanitizes Claude tool names and maps tool calls back to Gemini names', async () => { + const geminiToolName = 'mcp.read/file:custom'; + const claudeToolName = 'mcp_read_file_custom'; + const fetchFn = vi.fn(async (_input: string | URL, _init?: RequestInit) => + sseResponse([ + { + type: 'message_start', + message: { id: 'msg_2', model: 'claude-opus-4-8' }, + }, + { + type: 'content_block_start', + index: 0, + content_block: { + type: 'tool_use', + id: 'toolu_2', + name: claudeToolName, + input: {}, + }, + }, + { + type: 'content_block_delta', + index: 0, + delta: { + type: 'input_json_delta', + partial_json: '{"path":"b.txt"}', + }, + }, + { type: 'content_block_stop', index: 0 }, + { type: 'message_delta', delta: { stop_reason: 'tool_use' } }, + ]), + ); + const generator = new VertexAnthropicContentGenerator({ + projectId: 'my-project', + location: 'global', + auth: mockAuth, + fetchFn, + }); + + const stream = await generator.generateContentStream( + { + model: 'claude-opus-4-8', + contents: [ + { role: 'user', parts: [{ text: 'call the tool' }] }, + { + role: 'model', + parts: [ + { + functionCall: { + id: 'toolu_prev', + name: geminiToolName, + args: { path: 'old.txt' }, + }, + }, + ], + }, + ], + config: { + tools: [ + { + functionDeclarations: [ + { + name: geminiToolName, + parametersJsonSchema: { + type: 'object', + properties: { path: { type: 'string' } }, + }, + }, + ], + }, + ], + toolConfig: { + functionCallingConfig: { + mode: FunctionCallingConfigMode.ANY, + allowedFunctionNames: [geminiToolName], + }, + }, + }, + }, + 'prompt-id', + LlmRole.MAIN, + ); + + const chunks: GenerateContentResponse[] = []; + for await (const chunk of stream) { + chunks.push(chunk); + } + + const body = JSON.parse( + (fetchFn.mock.calls[0]?.[1] as RequestInit).body as string, + ) as Record; + expect(body['tools']).toEqual([ + { + name: claudeToolName, + input_schema: { + type: 'object', + properties: { path: { type: 'string' } }, + }, + }, + ]); + expect(body['tool_choice']).toEqual({ + type: 'tool', + name: claudeToolName, + }); + expect(body['messages']).toEqual([ + { role: 'user', content: [{ type: 'text', text: 'call the tool' }] }, + { + role: 'assistant', + content: [ + { + type: 'tool_use', + id: 'toolu_prev', + name: claudeToolName, + input: { path: 'old.txt' }, + }, + ], + }, + ]); + expect(chunks[0].functionCalls).toEqual([ + { id: 'toolu_2', name: geminiToolName, args: { path: 'b.txt' } }, + ]); + }); + + it('normalizes tool input schemas for Anthropic JSON Schema validation', async () => { + const fetchFn = vi.fn( + async (_input: string | URL, _init?: RequestInit) => + new Response( + JSON.stringify({ + id: 'msg_3', + content: [], + stop_reason: 'end_turn', + }), + { headers: { 'Content-Type': 'application/json' } }, + ), + ); + const generator = new VertexAnthropicContentGenerator({ + projectId: 'my-project', + location: 'global', + auth: mockAuth, + fetchFn, + }); + + await generator.generateContent( + { + model: 'claude-opus-4-8', + contents: [{ role: 'user', parts: [{ text: 'hi' }] }], + config: { + tools: [ + { + functionDeclarations: [ + { + name: 'schema_tool', + parametersJsonSchema: { + $schema: 'http://json-schema.org/draft-07/schema#', + $ref: '#/definitions/Root', + definitions: { + Root: { + type: 'OBJECT', + propertyOrdering: ['meta', 'maybeTags'], + properties: { + meta: { + type: 'OBJECT', + additionalProperties: { type: 'STRING' }, + }, + maybeTags: { + type: 'ARRAY', + items: { type: 'STRING' }, + nullable: true, + }, + union: { + oneOf: [{ type: 'STRING' }, { type: 'INTEGER' }], + }, + }, + required: [], + }, + }, + }, + }, + ], + }, + ], + }, + }, + 'prompt-id', + LlmRole.MAIN, + ); + + const body = JSON.parse( + (fetchFn.mock.calls[0]?.[1] as RequestInit).body as string, + ) as Record; + expect(body['tools']).toEqual([ + { + name: 'schema_tool', + input_schema: { + type: 'object', + properties: { + meta: { + type: 'object', + additionalProperties: { type: 'string' }, + }, + maybeTags: { + type: ['array', 'null'], + items: { type: 'string' }, + }, + union: { + oneOf: [{ type: 'string' }, { type: 'integer' }], + }, + }, + }, + }, + ]); + }); + + it('uses the count-tokens rawPredict endpoint', async () => { + const fetchFn = vi.fn( + async (_input: string | URL, _init?: RequestInit) => + new Response(JSON.stringify({ input_tokens: 42 }), { + headers: { 'Content-Type': 'application/json' }, + }), + ); + const generator = new VertexAnthropicContentGenerator({ + projectId: 'my-project', + location: 'us-east5', + auth: mockAuth, + fetchFn, + }); + + await expect( + generator.countTokens({ + model: 'claude-opus-4-8', + contents: [{ role: 'user', parts: [{ text: 'hello' }] }], + }), + ).resolves.toEqual({ totalTokens: 42 }); + + expect(fetchFn.mock.calls[0]?.[0]).toBe( + 'https://us-east5-aiplatform.googleapis.com/v1/projects/my-project/locations/us-east5/publishers/anthropic/models/count-tokens:rawPredict', + ); + const body = JSON.parse( + (fetchFn.mock.calls[0]?.[1] as RequestInit).body as string, + ) as Record; + expect(body['model']).toBe('claude-opus-4-8'); + expect(body).not.toHaveProperty('max_tokens'); + }); +}); diff --git a/packages/core/src/core/vertexAnthropicContentGenerator.ts b/packages/core/src/core/vertexAnthropicContentGenerator.ts new file mode 100644 index 0000000000..01c865aca6 --- /dev/null +++ b/packages/core/src/core/vertexAnthropicContentGenerator.ts @@ -0,0 +1,1775 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { createHash } from 'node:crypto'; +import { Buffer } from 'node:buffer'; +import { + FinishReason, + GenerateContentResponse, + ThinkingLevel, + type Content, + type CountTokensParameters, + type CountTokensResponse, + type EmbedContentResponse, + type EmbedContentParameters, + type FunctionCall, + type GenerateContentConfig, + type GenerateContentParameters, + type GenerateContentResponseUsageMetadata, + type Part, + type ThinkingConfig, + type Tool, +} from '@google/genai'; +import { GoogleAuth } from 'google-auth-library'; +import { Agent as UndiciAgent, ProxyAgent, type Dispatcher } from 'undici'; +import { toContents } from '../code_assist/converter.js'; +import type { LlmRole } from '../telemetry/llmRole.js'; +import { estimateTokenCountSync } from '../utils/tokenCalculation.js'; +import type { ContentGenerator } from './contentGenerator.js'; + +const CLOUD_PLATFORM_SCOPE = 'https://www.googleapis.com/auth/cloud-platform'; +const ANTHROPIC_VERTEX_VERSION = 'vertex-2023-10-16'; +const ANTHROPIC_TOOL_NAME_PATTERN = /^[a-zA-Z0-9_-]{1,128}$/; +const ANTHROPIC_TOOL_NAME_MAX_LENGTH = 128; +const CLAUDE_THINKING_SIGNATURE_PREFIX = 'claude_thinking:'; +const DEFAULT_CLAUDE_MAX_OUTPUT_TOKENS = 8192; +const CLAUDE_MAX_OUTPUT_TOKENS_128K = 128_000; + +export function isClaudeVertexModel(model: string): boolean { + const modelId = normalizeClaudeModelId(model); + return modelId.startsWith('claude-'); +} + +type FetchInit = RequestInit & { dispatcher?: Dispatcher }; +type FetchFn = (input: string | URL, init?: FetchInit) => Promise; + +const defaultFetch: FetchFn = (input, init) => fetch(input, init); + +interface GoogleAuthClientLike { + getRequestHeaders( + url?: string | URL, + ): Promise>; +} + +interface GoogleAuthLike { + getClient(): Promise; +} + +interface AnthropicTextBlock { + type: 'text'; + text: string; +} + +interface AnthropicThinkingBlock { + type: 'thinking'; + thinking: string; + signature?: string; +} + +interface AnthropicRedactedThinkingBlock { + type: 'redacted_thinking'; + data: string; +} + +type AnthropicThinkingLikeBlock = + | AnthropicThinkingBlock + | AnthropicRedactedThinkingBlock; + +interface AnthropicMediaBlock { + type: 'image' | 'document'; + source: { + type: 'base64'; + media_type: string; + data: string; + }; +} + +interface AnthropicToolUseBlock { + type: 'tool_use'; + id: string; + name: string; + input: unknown; +} + +interface AnthropicToolResultBlock { + type: 'tool_result'; + tool_use_id: string; + content: string; + is_error?: boolean; +} + +type AnthropicContentBlock = + | AnthropicTextBlock + | AnthropicThinkingBlock + | AnthropicRedactedThinkingBlock + | AnthropicMediaBlock + | AnthropicToolUseBlock + | AnthropicToolResultBlock; + +interface AnthropicMessage { + role: 'user' | 'assistant'; + content: AnthropicContentBlock[]; +} + +interface AnthropicTool { + name: string; + description?: string; + input_schema: unknown; +} + +interface AnthropicMessageRequest { + anthropic_version: typeof ANTHROPIC_VERTEX_VERSION; + messages: AnthropicMessage[]; + model?: string; + system?: string; + max_tokens?: number; + temperature?: number; + stop_sequences?: string[]; + tools?: AnthropicTool[]; + tool_choice?: unknown; + stream?: boolean; + thinking?: + | { + type: 'adaptive'; + display?: 'summarized' | 'omitted'; + } + | { + type: 'enabled'; + budget_tokens: number; + display?: 'summarized' | 'omitted'; + }; + output_config?: { + effort?: AnthropicEffort; + }; +} + +type AnthropicEffort = 'low' | 'medium' | 'high' | 'xhigh' | 'max'; + +interface AnthropicUsage { + input_tokens?: number; + output_tokens?: number; + cache_creation_input_tokens?: number; + cache_read_input_tokens?: number; +} + +interface AnthropicMessageResponse { + id?: string; + model?: string; + content?: AnthropicContentBlock[]; + stop_reason?: string | null; + usage?: AnthropicUsage; +} + +type AnthropicStreamEvent = + | { + type: 'message_start'; + message?: AnthropicMessageResponse; + } + | { + type: 'content_block_start'; + index?: number; + content_block?: AnthropicContentBlock; + } + | { + type: 'content_block_delta'; + index?: number; + delta?: { + type?: string; + text?: string; + partial_json?: string; + thinking?: string; + signature?: string; + }; + } + | { + type: 'content_block_stop'; + index?: number; + } + | { + type: 'message_delta'; + delta?: { + stop_reason?: string | null; + }; + usage?: AnthropicUsage; + } + | { + type: 'message_stop' | 'ping'; + }; + +type AnthropicStreamPayload = AnthropicStreamEvent | AnthropicMessageResponse; + +export interface VertexAnthropicContentGeneratorOptions { + projectId?: string; + location?: string; + baseUrl?: string; + headers?: Record; + proxy?: string; + auth?: GoogleAuthLike; + fetchFn?: FetchFn; +} + +interface ActiveToolUse { + id: string; + name: string; + inputJson: string; + thinkingSignature?: string; +} + +class ToolNameMapper { + private readonly originalToAnthropic = new Map(); + private readonly anthropicToOriginal = new Map(); + + constructor(tools?: GenerateContentConfig['tools']) { + const names = this.collectToolNames(tools); + names.sort( + (a, b) => + Number(!isAnthropicToolName(a)) - Number(!isAnthropicToolName(b)), + ); + for (const name of names) { + this.register(name); + } + } + + toAnthropicName(name: string | undefined): string { + return this.register(name?.trim() || 'unknown_tool'); + } + + toGeminiName(name: string): string { + return this.anthropicToOriginal.get(name) ?? name; + } + + private collectToolNames(tools?: GenerateContentConfig['tools']): string[] { + if (!Array.isArray(tools)) { + return []; + } + + const names = new Set(); + for (const tool of tools) { + if (!isFunctionDeclarationTool(tool)) { + continue; + } + for (const declaration of tool.functionDeclarations ?? []) { + if (declaration.name) { + names.add(declaration.name); + } + } + } + return [...names]; + } + + private register(originalName: string): string { + const existing = this.originalToAnthropic.get(originalName); + if (existing) { + return existing; + } + + const baseName = sanitizeAnthropicToolName(originalName); + let anthropicName = baseName; + const existingOriginal = this.anthropicToOriginal.get(anthropicName); + if (existingOriginal && existingOriginal !== originalName) { + anthropicName = appendToolNameHash(baseName, originalName); + } + + this.originalToAnthropic.set(originalName, anthropicName); + this.anthropicToOriginal.set(anthropicName, originalName); + return anthropicName; + } +} + +export class VertexAnthropicContentGenerator implements ContentGenerator { + private readonly auth: GoogleAuthLike; + private readonly location: string; + private readonly baseUrl?: string; + private readonly baseHeaders: Record; + private readonly dispatcher: Dispatcher; + private readonly fetchFn: FetchFn; + private readonly explicitProjectId?: string; + + constructor(options: VertexAnthropicContentGeneratorOptions = {}) { + this.explicitProjectId = + options.projectId || + process.env['GOOGLE_CLOUD_PROJECT'] || + process.env['GOOGLE_CLOUD_PROJECT_ID'] || + undefined; + this.location = + options.location || process.env['GOOGLE_CLOUD_LOCATION'] || 'global'; + this.baseUrl = options.baseUrl; + this.baseHeaders = options.headers ?? {}; + this.auth = + options.auth ?? + new GoogleAuth({ + scopes: [CLOUD_PLATFORM_SCOPE], + projectId: this.explicitProjectId, + }); + + const dispatcherOptions = { + headersTimeout: 60000, + bodyTimeout: 0, + }; + this.dispatcher = options.proxy + ? new ProxyAgent({ uri: options.proxy.trim(), ...dispatcherOptions }) + : new UndiciAgent(dispatcherOptions); + + this.fetchFn = options.fetchFn ?? defaultFetch; + } + + async generateContent( + request: GenerateContentParameters, + _userPromptId: string, + _role: LlmRole, + ): Promise { + const toolNameMapper = new ToolNameMapper(request.config?.tools); + const body = this.toAnthropicRequest(request, toolNameMapper); + const response = await this.postJson( + this.buildModelUrl(request.model, 'rawPredict'), + body, + request.config?.abortSignal, + ); + const json = await readJson(response); + return this.anthropicMessageToResponse( + toAnthropicMessageResponse(json), + request.model, + toolNameMapper, + ); + } + + async generateContentStream( + request: GenerateContentParameters, + _userPromptId: string, + _role: LlmRole, + ): Promise> { + const toolNameMapper = new ToolNameMapper(request.config?.tools); + const body = this.toAnthropicRequest(request, toolNameMapper); + body.stream = true; + const response = await this.postJson( + this.buildModelUrl(request.model, 'streamRawPredict'), + body, + request.config?.abortSignal, + ); + return this.streamAnthropicResponse( + response, + request.model, + toolNameMapper, + ); + } + + async countTokens( + request: CountTokensParameters, + ): Promise { + try { + const toolNameMapper = new ToolNameMapper(request.config?.tools); + const body: AnthropicMessageRequest = { + ...this.toAnthropicRequest( + { + model: request.model, + contents: request.contents, + config: request.config, + }, + toolNameMapper, + ), + model: normalizeClaudeModelId(request.model), + }; + delete body.max_tokens; + + const response = await this.postJson( + this.buildModelUrl('count-tokens', 'rawPredict'), + body, + undefined, + ); + const json = await readJson(response); + return { + totalTokens: + tokenCountFromResponse(json) ?? this.estimateTokens(request), + }; + } catch { + return { totalTokens: this.estimateTokens(request) }; + } + } + + async embedContent( + _request: EmbedContentParameters, + ): Promise { + throw new Error( + 'Claude models on Vertex AI do not support embeddings. Use a Gemini embedding model for embedContent requests.', + ); + } + + private async postJson( + url: string, + body: AnthropicMessageRequest, + signal: AbortSignal | undefined, + ): Promise { + const authHeaders = await this.getAuthHeaders(url); + const response = await this.fetchFn(url, { + method: 'POST', + headers: { + ...this.baseHeaders, + ...authHeaders, + 'Content-Type': 'application/json', + }, + body: JSON.stringify(body), + signal, + dispatcher: this.dispatcher, + }); + + if (!response.ok) { + throw await this.toApiError(response); + } + + return response; + } + + private async getAuthHeaders(url: string): Promise> { + const client = await this.auth.getClient(); + const headers = await client.getRequestHeaders(url); + return headersToRecord(headers); + } + + private async toApiError(response: Response): Promise { + const body = await response.text().catch(() => ''); + const message = body + ? `Claude Vertex AI request failed: ${response.status} ${response.statusText}: ${body}` + : `Claude Vertex AI request failed: ${response.status} ${response.statusText}`; + const error = new Error(message); + Object.assign(error, { status: response.status }); + return error; + } + + private buildServiceBaseUrl(): string { + if (this.baseUrl) { + const trimmed = this.baseUrl.replace(/\/+$/, ''); + return /\/v\d+(beta\d+)?$/.test(trimmed) ? trimmed : `${trimmed}/v1`; + } + + if (this.location === 'global') { + return 'https://aiplatform.googleapis.com/v1'; + } + if (this.location === 'us' || this.location === 'eu') { + return `https://aiplatform.${this.location}.rep.googleapis.com/v1`; + } + return `https://${this.location}-aiplatform.googleapis.com/v1`; + } + + private buildModelUrl( + model: string, + method: 'rawPredict' | 'streamRawPredict', + ) { + const modelId = encodeURIComponent(normalizeClaudeModelId(model)); + return `${this.buildServiceBaseUrl()}/projects/${this.getProjectIdSyncPlaceholder()}/locations/${this.location}/publishers/anthropic/models/${modelId}:${method}`; + } + + private getProjectIdSyncPlaceholder(): string { + if (!this.explicitProjectId) { + throw new Error( + 'GOOGLE_CLOUD_PROJECT or GOOGLE_CLOUD_PROJECT_ID is required for Claude models on Vertex AI.', + ); + } + return encodeURIComponent(this.explicitProjectId); + } + + private toAnthropicRequest( + request: Pick, + toolNameMapper: ToolNameMapper, + ): AnthropicMessageRequest { + const config = request.config; + const body: AnthropicMessageRequest = { + anthropic_version: ANTHROPIC_VERTEX_VERSION, + messages: mergeAdjacentMessages( + toContents(request.contents).map((content) => + contentToAnthropicMessage(content, toolNameMapper), + ), + ), + max_tokens: + config?.maxOutputTokens ?? defaultMaxOutputTokens(request.model), + }; + + const system = contentUnionText(config?.systemInstruction); + if (system) { + body.system = system; + } + if ( + config?.temperature !== undefined && + !omitsSamplingParameters(request.model) + ) { + body.temperature = config.temperature; + } + if (config?.stopSequences?.length) { + body.stop_sequences = config.stopSequences; + } + + const thinking = toAnthropicThinking(request.model, config); + if (thinking?.thinking) { + body.thinking = thinking.thinking; + } + if (thinking?.output_config) { + body.output_config = thinking.output_config; + } + + const tools = toAnthropicTools(config?.tools, toolNameMapper); + if (tools.length > 0) { + body.tools = tools; + body.tool_choice = toAnthropicToolChoice( + config, + toolNameMapper, + body.thinking !== undefined, + ); + } + + if (body.thinking?.type === 'enabled') { + const thinkingBudget = body.thinking.budget_tokens; + body.max_tokens = Math.max(body.max_tokens ?? 0, thinkingBudget + 1024); + } + + return body; + } + + private anthropicMessageToResponse( + message: AnthropicMessageResponse, + fallbackModel: string, + toolNameMapper: ToolNameMapper, + ): GenerateContentResponse { + return createGeminiResponse({ + parts: anthropicBlocksToGeminiParts( + message.content ?? [], + toolNameMapper, + ), + finishReason: mapAnthropicStopReason(message.stop_reason), + usage: message.usage, + responseId: message.id, + modelVersion: message.model ?? fallbackModel, + }); + } + + private async *streamAnthropicResponse( + response: Response, + fallbackModel: string, + toolNameMapper: ToolNameMapper, + ): AsyncGenerator { + if (!response.body) { + throw new Error( + 'Claude Vertex AI streaming response did not include a body.', + ); + } + + const activeTools = new Map(); + const activeThinking = new Map(); + let pendingThinkingBlocks: AnthropicThinkingLikeBlock[] = []; + let messageId: string | undefined; + let modelVersion: string | undefined; + let inputTokens: number | undefined; + let lastUsage: AnthropicUsage | undefined; + + for await (const event of parseSse(response.body)) { + if (isAnthropicMessageResponse(event)) { + yield this.anthropicMessageToResponse( + event, + fallbackModel, + toolNameMapper, + ); + continue; + } + + if (event.type === 'message_start') { + messageId = event.message?.id; + modelVersion = event.message?.model; + inputTokens = event.message?.usage?.input_tokens; + continue; + } + + if (event.type === 'content_block_start') { + const block = event.content_block; + if ( + event.index !== undefined && + (block?.type === 'thinking' || block?.type === 'redacted_thinking') + ) { + activeThinking.set(event.index, { ...block }); + if (block.type === 'thinking' && block.thinking) { + yield createGeminiResponse({ + parts: [{ text: block.thinking, thought: true }], + responseId: messageId, + modelVersion: modelVersion ?? fallbackModel, + }); + } + continue; + } + + if (event.index !== undefined && block?.type === 'tool_use') { + const thinkingSignature = encodeClaudeThinkingBlocks( + pendingThinkingBlocks, + ); + pendingThinkingBlocks = []; + activeTools.set(event.index, { + id: block.id, + name: toolNameMapper.toGeminiName(block.name), + inputJson: isEmptyObject(block.input) + ? '' + : JSON.stringify(block.input ?? {}), + thinkingSignature, + }); + continue; + } + + if (block?.type === 'text' && block.text) { + yield createGeminiResponse({ + parts: [{ text: block.text }], + responseId: messageId, + modelVersion: modelVersion ?? fallbackModel, + }); + } + continue; + } + + if (event.type === 'content_block_delta') { + const activeThought = + event.index === undefined + ? undefined + : activeThinking.get(event.index); + if (activeThought?.type === 'thinking' && event.delta?.thinking) { + activeThought.thinking += event.delta.thinking; + yield createGeminiResponse({ + parts: [{ text: event.delta.thinking, thought: true }], + responseId: messageId, + modelVersion: modelVersion ?? fallbackModel, + }); + continue; + } + if (activeThought?.type === 'thinking' && event.delta?.signature) { + activeThought.signature = event.delta.signature; + continue; + } + + const activeTool = + event.index === undefined ? undefined : activeTools.get(event.index); + if (activeTool && event.delta?.partial_json) { + activeTool.inputJson += event.delta.partial_json; + continue; + } + + if (event.delta?.text) { + yield createGeminiResponse({ + parts: [{ text: event.delta.text }], + responseId: messageId, + modelVersion: modelVersion ?? fallbackModel, + }); + } + continue; + } + + if (event.type === 'content_block_stop') { + const activeThought = + event.index === undefined + ? undefined + : activeThinking.get(event.index); + if (activeThought) { + activeThinking.delete(event.index!); + pendingThinkingBlocks.push(activeThought); + continue; + } + + const activeTool = + event.index === undefined ? undefined : activeTools.get(event.index); + if (!activeTool) { + continue; + } + activeTools.delete(event.index!); + yield createGeminiResponse({ + parts: [ + createFunctionCallPart( + activeTool.id, + activeTool.name, + parseJsonObject(activeTool.inputJson), + activeTool.thinkingSignature, + ), + ], + responseId: messageId, + modelVersion: modelVersion ?? fallbackModel, + }); + continue; + } + + if (event.type === 'message_delta') { + lastUsage = { + ...lastUsage, + input_tokens: inputTokens, + ...event.usage, + }; + if (event.delta?.stop_reason) { + yield createGeminiResponse({ + parts: [], + finishReason: mapAnthropicStopReason(event.delta.stop_reason), + usage: lastUsage, + responseId: messageId, + modelVersion: modelVersion ?? fallbackModel, + }); + } + } + } + } + + private estimateTokens(request: CountTokensParameters): number { + const contents = toContents(request.contents); + let total = 0; + for (const content of contents) { + total += estimateTokenCountSync(content.parts ?? []); + } + if (request.config) { + total += Math.floor(JSON.stringify(request.config).length / 4); + } + return total; + } +} + +export class VertexAiContentGeneratorRouter implements ContentGenerator { + constructor( + private readonly geminiGenerator: ContentGenerator, + private readonly claudeGenerator: ContentGenerator, + ) {} + + get userTier() { + return this.geminiGenerator.userTier; + } + + get userTierName() { + return this.geminiGenerator.userTierName; + } + + get paidTier() { + return this.geminiGenerator.paidTier; + } + + generateContent( + request: GenerateContentParameters, + userPromptId: string, + role: LlmRole, + ): Promise { + return this.generatorForModel(request.model).generateContent( + request, + userPromptId, + role, + ); + } + + generateContentStream( + request: GenerateContentParameters, + userPromptId: string, + role: LlmRole, + ): Promise> { + return this.generatorForModel(request.model).generateContentStream( + request, + userPromptId, + role, + ); + } + + countTokens(request: CountTokensParameters): Promise { + return this.generatorForModel(request.model).countTokens(request); + } + + embedContent(request: EmbedContentParameters): Promise { + return this.generatorForModel(request.model).embedContent(request); + } + + private generatorForModel(model: string): ContentGenerator { + return isClaudeVertexModel(model) + ? this.claudeGenerator + : this.geminiGenerator; + } +} + +async function readJson(response: Response): Promise { + const value: unknown = await response.json(); + return value; +} + +function toAnthropicMessageResponse(value: unknown): AnthropicMessageResponse { + if (!isRecord(value)) { + return {}; + } + + const stopReason = value['stop_reason']; + return { + id: stringField(value, 'id'), + model: stringField(value, 'model'), + stop_reason: + typeof stopReason === 'string' || stopReason === null + ? stopReason + : undefined, + usage: toAnthropicUsage(value['usage']), + content: Array.isArray(value['content']) + ? value['content'].filter(isAnthropicContentBlock) + : undefined, + }; +} + +function toAnthropicUsage(value: unknown): AnthropicUsage | undefined { + if (!isRecord(value)) { + return undefined; + } + return { + input_tokens: numberField(value, 'input_tokens'), + output_tokens: numberField(value, 'output_tokens'), + cache_creation_input_tokens: numberField( + value, + 'cache_creation_input_tokens', + ), + cache_read_input_tokens: numberField(value, 'cache_read_input_tokens'), + }; +} + +function tokenCountFromResponse(value: unknown): number | undefined { + if (!isRecord(value)) { + return undefined; + } + + const direct = + numberField(value, 'input_tokens') ?? + numberField(value, 'total_tokens') ?? + numberField(value, 'token_count'); + if (direct !== undefined) { + return direct; + } + + const usage = value['usage']; + if (!isRecord(usage)) { + return undefined; + } + return ( + numberField(usage, 'input_tokens') ?? numberField(usage, 'total_tokens') + ); +} + +function normalizeClaudeModelId(model: string): string { + const trimmed = model.trim(); + const match = trimmed.match(/(?:^|\/)models\/([^/]+)$/); + return match ? decodeURIComponent(match[1]) : trimmed; +} + +function isAnthropicToolName(name: string): boolean { + return ANTHROPIC_TOOL_NAME_PATTERN.test(name); +} + +function sanitizeAnthropicToolName(name: string): string { + const sanitized = name.replace(/[^a-zA-Z0-9_-]/g, '_') || 'tool'; + if (sanitized.length <= ANTHROPIC_TOOL_NAME_MAX_LENGTH) { + return sanitized; + } + return appendToolNameHash(sanitized, name); +} + +function appendToolNameHash(baseName: string, hashSource: string): string { + const suffix = `_${createHash('sha256').update(hashSource).digest('hex').slice(0, 8)}`; + return `${baseName.slice( + 0, + ANTHROPIC_TOOL_NAME_MAX_LENGTH - suffix.length, + )}${suffix}`; +} + +function encodeClaudeThinkingBlocks( + blocks: AnthropicThinkingLikeBlock[], +): string | undefined { + const preservedBlocks = blocks.filter(isPreservableThinkingBlock); + if (preservedBlocks.length === 0) { + return undefined; + } + + return `${CLAUDE_THINKING_SIGNATURE_PREFIX}${Buffer.from( + JSON.stringify(preservedBlocks), + 'utf8', + ).toString('base64url')}`; +} + +function decodeClaudeThinkingBlocks( + thoughtSignature: string | undefined, +): AnthropicThinkingLikeBlock[] { + if (!thoughtSignature?.startsWith(CLAUDE_THINKING_SIGNATURE_PREFIX)) { + return []; + } + + try { + const encoded = thoughtSignature.slice( + CLAUDE_THINKING_SIGNATURE_PREFIX.length, + ); + const parsed = JSON.parse( + Buffer.from(encoded, 'base64url').toString('utf8'), + ) as unknown; + if (!Array.isArray(parsed)) { + return []; + } + return parsed.filter(isAnthropicThinkingLikeBlock); + } catch { + return []; + } +} + +function headersToRecord(headers: Headers | Record) { + const result: Record = {}; + if (headers instanceof Headers) { + headers.forEach((value, key) => { + result[key] = value; + }); + return result; + } + + for (const [key, value] of Object.entries(headers)) { + result[key] = String(value); + } + return result; +} + +function contentToAnthropicMessage( + content: Content, + toolNameMapper: ToolNameMapper, +): AnthropicMessage { + const role = content.role === 'model' ? 'assistant' : 'user'; + const blocks = (content.parts ?? []).flatMap((part) => + partToAnthropicBlocks(part, role, toolNameMapper), + ); + return { + role, + content: blocks.length > 0 ? blocks : [{ type: 'text', text: ' ' }], + }; +} + +function partToAnthropicBlocks( + part: Part, + role: AnthropicMessage['role'], + toolNameMapper: ToolNameMapper, +): AnthropicContentBlock[] { + if (part.text !== undefined) { + return [{ type: 'text', text: part.text }]; + } + + if (part.inlineData?.data && part.inlineData.mimeType) { + if (part.inlineData.mimeType.startsWith('image/')) { + return [ + { + type: 'image', + source: { + type: 'base64', + media_type: part.inlineData.mimeType, + data: part.inlineData.data, + }, + }, + ]; + } + if (part.inlineData.mimeType === 'application/pdf') { + return [ + { + type: 'document', + source: { + type: 'base64', + media_type: part.inlineData.mimeType, + data: part.inlineData.data, + }, + }, + ]; + } + return [ + { + type: 'text', + text: `[Unsupported inlineData MIME type for Claude on Vertex AI: ${part.inlineData.mimeType}]`, + }, + ]; + } + + if (part.fileData) { + throw new Error( + 'fileData URL inputs are not supported for Claude on Vertex AI. Use inlineData instead.', + ); + } + + if (part.functionCall) { + if (role !== 'assistant') { + return []; + } + return [ + ...decodeClaudeThinkingBlocks( + (part as { thoughtSignature?: string }).thoughtSignature, + ), + { + type: 'tool_use', + id: part.functionCall.id ?? makeToolUseId(part.functionCall), + name: toolNameMapper.toAnthropicName(part.functionCall.name), + input: part.functionCall.args ?? {}, + }, + ]; + } + + if (part.functionResponse) { + const response = part.functionResponse.response ?? {}; + return [ + { + type: 'tool_result', + tool_use_id: + part.functionResponse.id ?? + `${part.functionResponse.name ?? 'unknown_tool'}_result`, + content: functionResponseContent(response), + is_error: response['error'] !== undefined ? true : undefined, + }, + ]; + } + + return []; +} + +function mergeAdjacentMessages( + messages: AnthropicMessage[], +): AnthropicMessage[] { + const merged: AnthropicMessage[] = []; + for (const message of messages) { + const previous = merged[merged.length - 1]; + if (previous?.role === message.role) { + previous.content.push(...message.content); + } else { + merged.push({ role: message.role, content: [...message.content] }); + } + } + return merged; +} + +function contentUnionText(content: GenerateContentConfig['systemInstruction']) { + if (!content) { + return undefined; + } + return toContents(content) + .flatMap((item) => item.parts ?? []) + .map((part) => part.text) + .filter((text): text is string => text !== undefined && text !== '') + .join('\n'); +} + +function toAnthropicTools( + tools?: GenerateContentConfig['tools'], + toolNameMapper?: ToolNameMapper, +): AnthropicTool[] { + if (!Array.isArray(tools)) { + return []; + } + return tools.flatMap((tool) => { + if (!isFunctionDeclarationTool(tool)) { + return []; + } + return (tool.functionDeclarations ?? []) + .filter((fn) => fn.name) + .map((fn) => ({ + name: toolNameMapper?.toAnthropicName(fn.name) ?? fn.name!, + description: fn.description, + input_schema: toJsonSchema(fn.parametersJsonSchema ?? fn.parameters), + })); + }); +} + +function isFunctionDeclarationTool(tool: unknown): tool is Tool { + return isRecord(tool) && Array.isArray(tool['functionDeclarations']); +} + +function toAnthropicThinking( + model: string, + config: GenerateContentConfig | undefined, +): Pick | undefined { + const thinkingConfig = config?.thinkingConfig; + if (!thinkingConfig || thinkingConfig.thinkingBudget === 0) { + return undefined; + } + + const thinkingRequested = + thinkingConfig.includeThoughts === true || + (thinkingConfig.thinkingBudget !== undefined && + thinkingConfig.thinkingBudget !== 0) || + (thinkingConfig.thinkingLevel !== undefined && + thinkingConfig.thinkingLevel !== + ThinkingLevel.THINKING_LEVEL_UNSPECIFIED); + + if (!thinkingRequested) { + return undefined; + } + + const display = + thinkingConfig.includeThoughts === true ? 'summarized' : undefined; + + if (supportsAdaptiveThinking(model)) { + return { + thinking: { + type: 'adaptive', + ...(display ? { display } : {}), + }, + output_config: toAnthropicOutputConfig(thinkingConfig.thinkingLevel), + }; + } + + const thinkingBudget = thinkingConfig.thinkingBudget; + if (thinkingBudget === undefined || thinkingBudget <= 0) { + return undefined; + } + + return { + thinking: { + type: 'enabled', + budget_tokens: thinkingBudget, + ...(display ? { display } : {}), + }, + }; +} + +function toAnthropicOutputConfig( + thinkingLevel: ThinkingConfig['thinkingLevel'], +): AnthropicMessageRequest['output_config'] | undefined { + if (thinkingLevel === ThinkingLevel.LOW) { + return { effort: 'low' }; + } + if (thinkingLevel === ThinkingLevel.HIGH) { + return { effort: 'high' }; + } + return undefined; +} + +function supportsAdaptiveThinking(model: string): boolean { + const modelId = normalizeClaudeModelId(model); + return ( + modelId === 'claude-opus-4-8' || + modelId === 'claude-opus-4-7' || + modelId === 'claude-opus-4-6' || + modelId === 'claude-sonnet-4-6' || + modelId === 'claude-mythos-preview' + ); +} + +function defaultMaxOutputTokens(model: string): number { + const modelId = normalizeClaudeModelId(model).toLowerCase(); + if ( + modelId.includes('claude-opus-4-8') || + modelId.includes('claude-opus-4-7') || + modelId.includes('claude-opus-4-6') + ) { + return CLAUDE_MAX_OUTPUT_TOKENS_128K; + } + if (modelId.includes('claude-opus-4-5')) { + return 64_000; + } + if (modelId.includes('claude-opus-4')) { + return 32_000; + } + return DEFAULT_CLAUDE_MAX_OUTPUT_TOKENS; +} + +function omitsSamplingParameters(model: string): boolean { + const modelId = normalizeClaudeModelId(model); + return modelId === 'claude-opus-4-8' || modelId === 'claude-opus-4-7'; +} + +function toAnthropicToolChoice( + config: GenerateContentConfig | undefined, + toolNameMapper: ToolNameMapper, + thinkingEnabled: boolean, +): unknown { + const functionCallingConfig = config?.toolConfig?.functionCallingConfig; + const mode = functionCallingConfig?.mode; + if (mode === 'NONE') { + return { type: 'none' }; + } + if (thinkingEnabled) { + return { type: 'auto' }; + } + if (mode === 'ANY') { + const allowedNames = functionCallingConfig?.allowedFunctionNames ?? []; + if (allowedNames.length === 1) { + return { + type: 'tool', + name: toolNameMapper.toAnthropicName(allowedNames[0]), + }; + } + return { type: 'any' }; + } + return { type: 'auto' }; +} + +function toJsonSchema(schema: unknown): unknown { + return toAnthropicInputSchema(normalizeJsonSchema(schema, schema, new Set())); +} + +function normalizeJsonSchema( + schema: unknown, + root: unknown, + resolvingRefs: Set, +): unknown { + if (typeof schema === 'boolean') { + return schema; + } + if (!isRecord(schema)) { + return {}; + } + + const ref = stringField(schema, '$ref'); + if (ref?.startsWith('#/') && !resolvingRefs.has(ref)) { + const target = resolveJsonPointer(root, ref); + if (target !== undefined) { + resolvingRefs.add(ref); + const normalizedTarget = normalizeJsonSchema(target, root, resolvingRefs); + resolvingRefs.delete(ref); + + const siblings = Object.fromEntries( + Object.entries(schema).filter( + ([key]) => + key !== '$ref' && + key !== '$schema' && + key !== '$defs' && + key !== 'definitions', + ), + ); + if (Object.keys(siblings).length === 0) { + return normalizedTarget; + } + if (isRecord(normalizedTarget)) { + return normalizeJsonSchema( + { ...normalizedTarget, ...siblings }, + root, + resolvingRefs, + ); + } + } + } + + const nullable = schema['nullable'] === true; + const output: Record = {}; + for (const [key, value] of Object.entries(schema)) { + if (key === '$schema' || key === 'propertyOrdering' || key === 'nullable') { + continue; + } + if (key === 'type') { + const normalizedType = normalizeJsonSchemaType(value); + if (normalizedType !== undefined) { + output[key] = normalizedType; + } + continue; + } + if ( + key === 'properties' || + key === 'patternProperties' || + key === '$defs' || + key === 'definitions' || + key === 'dependentSchemas' + ) { + if (isRecord(value)) { + output[key] = Object.fromEntries( + Object.entries(value).map(([property, propertySchema]) => [ + property, + normalizeJsonSchema(propertySchema, root, resolvingRefs), + ]), + ); + } + continue; + } + if ( + key === 'items' || + key === 'contains' || + key === 'additionalProperties' || + key === 'unevaluatedProperties' || + key === 'propertyNames' || + key === 'not' || + key === 'if' || + key === 'then' || + key === 'else' + ) { + output[key] = + typeof value === 'boolean' + ? value + : normalizeJsonSchema(value, root, resolvingRefs); + continue; + } + if (key === 'prefixItems') { + if (Array.isArray(value)) { + output[key] = value.map((item) => + normalizeJsonSchema(item, root, resolvingRefs), + ); + } + continue; + } + if (key === 'anyOf' || key === 'oneOf' || key === 'allOf') { + if (Array.isArray(value) && value.length > 0) { + output[key] = value.map((item) => + normalizeJsonSchema(item, root, resolvingRefs), + ); + } + continue; + } + if (key === 'required') { + if (Array.isArray(value)) { + const required = Array.from( + new Set( + value.filter((item): item is string => typeof item === 'string'), + ), + ); + if (required.length > 0) { + output[key] = required; + } + } + continue; + } + if (key === 'dependentRequired') { + if (isRecord(value)) { + output[key] = Object.fromEntries( + Object.entries(value) + .map(([property, dependencies]) => [ + property, + Array.isArray(dependencies) + ? Array.from( + new Set( + dependencies.filter( + (item): item is string => typeof item === 'string', + ), + ), + ) + : undefined, + ]) + .filter( + (entry): entry is [string, string[]] => entry[1] !== undefined, + ), + ); + } + continue; + } + output[key] = value; + } + + if (nullable) { + addNullToSchema(output); + } + + return output; +} + +function toAnthropicInputSchema(schema: unknown): unknown { + if (!isRecord(schema)) { + return { + type: 'object', + properties: {}, + additionalProperties: true, + }; + } + + const type = schema['type']; + if (type === undefined) { + return { + type: 'object', + properties: {}, + additionalProperties: true, + ...schema, + }; + } + if (type === 'object' || (Array.isArray(type) && type.includes('object'))) { + return schema['properties'] === undefined + ? { ...schema, properties: {} } + : schema; + } + + return { + type: 'object', + properties: {}, + additionalProperties: true, + }; +} + +function normalizeJsonSchemaType( + value: unknown, +): string | string[] | undefined { + const validTypes = new Set([ + 'array', + 'boolean', + 'integer', + 'null', + 'number', + 'object', + 'string', + ]); + if (typeof value === 'string') { + const normalized = value.toLowerCase(); + return validTypes.has(normalized) ? normalized : undefined; + } + if (Array.isArray(value)) { + const normalized = Array.from( + new Set( + value + .filter((item): item is string => typeof item === 'string') + .map((item) => item.toLowerCase()) + .filter((item) => validTypes.has(item)), + ), + ); + return normalized.length > 0 ? normalized : undefined; + } + return undefined; +} + +function addNullToSchema(schema: Record) { + const type = schema['type']; + if (typeof type === 'string') { + schema['type'] = Array.from(new Set([type, 'null'])); + return; + } + if (Array.isArray(type)) { + schema['type'] = Array.from( + new Set([ + ...type.filter((item): item is string => typeof item === 'string'), + 'null', + ]), + ); + return; + } + + const nonNullable = { ...schema }; + schema['anyOf'] = [nonNullable, { type: 'null' }]; +} + +function resolveJsonPointer(root: unknown, pointer: string): unknown { + const segments = pointer + .slice(2) + .split('/') + .map((segment) => segment.replace(/~1/g, '/').replace(/~0/g, '~')); + let current = root; + for (const segment of segments) { + current = jsonPointerSegmentValue(current, segment); + if (current === undefined) return undefined; + } + return current; +} + +function jsonPointerSegmentValue(value: unknown, segment: string): unknown { + if (Array.isArray(value)) { + const index = Number(segment); + return Number.isInteger(index) ? value[index] : undefined; + } + return isRecord(value) ? value[segment] : undefined; +} + +function anthropicBlockToGeminiPart( + block: AnthropicContentBlock, + toolNameMapper: ToolNameMapper, +): Part[] { + if (block.type === 'text') { + return block.text ? [{ text: block.text }] : []; + } + if (block.type === 'thinking') { + return block.thinking ? [{ text: block.thinking, thought: true }] : []; + } + if (block.type === 'redacted_thinking') { + return []; + } + if (block.type === 'tool_use') { + return [ + { + functionCall: { + id: block.id, + name: toolNameMapper.toGeminiName(block.name), + args: parseJsonObject(JSON.stringify(block.input ?? {})), + }, + }, + ]; + } + return []; +} + +function anthropicBlocksToGeminiParts( + blocks: AnthropicContentBlock[], + toolNameMapper: ToolNameMapper, +): Part[] { + const parts: Part[] = []; + let pendingThinkingBlocks: AnthropicThinkingLikeBlock[] = []; + + for (const block of blocks) { + if (block.type === 'thinking' || block.type === 'redacted_thinking') { + pendingThinkingBlocks.push(block); + if (block.type === 'thinking' && block.thinking) { + parts.push({ text: block.thinking, thought: true }); + } + continue; + } + + if (block.type === 'tool_use') { + parts.push( + createFunctionCallPart( + block.id, + toolNameMapper.toGeminiName(block.name), + parseJsonObject(JSON.stringify(block.input ?? {})), + encodeClaudeThinkingBlocks(pendingThinkingBlocks), + ), + ); + pendingThinkingBlocks = []; + continue; + } + + parts.push(...anthropicBlockToGeminiPart(block, toolNameMapper)); + } + + return parts; +} + +function createFunctionCallPart( + id: string, + name: string, + args: Record, + thinkingSignature?: string, +): Part { + return { + functionCall: { + id, + name, + args, + }, + ...(thinkingSignature ? { thoughtSignature: thinkingSignature } : {}), + }; +} + +function createGeminiResponse(args: { + parts: Part[]; + finishReason?: FinishReason; + usage?: AnthropicUsage; + responseId?: string; + modelVersion?: string; +}): GenerateContentResponse { + const response = new GenerateContentResponse(); + response.responseId = args.responseId; + response.modelVersion = args.modelVersion; + response.candidates = [ + { + index: 0, + content: { + role: 'model', + parts: args.parts, + }, + finishReason: args.finishReason, + }, + ]; + if (args.usage) { + response.usageMetadata = usageToGeminiUsage(args.usage); + } + return response; +} + +function usageToGeminiUsage( + usage: AnthropicUsage, +): GenerateContentResponseUsageMetadata { + const promptTokenCount = + (usage.input_tokens ?? 0) + + (usage.cache_creation_input_tokens ?? 0) + + (usage.cache_read_input_tokens ?? 0); + const candidatesTokenCount = usage.output_tokens ?? 0; + return { + promptTokenCount, + candidatesTokenCount, + totalTokenCount: promptTokenCount + candidatesTokenCount, + }; +} + +function mapAnthropicStopReason(stopReason?: string | null): FinishReason { + switch (stopReason) { + case 'max_tokens': + return FinishReason.MAX_TOKENS; + case 'refusal': + return FinishReason.SAFETY; + case 'end_turn': + case 'stop_sequence': + case 'tool_use': + default: + return FinishReason.STOP; + } +} + +async function* parseSse( + body: ReadableStream, +): AsyncGenerator { + const reader = body.getReader(); + const decoder = new TextDecoder(); + let buffer = ''; + + try { + while (true) { + const { done, value } = await reader.read(); + if (done) { + break; + } + buffer += decoder.decode(value, { stream: true }); + buffer = buffer.replace(/\r\n/g, '\n'); + let boundary = buffer.indexOf('\n\n'); + while (boundary !== -1) { + const chunk = buffer.slice(0, boundary); + buffer = buffer.slice(boundary + 2); + const event = parseSseChunk(chunk); + if (event) { + yield event; + } + boundary = buffer.indexOf('\n\n'); + } + } + + buffer += decoder.decode(); + const event = parseSseChunk(buffer); + if (event) { + yield event; + } + } finally { + reader.releaseLock(); + } +} + +function parseSseChunk(chunk: string): AnthropicStreamPayload | undefined { + const dataLines = chunk + .split(/\r?\n/) + .filter((line) => line.startsWith('data:')) + .map((line) => line.slice(5).trimStart()); + const data = (dataLines.length > 0 ? dataLines.join('\n') : chunk).trim(); + + if (!data || data === '[DONE]') { + return undefined; + } + const parsed = JSON.parse(data) as unknown; + if (isAnthropicStreamEvent(parsed)) { + return parsed; + } + return isAnthropicMessageResponse(parsed) + ? toAnthropicMessageResponse(parsed) + : undefined; +} + +function isAnthropicStreamEvent(value: unknown): value is AnthropicStreamEvent { + if (!isRecord(value)) { + return false; + } + switch (stringField(value, 'type')) { + case 'message_start': + case 'content_block_start': + case 'content_block_delta': + case 'content_block_stop': + case 'message_delta': + case 'message_stop': + case 'ping': + return true; + default: + return false; + } +} + +function isAnthropicMessageResponse( + value: unknown, +): value is AnthropicMessageResponse { + return isRecord(value) && Array.isArray(value['content']); +} + +function isAnthropicContentBlock( + value: unknown, +): value is AnthropicContentBlock { + if (!isRecord(value)) { + return false; + } + + const type = stringField(value, 'type'); + if (!type) { + return false; + } + + switch (type) { + case 'text': + return stringField(value, 'text') !== undefined; + case 'thinking': + return stringField(value, 'thinking') !== undefined; + case 'redacted_thinking': + return stringField(value, 'data') !== undefined; + case 'image': + case 'document': + return isRecord(value['source']); + case 'tool_use': + return ( + stringField(value, 'id') !== undefined && + stringField(value, 'name') !== undefined + ); + case 'tool_result': + return ( + stringField(value, 'tool_use_id') !== undefined && + stringField(value, 'content') !== undefined + ); + default: + return false; + } +} + +function isAnthropicThinkingLikeBlock( + value: unknown, +): value is AnthropicThinkingLikeBlock { + if (!isRecord(value)) { + return false; + } + + if (value['type'] === 'redacted_thinking') { + return stringField(value, 'data') !== undefined; + } + if (value['type'] === 'thinking') { + const signature = value['signature']; + return ( + stringField(value, 'thinking') !== undefined && + (signature === undefined || stringField(value, 'signature') !== undefined) + ); + } + return false; +} + +function isPreservableThinkingBlock( + block: AnthropicThinkingLikeBlock, +): boolean { + if (block.type === 'redacted_thinking') { + return block.data !== ''; + } + return block.signature !== undefined && block.signature !== ''; +} + +function parseJsonObject(value: string): Record { + try { + const parsed = JSON.parse(value) as unknown; + return isRecord(parsed) ? parsed : {}; + } catch { + return {}; + } +} + +function makeToolUseId(functionCall: FunctionCall): string { + return `toolu_${functionCall.name ?? 'unknown_tool'}_${ + JSON.stringify(functionCall.args ?? {}).length + }`; +} + +function functionResponseContent(response: Record): string { + const output = response['output']; + if (typeof output === 'string') { + return output; + } + return JSON.stringify(response); +} + +function isEmptyObject(value: unknown): boolean { + return ( + value !== null && + typeof value === 'object' && + !Array.isArray(value) && + Object.keys(value).length === 0 + ); +} + +function isRecord(value: unknown): value is Record { + return value !== null && typeof value === 'object' && !Array.isArray(value); +} + +function numberField( + value: Record, + key: string, +): number | undefined { + const field = value[key]; + return typeof field === 'number' ? field : undefined; +} + +function stringField( + value: Record, + key: string, +): string | undefined { + const field = value[key]; + return typeof field === 'string' ? field : undefined; +} diff --git a/packages/core/src/utils/partUtils.test.ts b/packages/core/src/utils/partUtils.test.ts index 5a8130c97c..315a752c24 100644 --- a/packages/core/src/utils/partUtils.test.ts +++ b/packages/core/src/utils/partUtils.test.ts @@ -90,6 +90,13 @@ describe('partUtils', () => { expect(partToString(part, verboseOptions)).toBe('[Thought: thinking]'); }); + it('should use text for boolean thought parts', () => { + const part = { text: 'thinking text', thought: true } as Part; + expect(partToString(part, verboseOptions)).toBe( + '[Thought: thinking text]', + ); + }); + it('should return descriptive string for codeExecutionResult part', () => { const part = { codeExecutionResult: {} } as Part; expect(partToString(part, verboseOptions)).toBe( diff --git a/packages/core/src/utils/partUtils.ts b/packages/core/src/utils/partUtils.ts index f45d9cf6c8..b74d2fd817 100644 --- a/packages/core/src/utils/partUtils.ts +++ b/packages/core/src/utils/partUtils.ts @@ -30,10 +30,9 @@ export function partToString( } // Cast to Part, assuming it might contain project-specific fields - // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion const part = value as Part & { videoMetadata?: unknown; - thought?: string; + thought?: unknown; codeExecutionResult?: unknown; executableCode?: unknown; }; @@ -43,7 +42,11 @@ export function partToString( return `[Video Metadata]`; } if (part.thought !== undefined) { - return `[Thought: ${part.thought}]`; + const thoughtText = + typeof part.text === 'string' && part.text.length > 0 + ? part.text + : String(part.thought); + return `[Thought: ${thoughtText}]`; } if (part.codeExecutionResult !== undefined) { return `[Code Execution Result]`; diff --git a/packages/core/src/utils/sessionUtils.test.ts b/packages/core/src/utils/sessionUtils.test.ts index 141efab572..5f4f96cd43 100644 --- a/packages/core/src/utils/sessionUtils.test.ts +++ b/packages/core/src/utils/sessionUtils.test.ts @@ -186,6 +186,153 @@ describe('convertSessionToClientHistory', () => { ]); }); + it('should not duplicate explicit tool response turns on resume', () => { + const toolResponse = { + functionResponse: { + id: 'toolu_1', + name: 'web_fetch', + response: { output: 'page content' }, + }, + }; + const messages: ConversationRecord['messages'] = [ + { + id: 'msg1', + type: 'user', + timestamp: '2024-01-01T10:00:00Z', + content: 'Fetch this page', + }, + { + id: 'msg2', + type: 'gemini', + timestamp: '2024-01-01T10:01:00Z', + content: [ + { text: 'Let me fetch it.' }, + { + functionCall: { + id: 'toolu_1', + name: 'web_fetch', + args: { url: 'https://example.com' }, + }, + }, + ], + toolCalls: [ + { + id: 'toolu_1', + name: 'web_fetch', + args: { url: 'https://example.com' }, + status: CoreToolCallStatus.Success, + timestamp: '2024-01-01T10:01:05Z', + result: [toolResponse], + }, + ], + }, + { + id: 'msg3', + type: 'user', + timestamp: '2024-01-01T10:01:06Z', + content: [toolResponse], + }, + ]; + + const history = convertSessionToClientHistory(messages); + + expect(history.map((h) => h.content)).toEqual([ + { role: 'user', parts: [{ text: 'Fetch this page' }] }, + { + role: 'model', + parts: [ + { text: 'Let me fetch it.' }, + { + functionCall: { + id: 'toolu_1', + name: 'web_fetch', + args: { url: 'https://example.com' }, + }, + }, + ], + }, + { role: 'user', parts: [toolResponse] }, + ]); + }); + + it('should deduplicate grouped tool results stored on multiple tool calls', () => { + const groupedResults = [ + { + functionResponse: { + id: 'call1', + name: 'read_file', + response: { output: 'file contents' }, + }, + }, + { + functionResponse: { + id: 'call2', + name: 'list_files', + response: { output: 'file.txt' }, + }, + }, + ]; + const messages: ConversationRecord['messages'] = [ + { + id: 'msg1', + type: 'user', + timestamp: '2024-01-01T10:00:00Z', + content: 'Inspect the project', + }, + { + id: 'msg2', + type: 'gemini', + timestamp: '2024-01-01T10:01:00Z', + content: 'I will inspect it.', + toolCalls: [ + { + id: 'call1', + name: 'read_file', + args: { path: 'README.md' }, + status: CoreToolCallStatus.Success, + timestamp: '2024-01-01T10:01:05Z', + result: groupedResults, + }, + { + id: 'call2', + name: 'list_files', + args: { dir: '.' }, + status: CoreToolCallStatus.Success, + timestamp: '2024-01-01T10:01:06Z', + result: groupedResults, + }, + ], + }, + ]; + + const history = convertSessionToClientHistory(messages); + + expect(history.map((h) => h.content)).toEqual([ + { role: 'user', parts: [{ text: 'Inspect the project' }] }, + { + role: 'model', + parts: [ + { text: 'I will inspect it.' }, + { + functionCall: { + id: 'call1', + name: 'read_file', + args: { path: 'README.md' }, + }, + }, + { + functionCall: { + id: 'call2', + name: 'list_files', + args: { dir: '.' }, + }, + }, + ], + }, + { role: 'user', parts: groupedResults }, + ]); + }); + it('should preserve multi-modal parts (inlineData)', () => { const messages: ConversationRecord['messages'] = [ { diff --git a/packages/core/src/utils/sessionUtils.ts b/packages/core/src/utils/sessionUtils.ts index 9dd30c2e89..f2d62ac338 100644 --- a/packages/core/src/utils/sessionUtils.ts +++ b/packages/core/src/utils/sessionUtils.ts @@ -104,6 +104,65 @@ export function isIgnoredUserContent(trimmedContent: string): boolean { ); } +function collectExplicitFunctionResponseIds( + messages: ConversationRecord['messages'], +): Set { + const ids = new Set(); + for (const msg of messages) { + if (msg.type !== 'user') { + continue; + } + + for (const part of ensurePartArray(msg.content)) { + const id = part.functionResponse?.id; + if (id) { + ids.add(id); + } + } + } + return ids; +} + +function appendFunctionResponseParts( + target: Part[], + parts: Part[], + explicitResponseIds: ReadonlySet, + generatedResponseIds: Set, +): void { + const partsToAppend: Part[] = []; + const idsToMark: string[] = []; + let hasFunctionResponse = false; + let hasNewFunctionResponse = false; + + for (const part of parts) { + const id = part.functionResponse?.id; + if (!part.functionResponse) { + partsToAppend.push(part); + continue; + } + + hasFunctionResponse = true; + if (id && (explicitResponseIds.has(id) || generatedResponseIds.has(id))) { + continue; + } + + partsToAppend.push(part); + hasNewFunctionResponse = true; + if (id) { + idsToMark.push(id); + } + } + + if (hasFunctionResponse && !hasNewFunctionResponse) { + return; + } + + target.push(...partsToAppend); + for (const id of idsToMark) { + generatedResponseIds.add(id); + } +} + /** * Converts session/conversation data into Gemini client history formats. */ @@ -111,6 +170,7 @@ export function convertSessionToClientHistory( messages: ConversationRecord['messages'], ): HistoryTurn[] { const clientHistory: HistoryTurn[] = []; + const explicitResponseIds = collectExplicitFunctionResponseIds(messages); for (const msg of messages) { if (msg.type === 'info' || msg.type === 'error' || msg.type === 'warning') { @@ -185,12 +245,18 @@ export function convertSessionToClientHistory( // 4. Generate tool response turns if (msg.toolCalls && msg.toolCalls.length > 0) { const functionResponseParts: Part[] = []; + const generatedResponseIds = new Set(); for (const toolCall of msg.toolCalls) { if (toolCall.result) { - let responseData: Part; - if (typeof toolCall.result === 'string') { - responseData = { + if ( + explicitResponseIds.has(toolCall.id) || + generatedResponseIds.has(toolCall.id) + ) { + continue; + } + + functionResponseParts.push({ functionResponse: { id: toolCall.id, name: toolCall.name, @@ -198,15 +264,16 @@ export function convertSessionToClientHistory( output: toolCall.result, }, }, - }; - } else if (Array.isArray(toolCall.result)) { - functionResponseParts.push(...ensurePartArray(toolCall.result)); - continue; + }); + generatedResponseIds.add(toolCall.id); } else { - responseData = toolCall.result; + appendFunctionResponseParts( + functionResponseParts, + ensurePartArray(toolCall.result), + explicitResponseIds, + generatedResponseIds, + ); } - - functionResponseParts.push(responseData); } }