diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index c4e18888fb..97ca58be5c 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -436,6 +436,20 @@ their corresponding top-level category object in your `settings.json` file. - **Default:** `"ask"` - **Values:** `"ask"`, `"always"`, `"never"` +- **`billing.vertexAi.requestType`** (enum): + - **Description:** Sets the X-Vertex-AI-LLM-Request-Type header for Vertex AI + requests. + - **Default:** `undefined` + - **Values:** `"dedicated"`, `"shared"` + - **Requires restart:** Yes + +- **`billing.vertexAi.sharedRequestType`** (enum): + - **Description:** Sets the X-Vertex-AI-LLM-Shared-Request-Type header for + Vertex AI requests. + - **Default:** `undefined` + - **Values:** `"priority"`, `"flex"` + - **Requires restart:** Yes + #### `model` - **`model.name`** (string): diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index 213c22120e..b3709ba0cd 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -1032,6 +1032,7 @@ export async function loadCliConfig( recordResponses: argv.recordResponses, retryFetchErrors: settings.general?.retryFetchErrors, billing: settings.billing, + vertexAiRouting: settings.billing?.vertexAi, maxAttempts: settings.general?.maxAttempts, ptyInfo: ptyInfo?.name, disableLLMCorrection: settings.tools?.disableLLMCorrection, diff --git a/packages/cli/src/config/settingsSchema.test.ts b/packages/cli/src/config/settingsSchema.test.ts index 81e5f32ff0..c0d58fcc07 100644 --- a/packages/cli/src/config/settingsSchema.test.ts +++ b/packages/cli/src/config/settingsSchema.test.ts @@ -313,6 +313,22 @@ describe('SettingsSchema', () => { ).toBe(false); }); + it('should have Vertex AI routing settings in schema', () => { + const vertexAi = + getSettingsSchema().billing.properties.vertexAi.properties; + + expect(vertexAi.requestType).toBeDefined(); + expect(vertexAi.requestType.type).toBe('enum'); + expect( + vertexAi.requestType.options?.map((option) => option.value), + ).toEqual(['dedicated', 'shared']); + expect(vertexAi.sharedRequestType).toBeDefined(); + expect(vertexAi.sharedRequestType.type).toBe('enum'); + expect( + vertexAi.sharedRequestType.options?.map((option) => option.value), + ).toEqual(['priority', 'flex']); + }); + it('should have folderTrustFeature setting in schema', () => { expect( getSettingsSchema().security.properties.folderTrust.properties.enabled, diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index 7e7de80132..4d8e6f4dde 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -21,6 +21,7 @@ import { type AgentOverride, type CustomTheme, type SandboxConfig, + type VertexAiRoutingConfig, } from '@google/gemini-cli-core'; import type { SessionRetentionSettings } from './settings.js'; import { DEFAULT_MIN_RETENTION } from '../utils/sessionCleanup.js'; @@ -990,6 +991,45 @@ const SETTINGS_SCHEMA = { { value: 'never', label: 'Never use credits' }, ], }, + vertexAi: { + type: 'object', + label: 'Vertex AI', + category: 'Advanced', + requiresRestart: true, + default: undefined as VertexAiRoutingConfig | undefined, + description: 'Vertex AI request routing settings.', + showInDialog: false, + properties: { + requestType: { + type: 'enum', + label: 'Vertex AI Request Type', + category: 'Advanced', + requiresRestart: true, + default: undefined as VertexAiRoutingConfig['requestType'], + description: + 'Sets the X-Vertex-AI-LLM-Request-Type header for Vertex AI requests.', + showInDialog: false, + options: [ + { value: 'dedicated', label: 'Dedicated' }, + { value: 'shared', label: 'Shared' }, + ], + }, + sharedRequestType: { + type: 'enum', + label: 'Vertex AI Shared Request Type', + category: 'Advanced', + requiresRestart: true, + default: undefined as VertexAiRoutingConfig['sharedRequestType'], + description: + 'Sets the X-Vertex-AI-LLM-Shared-Request-Type header for Vertex AI requests.', + showInDialog: false, + options: [ + { value: 'priority', label: 'Priority' }, + { value: 'flex', label: 'Flex' }, + ], + }, + }, + }, }, }, diff --git a/packages/core/src/config/config.test.ts b/packages/core/src/config/config.test.ts index fd97d67eda..52b2de871b 100644 --- a/packages/core/src/config/config.test.ts +++ b/packages/core/src/config/config.test.ts @@ -836,12 +836,37 @@ describe('Server Config (config.ts)', () => { undefined, undefined, undefined, + undefined, ); // Verify that contentGeneratorConfig is updated expect(config.getContentGeneratorConfig()).toEqual(mockContentConfig); expect(GeminiClient).toHaveBeenCalledWith(config); }); + it('should pass Vertex AI routing settings when refreshing auth', async () => { + const vertexAiRouting = { + requestType: 'shared' as const, + sharedRequestType: 'priority' as const, + }; + const config = new Config({ + ...baseParams, + vertexAiRouting, + }); + + vi.mocked(createContentGeneratorConfig).mockResolvedValue({}); + + await config.refreshAuth(AuthType.USE_VERTEX_AI); + + expect(createContentGeneratorConfig).toHaveBeenCalledWith( + config, + AuthType.USE_VERTEX_AI, + undefined, + undefined, + undefined, + vertexAiRouting, + ); + }); + it('should reset model availability status', async () => { const config = new Config(baseParams); const service = config.getModelAvailabilityService(); diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index e3220eb9ef..a23e9bc0b6 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -23,6 +23,7 @@ import { createContentGeneratorConfig, type ContentGenerator, type ContentGeneratorConfig, + type VertexAiRoutingConfig, } from '../core/contentGenerator.js'; import type { OverageStrategy } from '../billing/billing.js'; import { PromptRegistry } from '../prompts/prompt-registry.js'; @@ -731,6 +732,7 @@ export interface ConfigParameters { billing?: { overageStrategy?: OverageStrategy; }; + vertexAiRouting?: VertexAiRoutingConfig; } export class Config implements McpContext, AgentLoopContext { @@ -936,6 +938,7 @@ export class Config implements McpContext, AgentLoopContext { private readonly billing: { overageStrategy: OverageStrategy; }; + private readonly vertexAiRouting: VertexAiRoutingConfig | undefined; private readonly enableAgents: boolean; private agents: AgentSettings; @@ -1362,6 +1365,7 @@ export class Config implements McpContext, AgentLoopContext { this.billing = { overageStrategy: params.billing?.overageStrategy ?? 'ask', }; + this.vertexAiRouting = params.vertexAiRouting; if (params.contextFileName) { setGeminiMdFilename(params.contextFileName); @@ -1549,6 +1553,7 @@ export class Config implements McpContext, AgentLoopContext { apiKey, baseUrl, customHeaders, + this.vertexAiRouting, ); this.contentGenerator = await createContentGenerator( newContentGeneratorConfig, diff --git a/packages/core/src/core/contentGenerator.test.ts b/packages/core/src/core/contentGenerator.test.ts index bf7eef167d..2a89c52b6b 100644 --- a/packages/core/src/core/contentGenerator.test.ts +++ b/packages/core/src/core/contentGenerator.test.ts @@ -385,6 +385,44 @@ describe('createContentGenerator', () => { ); }); + it('should include Vertex AI routing headers for Vertex AI requests', async () => { + const mockConfig = { + getModel: vi.fn().mockReturnValue('gemini-pro'), + getProxy: vi.fn().mockReturnValue(undefined), + getUsageStatisticsEnabled: () => false, + getClientName: vi.fn().mockReturnValue(undefined), + } as unknown as Config; + + const mockGenerator = { + models: {}, + } as unknown as GoogleGenAI; + vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never); + + await createContentGenerator( + { + apiKey: 'test-api-key', + vertexai: true, + authType: AuthType.USE_VERTEX_AI, + vertexAiRouting: { + requestType: 'shared', + sharedRequestType: 'priority', + }, + }, + mockConfig, + ); + + expect(GoogleGenAI).toHaveBeenCalledWith( + expect.objectContaining({ + httpOptions: expect.objectContaining({ + headers: expect.objectContaining({ + 'X-Vertex-AI-LLM-Request-Type': 'shared', + 'X-Vertex-AI-LLM-Shared-Request-Type': 'priority', + }), + }), + }), + ); + }); + it('should pass api key as Authorization Header when GEMINI_API_KEY_AUTH_MECHANISM is set to bearer', async () => { const mockConfig = { getModel: vi.fn().mockReturnValue('gemini-pro'), @@ -887,6 +925,25 @@ describe('createContentGeneratorConfig', () => { expect(config.vertexai).toBe(true); }); + it('should include Vertex AI routing settings in content generator config', async () => { + vi.stubEnv('GOOGLE_API_KEY', 'env-google-key'); + const vertexAiRouting = { + requestType: 'shared' as const, + sharedRequestType: 'priority' as const, + }; + + const config = await createContentGeneratorConfig( + mockConfig, + AuthType.USE_VERTEX_AI, + undefined, + undefined, + undefined, + vertexAiRouting, + ); + + expect(config.vertexAiRouting).toEqual(vertexAiRouting); + }); + it('should configure for Vertex AI using GCP project and location when set', async () => { vi.stubEnv('GOOGLE_API_KEY', undefined); vi.stubEnv('GOOGLE_CLOUD_PROJECT', 'env-gcp-project'); diff --git a/packages/core/src/core/contentGenerator.ts b/packages/core/src/core/contentGenerator.ts index 31e36ede41..789942bb51 100644 --- a/packages/core/src/core/contentGenerator.ts +++ b/packages/core/src/core/contentGenerator.ts @@ -99,9 +99,21 @@ export type ContentGeneratorConfig = { proxy?: string; baseUrl?: string; customHeaders?: Record; + vertexAiRouting?: VertexAiRoutingConfig; }; +export type VertexAiRequestType = 'dedicated' | 'shared'; +export type VertexAiSharedRequestType = 'priority' | 'flex'; + +export interface VertexAiRoutingConfig { + requestType?: VertexAiRequestType; + sharedRequestType?: VertexAiSharedRequestType; +} + const LOCAL_HOSTNAMES = ['localhost', '127.0.0.1', '[::1]']; +const VERTEX_AI_REQUEST_TYPE_HEADER = 'X-Vertex-AI-LLM-Request-Type'; +const VERTEX_AI_SHARED_REQUEST_TYPE_HEADER = + 'X-Vertex-AI-LLM-Shared-Request-Type'; function validateBaseUrl(baseUrl: string): void { let url: URL; @@ -122,6 +134,7 @@ export async function createContentGeneratorConfig( apiKey?: string, baseUrl?: string, customHeaders?: Record, + vertexAiRouting?: VertexAiRoutingConfig, ): Promise { const geminiApiKey = apiKey || @@ -140,6 +153,7 @@ export async function createContentGeneratorConfig( proxy: config?.getProxy(), baseUrl, customHeaders, + vertexAiRouting, }; // If we are using Google auth or we are in Cloud Shell, there is nothing else to validate for now @@ -280,6 +294,21 @@ export async function createContentGenerator( if (config.customHeaders) { headers = { ...headers, ...config.customHeaders }; } + if ( + config.authType === AuthType.USE_VERTEX_AI && + config.vertexAiRouting + ) { + const { requestType, sharedRequestType } = config.vertexAiRouting; + headers = { + ...headers, + ...(requestType + ? { [VERTEX_AI_REQUEST_TYPE_HEADER]: requestType } + : {}), + ...(sharedRequestType + ? { [VERTEX_AI_SHARED_REQUEST_TYPE_HEADER]: sharedRequestType } + : {}), + }; + } if (gcConfig?.getUsageStatisticsEnabled()) { const installationManager = new InstallationManager(); const installationId = installationManager.getInstallationId(); diff --git a/schemas/settings.schema.json b/schemas/settings.schema.json index d30a6f4b0a..e24a7383d8 100644 --- a/schemas/settings.schema.json +++ b/schemas/settings.schema.json @@ -626,6 +626,29 @@ "default": "ask", "type": "string", "enum": ["ask", "always", "never"] + }, + "vertexAi": { + "title": "Vertex AI", + "description": "Vertex AI request routing settings.", + "markdownDescription": "Vertex AI request routing settings.\n\n- Category: `Advanced`\n- Requires restart: `yes`", + "type": "object", + "properties": { + "requestType": { + "title": "Vertex AI Request Type", + "description": "Sets the X-Vertex-AI-LLM-Request-Type header for Vertex AI requests.", + "markdownDescription": "Sets the X-Vertex-AI-LLM-Request-Type header for Vertex AI requests.\n\n- Category: `Advanced`\n- Requires restart: `yes`", + "type": "string", + "enum": ["dedicated", "shared"] + }, + "sharedRequestType": { + "title": "Vertex AI Shared Request Type", + "description": "Sets the X-Vertex-AI-LLM-Shared-Request-Type header for Vertex AI requests.", + "markdownDescription": "Sets the X-Vertex-AI-LLM-Shared-Request-Type header for Vertex AI requests.\n\n- Category: `Advanced`\n- Requires restart: `yes`", + "type": "string", + "enum": ["priority", "flex"] + } + }, + "additionalProperties": false } }, "additionalProperties": false