From 62e97b14a24fa29c8639dde63a6d2cde9170fd23 Mon Sep 17 00:00:00 2001 From: Aishanee Shah Date: Tue, 12 May 2026 20:15:52 +0000 Subject: [PATCH] feat: implement explicit context caching for main agent with stable SI hashing --- package-lock.json | 35 +++- packages/cli/src/config/settingsSchema.ts | 47 ++++++ .../autoRoutingFallback.integration.test.ts | 2 +- packages/core/src/code_assist/server.ts | 8 + packages/core/src/config/config.ts | 16 +- packages/core/src/config/storage.ts | 7 + .../src/context/contextCacheManager.test.ts | 106 ++++++++++++ .../core/src/context/contextCacheManager.ts | 144 +++++++++++++++++ packages/core/src/context/types.ts | 7 + packages/core/src/core/client.ts | 3 +- .../core/src/core/contentGenerator.test.ts | 65 +++++--- packages/core/src/core/contentGenerator.ts | 106 ++++++++++-- .../core/src/core/fakeContentGenerator.ts | 31 +++- packages/core/src/core/geminiChat.test.ts | 151 ++++++++++++++++++ packages/core/src/core/geminiChat.ts | 134 +++++++++++++++- .../src/core/loggingContentGenerator.test.ts | 2 + .../core/src/core/loggingContentGenerator.ts | 16 ++ packages/core/src/core/prompts.ts | 2 + .../core/recordingContentGenerator.test.ts | 2 + .../src/core/recordingContentGenerator.ts | 36 ++++- packages/core/src/index.ts | 1 + packages/core/src/prompts/promptProvider.ts | 50 +++++- .../core/src/utils/apiConversionUtils.test.ts | 16 ++ packages/core/src/utils/apiConversionUtils.ts | 8 +- .../core/src/utils/nextSpeakerChecker.test.ts | 2 + 25 files changed, 948 insertions(+), 49 deletions(-) create mode 100644 packages/core/src/context/contextCacheManager.test.ts create mode 100644 packages/core/src/context/contextCacheManager.ts diff --git a/package-lock.json b/package-lock.json index 9ced540f9a..951b500b3f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -449,7 +449,8 @@ "version": "2.11.0", "resolved": "https://registry.npmjs.org/@bufbuild/protobuf/-/protobuf-2.11.0.tgz", "integrity": "sha512-sBXGT13cpmPR5BMgHE6UEEfEaShh5Ror6rfN3yEK5si7QVrtZg8LEPQb0VVhiLRUslD2yLnXtnRzG035J/mZXQ==", - "license": "(Apache-2.0 AND BSD-3-Clause)" + "license": "(Apache-2.0 AND BSD-3-Clause)", + "peer": true }, "node_modules/@bundled-es-modules/cookie": { "version": "2.0.1", @@ -1535,6 +1536,7 @@ "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.13.4.tgz", "integrity": "sha512-GsFaMXCkMqkKIvwCQjCrwH+GHbPKBjhwo/8ZuUkWHqbI73Kky9I+pQltrlT0+MWpedCoosda53lgjYfyEPgxBg==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@grpc/proto-loader": "^0.7.13", "@js-sdsl/ordered-map": "^4.4.2" @@ -2212,6 +2214,7 @@ "integrity": "sha512-t54CUOsFMappY1Jbzb7fetWeO0n6K0k/4+/ZpkS+3Joz8I4VcvY9OiEBFRYISqaI2fq5sCiPtAjRDOzVYG8m+Q==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@octokit/auth-token": "^6.0.0", "@octokit/graphql": "^9.0.2", @@ -2392,6 +2395,7 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz", "integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==", "license": "Apache-2.0", + "peer": true, "engines": { "node": ">=8.0.0" } @@ -2441,6 +2445,7 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/core/-/core-2.5.0.tgz", "integrity": "sha512-ka4H8OM6+DlUhSAZpONu0cPBtPPTQKxbxVzC4CzVx5+K4JnroJVBtDzLAMx4/3CDTJXRvVFhpFjtl4SaiTNoyQ==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@opentelemetry/semantic-conventions": "^1.29.0" }, @@ -2815,6 +2820,7 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/resources/-/resources-2.5.0.tgz", "integrity": "sha512-F8W52ApePshpoSrfsSk1H2yJn9aKjCrbpQF1M9Qii0GHzbfVeFUB+rc3X4aggyZD8x9Gu3Slua+s6krmq6Dt8g==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@opentelemetry/core": "2.5.0", "@opentelemetry/semantic-conventions": "^1.29.0" @@ -2848,6 +2854,7 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-metrics/-/sdk-metrics-2.5.0.tgz", "integrity": "sha512-BeJLtU+f5Gf905cJX9vXFQorAr6TAfK3SPvTFqP+scfIpDQEJfRaGJWta7sJgP+m4dNtBf9y3yvBKVAZZtJQVA==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@opentelemetry/core": "2.5.0", "@opentelemetry/resources": "2.5.0" @@ -2902,6 +2909,7 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-trace-base/-/sdk-trace-base-2.5.0.tgz", "integrity": "sha512-VzRf8LzotASEyNDUxTdaJ9IRJ1/h692WyArDBInf5puLCjxbICD6XkHgpuudis56EndyS7LYFmtTMny6UABNdQ==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@opentelemetry/core": "2.5.0", "@opentelemetry/resources": "2.5.0", @@ -4139,6 +4147,7 @@ "integrity": "sha512-6mDvHUFSjyT2B2yeNx2nUgMxh9LtOWvkhIU3uePn2I2oyNymUAX1NIsdgviM4CH+JSrp2D2hsMvJOkxY+0wNRA==", "devOptional": true, "license": "MIT", + "peer": true, "dependencies": { "csstype": "^3.0.2" } @@ -4412,6 +4421,7 @@ "integrity": "sha512-/Zb/xaIDfxeJnvishjGdcR4jmr7S+bda8PKNhRGdljDM+elXhlvN0FyPSsMnLmJUrVG9aPO6dof80wjMawsASg==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.58.2", "@typescript-eslint/types": "8.58.2", @@ -5187,6 +5197,7 @@ "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "license": "MIT", + "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -7304,7 +7315,8 @@ "version": "0.0.1581282", "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1581282.tgz", "integrity": "sha512-nv7iKtNZQshSW2hKzYNr46nM/Cfh5SEvE2oV0/SEGgc9XupIY5ggf84Cz8eJIkBce7S3bmTAauFD6aysMpnqsQ==", - "license": "BSD-3-Clause" + "license": "BSD-3-Clause", + "peer": true }, "node_modules/dezalgo": { "version": "1.0.4", @@ -7889,6 +7901,7 @@ "integrity": "sha512-GsGizj2Y1rCWDu6XoEekL3RLilp0voSePurjZIkxL3wlm5o5EC9VpgaP7lrCvjnkuLvzFBQWB3vWB3K5KQTveQ==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.2.0", "@eslint-community/regexpp": "^4.12.1", @@ -8499,6 +8512,7 @@ "resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz", "integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==", "license": "MIT", + "peer": true, "dependencies": { "accepts": "^2.0.0", "body-parser": "^2.2.1", @@ -9765,6 +9779,7 @@ "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.12.tgz", "integrity": "sha512-p1JfQMKaceuCbpJKAPKVqyqviZdS0eUxH9v82oWo1kb9xjQ5wA6iP3FNVAPDFlz5/p7d45lO+BpSk1tuSZMF4Q==", "license": "MIT", + "peer": true, "engines": { "node": ">=16.9.0" } @@ -10024,6 +10039,7 @@ "resolved": "https://registry.npmjs.org/@jrichman/ink/-/ink-6.6.9.tgz", "integrity": "sha512-RL9sSiLQZECnjbmBwjIHOp8yVGdWF7C/uifg7ISv/e+F3nLNsfl7FdUFQs8iZARFMJAYxMFpxW6OW+HSt9drwQ==", "license": "MIT", + "peer": true, "dependencies": { "ansi-escapes": "^7.0.0", "ansi-styles": "^6.2.3", @@ -13799,6 +13815,7 @@ "resolved": "https://registry.npmjs.org/react/-/react-19.2.4.tgz", "integrity": "sha512-9nfp2hYpCwOjAN+8TZFGhtWEwgvWHXqESH8qT89AT/lWklpLON22Lc8pEtnpsZz7VmawabSU0gCjnj8aC0euHQ==", "license": "MIT", + "peer": true, "engines": { "node": ">=0.10.0" } @@ -13809,6 +13826,7 @@ "integrity": "sha512-ePrwPfxAnB+7hgnEr8vpKxL9cmnp7F322t8oqcPshbIQQhDKgFDW4tjhF2wjVbdXF9O/nyuy3sQWd9JGpiLPvA==", "devOptional": true, "license": "MIT", + "peer": true, "dependencies": { "shell-quote": "^1.6.1", "ws": "^7" @@ -15961,6 +15979,7 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -16183,7 +16202,8 @@ "version": "2.8.1", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "license": "0BSD" + "license": "0BSD", + "peer": true }, "node_modules/tsx": { "version": "4.20.3", @@ -16191,6 +16211,7 @@ "integrity": "sha512-qjbnuR9Tr+FJOMBqJCW5ehvIo/buZq7vH7qD7JziU98h6l3qGy0a/yPFjwO+y0/T7GFpNgNAvEcPPVfyT8rrPQ==", "devOptional": true, "license": "MIT", + "peer": true, "dependencies": { "esbuild": "~0.25.0", "get-tsconfig": "^4.7.5" @@ -16356,6 +16377,7 @@ "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==", "devOptional": true, "license": "Apache-2.0", + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -16423,6 +16445,7 @@ "integrity": "sha512-6sMvZePQrnZH2/cJkwRpkT7DxoAWh+g6+GFRK6bV3YQo7ogi3SX5rgF6099r5Q53Ma5qeT7LGmOmuIutF4t3lA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.35.0", "@typescript-eslint/types": "8.35.0", @@ -16842,6 +16865,7 @@ "resolved": "https://registry.npmjs.org/vite/-/vite-7.3.2.tgz", "integrity": "sha512-Bby3NOsna2jsjfLVOHKes8sGwgl4TT0E6vvpYgnAYDIF/tie7MRaFthmKuHx1NSXjiTueXH3do80FMQgvEktRg==", "license": "MIT", + "peer": true, "dependencies": { "esbuild": "^0.27.0", "fdir": "^6.5.0", @@ -17412,6 +17436,7 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -17424,6 +17449,7 @@ "resolved": "https://registry.npmjs.org/vitest/-/vitest-3.2.4.tgz", "integrity": "sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A==", "license": "MIT", + "peer": true, "dependencies": { "@types/chai": "^5.2.2", "@vitest/expect": "3.2.4", @@ -18062,6 +18088,7 @@ "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", "license": "MIT", + "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } @@ -18498,6 +18525,7 @@ "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.14.3.tgz", "integrity": "sha512-Iq8QQQ/7X3Sac15oB6p0FmUg/klxQvXLeileoqrTRGJYLV+/9tubbr9ipz0GKHjmXVsgFPo/+W+2cA8eNcR+XA==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@grpc/proto-loader": "^0.8.0", "@js-sdsl/ordered-map": "^4.4.2" @@ -18616,6 +18644,7 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index adb87bdfa2..e7711f4730 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -2156,6 +2156,53 @@ const SETTINGS_SCHEMA = { }, }, }, + contextCaching: { + type: 'object', + label: 'Context Caching', + category: 'Experimental', + requiresRestart: true, + default: {}, + description: 'Explicit context caching for the main agent.', + showInDialog: true, + properties: { + enabled: { + type: 'boolean', + label: 'Enable Context Caching', + category: 'Experimental', + requiresRestart: true, + default: false, + description: 'Enable explicit context caching for the main agent.', + showInDialog: true, + }, + thresholdTokens: { + type: 'number', + label: 'Threshold Tokens', + category: 'Experimental', + requiresRestart: true, + default: 32768, + description: 'Minimum tokens required to trigger explicit caching.', + showInDialog: true, + }, + ttlMinutes: { + type: 'number', + label: 'TTL (Minutes)', + category: 'Experimental', + requiresRestart: true, + default: 60, + description: 'Time to live for a cache resource in minutes.', + showInDialog: true, + }, + autoRenew: { + type: 'boolean', + label: 'Auto Renew', + category: 'Experimental', + requiresRestart: true, + default: true, + description: 'Automatically extend TTL on use.', + showInDialog: true, + }, + }, + }, adk: { type: 'object', label: 'ADK', diff --git a/packages/core/src/availability/autoRoutingFallback.integration.test.ts b/packages/core/src/availability/autoRoutingFallback.integration.test.ts index f4e157503b..8480577ce9 100644 --- a/packages/core/src/availability/autoRoutingFallback.integration.test.ts +++ b/packages/core/src/availability/autoRoutingFallback.integration.test.ts @@ -59,7 +59,7 @@ describe('Auto Routing Fallback Integration', () => { return ''; // Fallback for other files }); - fakeGenerator = new FakeContentGenerator([]); + fakeGenerator = new FakeContentGenerator([], []); }); afterEach(() => { diff --git a/packages/core/src/code_assist/server.ts b/packages/core/src/code_assist/server.ts index 92fc558ebb..e745f35604 100644 --- a/packages/core/src/code_assist/server.ts +++ b/packages/core/src/code_assist/server.ts @@ -346,6 +346,14 @@ export class CodeAssistServer implements ContentGenerator { throw Error(); } + async createCachedContent(): Promise { + throw new Error('Explicit caching is not supported for Code Assist auth.'); + } + + async updateCachedContent(): Promise { + throw new Error('Explicit caching is not supported for Code Assist auth.'); + } + async listExperiments( metadata: ClientMetadata, ): Promise { diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index f74ae4d7f5..e98a242c82 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -14,6 +14,7 @@ import type { ConversationRecord } from '../services/chatRecordingService.js'; import type { AgentHistoryProviderConfig, ContextManagementConfig, + ContextCachingConfig, ToolOutputMaskingConfig, } from '../context/types.js'; export type { ConversationRecord }; @@ -717,6 +718,7 @@ export interface ConfigParameters { experimentalAutoMemory?: boolean; experimentalGemma?: boolean; experimentalContextManagementConfig?: string; + experimentalContextCaching?: Partial; experimentalAgentHistoryTruncation?: boolean; experimentalAgentHistoryTruncationThreshold?: number; experimentalAgentHistoryRetainedMessages?: number; @@ -972,6 +974,7 @@ export class Config implements McpContext, AgentLoopContext { private readonly modelSteering: boolean; private memoryContextManager?: MemoryContextManager; private readonly contextManagement: ContextManagementConfig; + private readonly contextCaching: ContextCachingConfig; private terminalBackground: string | undefined = undefined; private remoteAdminSettings: AdminControlsSettings | undefined; private latestApiRequest: GenerateContentParameters | undefined; @@ -1224,6 +1227,13 @@ export class Config implements McpContext, AgentLoopContext { }, }, }; + this.contextCaching = { + enabled: params.experimentalContextCaching?.enabled ?? false, + thresholdTokens: + params.experimentalContextCaching?.thresholdTokens ?? 32768, + ttlMinutes: params.experimentalContextCaching?.ttlMinutes ?? 60, + autoRenew: params.experimentalContextCaching?.autoRenew ?? true, + }; this.topicUpdateNarration = params.topicUpdateNarration ?? true; this.modelSteering = params.modelSteering ?? false; this.injectionService = new InjectionService(() => @@ -2574,7 +2584,11 @@ export class Config implements McpContext, AgentLoopContext { return this.contextManagement; } - get agentHistoryProviderConfig(): AgentHistoryProviderConfig { + getContextCachingConfig(): ContextCachingConfig { + return this.contextCaching; + } + + getAgentHistoryProviderConfig(): AgentHistoryProviderConfig { return { maxTokens: this.contextManagement.historyWindow.maxTokens, retainedTokens: this.contextManagement.historyWindow.retainedTokens, diff --git a/packages/core/src/config/storage.ts b/packages/core/src/config/storage.ts index fcc3cddc84..6f155a4886 100644 --- a/packages/core/src/config/storage.ts +++ b/packages/core/src/config/storage.ts @@ -87,6 +87,13 @@ export class Storage { return path.join(Storage.getGlobalGeminiDir(), GOOGLE_ACCOUNTS_FILENAME); } + static getContextCacheMetadataPath(): string { + return path.join( + Storage.getGlobalGeminiDir(), + 'context-cache-metadata.json', + ); + } + static getTrustedFoldersPath(): string { if (process.env['GEMINI_CLI_TRUSTED_FOLDERS_PATH']) { return process.env['GEMINI_CLI_TRUSTED_FOLDERS_PATH']; diff --git a/packages/core/src/context/contextCacheManager.test.ts b/packages/core/src/context/contextCacheManager.test.ts new file mode 100644 index 0000000000..0311fe712e --- /dev/null +++ b/packages/core/src/context/contextCacheManager.test.ts @@ -0,0 +1,106 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { ContextCacheManager } from './contextCacheManager.js'; +import { Storage } from '../config/storage.js'; +import * as fs from 'node:fs'; + +vi.mock('node:fs'); +vi.mock('../config/storage.js'); + +describe('ContextCacheManager', () => { + let manager: ContextCacheManager; + const mockMetadataPath = '/test/metadata.json'; + + beforeEach(() => { + vi.clearAllMocks(); + vi.mocked(Storage.getContextCacheMetadataPath).mockReturnValue( + mockMetadataPath, + ); + manager = new ContextCacheManager(); + }); + + it('should calculate stable SHA-256 hash', () => { + const si = 'You are a helpful assistant.'; + const hash1 = manager.calculateHash(si); + const hash2 = manager.calculateHash(si); + expect(hash1).toBe(hash2); + expect(hash1).toMatch(/^[a-f0-9]{64}$/); + }); + + it('should return undefined if cache not found', () => { + vi.mocked(fs.existsSync).mockReturnValue(false); + expect(manager.getCache('nonexistent')).toBeUndefined(); + }); + + it('should return entry if valid cache found', () => { + const hash = 'testhash'; + const futureDate = new Date(Date.now() + 3600000).toISOString(); + const entry = { + cacheName: 'cachedContents/123', + model: 'gemini-pro', + expiresAt: futureDate, + tokenCount: 1000, + }; + + vi.mocked(fs.existsSync).mockReturnValue(true); + vi.mocked(fs.readFileSync).mockReturnValue( + JSON.stringify({ + version: '1.0', + entries: { [hash]: entry }, + }), + ); + + const result = manager.getCache(hash); + expect(result).toEqual(entry); + }); + + it('should purge and return undefined if cache expired', () => { + const hash = 'expiredhash'; + const pastDate = new Date(Date.now() - 3600000).toISOString(); + const entry = { + cacheName: 'cachedContents/expired', + model: 'gemini-pro', + expiresAt: pastDate, + tokenCount: 1000, + }; + + vi.mocked(fs.existsSync).mockReturnValue(true); + vi.mocked(fs.readFileSync).mockReturnValue( + JSON.stringify({ + version: '1.0', + entries: { [hash]: entry }, + }), + ); + + const result = manager.getCache(hash); + expect(result).toBeUndefined(); + expect(fs.writeFileSync).toHaveBeenCalled(); + const saved = JSON.parse( + vi.mocked(fs.writeFileSync).mock.calls[0][1] as string, + ); + expect(saved.entries[hash]).toBeUndefined(); + }); + + it('should save metadata when setCache is called', () => { + vi.mocked(fs.existsSync).mockReturnValue(false); + const hash = 'newhash'; + const entry = { + cacheName: 'cachedContents/new', + model: 'gemini-pro', + expiresAt: new Date().toISOString(), + tokenCount: 1000, + }; + + manager.setCache(hash, entry); + expect(fs.writeFileSync).toHaveBeenCalled(); + const saved = JSON.parse( + vi.mocked(fs.writeFileSync).mock.calls[0][1] as string, + ); + expect(saved.entries[hash]).toEqual(entry); + }); +}); diff --git a/packages/core/src/context/contextCacheManager.ts b/packages/core/src/context/contextCacheManager.ts new file mode 100644 index 0000000000..54ec94f535 --- /dev/null +++ b/packages/core/src/context/contextCacheManager.ts @@ -0,0 +1,144 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import * as crypto from 'node:crypto'; +import * as fs from 'node:fs'; +import { Storage } from '../config/storage.js'; +import { debugLogger } from '../utils/debugLogger.js'; +import { estimateTokenCountSync } from '../utils/tokenCalculation.js'; + +/** + * Metadata for a single Gemini Context Cache resource. + */ +export interface ContextCacheEntry { + /** The full resource name, e.g., 'cachedContents/xyz123' */ + cacheName: string; + /** The model ID this cache was created for */ + model: string; + /** ISO 8601 expiration timestamp */ + expiresAt: string; + /** Number of tokens in the cached content */ + tokenCount: number; +} + +/** + * Schema for the local persistent metadata storage. + */ +export interface ContextCacheMetadata { + version: string; + /** Map of SHA-256(SI) -> ContextCacheEntry */ + entries: Record; +} + +/** + * Manages the lifecycle and discovery of Gemini Context Caches. + * Uses a local metadata file to map System Instruction hashes to remote cache IDs. + */ +export class ContextCacheManager { + private metadata: ContextCacheMetadata | undefined; + private readonly metadataPath: string; + + constructor() { + this.metadataPath = Storage.getContextCacheMetadataPath(); + } + + private loadMetadata(): ContextCacheMetadata { + if (this.metadata) { + return this.metadata; + } + + try { + if (fs.existsSync(this.metadataPath)) { + const content = fs.readFileSync(this.metadataPath, 'utf8'); + const parsed = JSON.parse(content) as unknown; + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion + this.metadata = parsed as ContextCacheMetadata; + } else { + this.metadata = { version: '1.0', entries: {} }; + } + } catch (error) { + debugLogger.error('Failed to load context cache metadata:', error); + this.metadata = { version: '1.0', entries: {} }; + } + + return this.metadata; + } + + private saveMetadata(): void { + if (!this.metadata) return; + + try { + fs.writeFileSync( + this.metadataPath, + JSON.stringify(this.metadata, null, 2), + ); + } catch (error) { + debugLogger.error('Failed to save context cache metadata:', error); + } + } + + /** + * Calculates a stable SHA-256 hash of the System Instruction. + */ + calculateHash(systemInstruction: string): string { + return crypto.createHash('sha256').update(systemInstruction).digest('hex'); + } + + /** + * Calculates the token count of a system instruction string. + */ + calculateTokenCount(systemInstruction: string): number { + return estimateTokenCountSync([{ text: systemInstruction }]); + } + + /** + * Looks up a hot cache for the given SI hash. + * Purges the entry if it has expired. + */ + getCache(hash: string): ContextCacheEntry | undefined { + const metadata = this.loadMetadata(); + const entry = metadata.entries[hash]; + + if (entry) { + const now = new Date(); + if (new Date(entry.expiresAt) > now) { + return entry; + } else { + // Purge expired entry + debugLogger.log( + `[ContextCache] Purging expired cache: ${entry.cacheName}`, + ); + delete metadata.entries[hash]; + this.saveMetadata(); + } + } + + return undefined; + } + + /** + * Saves or updates a cache entry. + */ + setCache(hash: string, entry: ContextCacheEntry): void { + const metadata = this.loadMetadata(); + metadata.entries[hash] = entry; + this.saveMetadata(); + } + + /** + * Removes a cache entry by hash. + */ + removeCache(hash: string): void { + const metadata = this.loadMetadata(); + if (metadata.entries[hash]) { + delete metadata.entries[hash]; + this.saveMetadata(); + } + } +} + +/** Global singleton instance */ +export const contextCacheManager = new ContextCacheManager(); diff --git a/packages/core/src/context/types.ts b/packages/core/src/context/types.ts index abd29daf65..8e2717e279 100644 --- a/packages/core/src/context/types.ts +++ b/packages/core/src/context/types.ts @@ -18,6 +18,13 @@ export interface ToolOutputMaskingConfig { protectLatestTurn: boolean; } +export interface ContextCachingConfig { + enabled: boolean; + thresholdTokens: number; + ttlMinutes: number; + autoRenew: boolean; +} + export interface ContextManagementConfig { enabled: boolean; historyWindow: { diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index ce544a0e30..ca73effaaa 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -112,9 +112,10 @@ export class GeminiClient { this.loopDetector = new LoopDetectionService(this.config); this.compressionService = new ChatCompressionService(); this.agentHistoryProvider = new AgentHistoryProvider( - this.config.agentHistoryProviderConfig, + this.config.getAgentHistoryProviderConfig(), this.config, ); + this.toolOutputMaskingService = new ToolOutputMaskingService(); this.lastPromptId = this.config.getSessionId(); diff --git a/packages/core/src/core/contentGenerator.test.ts b/packages/core/src/core/contentGenerator.test.ts index 4efd9f65c6..1d3d25e314 100644 --- a/packages/core/src/core/contentGenerator.test.ts +++ b/packages/core/src/core/contentGenerator.test.ts @@ -137,7 +137,8 @@ describe('createContentGenerator', () => { vi.stubEnv('GEMINI_CLI_SURFACE', ''); const mockGenerator = { - models: {}, + models: { get: vi.fn() }, + caches: { create: vi.fn(), update: vi.fn() }, } as unknown as GoogleGenAI; vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never); const generator = await createContentGenerator( @@ -158,9 +159,7 @@ describe('createContentGenerator', () => { }), }), }); - expect(generator).toEqual( - new LoggingContentGenerator(mockGenerator.models, mockConfig), - ); + expect(generator).toBeInstanceOf(LoggingContentGenerator); }); it('should use standard User-Agent for a2a-server running outside VS Code', async () => { @@ -179,7 +178,8 @@ describe('createContentGenerator', () => { vi.stubEnv('GEMINI_CLI_SURFACE', ''); const mockGenerator = { - models: {}, + models: { get: vi.fn() }, + caches: { create: vi.fn(), update: vi.fn() }, } as unknown as GoogleGenAI; vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never); await createContentGenerator( @@ -217,7 +217,8 @@ describe('createContentGenerator', () => { vi.stubEnv('TERM_PROGRAM_VERSION', '1.85.0'); const mockGenerator = { - models: {}, + models: { get: vi.fn() }, + caches: { create: vi.fn(), update: vi.fn() }, } as unknown as GoogleGenAI; vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never); await createContentGenerator( @@ -255,7 +256,8 @@ describe('createContentGenerator', () => { vi.stubEnv('GEMINI_CLI_SURFACE', ''); const mockGenerator = { - models: {}, + models: { get: vi.fn() }, + caches: { create: vi.fn(), update: vi.fn() }, } as unknown as GoogleGenAI; vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never); await createContentGenerator( @@ -288,7 +290,8 @@ describe('createContentGenerator', () => { vi.stubEnv('GEMINI_CLI_CUSTOM_HEADERS', 'User-Agent:MyCustomUA'); const mockGenerator = { - models: {}, + models: { get: vi.fn() }, + caches: { create: vi.fn(), update: vi.fn() }, } as unknown as GoogleGenAI; vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never); await createContentGenerator( @@ -348,7 +351,8 @@ describe('createContentGenerator', () => { } as unknown as Config; const mockGenerator = { - models: {}, + models: { get: vi.fn() }, + caches: { create: vi.fn(), update: vi.fn() }, } as unknown as GoogleGenAI; vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never); vi.stubEnv( @@ -395,7 +399,8 @@ describe('createContentGenerator', () => { } as unknown as Config; const mockGenerator = { - models: {}, + models: { get: vi.fn() }, + caches: { create: vi.fn(), update: vi.fn() }, } as unknown as GoogleGenAI; vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never); @@ -433,7 +438,8 @@ describe('createContentGenerator', () => { } as unknown as Config; const mockGenerator = { - models: {}, + models: { get: vi.fn() }, + caches: { create: vi.fn(), update: vi.fn() }, } as unknown as GoogleGenAI; vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never); vi.stubEnv('GEMINI_API_KEY_AUTH_MECHANISM', 'bearer'); @@ -467,7 +473,8 @@ describe('createContentGenerator', () => { } as unknown as Config; const mockGenerator = { - models: {}, + models: { get: vi.fn() }, + caches: { create: vi.fn(), update: vi.fn() }, } as unknown as GoogleGenAI; vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never); // GEMINI_API_KEY_AUTH_MECHANISM is not stubbed, so it will be undefined, triggering default 'x-goog-api-key' @@ -508,7 +515,8 @@ describe('createContentGenerator', () => { getClientName: vi.fn().mockReturnValue(undefined), } as unknown as Config; const mockGenerator = { - models: {}, + models: { get: vi.fn() }, + caches: { create: vi.fn(), update: vi.fn() }, } as unknown as GoogleGenAI; vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never); const generator = await createContentGenerator( @@ -527,9 +535,7 @@ describe('createContentGenerator', () => { }, }), }); - expect(generator).toEqual( - new LoggingContentGenerator(mockGenerator.models, mockConfig), - ); + expect(generator).toBeInstanceOf(LoggingContentGenerator); }); it('should pass apiVersion to GoogleGenAI when GOOGLE_GENAI_API_VERSION is set', async () => { @@ -541,7 +547,8 @@ describe('createContentGenerator', () => { } as unknown as Config; const mockGenerator = { - models: {}, + models: { get: vi.fn() }, + caches: { create: vi.fn(), update: vi.fn() }, } as unknown as GoogleGenAI; vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never); vi.stubEnv('GOOGLE_GENAI_API_VERSION', 'v1'); @@ -575,7 +582,8 @@ describe('createContentGenerator', () => { } as unknown as Config; const mockGenerator = { - models: {}, + models: { get: vi.fn() }, + caches: { create: vi.fn(), update: vi.fn() }, } as unknown as GoogleGenAI; vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never); @@ -613,7 +621,8 @@ describe('createContentGenerator', () => { } as unknown as Config; const mockGenerator = { - models: {}, + models: { get: vi.fn() }, + caches: { create: vi.fn(), update: vi.fn() }, } as unknown as GoogleGenAI; vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never); vi.stubEnv('GOOGLE_GENAI_API_VERSION', ''); @@ -652,7 +661,8 @@ describe('createContentGenerator', () => { } as unknown as Config; const mockGenerator = { - models: {}, + models: { get: vi.fn() }, + caches: { create: vi.fn(), update: vi.fn() }, } as unknown as GoogleGenAI; vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never); vi.stubEnv('GOOGLE_GENAI_API_VERSION', 'v1alpha'); @@ -687,7 +697,8 @@ describe('createContentGenerator', () => { } as unknown as Config; const mockGenerator = { - models: {}, + models: { get: vi.fn() }, + caches: { create: vi.fn(), update: vi.fn() }, } as unknown as GoogleGenAI; vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never); vi.stubEnv('GOOGLE_GEMINI_BASE_URL', 'https://gemini.test.local'); @@ -719,7 +730,8 @@ describe('createContentGenerator', () => { } as unknown as Config; const mockGenerator = { - models: {}, + models: { get: vi.fn() }, + caches: { create: vi.fn(), update: vi.fn() }, } as unknown as GoogleGenAI; vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never); vi.stubEnv('GOOGLE_VERTEX_BASE_URL', 'https://vertex.test.local'); @@ -752,7 +764,8 @@ describe('createContentGenerator', () => { } as unknown as Config; const mockGenerator = { - models: {}, + models: { get: vi.fn() }, + caches: { create: vi.fn(), update: vi.fn() }, } as unknown as GoogleGenAI; vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never); vi.stubEnv('GOOGLE_GEMINI_BASE_URL', 'https://gemini.test.local'); @@ -785,7 +798,8 @@ describe('createContentGenerator', () => { } as unknown as Config; const mockGenerator = { - models: {}, + models: { get: vi.fn() }, + caches: { create: vi.fn(), update: vi.fn() }, } as unknown as GoogleGenAI; vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never); vi.stubEnv('GOOGLE_GEMINI_BASE_URL', 'https://env.test.local'); @@ -817,7 +831,8 @@ describe('createContentGenerator', () => { } as unknown as Config; const mockGenerator = { - models: {}, + models: { get: vi.fn() }, + caches: { create: vi.fn(), update: vi.fn() }, } as unknown as GoogleGenAI; vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never); diff --git a/packages/core/src/core/contentGenerator.ts b/packages/core/src/core/contentGenerator.ts index bcee8cfef4..62afe43b56 100644 --- a/packages/core/src/core/contentGenerator.ts +++ b/packages/core/src/core/contentGenerator.ts @@ -4,14 +4,17 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { +import type { + EmbedContentResponse, GoogleGenAI, + type Content, type CountTokensResponse, type GenerateContentResponse, type GenerateContentParameters, type CountTokensParameters, - type EmbedContentResponse, type EmbedContentParameters, + type CachedContent, + type CreateCachedContentParameters, } from '@google/genai'; import * as os from 'node:os'; import { createCodeAssistContentGenerator } from '../code_assist/codeAssist.js'; @@ -49,6 +52,15 @@ export interface ContentGenerator { embedContent(request: EmbedContentParameters): Promise; + createCachedContent( + request: CreateCachedContentParameters, + ): Promise; + + updateCachedContent(request: { + name: string; + config?: { ttl?: string; expireTime?: string }; + }): Promise; + userTier?: UserTierId; userTierName?: string; @@ -65,6 +77,72 @@ export enum AuthType { GATEWAY = 'gateway', } +/** + * Implementation of ContentGenerator for standard Gemini/Vertex SDKs. + */ +class SdkContentGenerator implements ContentGenerator { + paidTier?: GeminiUserTier; + + constructor( + private readonly genAI: GoogleGenAI, + private readonly modelName: string, + readonly history: Content[] = [], + ) {} + + async generateContent( + request: GenerateContentParameters, + _userPromptId: string, + _role: LlmRole, + ): Promise { + return this.genAI.models.generateContent({ + ...request, + model: this.modelName, + }); + } + + async generateContentStream( + request: GenerateContentParameters, + _userPromptId: string, + _role: LlmRole, + ): Promise> { + return this.genAI.models.generateContentStream({ + ...request, + model: this.modelName, + }); + } + + async countTokens( + request: CountTokensParameters, + ): Promise { + return this.genAI.models.countTokens({ + ...request, + model: this.modelName, + }); + } + + async embedContent( + request: EmbedContentParameters, + ): Promise { + return this.genAI.models.embedContent({ + ...request, + model: this.modelName, + }); + } + + async createCachedContent( + request: CreateCachedContentParameters, + ): Promise { + return this.genAI.caches.create(request); + } + + async updateCachedContent(request: { + name: string; + config?: { ttl?: string; expireTime?: string }; + }): Promise { + return this.genAI.caches.update(request); + } +} + /** * Detects the best authentication type based on environment variables. * @@ -197,7 +275,7 @@ export async function createContentGenerator( const fakeGenerator = await FakeContentGenerator.fromFile( gcConfig.fakeResponses, ); - return new LoggingContentGenerator(fakeGenerator, gcConfig); + return new LoggingContentGenerator(fakeGenerator, gcConfig, []); } const version = await getVersion(); const model = resolveModel( @@ -278,6 +356,7 @@ export async function createContentGenerator( sessionId, ), gcConfig, + [], ); } @@ -330,11 +409,10 @@ export async function createContentGenerator( const httpOptions: { baseUrl?: string; headers: Record; - } = { headers }; - - if (baseUrl) { - httpOptions.baseUrl = baseUrl; - } + } = { + headers, + ...(baseUrl ? { baseUrl } : {}), + }; const googleGenAI = new GoogleGenAI({ apiKey: config.apiKey === '' ? undefined : config.apiKey, @@ -342,7 +420,11 @@ export async function createContentGenerator( httpOptions, ...(apiVersionEnv && { apiVersion: apiVersionEnv }), }); - return new LoggingContentGenerator(googleGenAI.models, gcConfig); + return new LoggingContentGenerator( + new SdkContentGenerator(googleGenAI, model, []), + gcConfig, + [], + ); } throw new Error( `Error creating contentGenerator: Unsupported authType: ${config.authType}`, @@ -350,7 +432,11 @@ export async function createContentGenerator( })(); if (gcConfig.recordResponses) { - return new RecordingContentGenerator(generator, gcConfig.recordResponses); + return new RecordingContentGenerator( + generator, + gcConfig.recordResponses, + [], + ); } return generator; diff --git a/packages/core/src/core/fakeContentGenerator.ts b/packages/core/src/core/fakeContentGenerator.ts index 9ecd75a99d..e10c7cef2e 100644 --- a/packages/core/src/core/fakeContentGenerator.ts +++ b/packages/core/src/core/fakeContentGenerator.ts @@ -6,11 +6,14 @@ import { GenerateContentResponse, + type Content, type CountTokensResponse, type GenerateContentParameters, type CountTokensParameters, EmbedContentResponse, type EmbedContentParameters, + type CachedContent, + type CreateCachedContentParameters, } from '@google/genai'; import { promises } from 'node:fs'; import type { ContentGenerator } from './contentGenerator.js'; @@ -34,6 +37,14 @@ export type FakeResponse = | { method: 'embedContent'; response: EmbedContentResponse; + } + | { + method: 'createCachedContent'; + response: CachedContent; + } + | { + method: 'updateCachedContent'; + response: CachedContent; }; // A ContentGenerator that responds with canned responses. @@ -46,7 +57,10 @@ export class FakeContentGenerator implements ContentGenerator { userTierName?: string; paidTier?: GeminiUserTier; - constructor(private readonly responses: FakeResponse[]) {} + constructor( + private readonly responses: FakeResponse[], + readonly history: Content[] = [], + ) {} static async fromFile(filePath: string): Promise { const fileContent = await promises.readFile(filePath, 'utf-8'); @@ -124,4 +138,19 @@ export class FakeContentGenerator implements ContentGenerator { EmbedContentResponse.prototype, ); } + + async createCachedContent( + request: CreateCachedContentParameters, + ): Promise { + + return this.getNextResponse('createCachedContent', request); + } + + async updateCachedContent(request: { + name: string; + config?: { ttl?: string; expireTime?: string }; + }): Promise { + + return this.getNextResponse('updateCachedContent', request); + } } diff --git a/packages/core/src/core/geminiChat.test.ts b/packages/core/src/core/geminiChat.test.ts index 4f4cce3b54..2ddf1dd3a1 100644 --- a/packages/core/src/core/geminiChat.test.ts +++ b/packages/core/src/core/geminiChat.test.ts @@ -5,6 +5,7 @@ */ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import * as crypto from 'node:crypto'; import { ApiError, ThinkingLevel, @@ -25,6 +26,7 @@ import { CoreToolCallStatus, } from '../scheduler/types.js'; import { MockTool } from '../test-utils/mock-tool.js'; +import { Storage } from '../config/storage.js'; import type { Config } from '../config/config.js'; import { setSimulate429 } from '../utils/testUtils.js'; import { DEFAULT_THINKING_MODE } from '../config/models.js'; @@ -132,6 +134,8 @@ describe('GeminiChat', () => { countTokens: vi.fn(), embedContent: vi.fn(), batchEmbedContents: vi.fn(), + createCachedContent: vi.fn(), + updateCachedContent: vi.fn(), } as unknown as ContentGenerator; mockHandleFallback.mockClear(); @@ -186,6 +190,24 @@ describe('GeminiChat', () => { getMaxAttempts: vi.fn().mockReturnValue(10), getUserTier: vi.fn().mockReturnValue(undefined), isContextManagementEnabled: vi.fn().mockReturnValue(false), + getContextCachingConfig: vi.fn().mockReturnValue({ + enabled: false, + thresholdTokens: 32768, + ttlMinutes: 60, + autoRenew: true, + }), + getSystemInstructionMemory: vi.fn().mockReturnValue(undefined), + getIncludeDirectoryTree: vi.fn().mockReturnValue(true), + getWorkspaceContext: vi.fn().mockReturnValue({ + getDirectories: vi.fn().mockReturnValue([]), + }), + isTopicUpdateNarrationEnabled: vi.fn().mockReturnValue(false), + topicState: { + getTopic: vi.fn().mockReturnValue(undefined), + }, + getSkillManager: vi.fn().mockReturnValue({ + getSkills: vi.fn().mockReturnValue([]), + }), modelConfigService: { getResolvedConfig: vi.fn().mockImplementation((modelConfigKey) => { const model = modelConfigKey.model ?? mockConfig.getModel(); @@ -3093,4 +3115,133 @@ describe('GeminiChat', () => { expect(stripped[1].parts![0].functionResponse!.id).toBe('call_123'); }); }); + + describe('explicit context caching', () => { + it('should create a new cache if enabled and SI is large enough', async () => { + const si = 'Large system instruction...'.repeat(2000); // Definitely > 32k + chat = new GeminiChat(mockConfig, si); + + vi.mocked(mockConfig.getContextCachingConfig).mockReturnValue({ + enabled: true, + thresholdTokens: 32768, + ttlMinutes: 60, + autoRenew: true, + }); + + vi.mocked(mockContentGenerator.createCachedContent).mockResolvedValue({ + name: 'cachedContents/new-cache', + expireTime: new Date(Date.now() + 3600000).toISOString(), + }); + + vi.mocked(mockContentGenerator.generateContentStream).mockResolvedValue( + (async function* () { + yield { + candidates: [ + { + content: { role: 'model', parts: [{ text: 'response' }] }, + finishReason: 'STOP', + }, + ], + } as unknown as GenerateContentResponse; + })(), + ); + + const stream = await chat.sendMessageStream( + { model: 'gemini-pro' }, + 'test', + 'prompt-id', + new AbortController().signal, + LlmRole.MAIN, + ); + for await (const chunk of stream) { + expect(chunk).toBeDefined(); + } + + expect(mockContentGenerator.createCachedContent).toHaveBeenCalledWith( + expect.objectContaining({ + systemInstruction: { parts: [{ text: si }] }, + }), + ); + expect(mockContentGenerator.generateContentStream).toHaveBeenCalledWith( + expect.objectContaining({ + config: expect.objectContaining({ + cachedContent: 'cachedContents/new-cache', + }), + }), + 'prompt-id', + LlmRole.MAIN, + ); + }); + + it('should reuse existing cache if present', async () => { + const si = 'Large system instruction...'.repeat(2000); + chat = new GeminiChat(mockConfig, si); + + const siHash = crypto.createHash('sha256').update(si).digest('hex'); + const futureDate = new Date(Date.now() + 3600000).toISOString(); + + // Seed the metadata file via the mock fs + mockFileSystem.set( + Storage.getContextCacheMetadataPath(), + JSON.stringify({ + version: '1.0', + entries: { + [siHash]: { + cacheName: 'cachedContents/existing-cache', + model: 'gemini-pro', + expiresAt: futureDate, + tokenCount: 40000, + }, + }, + }), + ); + + vi.mocked(mockConfig.getContextCachingConfig).mockReturnValue({ + enabled: true, + thresholdTokens: 32768, + ttlMinutes: 60, + autoRenew: true, + }); + + vi.mocked(mockContentGenerator.updateCachedContent).mockResolvedValue({ + name: 'cachedContents/existing-cache', + expireTime: new Date(Date.now() + 7200000).toISOString(), + }); + + vi.mocked(mockContentGenerator.generateContentStream).mockResolvedValue( + (async function* () { + yield { + candidates: [ + { + content: { role: 'model', parts: [{ text: 'response' }] }, + finishReason: 'STOP', + }, + ], + } as unknown as GenerateContentResponse; + })(), + ); + + const stream = await chat.sendMessageStream( + { model: 'gemini-pro' }, + 'test', + 'prompt-id', + new AbortController().signal, + LlmRole.MAIN, + ); + for await (const chunk of stream) { + expect(chunk).toBeDefined(); + } + + expect(mockContentGenerator.createCachedContent).not.toHaveBeenCalled(); + expect(mockContentGenerator.generateContentStream).toHaveBeenCalledWith( + expect.objectContaining({ + config: expect.objectContaining({ + cachedContent: 'cachedContents/existing-cache', + }), + }), + 'prompt-id', + LlmRole.MAIN, + ); + }); + }); }); diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index dccbd282e2..9a991e7c05 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -45,7 +45,7 @@ import { ContentRetryEvent, ContentRetryFailureEvent, NetworkRetryAttemptEvent, - type LlmRole, + LlmRole, } from '../telemetry/types.js'; import { handleFallback } from '../fallback/handler.js'; import { isFunctionResponse } from '../utils/messageInspectors.js'; @@ -61,6 +61,9 @@ import { import { coreEvents } from '../utils/events.js'; import type { AgentLoopContext } from '../config/agent-loop-context.js'; import { debugLogger } from '../utils/debugLogger.js'; +import { contextCacheManager } from '../context/contextCacheManager.js'; +import { getCoreSystemPrompt } from './prompts.js'; +import { getDirectoryContextString } from '../utils/environmentContext.js'; export enum StreamEventType { /** A regular content chunk from the API. */ @@ -747,7 +750,134 @@ export class GeminiChat { lastConfig = config; lastContentsToUse = contentsToUse; - const finalContents = stripToolCallIdPrefixes(contentsToUse); + // Handle explicit context caching + const cachingConfig = this.context.config.getContextCachingConfig(); + let effectiveContents = contentsToUse; + + if (cachingConfig.enabled && role === LlmRole.MAIN) { + try { + const userMemory = this.context.config.getSystemInstructionMemory(); + const stableSI = getCoreSystemPrompt( + this.context.config, + userMemory, + undefined, + undefined, + 'stable', + ); + const siHash = contextCacheManager.calculateHash(stableSI); + const existingCache = contextCacheManager.getCache(siHash); + + if (existingCache && existingCache.model === modelToUse) { + config.cachedContent = existingCache.cacheName; + debugLogger.log( + `[ContextCache] Using existing cache: ${existingCache.cacheName}`, + ); + + // Prepend dynamic context to history + const dynamicContext = getCoreSystemPrompt( + this.context.config, + userMemory, + undefined, + undefined, + 'dynamic', + ); + const dirContext = await getDirectoryContextString( + this.context.config, + ); + const dynamicWithTree = dynamicContext.replace( + '[Recursive file tree provided in history]', + dirContext, + ); + + effectiveContents = [ + { role: 'user', parts: [{ text: dynamicWithTree }] }, + ...contentsToUse, + ]; + + // Asynchronously renew TTL if enabled + if (cachingConfig.autoRenew) { + this.context.config + .getContentGenerator() + .updateCachedContent({ + name: existingCache.cacheName, + config: { ttl: `${cachingConfig.ttlMinutes * 60}s` }, + }) + .then((updated) => { + if (updated.expireTime) { + existingCache.expiresAt = updated.expireTime; + contextCacheManager.setCache(siHash, existingCache); + debugLogger.log( + `[ContextCache] Renewed TTL for ${existingCache.cacheName}`, + ); + } + }) + .catch((e) => + debugLogger.error(`[ContextCache] Failed to renew TTL:`, e), + ); + } + } else { + // Check if we should create a new cache + const siTokens = contextCacheManager.calculateTokenCount(stableSI); + if (siTokens >= cachingConfig.thresholdTokens) { + debugLogger.log( + `[ContextCache] Creating new cache for stable SI (${siTokens} tokens)`, + ); + const newCache = await this.context.config + .getContentGenerator() + .createCachedContent({ + model: modelToUse, + config: { + systemInstruction: { parts: [{ text: stableSI }] }, + ttl: `${cachingConfig.ttlMinutes * 60}s`, + }, + }); + + if (newCache.name && newCache.expireTime) { + const entry = { + cacheName: newCache.name, + model: modelToUse, + expiresAt: newCache.expireTime, + tokenCount: siTokens, + }; + contextCacheManager.setCache(siHash, entry); + config.cachedContent = newCache.name; + debugLogger.log( + `[ContextCache] Created and using new cache: ${newCache.name}`, + ); + + // Prepend dynamic context to history for this initial call + const dynamicContext = getCoreSystemPrompt( + this.context.config, + userMemory, + undefined, + undefined, + 'dynamic', + ); + const dirContext = await getDirectoryContextString( + this.context.config, + ); + const dynamicWithTree = dynamicContext.replace( + '[Recursive file tree provided in history]', + dirContext, + ); + + effectiveContents = [ + { role: 'user', parts: [{ text: dynamicWithTree }] }, + ...contentsToUse, + ]; + } + } + } + } catch (error) { + // Fall back to standard request on cache failure + debugLogger.error( + '[ContextCache] Error managing context cache:', + error, + ); + } + } + + const finalContents = stripToolCallIdPrefixes(effectiveContents); return this.context.config.getContentGenerator().generateContentStream( { diff --git a/packages/core/src/core/loggingContentGenerator.test.ts b/packages/core/src/core/loggingContentGenerator.test.ts index 2a2580cb84..9053ff9dc5 100644 --- a/packages/core/src/core/loggingContentGenerator.test.ts +++ b/packages/core/src/core/loggingContentGenerator.test.ts @@ -65,6 +65,8 @@ describe('LoggingContentGenerator', () => { generateContentStream: vi.fn(), countTokens: vi.fn(), embedContent: vi.fn(), + createCachedContent: vi.fn(), + updateCachedContent: vi.fn(), }; config = { getGoogleAIConfig: vi.fn(), diff --git a/packages/core/src/core/loggingContentGenerator.ts b/packages/core/src/core/loggingContentGenerator.ts index d27b8a8f32..86d9613738 100644 --- a/packages/core/src/core/loggingContentGenerator.ts +++ b/packages/core/src/core/loggingContentGenerator.ts @@ -15,6 +15,8 @@ import type { GenerateContentParameters, GenerateContentResponseUsageMetadata, GenerateContentResponse, + CachedContent, + CreateCachedContentParameters, } from '@google/genai'; import { ApiRequestEvent, @@ -150,6 +152,7 @@ export class LoggingContentGenerator implements ContentGenerator { constructor( private readonly wrapped: ContentGenerator, private readonly config: Config, + readonly history: Content[] = [], ) {} getWrapped(): ContentGenerator { @@ -623,4 +626,17 @@ export class LoggingContentGenerator implements ContentGenerator { }, ); } + + async createCachedContent( + request: CreateCachedContentParameters, + ): Promise { + return this.wrapped.createCachedContent(request); + } + + async updateCachedContent(request: { + name: string; + config?: { ttl?: string; expireTime?: string }; + }): Promise { + return this.wrapped.updateCachedContent(request); + } } diff --git a/packages/core/src/core/prompts.ts b/packages/core/src/core/prompts.ts index 48e70e4cf4..1e24b5f254 100644 --- a/packages/core/src/core/prompts.ts +++ b/packages/core/src/core/prompts.ts @@ -25,12 +25,14 @@ export function getCoreSystemPrompt( userMemory?: string | HierarchicalMemory, interactiveOverride?: boolean, topicUpdateNarrationOverride?: boolean, + splitMode: 'combined' | 'stable' | 'dynamic' = 'combined', ): string { return new PromptProvider().getCoreSystemPrompt( config, userMemory, interactiveOverride, topicUpdateNarrationOverride, + splitMode, ); } diff --git a/packages/core/src/core/recordingContentGenerator.test.ts b/packages/core/src/core/recordingContentGenerator.test.ts index 518e8585c3..d20ff54f45 100644 --- a/packages/core/src/core/recordingContentGenerator.test.ts +++ b/packages/core/src/core/recordingContentGenerator.test.ts @@ -40,6 +40,8 @@ describe('RecordingContentGenerator', () => { generateContentStream: vi.fn(), countTokens: vi.fn(), embedContent: vi.fn(), + createCachedContent: vi.fn(), + updateCachedContent: vi.fn(), }; recorder = new RecordingContentGenerator(mockRealGenerator, filePath); vi.clearAllMocks(); diff --git a/packages/core/src/core/recordingContentGenerator.ts b/packages/core/src/core/recordingContentGenerator.ts index f2193bb16d..c438b1948b 100644 --- a/packages/core/src/core/recordingContentGenerator.ts +++ b/packages/core/src/core/recordingContentGenerator.ts @@ -5,17 +5,20 @@ */ import type { + Content, CountTokensResponse, GenerateContentParameters, GenerateContentResponse, CountTokensParameters, EmbedContentResponse, EmbedContentParameters, + CachedContent, + CreateCachedContentParameters, } from '@google/genai'; import { appendFileSync } from 'node:fs'; import type { ContentGenerator } from './contentGenerator.js'; import type { FakeResponse } from './fakeContentGenerator.js'; -import type { UserTierId } from '../code_assist/types.js'; +import type { UserTierId, GeminiUserTier } from '../code_assist/types.js'; import { safeJsonStringify } from '../utils/safeJsonStringify.js'; import type { LlmRole } from '../telemetry/types.js'; @@ -29,6 +32,7 @@ export class RecordingContentGenerator implements ContentGenerator { constructor( private readonly realGenerator: ContentGenerator, private readonly filePath: string, + readonly history: Content[] = [], ) {} get userTier(): UserTierId | undefined { @@ -39,6 +43,10 @@ export class RecordingContentGenerator implements ContentGenerator { return this.realGenerator.userTierName; } + get paidTier(): GeminiUserTier | undefined { + return this.realGenerator.paidTier; + } + async generateContent( request: GenerateContentParameters, userPromptId: string, @@ -111,7 +119,6 @@ export class RecordingContentGenerator implements ContentGenerator { request: EmbedContentParameters, ): Promise { const response = await this.realGenerator.embedContent(request); - const recordedResponse: FakeResponse = { method: 'embedContent', response: { @@ -122,4 +129,29 @@ export class RecordingContentGenerator implements ContentGenerator { appendFileSync(this.filePath, `${safeJsonStringify(recordedResponse)}\n`); return response; } + + async createCachedContent( + request: CreateCachedContentParameters, + ): Promise { + const response = await this.realGenerator.createCachedContent(request); + const recordedResponse: FakeResponse = { + method: 'createCachedContent', + response, + }; + appendFileSync(this.filePath, `${safeJsonStringify(recordedResponse)}\n`); + return response; + } + + async updateCachedContent(request: { + name: string; + config?: { ttl?: string; expireTime?: string }; + }): Promise { + const response = await this.realGenerator.updateCachedContent(request); + const recordedResponse: FakeResponse = { + method: 'updateCachedContent', + response, + }; + appendFileSync(this.filePath, `${safeJsonStringify(recordedResponse)}\n`); + return response; + } } diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 7fc1892139..55d087dca9 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -146,6 +146,7 @@ export { } from './services/memoryService.js'; export { isProjectSkillPatchTarget } from './services/memoryPatchUtils.js'; export * from './context/memoryContextManager.js'; +export * from './context/contextCacheManager.js'; export * from './services/trackerService.js'; export * from './services/trackerTypes.js'; export * from './services/keychainService.js'; diff --git a/packages/core/src/prompts/promptProvider.ts b/packages/core/src/prompts/promptProvider.ts index 2c1f9e8652..c3fcf60e39 100644 --- a/packages/core/src/prompts/promptProvider.ts +++ b/packages/core/src/prompts/promptProvider.ts @@ -42,14 +42,58 @@ import type { AgentLoopContext } from '../config/agent-loop-context.js'; */ export class PromptProvider { /** - * Generates the core system prompt. + * Generates the core system prompt, optionally split into stable and dynamic parts. */ getCoreSystemPrompt( context: AgentLoopContext, userMemory?: string | HierarchicalMemory, interactiveOverride?: boolean, topicUpdateNarrationOverride?: boolean, + splitMode: 'combined' | 'stable' | 'dynamic' = 'combined', ): string { + if (splitMode === 'dynamic') { + const today = new Date().toLocaleDateString(undefined, { + weekday: 'long', + year: 'numeric', + month: 'long', + day: 'numeric', + }); + const platform = process.platform; + const tempDir = context.config.storage.getProjectTempDir(); + + let dynamicPrompt = ` + +This is the Gemini CLI. We are setting up the context for our chat. +Today's date is ${today} (formatted according to the user's locale). +My operating system is: ${platform} +The project's temporary directory is: ${tempDir}`; + + if (context.config.getIncludeDirectoryTree()) { + const workspaceContext = context.config.getWorkspaceContext(); + const workspaceDirectories = workspaceContext.getDirectories(); + const dirList = workspaceDirectories + .map((dir) => ` - ${dir}`) + .join('\n'); + dynamicPrompt += `\n- **Workspace Directories:**\n${dirList}\n- **Directory Structure:**\n\n[Recursive file tree provided in history]`; + } + + if ( + topicUpdateNarrationOverride ?? + context.config.isTopicUpdateNarrationEnabled() + ) { + const activeTopic = context.config.topicState.getTopic(); + if (activeTopic) { + const sanitizedTopic = activeTopic + .replace(/\n/g, ' ') + .replace(/\]/g, ''); + dynamicPrompt += `\n\n[Active Topic: ${sanitizedTopic}]`; + } + } + + dynamicPrompt += `\n`; + return dynamicPrompt.trim(); + } + const systemMdResolution = resolvePathFromEnv( process.env['GEMINI_SYSTEM_MD'], ); @@ -275,6 +319,10 @@ export class PromptProvider { // Sanitize erratic newlines from composition let sanitizedPrompt = finalPrompt.replace(/\n{3,}/g, '\n\n'); + if (splitMode === 'stable') { + return sanitizedPrompt; + } + // Context Reinjection (Active Topic) if (isTopicUpdateNarrationEnabled) { const activeTopic = context.config.topicState.getTopic(); diff --git a/packages/core/src/utils/apiConversionUtils.test.ts b/packages/core/src/utils/apiConversionUtils.test.ts index fa907ca2e6..d1e28c0c01 100644 --- a/packages/core/src/utils/apiConversionUtils.test.ts +++ b/packages/core/src/utils/apiConversionUtils.test.ts @@ -90,6 +90,22 @@ describe('apiConversionUtils', () => { expect(result['generationConfig']).toBeUndefined(); }); + it('omits systemInstruction when cachedContent is present', () => { + const req: GenerateContentParameters = { + model: 'gemini-3-flash', + contents: [{ role: 'user', parts: [{ text: 'Hello' }] }], + config: { + systemInstruction: 'Original instruction', + cachedContent: 'cached-content-id', + }, + }; + + const result = convertToRestPayload(req); + + expect(result['cachedContent']).toBe('cached-content-id'); + expect(result['systemInstruction']).toBeUndefined(); + }); + it('retains pure hyperparameters in generationConfig', () => { const req: GenerateContentParameters = { model: 'gemini-3-flash', diff --git a/packages/core/src/utils/apiConversionUtils.ts b/packages/core/src/utils/apiConversionUtils.ts index 2e22a3a3ed..457b19dda4 100644 --- a/packages/core/src/utils/apiConversionUtils.ts +++ b/packages/core/src/utils/apiConversionUtils.ts @@ -46,12 +46,16 @@ export function convertToRestPayload( } // Assign extracted capabilities to the root level. - if (restSystemInstruction) + // CRITICAL: systemInstruction and cachedContent are mutually exclusive in the API. + if (sdkCachedContent) { + restPayload['cachedContent'] = sdkCachedContent; + } else if (restSystemInstruction) { restPayload['systemInstruction'] = restSystemInstruction; + } + if (sdkTools) restPayload['tools'] = sdkTools; if (sdkToolConfig) restPayload['toolConfig'] = sdkToolConfig; if (sdkSafetySettings) restPayload['safetySettings'] = sdkSafetySettings; - if (sdkCachedContent) restPayload['cachedContent'] = sdkCachedContent; return restPayload; } diff --git a/packages/core/src/utils/nextSpeakerChecker.test.ts b/packages/core/src/utils/nextSpeakerChecker.test.ts index 0a1fcd637f..098edc4762 100644 --- a/packages/core/src/utils/nextSpeakerChecker.test.ts +++ b/packages/core/src/utils/nextSpeakerChecker.test.ts @@ -92,6 +92,8 @@ describe('checkNextSpeaker', () => { generateContentStream: vi.fn(), countTokens: vi.fn(), embedContent: vi.fn(), + createCachedContent: vi.fn(), + updateCachedContent: vi.fn(), } as ContentGenerator, mockConfig, );