feat: implement explicit context caching for main agent with stable SI hashing

This commit is contained in:
Aishanee Shah
2026-05-12 20:15:52 +00:00
parent 5dda532573
commit 62e97b14a2
25 changed files with 948 additions and 49 deletions
+32 -3
View File
@@ -449,7 +449,8 @@
"version": "2.11.0",
"resolved": "https://registry.npmjs.org/@bufbuild/protobuf/-/protobuf-2.11.0.tgz",
"integrity": "sha512-sBXGT13cpmPR5BMgHE6UEEfEaShh5Ror6rfN3yEK5si7QVrtZg8LEPQb0VVhiLRUslD2yLnXtnRzG035J/mZXQ==",
"license": "(Apache-2.0 AND BSD-3-Clause)"
"license": "(Apache-2.0 AND BSD-3-Clause)",
"peer": true
},
"node_modules/@bundled-es-modules/cookie": {
"version": "2.0.1",
@@ -1535,6 +1536,7 @@
"resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.13.4.tgz",
"integrity": "sha512-GsFaMXCkMqkKIvwCQjCrwH+GHbPKBjhwo/8ZuUkWHqbI73Kky9I+pQltrlT0+MWpedCoosda53lgjYfyEPgxBg==",
"license": "Apache-2.0",
"peer": true,
"dependencies": {
"@grpc/proto-loader": "^0.7.13",
"@js-sdsl/ordered-map": "^4.4.2"
@@ -2212,6 +2214,7 @@
"integrity": "sha512-t54CUOsFMappY1Jbzb7fetWeO0n6K0k/4+/ZpkS+3Joz8I4VcvY9OiEBFRYISqaI2fq5sCiPtAjRDOzVYG8m+Q==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@octokit/auth-token": "^6.0.0",
"@octokit/graphql": "^9.0.2",
@@ -2392,6 +2395,7 @@
"resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz",
"integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==",
"license": "Apache-2.0",
"peer": true,
"engines": {
"node": ">=8.0.0"
}
@@ -2441,6 +2445,7 @@
"resolved": "https://registry.npmjs.org/@opentelemetry/core/-/core-2.5.0.tgz",
"integrity": "sha512-ka4H8OM6+DlUhSAZpONu0cPBtPPTQKxbxVzC4CzVx5+K4JnroJVBtDzLAMx4/3CDTJXRvVFhpFjtl4SaiTNoyQ==",
"license": "Apache-2.0",
"peer": true,
"dependencies": {
"@opentelemetry/semantic-conventions": "^1.29.0"
},
@@ -2815,6 +2820,7 @@
"resolved": "https://registry.npmjs.org/@opentelemetry/resources/-/resources-2.5.0.tgz",
"integrity": "sha512-F8W52ApePshpoSrfsSk1H2yJn9aKjCrbpQF1M9Qii0GHzbfVeFUB+rc3X4aggyZD8x9Gu3Slua+s6krmq6Dt8g==",
"license": "Apache-2.0",
"peer": true,
"dependencies": {
"@opentelemetry/core": "2.5.0",
"@opentelemetry/semantic-conventions": "^1.29.0"
@@ -2848,6 +2854,7 @@
"resolved": "https://registry.npmjs.org/@opentelemetry/sdk-metrics/-/sdk-metrics-2.5.0.tgz",
"integrity": "sha512-BeJLtU+f5Gf905cJX9vXFQorAr6TAfK3SPvTFqP+scfIpDQEJfRaGJWta7sJgP+m4dNtBf9y3yvBKVAZZtJQVA==",
"license": "Apache-2.0",
"peer": true,
"dependencies": {
"@opentelemetry/core": "2.5.0",
"@opentelemetry/resources": "2.5.0"
@@ -2902,6 +2909,7 @@
"resolved": "https://registry.npmjs.org/@opentelemetry/sdk-trace-base/-/sdk-trace-base-2.5.0.tgz",
"integrity": "sha512-VzRf8LzotASEyNDUxTdaJ9IRJ1/h692WyArDBInf5puLCjxbICD6XkHgpuudis56EndyS7LYFmtTMny6UABNdQ==",
"license": "Apache-2.0",
"peer": true,
"dependencies": {
"@opentelemetry/core": "2.5.0",
"@opentelemetry/resources": "2.5.0",
@@ -4139,6 +4147,7 @@
"integrity": "sha512-6mDvHUFSjyT2B2yeNx2nUgMxh9LtOWvkhIU3uePn2I2oyNymUAX1NIsdgviM4CH+JSrp2D2hsMvJOkxY+0wNRA==",
"devOptional": true,
"license": "MIT",
"peer": true,
"dependencies": {
"csstype": "^3.0.2"
}
@@ -4412,6 +4421,7 @@
"integrity": "sha512-/Zb/xaIDfxeJnvishjGdcR4jmr7S+bda8PKNhRGdljDM+elXhlvN0FyPSsMnLmJUrVG9aPO6dof80wjMawsASg==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@typescript-eslint/scope-manager": "8.58.2",
"@typescript-eslint/types": "8.58.2",
@@ -5187,6 +5197,7 @@
"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
"license": "MIT",
"peer": true,
"bin": {
"acorn": "bin/acorn"
},
@@ -7304,7 +7315,8 @@
"version": "0.0.1581282",
"resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1581282.tgz",
"integrity": "sha512-nv7iKtNZQshSW2hKzYNr46nM/Cfh5SEvE2oV0/SEGgc9XupIY5ggf84Cz8eJIkBce7S3bmTAauFD6aysMpnqsQ==",
"license": "BSD-3-Clause"
"license": "BSD-3-Clause",
"peer": true
},
"node_modules/dezalgo": {
"version": "1.0.4",
@@ -7889,6 +7901,7 @@
"integrity": "sha512-GsGizj2Y1rCWDu6XoEekL3RLilp0voSePurjZIkxL3wlm5o5EC9VpgaP7lrCvjnkuLvzFBQWB3vWB3K5KQTveQ==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@eslint-community/eslint-utils": "^4.2.0",
"@eslint-community/regexpp": "^4.12.1",
@@ -8499,6 +8512,7 @@
"resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz",
"integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==",
"license": "MIT",
"peer": true,
"dependencies": {
"accepts": "^2.0.0",
"body-parser": "^2.2.1",
@@ -9765,6 +9779,7 @@
"resolved": "https://registry.npmjs.org/hono/-/hono-4.12.12.tgz",
"integrity": "sha512-p1JfQMKaceuCbpJKAPKVqyqviZdS0eUxH9v82oWo1kb9xjQ5wA6iP3FNVAPDFlz5/p7d45lO+BpSk1tuSZMF4Q==",
"license": "MIT",
"peer": true,
"engines": {
"node": ">=16.9.0"
}
@@ -10024,6 +10039,7 @@
"resolved": "https://registry.npmjs.org/@jrichman/ink/-/ink-6.6.9.tgz",
"integrity": "sha512-RL9sSiLQZECnjbmBwjIHOp8yVGdWF7C/uifg7ISv/e+F3nLNsfl7FdUFQs8iZARFMJAYxMFpxW6OW+HSt9drwQ==",
"license": "MIT",
"peer": true,
"dependencies": {
"ansi-escapes": "^7.0.0",
"ansi-styles": "^6.2.3",
@@ -13799,6 +13815,7 @@
"resolved": "https://registry.npmjs.org/react/-/react-19.2.4.tgz",
"integrity": "sha512-9nfp2hYpCwOjAN+8TZFGhtWEwgvWHXqESH8qT89AT/lWklpLON22Lc8pEtnpsZz7VmawabSU0gCjnj8aC0euHQ==",
"license": "MIT",
"peer": true,
"engines": {
"node": ">=0.10.0"
}
@@ -13809,6 +13826,7 @@
"integrity": "sha512-ePrwPfxAnB+7hgnEr8vpKxL9cmnp7F322t8oqcPshbIQQhDKgFDW4tjhF2wjVbdXF9O/nyuy3sQWd9JGpiLPvA==",
"devOptional": true,
"license": "MIT",
"peer": true,
"dependencies": {
"shell-quote": "^1.6.1",
"ws": "^7"
@@ -15961,6 +15979,7 @@
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
"integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
"license": "MIT",
"peer": true,
"engines": {
"node": ">=12"
},
@@ -16183,7 +16202,8 @@
"version": "2.8.1",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
"integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
"license": "0BSD"
"license": "0BSD",
"peer": true
},
"node_modules/tsx": {
"version": "4.20.3",
@@ -16191,6 +16211,7 @@
"integrity": "sha512-qjbnuR9Tr+FJOMBqJCW5ehvIo/buZq7vH7qD7JziU98h6l3qGy0a/yPFjwO+y0/T7GFpNgNAvEcPPVfyT8rrPQ==",
"devOptional": true,
"license": "MIT",
"peer": true,
"dependencies": {
"esbuild": "~0.25.0",
"get-tsconfig": "^4.7.5"
@@ -16356,6 +16377,7 @@
"integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==",
"devOptional": true,
"license": "Apache-2.0",
"peer": true,
"bin": {
"tsc": "bin/tsc",
"tsserver": "bin/tsserver"
@@ -16423,6 +16445,7 @@
"integrity": "sha512-6sMvZePQrnZH2/cJkwRpkT7DxoAWh+g6+GFRK6bV3YQo7ogi3SX5rgF6099r5Q53Ma5qeT7LGmOmuIutF4t3lA==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@typescript-eslint/scope-manager": "8.35.0",
"@typescript-eslint/types": "8.35.0",
@@ -16842,6 +16865,7 @@
"resolved": "https://registry.npmjs.org/vite/-/vite-7.3.2.tgz",
"integrity": "sha512-Bby3NOsna2jsjfLVOHKes8sGwgl4TT0E6vvpYgnAYDIF/tie7MRaFthmKuHx1NSXjiTueXH3do80FMQgvEktRg==",
"license": "MIT",
"peer": true,
"dependencies": {
"esbuild": "^0.27.0",
"fdir": "^6.5.0",
@@ -17412,6 +17436,7 @@
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
"integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
"license": "MIT",
"peer": true,
"engines": {
"node": ">=12"
},
@@ -17424,6 +17449,7 @@
"resolved": "https://registry.npmjs.org/vitest/-/vitest-3.2.4.tgz",
"integrity": "sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A==",
"license": "MIT",
"peer": true,
"dependencies": {
"@types/chai": "^5.2.2",
"@vitest/expect": "3.2.4",
@@ -18062,6 +18088,7 @@
"resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
"integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==",
"license": "MIT",
"peer": true,
"funding": {
"url": "https://github.com/sponsors/colinhacks"
}
@@ -18498,6 +18525,7 @@
"resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.14.3.tgz",
"integrity": "sha512-Iq8QQQ/7X3Sac15oB6p0FmUg/klxQvXLeileoqrTRGJYLV+/9tubbr9ipz0GKHjmXVsgFPo/+W+2cA8eNcR+XA==",
"license": "Apache-2.0",
"peer": true,
"dependencies": {
"@grpc/proto-loader": "^0.8.0",
"@js-sdsl/ordered-map": "^4.4.2"
@@ -18616,6 +18644,7 @@
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
"integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
"license": "MIT",
"peer": true,
"engines": {
"node": ">=12"
},
+47
View File
@@ -2156,6 +2156,53 @@ const SETTINGS_SCHEMA = {
},
},
},
contextCaching: {
type: 'object',
label: 'Context Caching',
category: 'Experimental',
requiresRestart: true,
default: {},
description: 'Explicit context caching for the main agent.',
showInDialog: true,
properties: {
enabled: {
type: 'boolean',
label: 'Enable Context Caching',
category: 'Experimental',
requiresRestart: true,
default: false,
description: 'Enable explicit context caching for the main agent.',
showInDialog: true,
},
thresholdTokens: {
type: 'number',
label: 'Threshold Tokens',
category: 'Experimental',
requiresRestart: true,
default: 32768,
description: 'Minimum tokens required to trigger explicit caching.',
showInDialog: true,
},
ttlMinutes: {
type: 'number',
label: 'TTL (Minutes)',
category: 'Experimental',
requiresRestart: true,
default: 60,
description: 'Time to live for a cache resource in minutes.',
showInDialog: true,
},
autoRenew: {
type: 'boolean',
label: 'Auto Renew',
category: 'Experimental',
requiresRestart: true,
default: true,
description: 'Automatically extend TTL on use.',
showInDialog: true,
},
},
},
adk: {
type: 'object',
label: 'ADK',
@@ -59,7 +59,7 @@ describe('Auto Routing Fallback Integration', () => {
return ''; // Fallback for other files
});
fakeGenerator = new FakeContentGenerator([]);
fakeGenerator = new FakeContentGenerator([], []);
});
afterEach(() => {
+8
View File
@@ -346,6 +346,14 @@ export class CodeAssistServer implements ContentGenerator {
throw Error();
}
async createCachedContent(): Promise<never> {
throw new Error('Explicit caching is not supported for Code Assist auth.');
}
async updateCachedContent(): Promise<never> {
throw new Error('Explicit caching is not supported for Code Assist auth.');
}
async listExperiments(
metadata: ClientMetadata,
): Promise<ListExperimentsResponse> {
+15 -1
View File
@@ -14,6 +14,7 @@ import type { ConversationRecord } from '../services/chatRecordingService.js';
import type {
AgentHistoryProviderConfig,
ContextManagementConfig,
ContextCachingConfig,
ToolOutputMaskingConfig,
} from '../context/types.js';
export type { ConversationRecord };
@@ -717,6 +718,7 @@ export interface ConfigParameters {
experimentalAutoMemory?: boolean;
experimentalGemma?: boolean;
experimentalContextManagementConfig?: string;
experimentalContextCaching?: Partial<ContextCachingConfig>;
experimentalAgentHistoryTruncation?: boolean;
experimentalAgentHistoryTruncationThreshold?: number;
experimentalAgentHistoryRetainedMessages?: number;
@@ -972,6 +974,7 @@ export class Config implements McpContext, AgentLoopContext {
private readonly modelSteering: boolean;
private memoryContextManager?: MemoryContextManager;
private readonly contextManagement: ContextManagementConfig;
private readonly contextCaching: ContextCachingConfig;
private terminalBackground: string | undefined = undefined;
private remoteAdminSettings: AdminControlsSettings | undefined;
private latestApiRequest: GenerateContentParameters | undefined;
@@ -1224,6 +1227,13 @@ export class Config implements McpContext, AgentLoopContext {
},
},
};
this.contextCaching = {
enabled: params.experimentalContextCaching?.enabled ?? false,
thresholdTokens:
params.experimentalContextCaching?.thresholdTokens ?? 32768,
ttlMinutes: params.experimentalContextCaching?.ttlMinutes ?? 60,
autoRenew: params.experimentalContextCaching?.autoRenew ?? true,
};
this.topicUpdateNarration = params.topicUpdateNarration ?? true;
this.modelSteering = params.modelSteering ?? false;
this.injectionService = new InjectionService(() =>
@@ -2574,7 +2584,11 @@ export class Config implements McpContext, AgentLoopContext {
return this.contextManagement;
}
get agentHistoryProviderConfig(): AgentHistoryProviderConfig {
getContextCachingConfig(): ContextCachingConfig {
return this.contextCaching;
}
getAgentHistoryProviderConfig(): AgentHistoryProviderConfig {
return {
maxTokens: this.contextManagement.historyWindow.maxTokens,
retainedTokens: this.contextManagement.historyWindow.retainedTokens,
+7
View File
@@ -87,6 +87,13 @@ export class Storage {
return path.join(Storage.getGlobalGeminiDir(), GOOGLE_ACCOUNTS_FILENAME);
}
static getContextCacheMetadataPath(): string {
return path.join(
Storage.getGlobalGeminiDir(),
'context-cache-metadata.json',
);
}
static getTrustedFoldersPath(): string {
if (process.env['GEMINI_CLI_TRUSTED_FOLDERS_PATH']) {
return process.env['GEMINI_CLI_TRUSTED_FOLDERS_PATH'];
@@ -0,0 +1,106 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { ContextCacheManager } from './contextCacheManager.js';
import { Storage } from '../config/storage.js';
import * as fs from 'node:fs';
vi.mock('node:fs');
vi.mock('../config/storage.js');
describe('ContextCacheManager', () => {
let manager: ContextCacheManager;
const mockMetadataPath = '/test/metadata.json';
beforeEach(() => {
vi.clearAllMocks();
vi.mocked(Storage.getContextCacheMetadataPath).mockReturnValue(
mockMetadataPath,
);
manager = new ContextCacheManager();
});
it('should calculate stable SHA-256 hash', () => {
const si = 'You are a helpful assistant.';
const hash1 = manager.calculateHash(si);
const hash2 = manager.calculateHash(si);
expect(hash1).toBe(hash2);
expect(hash1).toMatch(/^[a-f0-9]{64}$/);
});
it('should return undefined if cache not found', () => {
vi.mocked(fs.existsSync).mockReturnValue(false);
expect(manager.getCache('nonexistent')).toBeUndefined();
});
it('should return entry if valid cache found', () => {
const hash = 'testhash';
const futureDate = new Date(Date.now() + 3600000).toISOString();
const entry = {
cacheName: 'cachedContents/123',
model: 'gemini-pro',
expiresAt: futureDate,
tokenCount: 1000,
};
vi.mocked(fs.existsSync).mockReturnValue(true);
vi.mocked(fs.readFileSync).mockReturnValue(
JSON.stringify({
version: '1.0',
entries: { [hash]: entry },
}),
);
const result = manager.getCache(hash);
expect(result).toEqual(entry);
});
it('should purge and return undefined if cache expired', () => {
const hash = 'expiredhash';
const pastDate = new Date(Date.now() - 3600000).toISOString();
const entry = {
cacheName: 'cachedContents/expired',
model: 'gemini-pro',
expiresAt: pastDate,
tokenCount: 1000,
};
vi.mocked(fs.existsSync).mockReturnValue(true);
vi.mocked(fs.readFileSync).mockReturnValue(
JSON.stringify({
version: '1.0',
entries: { [hash]: entry },
}),
);
const result = manager.getCache(hash);
expect(result).toBeUndefined();
expect(fs.writeFileSync).toHaveBeenCalled();
const saved = JSON.parse(
vi.mocked(fs.writeFileSync).mock.calls[0][1] as string,
);
expect(saved.entries[hash]).toBeUndefined();
});
it('should save metadata when setCache is called', () => {
vi.mocked(fs.existsSync).mockReturnValue(false);
const hash = 'newhash';
const entry = {
cacheName: 'cachedContents/new',
model: 'gemini-pro',
expiresAt: new Date().toISOString(),
tokenCount: 1000,
};
manager.setCache(hash, entry);
expect(fs.writeFileSync).toHaveBeenCalled();
const saved = JSON.parse(
vi.mocked(fs.writeFileSync).mock.calls[0][1] as string,
);
expect(saved.entries[hash]).toEqual(entry);
});
});
@@ -0,0 +1,144 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import * as crypto from 'node:crypto';
import * as fs from 'node:fs';
import { Storage } from '../config/storage.js';
import { debugLogger } from '../utils/debugLogger.js';
import { estimateTokenCountSync } from '../utils/tokenCalculation.js';
/**
* Metadata for a single Gemini Context Cache resource.
*/
export interface ContextCacheEntry {
/** The full resource name, e.g., 'cachedContents/xyz123' */
cacheName: string;
/** The model ID this cache was created for */
model: string;
/** ISO 8601 expiration timestamp */
expiresAt: string;
/** Number of tokens in the cached content */
tokenCount: number;
}
/**
* Schema for the local persistent metadata storage.
*/
export interface ContextCacheMetadata {
version: string;
/** Map of SHA-256(SI) -> ContextCacheEntry */
entries: Record<string, ContextCacheEntry>;
}
/**
* Manages the lifecycle and discovery of Gemini Context Caches.
* Uses a local metadata file to map System Instruction hashes to remote cache IDs.
*/
export class ContextCacheManager {
private metadata: ContextCacheMetadata | undefined;
private readonly metadataPath: string;
constructor() {
this.metadataPath = Storage.getContextCacheMetadataPath();
}
private loadMetadata(): ContextCacheMetadata {
if (this.metadata) {
return this.metadata;
}
try {
if (fs.existsSync(this.metadataPath)) {
const content = fs.readFileSync(this.metadataPath, 'utf8');
const parsed = JSON.parse(content) as unknown;
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
this.metadata = parsed as ContextCacheMetadata;
} else {
this.metadata = { version: '1.0', entries: {} };
}
} catch (error) {
debugLogger.error('Failed to load context cache metadata:', error);
this.metadata = { version: '1.0', entries: {} };
}
return this.metadata;
}
private saveMetadata(): void {
if (!this.metadata) return;
try {
fs.writeFileSync(
this.metadataPath,
JSON.stringify(this.metadata, null, 2),
);
} catch (error) {
debugLogger.error('Failed to save context cache metadata:', error);
}
}
/**
* Calculates a stable SHA-256 hash of the System Instruction.
*/
calculateHash(systemInstruction: string): string {
return crypto.createHash('sha256').update(systemInstruction).digest('hex');
}
/**
* Calculates the token count of a system instruction string.
*/
calculateTokenCount(systemInstruction: string): number {
return estimateTokenCountSync([{ text: systemInstruction }]);
}
/**
* Looks up a hot cache for the given SI hash.
* Purges the entry if it has expired.
*/
getCache(hash: string): ContextCacheEntry | undefined {
const metadata = this.loadMetadata();
const entry = metadata.entries[hash];
if (entry) {
const now = new Date();
if (new Date(entry.expiresAt) > now) {
return entry;
} else {
// Purge expired entry
debugLogger.log(
`[ContextCache] Purging expired cache: ${entry.cacheName}`,
);
delete metadata.entries[hash];
this.saveMetadata();
}
}
return undefined;
}
/**
* Saves or updates a cache entry.
*/
setCache(hash: string, entry: ContextCacheEntry): void {
const metadata = this.loadMetadata();
metadata.entries[hash] = entry;
this.saveMetadata();
}
/**
* Removes a cache entry by hash.
*/
removeCache(hash: string): void {
const metadata = this.loadMetadata();
if (metadata.entries[hash]) {
delete metadata.entries[hash];
this.saveMetadata();
}
}
}
/** Global singleton instance */
export const contextCacheManager = new ContextCacheManager();
+7
View File
@@ -18,6 +18,13 @@ export interface ToolOutputMaskingConfig {
protectLatestTurn: boolean;
}
export interface ContextCachingConfig {
enabled: boolean;
thresholdTokens: number;
ttlMinutes: number;
autoRenew: boolean;
}
export interface ContextManagementConfig {
enabled: boolean;
historyWindow: {
+2 -1
View File
@@ -112,9 +112,10 @@ export class GeminiClient {
this.loopDetector = new LoopDetectionService(this.config);
this.compressionService = new ChatCompressionService();
this.agentHistoryProvider = new AgentHistoryProvider(
this.config.agentHistoryProviderConfig,
this.config.getAgentHistoryProviderConfig(),
this.config,
);
this.toolOutputMaskingService = new ToolOutputMaskingService();
this.lastPromptId = this.config.getSessionId();
+40 -25
View File
@@ -137,7 +137,8 @@ describe('createContentGenerator', () => {
vi.stubEnv('GEMINI_CLI_SURFACE', '');
const mockGenerator = {
models: {},
models: { get: vi.fn() },
caches: { create: vi.fn(), update: vi.fn() },
} as unknown as GoogleGenAI;
vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
const generator = await createContentGenerator(
@@ -158,9 +159,7 @@ describe('createContentGenerator', () => {
}),
}),
});
expect(generator).toEqual(
new LoggingContentGenerator(mockGenerator.models, mockConfig),
);
expect(generator).toBeInstanceOf(LoggingContentGenerator);
});
it('should use standard User-Agent for a2a-server running outside VS Code', async () => {
@@ -179,7 +178,8 @@ describe('createContentGenerator', () => {
vi.stubEnv('GEMINI_CLI_SURFACE', '');
const mockGenerator = {
models: {},
models: { get: vi.fn() },
caches: { create: vi.fn(), update: vi.fn() },
} as unknown as GoogleGenAI;
vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
await createContentGenerator(
@@ -217,7 +217,8 @@ describe('createContentGenerator', () => {
vi.stubEnv('TERM_PROGRAM_VERSION', '1.85.0');
const mockGenerator = {
models: {},
models: { get: vi.fn() },
caches: { create: vi.fn(), update: vi.fn() },
} as unknown as GoogleGenAI;
vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
await createContentGenerator(
@@ -255,7 +256,8 @@ describe('createContentGenerator', () => {
vi.stubEnv('GEMINI_CLI_SURFACE', '');
const mockGenerator = {
models: {},
models: { get: vi.fn() },
caches: { create: vi.fn(), update: vi.fn() },
} as unknown as GoogleGenAI;
vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
await createContentGenerator(
@@ -288,7 +290,8 @@ describe('createContentGenerator', () => {
vi.stubEnv('GEMINI_CLI_CUSTOM_HEADERS', 'User-Agent:MyCustomUA');
const mockGenerator = {
models: {},
models: { get: vi.fn() },
caches: { create: vi.fn(), update: vi.fn() },
} as unknown as GoogleGenAI;
vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
await createContentGenerator(
@@ -348,7 +351,8 @@ describe('createContentGenerator', () => {
} as unknown as Config;
const mockGenerator = {
models: {},
models: { get: vi.fn() },
caches: { create: vi.fn(), update: vi.fn() },
} as unknown as GoogleGenAI;
vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
vi.stubEnv(
@@ -395,7 +399,8 @@ describe('createContentGenerator', () => {
} as unknown as Config;
const mockGenerator = {
models: {},
models: { get: vi.fn() },
caches: { create: vi.fn(), update: vi.fn() },
} as unknown as GoogleGenAI;
vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
@@ -433,7 +438,8 @@ describe('createContentGenerator', () => {
} as unknown as Config;
const mockGenerator = {
models: {},
models: { get: vi.fn() },
caches: { create: vi.fn(), update: vi.fn() },
} as unknown as GoogleGenAI;
vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
vi.stubEnv('GEMINI_API_KEY_AUTH_MECHANISM', 'bearer');
@@ -467,7 +473,8 @@ describe('createContentGenerator', () => {
} as unknown as Config;
const mockGenerator = {
models: {},
models: { get: vi.fn() },
caches: { create: vi.fn(), update: vi.fn() },
} as unknown as GoogleGenAI;
vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
// GEMINI_API_KEY_AUTH_MECHANISM is not stubbed, so it will be undefined, triggering default 'x-goog-api-key'
@@ -508,7 +515,8 @@ describe('createContentGenerator', () => {
getClientName: vi.fn().mockReturnValue(undefined),
} as unknown as Config;
const mockGenerator = {
models: {},
models: { get: vi.fn() },
caches: { create: vi.fn(), update: vi.fn() },
} as unknown as GoogleGenAI;
vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
const generator = await createContentGenerator(
@@ -527,9 +535,7 @@ describe('createContentGenerator', () => {
},
}),
});
expect(generator).toEqual(
new LoggingContentGenerator(mockGenerator.models, mockConfig),
);
expect(generator).toBeInstanceOf(LoggingContentGenerator);
});
it('should pass apiVersion to GoogleGenAI when GOOGLE_GENAI_API_VERSION is set', async () => {
@@ -541,7 +547,8 @@ describe('createContentGenerator', () => {
} as unknown as Config;
const mockGenerator = {
models: {},
models: { get: vi.fn() },
caches: { create: vi.fn(), update: vi.fn() },
} as unknown as GoogleGenAI;
vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
vi.stubEnv('GOOGLE_GENAI_API_VERSION', 'v1');
@@ -575,7 +582,8 @@ describe('createContentGenerator', () => {
} as unknown as Config;
const mockGenerator = {
models: {},
models: { get: vi.fn() },
caches: { create: vi.fn(), update: vi.fn() },
} as unknown as GoogleGenAI;
vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
@@ -613,7 +621,8 @@ describe('createContentGenerator', () => {
} as unknown as Config;
const mockGenerator = {
models: {},
models: { get: vi.fn() },
caches: { create: vi.fn(), update: vi.fn() },
} as unknown as GoogleGenAI;
vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
vi.stubEnv('GOOGLE_GENAI_API_VERSION', '');
@@ -652,7 +661,8 @@ describe('createContentGenerator', () => {
} as unknown as Config;
const mockGenerator = {
models: {},
models: { get: vi.fn() },
caches: { create: vi.fn(), update: vi.fn() },
} as unknown as GoogleGenAI;
vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
vi.stubEnv('GOOGLE_GENAI_API_VERSION', 'v1alpha');
@@ -687,7 +697,8 @@ describe('createContentGenerator', () => {
} as unknown as Config;
const mockGenerator = {
models: {},
models: { get: vi.fn() },
caches: { create: vi.fn(), update: vi.fn() },
} as unknown as GoogleGenAI;
vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
vi.stubEnv('GOOGLE_GEMINI_BASE_URL', 'https://gemini.test.local');
@@ -719,7 +730,8 @@ describe('createContentGenerator', () => {
} as unknown as Config;
const mockGenerator = {
models: {},
models: { get: vi.fn() },
caches: { create: vi.fn(), update: vi.fn() },
} as unknown as GoogleGenAI;
vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
vi.stubEnv('GOOGLE_VERTEX_BASE_URL', 'https://vertex.test.local');
@@ -752,7 +764,8 @@ describe('createContentGenerator', () => {
} as unknown as Config;
const mockGenerator = {
models: {},
models: { get: vi.fn() },
caches: { create: vi.fn(), update: vi.fn() },
} as unknown as GoogleGenAI;
vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
vi.stubEnv('GOOGLE_GEMINI_BASE_URL', 'https://gemini.test.local');
@@ -785,7 +798,8 @@ describe('createContentGenerator', () => {
} as unknown as Config;
const mockGenerator = {
models: {},
models: { get: vi.fn() },
caches: { create: vi.fn(), update: vi.fn() },
} as unknown as GoogleGenAI;
vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
vi.stubEnv('GOOGLE_GEMINI_BASE_URL', 'https://env.test.local');
@@ -817,7 +831,8 @@ describe('createContentGenerator', () => {
} as unknown as Config;
const mockGenerator = {
models: {},
models: { get: vi.fn() },
caches: { create: vi.fn(), update: vi.fn() },
} as unknown as GoogleGenAI;
vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
+96 -10
View File
@@ -4,14 +4,17 @@
* SPDX-License-Identifier: Apache-2.0
*/
import {
import type {
EmbedContentResponse,
GoogleGenAI,
type Content,
type CountTokensResponse,
type GenerateContentResponse,
type GenerateContentParameters,
type CountTokensParameters,
type EmbedContentResponse,
type EmbedContentParameters,
type CachedContent,
type CreateCachedContentParameters,
} from '@google/genai';
import * as os from 'node:os';
import { createCodeAssistContentGenerator } from '../code_assist/codeAssist.js';
@@ -49,6 +52,15 @@ export interface ContentGenerator {
embedContent(request: EmbedContentParameters): Promise<EmbedContentResponse>;
createCachedContent(
request: CreateCachedContentParameters,
): Promise<CachedContent>;
updateCachedContent(request: {
name: string;
config?: { ttl?: string; expireTime?: string };
}): Promise<CachedContent>;
userTier?: UserTierId;
userTierName?: string;
@@ -65,6 +77,72 @@ export enum AuthType {
GATEWAY = 'gateway',
}
/**
* Implementation of ContentGenerator for standard Gemini/Vertex SDKs.
*/
class SdkContentGenerator implements ContentGenerator {
paidTier?: GeminiUserTier;
constructor(
private readonly genAI: GoogleGenAI,
private readonly modelName: string,
readonly history: Content[] = [],
) {}
async generateContent(
request: GenerateContentParameters,
_userPromptId: string,
_role: LlmRole,
): Promise<GenerateContentResponse> {
return this.genAI.models.generateContent({
...request,
model: this.modelName,
});
}
async generateContentStream(
request: GenerateContentParameters,
_userPromptId: string,
_role: LlmRole,
): Promise<AsyncGenerator<GenerateContentResponse>> {
return this.genAI.models.generateContentStream({
...request,
model: this.modelName,
});
}
async countTokens(
request: CountTokensParameters,
): Promise<CountTokensResponse> {
return this.genAI.models.countTokens({
...request,
model: this.modelName,
});
}
async embedContent(
request: EmbedContentParameters,
): Promise<EmbedContentResponse> {
return this.genAI.models.embedContent({
...request,
model: this.modelName,
});
}
async createCachedContent(
request: CreateCachedContentParameters,
): Promise<CachedContent> {
return this.genAI.caches.create(request);
}
async updateCachedContent(request: {
name: string;
config?: { ttl?: string; expireTime?: string };
}): Promise<CachedContent> {
return this.genAI.caches.update(request);
}
}
/**
* Detects the best authentication type based on environment variables.
*
@@ -197,7 +275,7 @@ export async function createContentGenerator(
const fakeGenerator = await FakeContentGenerator.fromFile(
gcConfig.fakeResponses,
);
return new LoggingContentGenerator(fakeGenerator, gcConfig);
return new LoggingContentGenerator(fakeGenerator, gcConfig, []);
}
const version = await getVersion();
const model = resolveModel(
@@ -278,6 +356,7 @@ export async function createContentGenerator(
sessionId,
),
gcConfig,
[],
);
}
@@ -330,11 +409,10 @@ export async function createContentGenerator(
const httpOptions: {
baseUrl?: string;
headers: Record<string, string>;
} = { headers };
if (baseUrl) {
httpOptions.baseUrl = baseUrl;
}
} = {
headers,
...(baseUrl ? { baseUrl } : {}),
};
const googleGenAI = new GoogleGenAI({
apiKey: config.apiKey === '' ? undefined : config.apiKey,
@@ -342,7 +420,11 @@ export async function createContentGenerator(
httpOptions,
...(apiVersionEnv && { apiVersion: apiVersionEnv }),
});
return new LoggingContentGenerator(googleGenAI.models, gcConfig);
return new LoggingContentGenerator(
new SdkContentGenerator(googleGenAI, model, []),
gcConfig,
[],
);
}
throw new Error(
`Error creating contentGenerator: Unsupported authType: ${config.authType}`,
@@ -350,7 +432,11 @@ export async function createContentGenerator(
})();
if (gcConfig.recordResponses) {
return new RecordingContentGenerator(generator, gcConfig.recordResponses);
return new RecordingContentGenerator(
generator,
gcConfig.recordResponses,
[],
);
}
return generator;
+30 -1
View File
@@ -6,11 +6,14 @@
import {
GenerateContentResponse,
type Content,
type CountTokensResponse,
type GenerateContentParameters,
type CountTokensParameters,
EmbedContentResponse,
type EmbedContentParameters,
type CachedContent,
type CreateCachedContentParameters,
} from '@google/genai';
import { promises } from 'node:fs';
import type { ContentGenerator } from './contentGenerator.js';
@@ -34,6 +37,14 @@ export type FakeResponse =
| {
method: 'embedContent';
response: EmbedContentResponse;
}
| {
method: 'createCachedContent';
response: CachedContent;
}
| {
method: 'updateCachedContent';
response: CachedContent;
};
// A ContentGenerator that responds with canned responses.
@@ -46,7 +57,10 @@ export class FakeContentGenerator implements ContentGenerator {
userTierName?: string;
paidTier?: GeminiUserTier;
constructor(private readonly responses: FakeResponse[]) {}
constructor(
private readonly responses: FakeResponse[],
readonly history: Content[] = [],
) {}
static async fromFile(filePath: string): Promise<FakeContentGenerator> {
const fileContent = await promises.readFile(filePath, 'utf-8');
@@ -124,4 +138,19 @@ export class FakeContentGenerator implements ContentGenerator {
EmbedContentResponse.prototype,
);
}
async createCachedContent(
request: CreateCachedContentParameters,
): Promise<CachedContent> {
return this.getNextResponse('createCachedContent', request);
}
async updateCachedContent(request: {
name: string;
config?: { ttl?: string; expireTime?: string };
}): Promise<CachedContent> {
return this.getNextResponse('updateCachedContent', request);
}
}
+151
View File
@@ -5,6 +5,7 @@
*/
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import * as crypto from 'node:crypto';
import {
ApiError,
ThinkingLevel,
@@ -25,6 +26,7 @@ import {
CoreToolCallStatus,
} from '../scheduler/types.js';
import { MockTool } from '../test-utils/mock-tool.js';
import { Storage } from '../config/storage.js';
import type { Config } from '../config/config.js';
import { setSimulate429 } from '../utils/testUtils.js';
import { DEFAULT_THINKING_MODE } from '../config/models.js';
@@ -132,6 +134,8 @@ describe('GeminiChat', () => {
countTokens: vi.fn(),
embedContent: vi.fn(),
batchEmbedContents: vi.fn(),
createCachedContent: vi.fn(),
updateCachedContent: vi.fn(),
} as unknown as ContentGenerator;
mockHandleFallback.mockClear();
@@ -186,6 +190,24 @@ describe('GeminiChat', () => {
getMaxAttempts: vi.fn().mockReturnValue(10),
getUserTier: vi.fn().mockReturnValue(undefined),
isContextManagementEnabled: vi.fn().mockReturnValue(false),
getContextCachingConfig: vi.fn().mockReturnValue({
enabled: false,
thresholdTokens: 32768,
ttlMinutes: 60,
autoRenew: true,
}),
getSystemInstructionMemory: vi.fn().mockReturnValue(undefined),
getIncludeDirectoryTree: vi.fn().mockReturnValue(true),
getWorkspaceContext: vi.fn().mockReturnValue({
getDirectories: vi.fn().mockReturnValue([]),
}),
isTopicUpdateNarrationEnabled: vi.fn().mockReturnValue(false),
topicState: {
getTopic: vi.fn().mockReturnValue(undefined),
},
getSkillManager: vi.fn().mockReturnValue({
getSkills: vi.fn().mockReturnValue([]),
}),
modelConfigService: {
getResolvedConfig: vi.fn().mockImplementation((modelConfigKey) => {
const model = modelConfigKey.model ?? mockConfig.getModel();
@@ -3093,4 +3115,133 @@ describe('GeminiChat', () => {
expect(stripped[1].parts![0].functionResponse!.id).toBe('call_123');
});
});
describe('explicit context caching', () => {
it('should create a new cache if enabled and SI is large enough', async () => {
const si = 'Large system instruction...'.repeat(2000); // Definitely > 32k
chat = new GeminiChat(mockConfig, si);
vi.mocked(mockConfig.getContextCachingConfig).mockReturnValue({
enabled: true,
thresholdTokens: 32768,
ttlMinutes: 60,
autoRenew: true,
});
vi.mocked(mockContentGenerator.createCachedContent).mockResolvedValue({
name: 'cachedContents/new-cache',
expireTime: new Date(Date.now() + 3600000).toISOString(),
});
vi.mocked(mockContentGenerator.generateContentStream).mockResolvedValue(
(async function* () {
yield {
candidates: [
{
content: { role: 'model', parts: [{ text: 'response' }] },
finishReason: 'STOP',
},
],
} as unknown as GenerateContentResponse;
})(),
);
const stream = await chat.sendMessageStream(
{ model: 'gemini-pro' },
'test',
'prompt-id',
new AbortController().signal,
LlmRole.MAIN,
);
for await (const chunk of stream) {
expect(chunk).toBeDefined();
}
expect(mockContentGenerator.createCachedContent).toHaveBeenCalledWith(
expect.objectContaining({
systemInstruction: { parts: [{ text: si }] },
}),
);
expect(mockContentGenerator.generateContentStream).toHaveBeenCalledWith(
expect.objectContaining({
config: expect.objectContaining({
cachedContent: 'cachedContents/new-cache',
}),
}),
'prompt-id',
LlmRole.MAIN,
);
});
it('should reuse existing cache if present', async () => {
const si = 'Large system instruction...'.repeat(2000);
chat = new GeminiChat(mockConfig, si);
const siHash = crypto.createHash('sha256').update(si).digest('hex');
const futureDate = new Date(Date.now() + 3600000).toISOString();
// Seed the metadata file via the mock fs
mockFileSystem.set(
Storage.getContextCacheMetadataPath(),
JSON.stringify({
version: '1.0',
entries: {
[siHash]: {
cacheName: 'cachedContents/existing-cache',
model: 'gemini-pro',
expiresAt: futureDate,
tokenCount: 40000,
},
},
}),
);
vi.mocked(mockConfig.getContextCachingConfig).mockReturnValue({
enabled: true,
thresholdTokens: 32768,
ttlMinutes: 60,
autoRenew: true,
});
vi.mocked(mockContentGenerator.updateCachedContent).mockResolvedValue({
name: 'cachedContents/existing-cache',
expireTime: new Date(Date.now() + 7200000).toISOString(),
});
vi.mocked(mockContentGenerator.generateContentStream).mockResolvedValue(
(async function* () {
yield {
candidates: [
{
content: { role: 'model', parts: [{ text: 'response' }] },
finishReason: 'STOP',
},
],
} as unknown as GenerateContentResponse;
})(),
);
const stream = await chat.sendMessageStream(
{ model: 'gemini-pro' },
'test',
'prompt-id',
new AbortController().signal,
LlmRole.MAIN,
);
for await (const chunk of stream) {
expect(chunk).toBeDefined();
}
expect(mockContentGenerator.createCachedContent).not.toHaveBeenCalled();
expect(mockContentGenerator.generateContentStream).toHaveBeenCalledWith(
expect.objectContaining({
config: expect.objectContaining({
cachedContent: 'cachedContents/existing-cache',
}),
}),
'prompt-id',
LlmRole.MAIN,
);
});
});
});
+132 -2
View File
@@ -45,7 +45,7 @@ import {
ContentRetryEvent,
ContentRetryFailureEvent,
NetworkRetryAttemptEvent,
type LlmRole,
LlmRole,
} from '../telemetry/types.js';
import { handleFallback } from '../fallback/handler.js';
import { isFunctionResponse } from '../utils/messageInspectors.js';
@@ -61,6 +61,9 @@ import {
import { coreEvents } from '../utils/events.js';
import type { AgentLoopContext } from '../config/agent-loop-context.js';
import { debugLogger } from '../utils/debugLogger.js';
import { contextCacheManager } from '../context/contextCacheManager.js';
import { getCoreSystemPrompt } from './prompts.js';
import { getDirectoryContextString } from '../utils/environmentContext.js';
export enum StreamEventType {
/** A regular content chunk from the API. */
@@ -747,7 +750,134 @@ export class GeminiChat {
lastConfig = config;
lastContentsToUse = contentsToUse;
const finalContents = stripToolCallIdPrefixes(contentsToUse);
// Handle explicit context caching
const cachingConfig = this.context.config.getContextCachingConfig();
let effectiveContents = contentsToUse;
if (cachingConfig.enabled && role === LlmRole.MAIN) {
try {
const userMemory = this.context.config.getSystemInstructionMemory();
const stableSI = getCoreSystemPrompt(
this.context.config,
userMemory,
undefined,
undefined,
'stable',
);
const siHash = contextCacheManager.calculateHash(stableSI);
const existingCache = contextCacheManager.getCache(siHash);
if (existingCache && existingCache.model === modelToUse) {
config.cachedContent = existingCache.cacheName;
debugLogger.log(
`[ContextCache] Using existing cache: ${existingCache.cacheName}`,
);
// Prepend dynamic context to history
const dynamicContext = getCoreSystemPrompt(
this.context.config,
userMemory,
undefined,
undefined,
'dynamic',
);
const dirContext = await getDirectoryContextString(
this.context.config,
);
const dynamicWithTree = dynamicContext.replace(
'[Recursive file tree provided in history]',
dirContext,
);
effectiveContents = [
{ role: 'user', parts: [{ text: dynamicWithTree }] },
...contentsToUse,
];
// Asynchronously renew TTL if enabled
if (cachingConfig.autoRenew) {
this.context.config
.getContentGenerator()
.updateCachedContent({
name: existingCache.cacheName,
config: { ttl: `${cachingConfig.ttlMinutes * 60}s` },
})
.then((updated) => {
if (updated.expireTime) {
existingCache.expiresAt = updated.expireTime;
contextCacheManager.setCache(siHash, existingCache);
debugLogger.log(
`[ContextCache] Renewed TTL for ${existingCache.cacheName}`,
);
}
})
.catch((e) =>
debugLogger.error(`[ContextCache] Failed to renew TTL:`, e),
);
}
} else {
// Check if we should create a new cache
const siTokens = contextCacheManager.calculateTokenCount(stableSI);
if (siTokens >= cachingConfig.thresholdTokens) {
debugLogger.log(
`[ContextCache] Creating new cache for stable SI (${siTokens} tokens)`,
);
const newCache = await this.context.config
.getContentGenerator()
.createCachedContent({
model: modelToUse,
config: {
systemInstruction: { parts: [{ text: stableSI }] },
ttl: `${cachingConfig.ttlMinutes * 60}s`,
},
});
if (newCache.name && newCache.expireTime) {
const entry = {
cacheName: newCache.name,
model: modelToUse,
expiresAt: newCache.expireTime,
tokenCount: siTokens,
};
contextCacheManager.setCache(siHash, entry);
config.cachedContent = newCache.name;
debugLogger.log(
`[ContextCache] Created and using new cache: ${newCache.name}`,
);
// Prepend dynamic context to history for this initial call
const dynamicContext = getCoreSystemPrompt(
this.context.config,
userMemory,
undefined,
undefined,
'dynamic',
);
const dirContext = await getDirectoryContextString(
this.context.config,
);
const dynamicWithTree = dynamicContext.replace(
'[Recursive file tree provided in history]',
dirContext,
);
effectiveContents = [
{ role: 'user', parts: [{ text: dynamicWithTree }] },
...contentsToUse,
];
}
}
}
} catch (error) {
// Fall back to standard request on cache failure
debugLogger.error(
'[ContextCache] Error managing context cache:',
error,
);
}
}
const finalContents = stripToolCallIdPrefixes(effectiveContents);
return this.context.config.getContentGenerator().generateContentStream(
{
@@ -65,6 +65,8 @@ describe('LoggingContentGenerator', () => {
generateContentStream: vi.fn(),
countTokens: vi.fn(),
embedContent: vi.fn(),
createCachedContent: vi.fn(),
updateCachedContent: vi.fn(),
};
config = {
getGoogleAIConfig: vi.fn(),
@@ -15,6 +15,8 @@ import type {
GenerateContentParameters,
GenerateContentResponseUsageMetadata,
GenerateContentResponse,
CachedContent,
CreateCachedContentParameters,
} from '@google/genai';
import {
ApiRequestEvent,
@@ -150,6 +152,7 @@ export class LoggingContentGenerator implements ContentGenerator {
constructor(
private readonly wrapped: ContentGenerator,
private readonly config: Config,
readonly history: Content[] = [],
) {}
getWrapped(): ContentGenerator {
@@ -623,4 +626,17 @@ export class LoggingContentGenerator implements ContentGenerator {
},
);
}
async createCachedContent(
request: CreateCachedContentParameters,
): Promise<CachedContent> {
return this.wrapped.createCachedContent(request);
}
async updateCachedContent(request: {
name: string;
config?: { ttl?: string; expireTime?: string };
}): Promise<CachedContent> {
return this.wrapped.updateCachedContent(request);
}
}
+2
View File
@@ -25,12 +25,14 @@ export function getCoreSystemPrompt(
userMemory?: string | HierarchicalMemory,
interactiveOverride?: boolean,
topicUpdateNarrationOverride?: boolean,
splitMode: 'combined' | 'stable' | 'dynamic' = 'combined',
): string {
return new PromptProvider().getCoreSystemPrompt(
config,
userMemory,
interactiveOverride,
topicUpdateNarrationOverride,
splitMode,
);
}
@@ -40,6 +40,8 @@ describe('RecordingContentGenerator', () => {
generateContentStream: vi.fn(),
countTokens: vi.fn(),
embedContent: vi.fn(),
createCachedContent: vi.fn(),
updateCachedContent: vi.fn(),
};
recorder = new RecordingContentGenerator(mockRealGenerator, filePath);
vi.clearAllMocks();
@@ -5,17 +5,20 @@
*/
import type {
Content,
CountTokensResponse,
GenerateContentParameters,
GenerateContentResponse,
CountTokensParameters,
EmbedContentResponse,
EmbedContentParameters,
CachedContent,
CreateCachedContentParameters,
} from '@google/genai';
import { appendFileSync } from 'node:fs';
import type { ContentGenerator } from './contentGenerator.js';
import type { FakeResponse } from './fakeContentGenerator.js';
import type { UserTierId } from '../code_assist/types.js';
import type { UserTierId, GeminiUserTier } from '../code_assist/types.js';
import { safeJsonStringify } from '../utils/safeJsonStringify.js';
import type { LlmRole } from '../telemetry/types.js';
@@ -29,6 +32,7 @@ export class RecordingContentGenerator implements ContentGenerator {
constructor(
private readonly realGenerator: ContentGenerator,
private readonly filePath: string,
readonly history: Content[] = [],
) {}
get userTier(): UserTierId | undefined {
@@ -39,6 +43,10 @@ export class RecordingContentGenerator implements ContentGenerator {
return this.realGenerator.userTierName;
}
get paidTier(): GeminiUserTier | undefined {
return this.realGenerator.paidTier;
}
async generateContent(
request: GenerateContentParameters,
userPromptId: string,
@@ -111,7 +119,6 @@ export class RecordingContentGenerator implements ContentGenerator {
request: EmbedContentParameters,
): Promise<EmbedContentResponse> {
const response = await this.realGenerator.embedContent(request);
const recordedResponse: FakeResponse = {
method: 'embedContent',
response: {
@@ -122,4 +129,29 @@ export class RecordingContentGenerator implements ContentGenerator {
appendFileSync(this.filePath, `${safeJsonStringify(recordedResponse)}\n`);
return response;
}
async createCachedContent(
request: CreateCachedContentParameters,
): Promise<CachedContent> {
const response = await this.realGenerator.createCachedContent(request);
const recordedResponse: FakeResponse = {
method: 'createCachedContent',
response,
};
appendFileSync(this.filePath, `${safeJsonStringify(recordedResponse)}\n`);
return response;
}
async updateCachedContent(request: {
name: string;
config?: { ttl?: string; expireTime?: string };
}): Promise<CachedContent> {
const response = await this.realGenerator.updateCachedContent(request);
const recordedResponse: FakeResponse = {
method: 'updateCachedContent',
response,
};
appendFileSync(this.filePath, `${safeJsonStringify(recordedResponse)}\n`);
return response;
}
}
+1
View File
@@ -146,6 +146,7 @@ export {
} from './services/memoryService.js';
export { isProjectSkillPatchTarget } from './services/memoryPatchUtils.js';
export * from './context/memoryContextManager.js';
export * from './context/contextCacheManager.js';
export * from './services/trackerService.js';
export * from './services/trackerTypes.js';
export * from './services/keychainService.js';
+49 -1
View File
@@ -42,14 +42,58 @@ import type { AgentLoopContext } from '../config/agent-loop-context.js';
*/
export class PromptProvider {
/**
* Generates the core system prompt.
* Generates the core system prompt, optionally split into stable and dynamic parts.
*/
getCoreSystemPrompt(
context: AgentLoopContext,
userMemory?: string | HierarchicalMemory,
interactiveOverride?: boolean,
topicUpdateNarrationOverride?: boolean,
splitMode: 'combined' | 'stable' | 'dynamic' = 'combined',
): string {
if (splitMode === 'dynamic') {
const today = new Date().toLocaleDateString(undefined, {
weekday: 'long',
year: 'numeric',
month: 'long',
day: 'numeric',
});
const platform = process.platform;
const tempDir = context.config.storage.getProjectTempDir();
let dynamicPrompt = `
<session_context>
This is the Gemini CLI. We are setting up the context for our chat.
Today's date is ${today} (formatted according to the user's locale).
My operating system is: ${platform}
The project's temporary directory is: ${tempDir}`;
if (context.config.getIncludeDirectoryTree()) {
const workspaceContext = context.config.getWorkspaceContext();
const workspaceDirectories = workspaceContext.getDirectories();
const dirList = workspaceDirectories
.map((dir) => ` - ${dir}`)
.join('\n');
dynamicPrompt += `\n- **Workspace Directories:**\n${dirList}\n- **Directory Structure:**\n\n[Recursive file tree provided in history]`;
}
if (
topicUpdateNarrationOverride ??
context.config.isTopicUpdateNarrationEnabled()
) {
const activeTopic = context.config.topicState.getTopic();
if (activeTopic) {
const sanitizedTopic = activeTopic
.replace(/\n/g, ' ')
.replace(/\]/g, '');
dynamicPrompt += `\n\n[Active Topic: ${sanitizedTopic}]`;
}
}
dynamicPrompt += `\n</session_context>`;
return dynamicPrompt.trim();
}
const systemMdResolution = resolvePathFromEnv(
process.env['GEMINI_SYSTEM_MD'],
);
@@ -275,6 +319,10 @@ export class PromptProvider {
// Sanitize erratic newlines from composition
let sanitizedPrompt = finalPrompt.replace(/\n{3,}/g, '\n\n');
if (splitMode === 'stable') {
return sanitizedPrompt;
}
// Context Reinjection (Active Topic)
if (isTopicUpdateNarrationEnabled) {
const activeTopic = context.config.topicState.getTopic();
@@ -90,6 +90,22 @@ describe('apiConversionUtils', () => {
expect(result['generationConfig']).toBeUndefined();
});
it('omits systemInstruction when cachedContent is present', () => {
const req: GenerateContentParameters = {
model: 'gemini-3-flash',
contents: [{ role: 'user', parts: [{ text: 'Hello' }] }],
config: {
systemInstruction: 'Original instruction',
cachedContent: 'cached-content-id',
},
};
const result = convertToRestPayload(req);
expect(result['cachedContent']).toBe('cached-content-id');
expect(result['systemInstruction']).toBeUndefined();
});
it('retains pure hyperparameters in generationConfig', () => {
const req: GenerateContentParameters = {
model: 'gemini-3-flash',
@@ -46,12 +46,16 @@ export function convertToRestPayload(
}
// Assign extracted capabilities to the root level.
if (restSystemInstruction)
// CRITICAL: systemInstruction and cachedContent are mutually exclusive in the API.
if (sdkCachedContent) {
restPayload['cachedContent'] = sdkCachedContent;
} else if (restSystemInstruction) {
restPayload['systemInstruction'] = restSystemInstruction;
}
if (sdkTools) restPayload['tools'] = sdkTools;
if (sdkToolConfig) restPayload['toolConfig'] = sdkToolConfig;
if (sdkSafetySettings) restPayload['safetySettings'] = sdkSafetySettings;
if (sdkCachedContent) restPayload['cachedContent'] = sdkCachedContent;
return restPayload;
}
@@ -92,6 +92,8 @@ describe('checkNextSpeaker', () => {
generateContentStream: vi.fn(),
countTokens: vi.fn(),
embedContent: vi.fn(),
createCachedContent: vi.fn(),
updateCachedContent: vi.fn(),
} as ContentGenerator,
mockConfig,
);