feat: implement explicit context caching for main agent with stable SI hashing

2026-05-14 22:02:59 -07:00 · 2026-05-12 20:15:52 +00:00
parent 5dda532573
commit 62e97b14a2
25 changed files with 948 additions and 49 deletions
@@ -449,7 +449,8 @@
      "version": "2.11.0",
      "resolved": "https://registry.npmjs.org/@bufbuild/protobuf/-/protobuf-2.11.0.tgz",
      "integrity": "sha512-sBXGT13cpmPR5BMgHE6UEEfEaShh5Ror6rfN3yEK5si7QVrtZg8LEPQb0VVhiLRUslD2yLnXtnRzG035J/mZXQ==",
-      "license": "(Apache-2.0 AND BSD-3-Clause)"
+      "license": "(Apache-2.0 AND BSD-3-Clause)",
+      "peer": true
    },
    "node_modules/@bundled-es-modules/cookie": {
      "version": "2.0.1",
@@ -1535,6 +1536,7 @@
      "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.13.4.tgz",
      "integrity": "sha512-GsFaMXCkMqkKIvwCQjCrwH+GHbPKBjhwo/8ZuUkWHqbI73Kky9I+pQltrlT0+MWpedCoosda53lgjYfyEPgxBg==",
      "license": "Apache-2.0",
+      "peer": true,
      "dependencies": {
        "@grpc/proto-loader": "^0.7.13",
        "@js-sdsl/ordered-map": "^4.4.2"
@@ -2212,6 +2214,7 @@
      "integrity": "sha512-t54CUOsFMappY1Jbzb7fetWeO0n6K0k/4+/ZpkS+3Joz8I4VcvY9OiEBFRYISqaI2fq5sCiPtAjRDOzVYG8m+Q==",
      "dev": true,
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "@octokit/auth-token": "^6.0.0",
        "@octokit/graphql": "^9.0.2",
@@ -2392,6 +2395,7 @@
      "resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz",
      "integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==",
      "license": "Apache-2.0",
+      "peer": true,
      "engines": {
        "node": ">=8.0.0"
      }
@@ -2441,6 +2445,7 @@
      "resolved": "https://registry.npmjs.org/@opentelemetry/core/-/core-2.5.0.tgz",
      "integrity": "sha512-ka4H8OM6+DlUhSAZpONu0cPBtPPTQKxbxVzC4CzVx5+K4JnroJVBtDzLAMx4/3CDTJXRvVFhpFjtl4SaiTNoyQ==",
      "license": "Apache-2.0",
+      "peer": true,
      "dependencies": {
        "@opentelemetry/semantic-conventions": "^1.29.0"
      },
@@ -2815,6 +2820,7 @@
      "resolved": "https://registry.npmjs.org/@opentelemetry/resources/-/resources-2.5.0.tgz",
      "integrity": "sha512-F8W52ApePshpoSrfsSk1H2yJn9aKjCrbpQF1M9Qii0GHzbfVeFUB+rc3X4aggyZD8x9Gu3Slua+s6krmq6Dt8g==",
      "license": "Apache-2.0",
+      "peer": true,
      "dependencies": {
        "@opentelemetry/core": "2.5.0",
        "@opentelemetry/semantic-conventions": "^1.29.0"
@@ -2848,6 +2854,7 @@
      "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-metrics/-/sdk-metrics-2.5.0.tgz",
      "integrity": "sha512-BeJLtU+f5Gf905cJX9vXFQorAr6TAfK3SPvTFqP+scfIpDQEJfRaGJWta7sJgP+m4dNtBf9y3yvBKVAZZtJQVA==",
      "license": "Apache-2.0",
+      "peer": true,
      "dependencies": {
        "@opentelemetry/core": "2.5.0",
        "@opentelemetry/resources": "2.5.0"
@@ -2902,6 +2909,7 @@
      "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-trace-base/-/sdk-trace-base-2.5.0.tgz",
      "integrity": "sha512-VzRf8LzotASEyNDUxTdaJ9IRJ1/h692WyArDBInf5puLCjxbICD6XkHgpuudis56EndyS7LYFmtTMny6UABNdQ==",
      "license": "Apache-2.0",
+      "peer": true,
      "dependencies": {
        "@opentelemetry/core": "2.5.0",
        "@opentelemetry/resources": "2.5.0",
@@ -4139,6 +4147,7 @@
      "integrity": "sha512-6mDvHUFSjyT2B2yeNx2nUgMxh9LtOWvkhIU3uePn2I2oyNymUAX1NIsdgviM4CH+JSrp2D2hsMvJOkxY+0wNRA==",
      "devOptional": true,
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "csstype": "^3.0.2"
      }
@@ -4412,6 +4421,7 @@
      "integrity": "sha512-/Zb/xaIDfxeJnvishjGdcR4jmr7S+bda8PKNhRGdljDM+elXhlvN0FyPSsMnLmJUrVG9aPO6dof80wjMawsASg==",
      "dev": true,
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "@typescript-eslint/scope-manager": "8.58.2",
        "@typescript-eslint/types": "8.58.2",
@@ -5187,6 +5197,7 @@
      "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
      "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
      "license": "MIT",
+      "peer": true,
      "bin": {
        "acorn": "bin/acorn"
      },
@@ -7304,7 +7315,8 @@
      "version": "0.0.1581282",
      "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1581282.tgz",
      "integrity": "sha512-nv7iKtNZQshSW2hKzYNr46nM/Cfh5SEvE2oV0/SEGgc9XupIY5ggf84Cz8eJIkBce7S3bmTAauFD6aysMpnqsQ==",
-      "license": "BSD-3-Clause"
+      "license": "BSD-3-Clause",
+      "peer": true
    },
    "node_modules/dezalgo": {
      "version": "1.0.4",
@@ -7889,6 +7901,7 @@
      "integrity": "sha512-GsGizj2Y1rCWDu6XoEekL3RLilp0voSePurjZIkxL3wlm5o5EC9VpgaP7lrCvjnkuLvzFBQWB3vWB3K5KQTveQ==",
      "dev": true,
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "@eslint-community/eslint-utils": "^4.2.0",
        "@eslint-community/regexpp": "^4.12.1",
@@ -8499,6 +8512,7 @@
      "resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz",
      "integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==",
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "accepts": "^2.0.0",
        "body-parser": "^2.2.1",
@@ -9765,6 +9779,7 @@
      "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.12.tgz",
      "integrity": "sha512-p1JfQMKaceuCbpJKAPKVqyqviZdS0eUxH9v82oWo1kb9xjQ5wA6iP3FNVAPDFlz5/p7d45lO+BpSk1tuSZMF4Q==",
      "license": "MIT",
+      "peer": true,
      "engines": {
        "node": ">=16.9.0"
      }
@@ -10024,6 +10039,7 @@
      "resolved": "https://registry.npmjs.org/@jrichman/ink/-/ink-6.6.9.tgz",
      "integrity": "sha512-RL9sSiLQZECnjbmBwjIHOp8yVGdWF7C/uifg7ISv/e+F3nLNsfl7FdUFQs8iZARFMJAYxMFpxW6OW+HSt9drwQ==",
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "ansi-escapes": "^7.0.0",
        "ansi-styles": "^6.2.3",
@@ -13799,6 +13815,7 @@
      "resolved": "https://registry.npmjs.org/react/-/react-19.2.4.tgz",
      "integrity": "sha512-9nfp2hYpCwOjAN+8TZFGhtWEwgvWHXqESH8qT89AT/lWklpLON22Lc8pEtnpsZz7VmawabSU0gCjnj8aC0euHQ==",
      "license": "MIT",
+      "peer": true,
      "engines": {
        "node": ">=0.10.0"
      }
@@ -13809,6 +13826,7 @@
      "integrity": "sha512-ePrwPfxAnB+7hgnEr8vpKxL9cmnp7F322t8oqcPshbIQQhDKgFDW4tjhF2wjVbdXF9O/nyuy3sQWd9JGpiLPvA==",
      "devOptional": true,
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "shell-quote": "^1.6.1",
        "ws": "^7"
@@ -15961,6 +15979,7 @@
      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
      "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
      "license": "MIT",
+      "peer": true,
      "engines": {
        "node": ">=12"
      },
@@ -16183,7 +16202,8 @@
      "version": "2.8.1",
      "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
      "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
-      "license": "0BSD"
+      "license": "0BSD",
+      "peer": true
    },
    "node_modules/tsx": {
      "version": "4.20.3",
@@ -16191,6 +16211,7 @@
      "integrity": "sha512-qjbnuR9Tr+FJOMBqJCW5ehvIo/buZq7vH7qD7JziU98h6l3qGy0a/yPFjwO+y0/T7GFpNgNAvEcPPVfyT8rrPQ==",
      "devOptional": true,
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "esbuild": "~0.25.0",
        "get-tsconfig": "^4.7.5"
@@ -16356,6 +16377,7 @@
      "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==",
      "devOptional": true,
      "license": "Apache-2.0",
+      "peer": true,
      "bin": {
        "tsc": "bin/tsc",
        "tsserver": "bin/tsserver"
@@ -16423,6 +16445,7 @@
      "integrity": "sha512-6sMvZePQrnZH2/cJkwRpkT7DxoAWh+g6+GFRK6bV3YQo7ogi3SX5rgF6099r5Q53Ma5qeT7LGmOmuIutF4t3lA==",
      "dev": true,
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "@typescript-eslint/scope-manager": "8.35.0",
        "@typescript-eslint/types": "8.35.0",
@@ -16842,6 +16865,7 @@
      "resolved": "https://registry.npmjs.org/vite/-/vite-7.3.2.tgz",
      "integrity": "sha512-Bby3NOsna2jsjfLVOHKes8sGwgl4TT0E6vvpYgnAYDIF/tie7MRaFthmKuHx1NSXjiTueXH3do80FMQgvEktRg==",
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "esbuild": "^0.27.0",
        "fdir": "^6.5.0",
@@ -17412,6 +17436,7 @@
      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
      "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
      "license": "MIT",
+      "peer": true,
      "engines": {
        "node": ">=12"
      },
@@ -17424,6 +17449,7 @@
      "resolved": "https://registry.npmjs.org/vitest/-/vitest-3.2.4.tgz",
      "integrity": "sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A==",
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "@types/chai": "^5.2.2",
        "@vitest/expect": "3.2.4",
@@ -18062,6 +18088,7 @@
      "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
      "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==",
      "license": "MIT",
+      "peer": true,
      "funding": {
        "url": "https://github.com/sponsors/colinhacks"
      }
@@ -18498,6 +18525,7 @@
      "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.14.3.tgz",
      "integrity": "sha512-Iq8QQQ/7X3Sac15oB6p0FmUg/klxQvXLeileoqrTRGJYLV+/9tubbr9ipz0GKHjmXVsgFPo/+W+2cA8eNcR+XA==",
      "license": "Apache-2.0",
+      "peer": true,
      "dependencies": {
        "@grpc/proto-loader": "^0.8.0",
        "@js-sdsl/ordered-map": "^4.4.2"
@@ -18616,6 +18644,7 @@
      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
      "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
      "license": "MIT",
+      "peer": true,
      "engines": {
        "node": ">=12"
      },
@@ -2156,6 +2156,53 @@ const SETTINGS_SCHEMA = {
          },
        },
      },
+      contextCaching: {
+        type: 'object',
+        label: 'Context Caching',
+        category: 'Experimental',
+        requiresRestart: true,
+        default: {},
+        description: 'Explicit context caching for the main agent.',
+        showInDialog: true,
+        properties: {
+          enabled: {
+            type: 'boolean',
+            label: 'Enable Context Caching',
+            category: 'Experimental',
+            requiresRestart: true,
+            default: false,
+            description: 'Enable explicit context caching for the main agent.',
+            showInDialog: true,
+          },
+          thresholdTokens: {
+            type: 'number',
+            label: 'Threshold Tokens',
+            category: 'Experimental',
+            requiresRestart: true,
+            default: 32768,
+            description: 'Minimum tokens required to trigger explicit caching.',
+            showInDialog: true,
+          },
+          ttlMinutes: {
+            type: 'number',
+            label: 'TTL (Minutes)',
+            category: 'Experimental',
+            requiresRestart: true,
+            default: 60,
+            description: 'Time to live for a cache resource in minutes.',
+            showInDialog: true,
+          },
+          autoRenew: {
+            type: 'boolean',
+            label: 'Auto Renew',
+            category: 'Experimental',
+            requiresRestart: true,
+            default: true,
+            description: 'Automatically extend TTL on use.',
+            showInDialog: true,
+          },
+        },
+      },
      adk: {
        type: 'object',
        label: 'ADK',
@@ -59,7 +59,7 @@ describe('Auto Routing Fallback Integration', () => {
      return ''; // Fallback for other files
    });

-    fakeGenerator = new FakeContentGenerator([]);
+    fakeGenerator = new FakeContentGenerator([], []);
  });

  afterEach(() => {
@@ -346,6 +346,14 @@ export class CodeAssistServer implements ContentGenerator {
    throw Error();
  }

+  async createCachedContent(): Promise<never> {
+    throw new Error('Explicit caching is not supported for Code Assist auth.');
+  }
+
+  async updateCachedContent(): Promise<never> {
+    throw new Error('Explicit caching is not supported for Code Assist auth.');
+  }
+
  async listExperiments(
    metadata: ClientMetadata,
  ): Promise<ListExperimentsResponse> {
@@ -14,6 +14,7 @@ import type { ConversationRecord } from '../services/chatRecordingService.js';
 import type {
  AgentHistoryProviderConfig,
  ContextManagementConfig,
+  ContextCachingConfig,
  ToolOutputMaskingConfig,
 } from '../context/types.js';
 export type { ConversationRecord };
@@ -717,6 +718,7 @@ export interface ConfigParameters {
  experimentalAutoMemory?: boolean;
  experimentalGemma?: boolean;
  experimentalContextManagementConfig?: string;
+  experimentalContextCaching?: Partial<ContextCachingConfig>;
  experimentalAgentHistoryTruncation?: boolean;
  experimentalAgentHistoryTruncationThreshold?: number;
  experimentalAgentHistoryRetainedMessages?: number;
@@ -972,6 +974,7 @@ export class Config implements McpContext, AgentLoopContext {
  private readonly modelSteering: boolean;
  private memoryContextManager?: MemoryContextManager;
  private readonly contextManagement: ContextManagementConfig;
+  private readonly contextCaching: ContextCachingConfig;
  private terminalBackground: string | undefined = undefined;
  private remoteAdminSettings: AdminControlsSettings | undefined;
  private latestApiRequest: GenerateContentParameters | undefined;
@@ -1224,6 +1227,13 @@ export class Config implements McpContext, AgentLoopContext {
        },
      },
    };
+    this.contextCaching = {
+      enabled: params.experimentalContextCaching?.enabled ?? false,
+      thresholdTokens:
+        params.experimentalContextCaching?.thresholdTokens ?? 32768,
+      ttlMinutes: params.experimentalContextCaching?.ttlMinutes ?? 60,
+      autoRenew: params.experimentalContextCaching?.autoRenew ?? true,
+    };
    this.topicUpdateNarration = params.topicUpdateNarration ?? true;
    this.modelSteering = params.modelSteering ?? false;
    this.injectionService = new InjectionService(() =>
@@ -2574,7 +2584,11 @@ export class Config implements McpContext, AgentLoopContext {
    return this.contextManagement;
  }

-  get agentHistoryProviderConfig(): AgentHistoryProviderConfig {
+  getContextCachingConfig(): ContextCachingConfig {
+    return this.contextCaching;
+  }
+
+  getAgentHistoryProviderConfig(): AgentHistoryProviderConfig {
    return {
      maxTokens: this.contextManagement.historyWindow.maxTokens,
      retainedTokens: this.contextManagement.historyWindow.retainedTokens,
@@ -87,6 +87,13 @@ export class Storage {
    return path.join(Storage.getGlobalGeminiDir(), GOOGLE_ACCOUNTS_FILENAME);
  }

+  static getContextCacheMetadataPath(): string {
+    return path.join(
+      Storage.getGlobalGeminiDir(),
+      'context-cache-metadata.json',
+    );
+  }
+
  static getTrustedFoldersPath(): string {
    if (process.env['GEMINI_CLI_TRUSTED_FOLDERS_PATH']) {
      return process.env['GEMINI_CLI_TRUSTED_FOLDERS_PATH'];
@@ -0,0 +1,106 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+import { ContextCacheManager } from './contextCacheManager.js';
+import { Storage } from '../config/storage.js';
+import * as fs from 'node:fs';
+
+vi.mock('node:fs');
+vi.mock('../config/storage.js');
+
+describe('ContextCacheManager', () => {
+  let manager: ContextCacheManager;
+  const mockMetadataPath = '/test/metadata.json';
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+    vi.mocked(Storage.getContextCacheMetadataPath).mockReturnValue(
+      mockMetadataPath,
+    );
+    manager = new ContextCacheManager();
+  });
+
+  it('should calculate stable SHA-256 hash', () => {
+    const si = 'You are a helpful assistant.';
+    const hash1 = manager.calculateHash(si);
+    const hash2 = manager.calculateHash(si);
+    expect(hash1).toBe(hash2);
+    expect(hash1).toMatch(/^[a-f0-9]{64}$/);
+  });
+
+  it('should return undefined if cache not found', () => {
+    vi.mocked(fs.existsSync).mockReturnValue(false);
+    expect(manager.getCache('nonexistent')).toBeUndefined();
+  });
+
+  it('should return entry if valid cache found', () => {
+    const hash = 'testhash';
+    const futureDate = new Date(Date.now() + 3600000).toISOString();
+    const entry = {
+      cacheName: 'cachedContents/123',
+      model: 'gemini-pro',
+      expiresAt: futureDate,
+      tokenCount: 1000,
+    };
+
+    vi.mocked(fs.existsSync).mockReturnValue(true);
+    vi.mocked(fs.readFileSync).mockReturnValue(
+      JSON.stringify({
+        version: '1.0',
+        entries: { [hash]: entry },
+      }),
+    );
+
+    const result = manager.getCache(hash);
+    expect(result).toEqual(entry);
+  });
+
+  it('should purge and return undefined if cache expired', () => {
+    const hash = 'expiredhash';
+    const pastDate = new Date(Date.now() - 3600000).toISOString();
+    const entry = {
+      cacheName: 'cachedContents/expired',
+      model: 'gemini-pro',
+      expiresAt: pastDate,
+      tokenCount: 1000,
+    };
+
+    vi.mocked(fs.existsSync).mockReturnValue(true);
+    vi.mocked(fs.readFileSync).mockReturnValue(
+      JSON.stringify({
+        version: '1.0',
+        entries: { [hash]: entry },
+      }),
+    );
+
+    const result = manager.getCache(hash);
+    expect(result).toBeUndefined();
+    expect(fs.writeFileSync).toHaveBeenCalled();
+    const saved = JSON.parse(
+      vi.mocked(fs.writeFileSync).mock.calls[0][1] as string,
+    );
+    expect(saved.entries[hash]).toBeUndefined();
+  });
+
+  it('should save metadata when setCache is called', () => {
+    vi.mocked(fs.existsSync).mockReturnValue(false);
+    const hash = 'newhash';
+    const entry = {
+      cacheName: 'cachedContents/new',
+      model: 'gemini-pro',
+      expiresAt: new Date().toISOString(),
+      tokenCount: 1000,
+    };
+
+    manager.setCache(hash, entry);
+    expect(fs.writeFileSync).toHaveBeenCalled();
+    const saved = JSON.parse(
+      vi.mocked(fs.writeFileSync).mock.calls[0][1] as string,
+    );
+    expect(saved.entries[hash]).toEqual(entry);
+  });
+});
@@ -0,0 +1,144 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import * as crypto from 'node:crypto';
+import * as fs from 'node:fs';
+import { Storage } from '../config/storage.js';
+import { debugLogger } from '../utils/debugLogger.js';
+import { estimateTokenCountSync } from '../utils/tokenCalculation.js';
+
+/**
+ * Metadata for a single Gemini Context Cache resource.
+ */
+export interface ContextCacheEntry {
+  /** The full resource name, e.g., 'cachedContents/xyz123' */
+  cacheName: string;
+  /** The model ID this cache was created for */
+  model: string;
+  /** ISO 8601 expiration timestamp */
+  expiresAt: string;
+  /** Number of tokens in the cached content */
+  tokenCount: number;
+}
+
+/**
+ * Schema for the local persistent metadata storage.
+ */
+export interface ContextCacheMetadata {
+  version: string;
+  /** Map of SHA-256(SI) -> ContextCacheEntry */
+  entries: Record<string, ContextCacheEntry>;
+}
+
+/**
+ * Manages the lifecycle and discovery of Gemini Context Caches.
+ * Uses a local metadata file to map System Instruction hashes to remote cache IDs.
+ */
+export class ContextCacheManager {
+  private metadata: ContextCacheMetadata | undefined;
+  private readonly metadataPath: string;
+
+  constructor() {
+    this.metadataPath = Storage.getContextCacheMetadataPath();
+  }
+
+  private loadMetadata(): ContextCacheMetadata {
+    if (this.metadata) {
+      return this.metadata;
+    }
+
+    try {
+      if (fs.existsSync(this.metadataPath)) {
+        const content = fs.readFileSync(this.metadataPath, 'utf8');
+        const parsed = JSON.parse(content) as unknown;
+        // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
+        this.metadata = parsed as ContextCacheMetadata;
+      } else {
+        this.metadata = { version: '1.0', entries: {} };
+      }
+    } catch (error) {
+      debugLogger.error('Failed to load context cache metadata:', error);
+      this.metadata = { version: '1.0', entries: {} };
+    }
+
+    return this.metadata;
+  }
+
+  private saveMetadata(): void {
+    if (!this.metadata) return;
+
+    try {
+      fs.writeFileSync(
+        this.metadataPath,
+        JSON.stringify(this.metadata, null, 2),
+      );
+    } catch (error) {
+      debugLogger.error('Failed to save context cache metadata:', error);
+    }
+  }
+
+  /**
+   * Calculates a stable SHA-256 hash of the System Instruction.
+   */
+  calculateHash(systemInstruction: string): string {
+    return crypto.createHash('sha256').update(systemInstruction).digest('hex');
+  }
+
+  /**
+   * Calculates the token count of a system instruction string.
+   */
+  calculateTokenCount(systemInstruction: string): number {
+    return estimateTokenCountSync([{ text: systemInstruction }]);
+  }
+
+  /**
+   * Looks up a hot cache for the given SI hash.
+   * Purges the entry if it has expired.
+   */
+  getCache(hash: string): ContextCacheEntry | undefined {
+    const metadata = this.loadMetadata();
+    const entry = metadata.entries[hash];
+
+    if (entry) {
+      const now = new Date();
+      if (new Date(entry.expiresAt) > now) {
+        return entry;
+      } else {
+        // Purge expired entry
+        debugLogger.log(
+          `[ContextCache] Purging expired cache: ${entry.cacheName}`,
+        );
+        delete metadata.entries[hash];
+        this.saveMetadata();
+      }
+    }
+
+    return undefined;
+  }
+
+  /**
+   * Saves or updates a cache entry.
+   */
+  setCache(hash: string, entry: ContextCacheEntry): void {
+    const metadata = this.loadMetadata();
+    metadata.entries[hash] = entry;
+    this.saveMetadata();
+  }
+
+  /**
+   * Removes a cache entry by hash.
+   */
+  removeCache(hash: string): void {
+    const metadata = this.loadMetadata();
+    if (metadata.entries[hash]) {
+      delete metadata.entries[hash];
+      this.saveMetadata();
+    }
+  }
+}
+
+/** Global singleton instance */
+export const contextCacheManager = new ContextCacheManager();
@@ -18,6 +18,13 @@ export interface ToolOutputMaskingConfig {
  protectLatestTurn: boolean;
 }

+export interface ContextCachingConfig {
+  enabled: boolean;
+  thresholdTokens: number;
+  ttlMinutes: number;
+  autoRenew: boolean;
+}
+
 export interface ContextManagementConfig {
  enabled: boolean;
  historyWindow: {
@@ -112,9 +112,10 @@ export class GeminiClient {
    this.loopDetector = new LoopDetectionService(this.config);
    this.compressionService = new ChatCompressionService();
    this.agentHistoryProvider = new AgentHistoryProvider(
-      this.config.agentHistoryProviderConfig,
+      this.config.getAgentHistoryProviderConfig(),
      this.config,
    );
+
    this.toolOutputMaskingService = new ToolOutputMaskingService();
    this.lastPromptId = this.config.getSessionId();

@@ -137,7 +137,8 @@ describe('createContentGenerator', () => {
    vi.stubEnv('GEMINI_CLI_SURFACE', '');

    const mockGenerator = {
-      models: {},
+      models: { get: vi.fn() },
+      caches: { create: vi.fn(), update: vi.fn() },
    } as unknown as GoogleGenAI;
    vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
    const generator = await createContentGenerator(
@@ -158,9 +159,7 @@ describe('createContentGenerator', () => {
        }),
      }),
    });
-    expect(generator).toEqual(
-      new LoggingContentGenerator(mockGenerator.models, mockConfig),
-    );
+    expect(generator).toBeInstanceOf(LoggingContentGenerator);
  });

  it('should use standard User-Agent for a2a-server running outside VS Code', async () => {
@@ -179,7 +178,8 @@ describe('createContentGenerator', () => {
    vi.stubEnv('GEMINI_CLI_SURFACE', '');

    const mockGenerator = {
-      models: {},
+      models: { get: vi.fn() },
+      caches: { create: vi.fn(), update: vi.fn() },
    } as unknown as GoogleGenAI;
    vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
    await createContentGenerator(
@@ -217,7 +217,8 @@ describe('createContentGenerator', () => {
    vi.stubEnv('TERM_PROGRAM_VERSION', '1.85.0');

    const mockGenerator = {
-      models: {},
+      models: { get: vi.fn() },
+      caches: { create: vi.fn(), update: vi.fn() },
    } as unknown as GoogleGenAI;
    vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
    await createContentGenerator(
@@ -255,7 +256,8 @@ describe('createContentGenerator', () => {
    vi.stubEnv('GEMINI_CLI_SURFACE', '');

    const mockGenerator = {
-      models: {},
+      models: { get: vi.fn() },
+      caches: { create: vi.fn(), update: vi.fn() },
    } as unknown as GoogleGenAI;
    vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
    await createContentGenerator(
@@ -288,7 +290,8 @@ describe('createContentGenerator', () => {
    vi.stubEnv('GEMINI_CLI_CUSTOM_HEADERS', 'User-Agent:MyCustomUA');

    const mockGenerator = {
-      models: {},
+      models: { get: vi.fn() },
+      caches: { create: vi.fn(), update: vi.fn() },
    } as unknown as GoogleGenAI;
    vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
    await createContentGenerator(
@@ -348,7 +351,8 @@ describe('createContentGenerator', () => {
    } as unknown as Config;

    const mockGenerator = {
-      models: {},
+      models: { get: vi.fn() },
+      caches: { create: vi.fn(), update: vi.fn() },
    } as unknown as GoogleGenAI;
    vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
    vi.stubEnv(
@@ -395,7 +399,8 @@ describe('createContentGenerator', () => {
    } as unknown as Config;

    const mockGenerator = {
-      models: {},
+      models: { get: vi.fn() },
+      caches: { create: vi.fn(), update: vi.fn() },
    } as unknown as GoogleGenAI;
    vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);

@@ -433,7 +438,8 @@ describe('createContentGenerator', () => {
    } as unknown as Config;

    const mockGenerator = {
-      models: {},
+      models: { get: vi.fn() },
+      caches: { create: vi.fn(), update: vi.fn() },
    } as unknown as GoogleGenAI;
    vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
    vi.stubEnv('GEMINI_API_KEY_AUTH_MECHANISM', 'bearer');
@@ -467,7 +473,8 @@ describe('createContentGenerator', () => {
    } as unknown as Config;

    const mockGenerator = {
-      models: {},
+      models: { get: vi.fn() },
+      caches: { create: vi.fn(), update: vi.fn() },
    } as unknown as GoogleGenAI;
    vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
    // GEMINI_API_KEY_AUTH_MECHANISM is not stubbed, so it will be undefined, triggering default 'x-goog-api-key'
@@ -508,7 +515,8 @@ describe('createContentGenerator', () => {
      getClientName: vi.fn().mockReturnValue(undefined),
    } as unknown as Config;
    const mockGenerator = {
-      models: {},
+      models: { get: vi.fn() },
+      caches: { create: vi.fn(), update: vi.fn() },
    } as unknown as GoogleGenAI;
    vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
    const generator = await createContentGenerator(
@@ -527,9 +535,7 @@ describe('createContentGenerator', () => {
        },
      }),
    });
-    expect(generator).toEqual(
-      new LoggingContentGenerator(mockGenerator.models, mockConfig),
-    );
+    expect(generator).toBeInstanceOf(LoggingContentGenerator);
  });

  it('should pass apiVersion to GoogleGenAI when GOOGLE_GENAI_API_VERSION is set', async () => {
@@ -541,7 +547,8 @@ describe('createContentGenerator', () => {
    } as unknown as Config;

    const mockGenerator = {
-      models: {},
+      models: { get: vi.fn() },
+      caches: { create: vi.fn(), update: vi.fn() },
    } as unknown as GoogleGenAI;
    vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
    vi.stubEnv('GOOGLE_GENAI_API_VERSION', 'v1');
@@ -575,7 +582,8 @@ describe('createContentGenerator', () => {
    } as unknown as Config;

    const mockGenerator = {
-      models: {},
+      models: { get: vi.fn() },
+      caches: { create: vi.fn(), update: vi.fn() },
    } as unknown as GoogleGenAI;
    vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);

@@ -613,7 +621,8 @@ describe('createContentGenerator', () => {
    } as unknown as Config;

    const mockGenerator = {
-      models: {},
+      models: { get: vi.fn() },
+      caches: { create: vi.fn(), update: vi.fn() },
    } as unknown as GoogleGenAI;
    vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
    vi.stubEnv('GOOGLE_GENAI_API_VERSION', '');
@@ -652,7 +661,8 @@ describe('createContentGenerator', () => {
    } as unknown as Config;

    const mockGenerator = {
-      models: {},
+      models: { get: vi.fn() },
+      caches: { create: vi.fn(), update: vi.fn() },
    } as unknown as GoogleGenAI;
    vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
    vi.stubEnv('GOOGLE_GENAI_API_VERSION', 'v1alpha');
@@ -687,7 +697,8 @@ describe('createContentGenerator', () => {
    } as unknown as Config;

    const mockGenerator = {
-      models: {},
+      models: { get: vi.fn() },
+      caches: { create: vi.fn(), update: vi.fn() },
    } as unknown as GoogleGenAI;
    vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
    vi.stubEnv('GOOGLE_GEMINI_BASE_URL', 'https://gemini.test.local');
@@ -719,7 +730,8 @@ describe('createContentGenerator', () => {
    } as unknown as Config;

    const mockGenerator = {
-      models: {},
+      models: { get: vi.fn() },
+      caches: { create: vi.fn(), update: vi.fn() },
    } as unknown as GoogleGenAI;
    vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
    vi.stubEnv('GOOGLE_VERTEX_BASE_URL', 'https://vertex.test.local');
@@ -752,7 +764,8 @@ describe('createContentGenerator', () => {
    } as unknown as Config;

    const mockGenerator = {
-      models: {},
+      models: { get: vi.fn() },
+      caches: { create: vi.fn(), update: vi.fn() },
    } as unknown as GoogleGenAI;
    vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
    vi.stubEnv('GOOGLE_GEMINI_BASE_URL', 'https://gemini.test.local');
@@ -785,7 +798,8 @@ describe('createContentGenerator', () => {
    } as unknown as Config;

    const mockGenerator = {
-      models: {},
+      models: { get: vi.fn() },
+      caches: { create: vi.fn(), update: vi.fn() },
    } as unknown as GoogleGenAI;
    vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
    vi.stubEnv('GOOGLE_GEMINI_BASE_URL', 'https://env.test.local');
@@ -817,7 +831,8 @@ describe('createContentGenerator', () => {
    } as unknown as Config;

    const mockGenerator = {
-      models: {},
+      models: { get: vi.fn() },
+      caches: { create: vi.fn(), update: vi.fn() },
    } as unknown as GoogleGenAI;
    vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);

@@ -4,14 +4,17 @@
 * SPDX-License-Identifier: Apache-2.0
 */

-import {
+import type {
+  EmbedContentResponse,
  GoogleGenAI,
+  type Content,
  type CountTokensResponse,
  type GenerateContentResponse,
  type GenerateContentParameters,
  type CountTokensParameters,
-  type EmbedContentResponse,
  type EmbedContentParameters,
+  type CachedContent,
+  type CreateCachedContentParameters,
 } from '@google/genai';
 import * as os from 'node:os';
 import { createCodeAssistContentGenerator } from '../code_assist/codeAssist.js';
@@ -49,6 +52,15 @@ export interface ContentGenerator {

  embedContent(request: EmbedContentParameters): Promise<EmbedContentResponse>;

+  createCachedContent(
+    request: CreateCachedContentParameters,
+  ): Promise<CachedContent>;
+
+  updateCachedContent(request: {
+    name: string;
+    config?: { ttl?: string; expireTime?: string };
+  }): Promise<CachedContent>;
+
  userTier?: UserTierId;

  userTierName?: string;
@@ -65,6 +77,72 @@ export enum AuthType {
  GATEWAY = 'gateway',
 }

+/**
+ * Implementation of ContentGenerator for standard Gemini/Vertex SDKs.
+ */
+class SdkContentGenerator implements ContentGenerator {
+  paidTier?: GeminiUserTier;
+
+  constructor(
+    private readonly genAI: GoogleGenAI,
+    private readonly modelName: string,
+    readonly history: Content[] = [],
+  ) {}
+
+  async generateContent(
+    request: GenerateContentParameters,
+    _userPromptId: string,
+    _role: LlmRole,
+  ): Promise<GenerateContentResponse> {
+    return this.genAI.models.generateContent({
+      ...request,
+      model: this.modelName,
+    });
+  }
+
+  async generateContentStream(
+    request: GenerateContentParameters,
+    _userPromptId: string,
+    _role: LlmRole,
+  ): Promise<AsyncGenerator<GenerateContentResponse>> {
+    return this.genAI.models.generateContentStream({
+      ...request,
+      model: this.modelName,
+    });
+  }
+
+  async countTokens(
+    request: CountTokensParameters,
+  ): Promise<CountTokensResponse> {
+    return this.genAI.models.countTokens({
+      ...request,
+      model: this.modelName,
+    });
+  }
+
+  async embedContent(
+    request: EmbedContentParameters,
+  ): Promise<EmbedContentResponse> {
+    return this.genAI.models.embedContent({
+      ...request,
+      model: this.modelName,
+    });
+  }
+
+  async createCachedContent(
+    request: CreateCachedContentParameters,
+  ): Promise<CachedContent> {
+    return this.genAI.caches.create(request);
+  }
+
+  async updateCachedContent(request: {
+    name: string;
+    config?: { ttl?: string; expireTime?: string };
+  }): Promise<CachedContent> {
+    return this.genAI.caches.update(request);
+  }
+}
+
 /**
 * Detects the best authentication type based on environment variables.
 *
@@ -197,7 +275,7 @@ export async function createContentGenerator(
      const fakeGenerator = await FakeContentGenerator.fromFile(
        gcConfig.fakeResponses,
      );
-      return new LoggingContentGenerator(fakeGenerator, gcConfig);
+      return new LoggingContentGenerator(fakeGenerator, gcConfig, []);
    }
    const version = await getVersion();
    const model = resolveModel(
@@ -278,6 +356,7 @@ export async function createContentGenerator(
          sessionId,
        ),
        gcConfig,
+        [],
      );
    }

@@ -330,11 +409,10 @@ export async function createContentGenerator(
      const httpOptions: {
        baseUrl?: string;
        headers: Record<string, string>;
-      } = { headers };
-
-      if (baseUrl) {
-        httpOptions.baseUrl = baseUrl;
-      }
+      } = {
+        headers,
+        ...(baseUrl ? { baseUrl } : {}),
+      };

      const googleGenAI = new GoogleGenAI({
        apiKey: config.apiKey === '' ? undefined : config.apiKey,
@@ -342,7 +420,11 @@ export async function createContentGenerator(
        httpOptions,
        ...(apiVersionEnv && { apiVersion: apiVersionEnv }),
      });
-      return new LoggingContentGenerator(googleGenAI.models, gcConfig);
+      return new LoggingContentGenerator(
+        new SdkContentGenerator(googleGenAI, model, []),
+        gcConfig,
+        [],
+      );
    }
    throw new Error(
      `Error creating contentGenerator: Unsupported authType: ${config.authType}`,
@@ -350,7 +432,11 @@ export async function createContentGenerator(
  })();

  if (gcConfig.recordResponses) {
-    return new RecordingContentGenerator(generator, gcConfig.recordResponses);
+    return new RecordingContentGenerator(
+      generator,
+      gcConfig.recordResponses,
+      [],
+    );
  }

  return generator;
@@ -6,11 +6,14 @@

 import {
  GenerateContentResponse,
+  type Content,
  type CountTokensResponse,
  type GenerateContentParameters,
  type CountTokensParameters,
  EmbedContentResponse,
  type EmbedContentParameters,
+  type CachedContent,
+  type CreateCachedContentParameters,
 } from '@google/genai';
 import { promises } from 'node:fs';
 import type { ContentGenerator } from './contentGenerator.js';
@@ -34,6 +37,14 @@ export type FakeResponse =
  | {
      method: 'embedContent';
      response: EmbedContentResponse;
+    }
+  | {
+      method: 'createCachedContent';
+      response: CachedContent;
+    }
+  | {
+      method: 'updateCachedContent';
+      response: CachedContent;
    };

 // A ContentGenerator that responds with canned responses.
@@ -46,7 +57,10 @@ export class FakeContentGenerator implements ContentGenerator {
  userTierName?: string;
  paidTier?: GeminiUserTier;

-  constructor(private readonly responses: FakeResponse[]) {}
+  constructor(
+    private readonly responses: FakeResponse[],
+    readonly history: Content[] = [],
+  ) {}

  static async fromFile(filePath: string): Promise<FakeContentGenerator> {
    const fileContent = await promises.readFile(filePath, 'utf-8');
@@ -124,4 +138,19 @@ export class FakeContentGenerator implements ContentGenerator {
      EmbedContentResponse.prototype,
    );
  }
+
+  async createCachedContent(
+    request: CreateCachedContentParameters,
+  ): Promise<CachedContent> {
+     
+    return this.getNextResponse('createCachedContent', request);
+  }
+
+  async updateCachedContent(request: {
+    name: string;
+    config?: { ttl?: string; expireTime?: string };
+  }): Promise<CachedContent> {
+     
+    return this.getNextResponse('updateCachedContent', request);
+  }
 }
@@ -5,6 +5,7 @@
 */

 import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import * as crypto from 'node:crypto';
 import {
  ApiError,
  ThinkingLevel,
@@ -25,6 +26,7 @@ import {
  CoreToolCallStatus,
 } from '../scheduler/types.js';
 import { MockTool } from '../test-utils/mock-tool.js';
+import { Storage } from '../config/storage.js';
 import type { Config } from '../config/config.js';
 import { setSimulate429 } from '../utils/testUtils.js';
 import { DEFAULT_THINKING_MODE } from '../config/models.js';
@@ -132,6 +134,8 @@ describe('GeminiChat', () => {
      countTokens: vi.fn(),
      embedContent: vi.fn(),
      batchEmbedContents: vi.fn(),
+      createCachedContent: vi.fn(),
+      updateCachedContent: vi.fn(),
    } as unknown as ContentGenerator;

    mockHandleFallback.mockClear();
@@ -186,6 +190,24 @@ describe('GeminiChat', () => {
      getMaxAttempts: vi.fn().mockReturnValue(10),
      getUserTier: vi.fn().mockReturnValue(undefined),
      isContextManagementEnabled: vi.fn().mockReturnValue(false),
+      getContextCachingConfig: vi.fn().mockReturnValue({
+        enabled: false,
+        thresholdTokens: 32768,
+        ttlMinutes: 60,
+        autoRenew: true,
+      }),
+      getSystemInstructionMemory: vi.fn().mockReturnValue(undefined),
+      getIncludeDirectoryTree: vi.fn().mockReturnValue(true),
+      getWorkspaceContext: vi.fn().mockReturnValue({
+        getDirectories: vi.fn().mockReturnValue([]),
+      }),
+      isTopicUpdateNarrationEnabled: vi.fn().mockReturnValue(false),
+      topicState: {
+        getTopic: vi.fn().mockReturnValue(undefined),
+      },
+      getSkillManager: vi.fn().mockReturnValue({
+        getSkills: vi.fn().mockReturnValue([]),
+      }),
      modelConfigService: {
        getResolvedConfig: vi.fn().mockImplementation((modelConfigKey) => {
          const model = modelConfigKey.model ?? mockConfig.getModel();
@@ -3093,4 +3115,133 @@ describe('GeminiChat', () => {
      expect(stripped[1].parts![0].functionResponse!.id).toBe('call_123');
    });
  });
+
+  describe('explicit context caching', () => {
+    it('should create a new cache if enabled and SI is large enough', async () => {
+      const si = 'Large system instruction...'.repeat(2000); // Definitely > 32k
+      chat = new GeminiChat(mockConfig, si);
+
+      vi.mocked(mockConfig.getContextCachingConfig).mockReturnValue({
+        enabled: true,
+        thresholdTokens: 32768,
+        ttlMinutes: 60,
+        autoRenew: true,
+      });
+
+      vi.mocked(mockContentGenerator.createCachedContent).mockResolvedValue({
+        name: 'cachedContents/new-cache',
+        expireTime: new Date(Date.now() + 3600000).toISOString(),
+      });
+
+      vi.mocked(mockContentGenerator.generateContentStream).mockResolvedValue(
+        (async function* () {
+          yield {
+            candidates: [
+              {
+                content: { role: 'model', parts: [{ text: 'response' }] },
+                finishReason: 'STOP',
+              },
+            ],
+          } as unknown as GenerateContentResponse;
+        })(),
+      );
+
+      const stream = await chat.sendMessageStream(
+        { model: 'gemini-pro' },
+        'test',
+        'prompt-id',
+        new AbortController().signal,
+        LlmRole.MAIN,
+      );
+      for await (const chunk of stream) {
+        expect(chunk).toBeDefined();
+      }
+
+      expect(mockContentGenerator.createCachedContent).toHaveBeenCalledWith(
+        expect.objectContaining({
+          systemInstruction: { parts: [{ text: si }] },
+        }),
+      );
+      expect(mockContentGenerator.generateContentStream).toHaveBeenCalledWith(
+        expect.objectContaining({
+          config: expect.objectContaining({
+            cachedContent: 'cachedContents/new-cache',
+          }),
+        }),
+        'prompt-id',
+        LlmRole.MAIN,
+      );
+    });
+
+    it('should reuse existing cache if present', async () => {
+      const si = 'Large system instruction...'.repeat(2000);
+      chat = new GeminiChat(mockConfig, si);
+
+      const siHash = crypto.createHash('sha256').update(si).digest('hex');
+      const futureDate = new Date(Date.now() + 3600000).toISOString();
+
+      // Seed the metadata file via the mock fs
+      mockFileSystem.set(
+        Storage.getContextCacheMetadataPath(),
+        JSON.stringify({
+          version: '1.0',
+          entries: {
+            [siHash]: {
+              cacheName: 'cachedContents/existing-cache',
+              model: 'gemini-pro',
+              expiresAt: futureDate,
+              tokenCount: 40000,
+            },
+          },
+        }),
+      );
+
+      vi.mocked(mockConfig.getContextCachingConfig).mockReturnValue({
+        enabled: true,
+        thresholdTokens: 32768,
+        ttlMinutes: 60,
+        autoRenew: true,
+      });
+
+      vi.mocked(mockContentGenerator.updateCachedContent).mockResolvedValue({
+        name: 'cachedContents/existing-cache',
+        expireTime: new Date(Date.now() + 7200000).toISOString(),
+      });
+
+      vi.mocked(mockContentGenerator.generateContentStream).mockResolvedValue(
+        (async function* () {
+          yield {
+            candidates: [
+              {
+                content: { role: 'model', parts: [{ text: 'response' }] },
+                finishReason: 'STOP',
+              },
+            ],
+          } as unknown as GenerateContentResponse;
+        })(),
+      );
+
+      const stream = await chat.sendMessageStream(
+        { model: 'gemini-pro' },
+        'test',
+        'prompt-id',
+        new AbortController().signal,
+        LlmRole.MAIN,
+      );
+      for await (const chunk of stream) {
+        expect(chunk).toBeDefined();
+      }
+
+      expect(mockContentGenerator.createCachedContent).not.toHaveBeenCalled();
+      expect(mockContentGenerator.generateContentStream).toHaveBeenCalledWith(
+        expect.objectContaining({
+          config: expect.objectContaining({
+            cachedContent: 'cachedContents/existing-cache',
+          }),
+        }),
+        'prompt-id',
+        LlmRole.MAIN,
+      );
+    });
+  });
 });
@@ -45,7 +45,7 @@ import {
  ContentRetryEvent,
  ContentRetryFailureEvent,
  NetworkRetryAttemptEvent,
-  type LlmRole,
+  LlmRole,
 } from '../telemetry/types.js';
 import { handleFallback } from '../fallback/handler.js';
 import { isFunctionResponse } from '../utils/messageInspectors.js';
@@ -61,6 +61,9 @@ import {
 import { coreEvents } from '../utils/events.js';
 import type { AgentLoopContext } from '../config/agent-loop-context.js';
 import { debugLogger } from '../utils/debugLogger.js';
+import { contextCacheManager } from '../context/contextCacheManager.js';
+import { getCoreSystemPrompt } from './prompts.js';
+import { getDirectoryContextString } from '../utils/environmentContext.js';

 export enum StreamEventType {
  /** A regular content chunk from the API. */
@@ -747,7 +750,134 @@ export class GeminiChat {
      lastConfig = config;
      lastContentsToUse = contentsToUse;

-      const finalContents = stripToolCallIdPrefixes(contentsToUse);
+      // Handle explicit context caching
+      const cachingConfig = this.context.config.getContextCachingConfig();
+      let effectiveContents = contentsToUse;
+
+      if (cachingConfig.enabled && role === LlmRole.MAIN) {
+        try {
+          const userMemory = this.context.config.getSystemInstructionMemory();
+          const stableSI = getCoreSystemPrompt(
+            this.context.config,
+            userMemory,
+            undefined,
+            undefined,
+            'stable',
+          );
+          const siHash = contextCacheManager.calculateHash(stableSI);
+          const existingCache = contextCacheManager.getCache(siHash);
+
+          if (existingCache && existingCache.model === modelToUse) {
+            config.cachedContent = existingCache.cacheName;
+            debugLogger.log(
+              `[ContextCache] Using existing cache: ${existingCache.cacheName}`,
+            );
+
+            // Prepend dynamic context to history
+            const dynamicContext = getCoreSystemPrompt(
+              this.context.config,
+              userMemory,
+              undefined,
+              undefined,
+              'dynamic',
+            );
+            const dirContext = await getDirectoryContextString(
+              this.context.config,
+            );
+            const dynamicWithTree = dynamicContext.replace(
+              '[Recursive file tree provided in history]',
+              dirContext,
+            );
+
+            effectiveContents = [
+              { role: 'user', parts: [{ text: dynamicWithTree }] },
+              ...contentsToUse,
+            ];
+
+            // Asynchronously renew TTL if enabled
+            if (cachingConfig.autoRenew) {
+              this.context.config
+                .getContentGenerator()
+                .updateCachedContent({
+                  name: existingCache.cacheName,
+                  config: { ttl: `${cachingConfig.ttlMinutes * 60}s` },
+                })
+                .then((updated) => {
+                  if (updated.expireTime) {
+                    existingCache.expiresAt = updated.expireTime;
+                    contextCacheManager.setCache(siHash, existingCache);
+                    debugLogger.log(
+                      `[ContextCache] Renewed TTL for ${existingCache.cacheName}`,
+                    );
+                  }
+                })
+                .catch((e) =>
+                  debugLogger.error(`[ContextCache] Failed to renew TTL:`, e),
+                );
+            }
+          } else {
+            // Check if we should create a new cache
+            const siTokens = contextCacheManager.calculateTokenCount(stableSI);
+            if (siTokens >= cachingConfig.thresholdTokens) {
+              debugLogger.log(
+                `[ContextCache] Creating new cache for stable SI (${siTokens} tokens)`,
+              );
+              const newCache = await this.context.config
+                .getContentGenerator()
+                .createCachedContent({
+                  model: modelToUse,
+                  config: {
+                    systemInstruction: { parts: [{ text: stableSI }] },
+                    ttl: `${cachingConfig.ttlMinutes * 60}s`,
+                  },
+                });
+
+              if (newCache.name && newCache.expireTime) {
+                const entry = {
+                  cacheName: newCache.name,
+                  model: modelToUse,
+                  expiresAt: newCache.expireTime,
+                  tokenCount: siTokens,
+                };
+                contextCacheManager.setCache(siHash, entry);
+                config.cachedContent = newCache.name;
+                debugLogger.log(
+                  `[ContextCache] Created and using new cache: ${newCache.name}`,
+                );
+
+                // Prepend dynamic context to history for this initial call
+                const dynamicContext = getCoreSystemPrompt(
+                  this.context.config,
+                  userMemory,
+                  undefined,
+                  undefined,
+                  'dynamic',
+                );
+                const dirContext = await getDirectoryContextString(
+                  this.context.config,
+                );
+                const dynamicWithTree = dynamicContext.replace(
+                  '[Recursive file tree provided in history]',
+                  dirContext,
+                );
+
+                effectiveContents = [
+                  { role: 'user', parts: [{ text: dynamicWithTree }] },
+                  ...contentsToUse,
+                ];
+              }
+            }
+          }
+        } catch (error) {
+          // Fall back to standard request on cache failure
+          debugLogger.error(
+            '[ContextCache] Error managing context cache:',
+            error,
+          );
+        }
+      }
+
+      const finalContents = stripToolCallIdPrefixes(effectiveContents);

      return this.context.config.getContentGenerator().generateContentStream(
        {
@@ -65,6 +65,8 @@ describe('LoggingContentGenerator', () => {
      generateContentStream: vi.fn(),
      countTokens: vi.fn(),
      embedContent: vi.fn(),
+      createCachedContent: vi.fn(),
+      updateCachedContent: vi.fn(),
    };
    config = {
      getGoogleAIConfig: vi.fn(),
@@ -15,6 +15,8 @@ import type {
  GenerateContentParameters,
  GenerateContentResponseUsageMetadata,
  GenerateContentResponse,
+  CachedContent,
+  CreateCachedContentParameters,
 } from '@google/genai';
 import {
  ApiRequestEvent,
@@ -150,6 +152,7 @@ export class LoggingContentGenerator implements ContentGenerator {
  constructor(
    private readonly wrapped: ContentGenerator,
    private readonly config: Config,
+    readonly history: Content[] = [],
  ) {}

  getWrapped(): ContentGenerator {
@@ -623,4 +626,17 @@ export class LoggingContentGenerator implements ContentGenerator {
      },
    );
  }
+
+  async createCachedContent(
+    request: CreateCachedContentParameters,
+  ): Promise<CachedContent> {
+    return this.wrapped.createCachedContent(request);
+  }
+
+  async updateCachedContent(request: {
+    name: string;
+    config?: { ttl?: string; expireTime?: string };
+  }): Promise<CachedContent> {
+    return this.wrapped.updateCachedContent(request);
+  }
 }
@@ -25,12 +25,14 @@ export function getCoreSystemPrompt(
  userMemory?: string | HierarchicalMemory,
  interactiveOverride?: boolean,
  topicUpdateNarrationOverride?: boolean,
+  splitMode: 'combined' | 'stable' | 'dynamic' = 'combined',
 ): string {
  return new PromptProvider().getCoreSystemPrompt(
    config,
    userMemory,
    interactiveOverride,
    topicUpdateNarrationOverride,
+    splitMode,
  );
 }

@@ -40,6 +40,8 @@ describe('RecordingContentGenerator', () => {
      generateContentStream: vi.fn(),
      countTokens: vi.fn(),
      embedContent: vi.fn(),
+      createCachedContent: vi.fn(),
+      updateCachedContent: vi.fn(),
    };
    recorder = new RecordingContentGenerator(mockRealGenerator, filePath);
    vi.clearAllMocks();
@@ -5,17 +5,20 @@
 */

 import type {
+  Content,
  CountTokensResponse,
  GenerateContentParameters,
  GenerateContentResponse,
  CountTokensParameters,
  EmbedContentResponse,
  EmbedContentParameters,
+  CachedContent,
+  CreateCachedContentParameters,
 } from '@google/genai';
 import { appendFileSync } from 'node:fs';
 import type { ContentGenerator } from './contentGenerator.js';
 import type { FakeResponse } from './fakeContentGenerator.js';
-import type { UserTierId } from '../code_assist/types.js';
+import type { UserTierId, GeminiUserTier } from '../code_assist/types.js';
 import { safeJsonStringify } from '../utils/safeJsonStringify.js';
 import type { LlmRole } from '../telemetry/types.js';

@@ -29,6 +32,7 @@ export class RecordingContentGenerator implements ContentGenerator {
  constructor(
    private readonly realGenerator: ContentGenerator,
    private readonly filePath: string,
+    readonly history: Content[] = [],
  ) {}

  get userTier(): UserTierId | undefined {
@@ -39,6 +43,10 @@ export class RecordingContentGenerator implements ContentGenerator {
    return this.realGenerator.userTierName;
  }

+  get paidTier(): GeminiUserTier | undefined {
+    return this.realGenerator.paidTier;
+  }
+
  async generateContent(
    request: GenerateContentParameters,
    userPromptId: string,
@@ -111,7 +119,6 @@ export class RecordingContentGenerator implements ContentGenerator {
    request: EmbedContentParameters,
  ): Promise<EmbedContentResponse> {
    const response = await this.realGenerator.embedContent(request);
-
    const recordedResponse: FakeResponse = {
      method: 'embedContent',
      response: {
@@ -122,4 +129,29 @@ export class RecordingContentGenerator implements ContentGenerator {
    appendFileSync(this.filePath, `${safeJsonStringify(recordedResponse)}\n`);
    return response;
  }
+
+  async createCachedContent(
+    request: CreateCachedContentParameters,
+  ): Promise<CachedContent> {
+    const response = await this.realGenerator.createCachedContent(request);
+    const recordedResponse: FakeResponse = {
+      method: 'createCachedContent',
+      response,
+    };
+    appendFileSync(this.filePath, `${safeJsonStringify(recordedResponse)}\n`);
+    return response;
+  }
+
+  async updateCachedContent(request: {
+    name: string;
+    config?: { ttl?: string; expireTime?: string };
+  }): Promise<CachedContent> {
+    const response = await this.realGenerator.updateCachedContent(request);
+    const recordedResponse: FakeResponse = {
+      method: 'updateCachedContent',
+      response,
+    };
+    appendFileSync(this.filePath, `${safeJsonStringify(recordedResponse)}\n`);
+    return response;
+  }
 }
@@ -146,6 +146,7 @@ export {
 } from './services/memoryService.js';
 export { isProjectSkillPatchTarget } from './services/memoryPatchUtils.js';
 export * from './context/memoryContextManager.js';
+export * from './context/contextCacheManager.js';
 export * from './services/trackerService.js';
 export * from './services/trackerTypes.js';
 export * from './services/keychainService.js';
@@ -42,14 +42,58 @@ import type { AgentLoopContext } from '../config/agent-loop-context.js';
 */
 export class PromptProvider {
  /**
-   * Generates the core system prompt.
+   * Generates the core system prompt, optionally split into stable and dynamic parts.
   */
  getCoreSystemPrompt(
    context: AgentLoopContext,
    userMemory?: string | HierarchicalMemory,
    interactiveOverride?: boolean,
    topicUpdateNarrationOverride?: boolean,
+    splitMode: 'combined' | 'stable' | 'dynamic' = 'combined',
  ): string {
+    if (splitMode === 'dynamic') {
+      const today = new Date().toLocaleDateString(undefined, {
+        weekday: 'long',
+        year: 'numeric',
+        month: 'long',
+        day: 'numeric',
+      });
+      const platform = process.platform;
+      const tempDir = context.config.storage.getProjectTempDir();
+
+      let dynamicPrompt = `
+<session_context>
+This is the Gemini CLI. We are setting up the context for our chat.
+Today's date is ${today} (formatted according to the user's locale).
+My operating system is: ${platform}
+The project's temporary directory is: ${tempDir}`;
+
+      if (context.config.getIncludeDirectoryTree()) {
+        const workspaceContext = context.config.getWorkspaceContext();
+        const workspaceDirectories = workspaceContext.getDirectories();
+        const dirList = workspaceDirectories
+          .map((dir) => `  - ${dir}`)
+          .join('\n');
+        dynamicPrompt += `\n- **Workspace Directories:**\n${dirList}\n- **Directory Structure:**\n\n[Recursive file tree provided in history]`;
+      }
+
+      if (
+        topicUpdateNarrationOverride ??
+        context.config.isTopicUpdateNarrationEnabled()
+      ) {
+        const activeTopic = context.config.topicState.getTopic();
+        if (activeTopic) {
+          const sanitizedTopic = activeTopic
+            .replace(/\n/g, ' ')
+            .replace(/\]/g, '');
+          dynamicPrompt += `\n\n[Active Topic: ${sanitizedTopic}]`;
+        }
+      }
+
+      dynamicPrompt += `\n</session_context>`;
+      return dynamicPrompt.trim();
+    }
+
    const systemMdResolution = resolvePathFromEnv(
      process.env['GEMINI_SYSTEM_MD'],
    );
@@ -275,6 +319,10 @@ export class PromptProvider {
    // Sanitize erratic newlines from composition
    let sanitizedPrompt = finalPrompt.replace(/\n{3,}/g, '\n\n');

+    if (splitMode === 'stable') {
+      return sanitizedPrompt;
+    }
+
    // Context Reinjection (Active Topic)
    if (isTopicUpdateNarrationEnabled) {
      const activeTopic = context.config.topicState.getTopic();
@@ -90,6 +90,22 @@ describe('apiConversionUtils', () => {
      expect(result['generationConfig']).toBeUndefined();
    });

+    it('omits systemInstruction when cachedContent is present', () => {
+      const req: GenerateContentParameters = {
+        model: 'gemini-3-flash',
+        contents: [{ role: 'user', parts: [{ text: 'Hello' }] }],
+        config: {
+          systemInstruction: 'Original instruction',
+          cachedContent: 'cached-content-id',
+        },
+      };
+
+      const result = convertToRestPayload(req);
+
+      expect(result['cachedContent']).toBe('cached-content-id');
+      expect(result['systemInstruction']).toBeUndefined();
+    });
+
    it('retains pure hyperparameters in generationConfig', () => {
      const req: GenerateContentParameters = {
        model: 'gemini-3-flash',
@@ -46,12 +46,16 @@ export function convertToRestPayload(
  }

  // Assign extracted capabilities to the root level.
-  if (restSystemInstruction)
+  // CRITICAL: systemInstruction and cachedContent are mutually exclusive in the API.
+  if (sdkCachedContent) {
+    restPayload['cachedContent'] = sdkCachedContent;
+  } else if (restSystemInstruction) {
    restPayload['systemInstruction'] = restSystemInstruction;
+  }
+
  if (sdkTools) restPayload['tools'] = sdkTools;
  if (sdkToolConfig) restPayload['toolConfig'] = sdkToolConfig;
  if (sdkSafetySettings) restPayload['safetySettings'] = sdkSafetySettings;
-  if (sdkCachedContent) restPayload['cachedContent'] = sdkCachedContent;

  return restPayload;
 }
@@ -92,6 +92,8 @@ describe('checkNextSpeaker', () => {
        generateContentStream: vi.fn(),
        countTokens: vi.fn(),
        embedContent: vi.fn(),
+        createCachedContent: vi.fn(),
+        updateCachedContent: vi.fn(),
      } as ContentGenerator,
      mockConfig,
    );