diff --git a/evals/head_entropy.eval.ts b/evals/head_entropy.eval.ts new file mode 100644 index 0000000000..ae1f051be6 --- /dev/null +++ b/evals/head_entropy.eval.ts @@ -0,0 +1,44 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect } from 'vitest'; +import { appEvalTest } from './app-test-helper.js'; + +describe('Head Entropy Behavioral Evals', () => { + appEvalTest('ALWAYS_PASSES', { + name: 'Head Entropy: Model distills a high-noise tool output', + configOverrides: { + modelSteering: true, + continuousSession: true, + }, + files: { + 'src/big-file.ts': '// NOISE\n'.repeat(100) + 'const SECRET = "SIGNAL";\n' + '// NOISE\n'.repeat(100), + }, + prompt: + 'Grep for SECRET in src/. If the result is very noisy, use distill_result to replace it with just the signal you found before continuing.', + setup: async (rig) => { + // Pause on our new tools to observe the workflow + rig.setBreakpoint(['grep_search', 'distill_result']); + }, + assert: async (rig) => { + // 1. Wait for Grep + await rig.waitForPendingConfirmation('grep_search', 45000); + await rig.resolveAwaitedTool(); + + // 2. Wait for Distillation + await rig.waitForPendingConfirmation('distill_result', 45000); + await rig.resolveAwaitedTool(); + + // 3. Final Verification + await rig.waitForOutput(/SIGNAL/i, 60000); + await rig.waitForIdle(30000); + + const output = rig.getStaticOutput(); + expect(output).toContain('SIGNAL'); + expect(output).not.toContain('NOISE'); // Should be elided/replaced + }, + }); +}); diff --git a/packages/cli/src/integration-tests/__snapshots__/continuousSession.test.tsx.snap b/packages/cli/src/integration-tests/__snapshots__/continuousSession.test.tsx.snap new file mode 100644 index 0000000000..167676c288 --- /dev/null +++ b/packages/cli/src/integration-tests/__snapshots__/continuousSession.test.tsx.snap @@ -0,0 +1,107 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[`Continuous Session Integration > should handle checkpoint_state and manual compress tools correctly > 1-before-checkpoint 1`] = ` +[ + { + "parts": [ + { + "text": "", + }, + { + "text": "Start the mission PADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPAD", + }, + ], + "role": "user", + }, +] +`; + +exports[`Continuous Session Integration > should handle checkpoint_state and manual compress tools correctly > 2-with-checkpoint 1`] = ` +[ + { + "parts": [ + { + "text": "", + }, + { + "text": " +GOAL: Implementation of session continuity. +PROGRESS: Tools implemented. +CONSTRAINT: Use high-fidelity summary. +", + }, + { + "text": "Start the mission PADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPAD", + }, + ], + "role": "user", + }, + { + "parts": [ + { + "text": "I will now checkpoint our progress.", + }, + { + "functionCall": { + "args": { + "summary": "GOAL: Implementation of session continuity. +PROGRESS: Tools implemented. +CONSTRAINT: Use high-fidelity summary.", + }, + "id": "", + "name": "checkpoint_state", + }, + "thoughtSignature": "skip_thought_signature_validator", + }, + ], + "role": "model", + }, + { + "parts": [ + { + "functionResponse": { + "id": "", + "name": "checkpoint_state", + "response": { + "output": "First checkpoint created. No previous summary found.", + }, + }, + }, + ], + "role": "user", + }, +] +`; + +exports[`Continuous Session Integration > should handle checkpoint_state and manual compress tools correctly > final-curated-history 1`] = ` +[ + { + "parts": [ + { + "text": "", + }, + { + "text": " + +Implement session continuity +Use high-fidelity summary +Tools implemented: checkpoint_state, compress +1. [DONE] Implement tools +2. [IN PROGRESS] Verify continuity + + +", + }, + ], + "role": "user", + }, + { + "parts": [ + { + "text": "Compression successful. I have clear context and I remember our mission.", + }, + ], + "role": "model", + }, +] +`; diff --git a/packages/cli/src/integration-tests/__snapshots__/distill_result.test.tsx.snap b/packages/cli/src/integration-tests/__snapshots__/distill_result.test.tsx.snap new file mode 100644 index 0000000000..c4a8e85f8e --- /dev/null +++ b/packages/cli/src/integration-tests/__snapshots__/distill_result.test.tsx.snap @@ -0,0 +1,124 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[`Distill Result Integration > should surgically replace a noisy tool result with a distilled version > 1-initial-prompt 1`] = ` +[ + { + "parts": [ + { + "text": "", + }, + { + "text": "Audit src/ for secrets", + }, + ], + "role": "user", + }, +] +`; + +exports[`Distill Result Integration > should surgically replace a noisy tool result with a distilled version > 2-request-with-noise 1`] = ` +[ + { + "parts": [ + { + "text": "", + }, + { + "text": "Audit src/ for secrets", + }, + ], + "role": "user", + }, + { + "parts": [ + { + "text": "**Thought** +I need to search for SECRET in the src/ directory.", + }, + { + "functionCall": { + "args": { + "file_path": "src/foo.txt", + }, + "id": "", + "name": "read_file", + }, + "thoughtSignature": "skip_thought_signature_validator", + }, + ], + "role": "model", + }, + { + "parts": [ + { + "functionResponse": { + "id": "", + "name": "read_file", + "response": { + "error": "File not found: /src/foo.txt", + }, + }, + }, + ], + "role": "user", + }, +] +`; + +exports[`Distill Result Integration > should surgically replace a noisy tool result with a distilled version > final-curated-history 1`] = ` +[ + { + "parts": [ + { + "text": "", + }, + { + "text": "Audit src/ for secrets", + }, + ], + "role": "user", + }, + { + "parts": [ + { + "text": "**Thought** +I need to search for SECRET in the src/ directory.", + }, + { + "functionCall": { + "args": { + "file_path": "src/foo.txt", + }, + "id": "", + "name": "read_file", + }, + }, + ], + "role": "model", + }, + { + "parts": [ + { + "functionResponse": { + "id": "", + "name": "read_file", + "response": { + "distilled": true, + "distilled_output": "Found SECRET_KEY="12345" in src/env.ts", + "original_output_file": "", + }, + }, + }, + ], + "role": "user", + }, + { + "parts": [ + { + "text": "I found the SECRET_KEY="12345" in src/env.ts after distilling the search results.", + }, + ], + "role": "model", + }, +] +`; diff --git a/packages/cli/src/integration-tests/continuousSession.test.tsx b/packages/cli/src/integration-tests/continuousSession.test.tsx index 1f231cd26d..cfa272b391 100644 --- a/packages/cli/src/integration-tests/continuousSession.test.tsx +++ b/packages/cli/src/integration-tests/continuousSession.test.tsx @@ -4,7 +4,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { describe, it, afterEach } from 'vitest'; +import { describe, it, expect, afterEach } from 'vitest'; import { AppRig } from '../test-utils/AppRig.js'; import path from 'node:path'; import { fileURLToPath } from 'node:url'; @@ -26,26 +26,41 @@ describe('Continuous Session Integration', () => { ); rig = new AppRig({ fakeResponsesPath, + configOverrides: { + continuousSession: true, + }, }); await rig.initialize(); rig.render(); await rig.waitForIdle(); - // Set policies to AUTO so it proceeds without asking user - rig.setToolPolicy('checkpoint_state', PolicyDecision.ALLOW); - rig.setToolPolicy('compress', PolicyDecision.ALLOW); + // Use ASK_USER to pause and inspect the curated history at key moments + rig.setToolPolicy('checkpoint_state', PolicyDecision.ASK_USER); + rig.setToolPolicy('compress', PolicyDecision.ASK_USER); // Start the quest - await rig.type('Start the mission'); + await rig.type('Start the mission ' + 'PAD'.repeat(100)); await rig.pressEnter(); // 1. Wait for CheckpointState tool call await rig.waitForOutput('CheckpointState'); + // Verify curated history BEFORE checkpoint is applied + expect(rig.getLastSentRequestContents()).toMatchSnapshot('1-before-checkpoint'); + await rig.resolveTool('CheckpointState'); // 2. Wait for Compress tool call await rig.waitForOutput('Compress'); + // Verify curated history contains the checkpoint + expect(rig.getLastSentRequestContents()).toMatchSnapshot('2-with-checkpoint'); + await rig.resolveTool('Compress'); // 3. Wait for final model response after compression await rig.waitForOutput('Compression successful.'); + await rig.waitForIdle(); + + // Verify the final curated history: + // - Should contain the high-fidelity snapshot + // - Should NOT contain pre-compression turns + expect(rig.getCuratedHistory()).toMatchSnapshot('final-curated-history'); }); }); diff --git a/packages/cli/src/integration-tests/distill_result.test.tsx b/packages/cli/src/integration-tests/distill_result.test.tsx new file mode 100644 index 0000000000..d23a707905 --- /dev/null +++ b/packages/cli/src/integration-tests/distill_result.test.tsx @@ -0,0 +1,86 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, afterEach } from 'vitest'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { AppRig } from '../test-utils/AppRig.js'; +import { PolicyDecision } from '@google/gemini-cli-core'; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); + +describe('Distill Result Integration', () => { + let rig: AppRig | undefined; + + afterEach(async () => { + await rig?.unmount(); + }); + + it('should surgically replace a noisy tool result with a distilled version', async () => { + const fakeResponsesPath = path.resolve( + __dirname, + '../test-utils/fixtures/distill_result.responses', + ); + + rig = new AppRig({ + fakeResponsesPath, + configOverrides: { + continuousSession: true, + modelSteering: true, + }, + }); + + await rig.initialize(); + rig.render(); + await rig.waitForIdle(); + + rig.setMockCommands([ + { + command: /read_file/, + result: { + output: 'NOISE\n'.repeat(50) + 'SECRET_KEY="12345"\n' + 'NOISE\n'.repeat(50), + exitCode: 0, + }, + }, + ]); + + // Use ASK_USER to pause and inspect the request before each model turn + rig.setToolPolicy('read_file', PolicyDecision.ASK_USER); + rig.setToolPolicy('distill_result', PolicyDecision.ASK_USER); + + // 1. Initial Prompt: Audit for secrets + await rig.sendMessage('Audit src/ for secrets'); + + // 2. Model calls run_shell_command (the "Noise Bomb") + await rig.waitForOutput('ReadFile'); + // Verify the curated history sent to model contains the initial user prompt + expect(rig.getLastSentRequestContents()).toMatchSnapshot('1-initial-prompt'); + + await rig.resolveTool('ReadFile'); + + // 3. Model realizes it's noisy and calls distill_result + await rig.waitForOutput('DistillResult'); + // Verify history now includes the massive noise + expect(rig.getLastSentRequestContents()).toMatchSnapshot('2-request-with-noise'); + + await rig.resolveTool('DistillResult'); + + // 4. Model continues from the distilled state and finishes + await rig.waitForOutput(/found the SECRET_KEY/i); + await rig.waitForIdle(); + + // Verify the final curated history: + // - NO noise from the original read_file + // - original read_file response is replaced with our universal distillation schema + // - intermediate thoughts and the distill_result turn itself are elided + expect(rig.getCuratedHistory()).toMatchSnapshot('final-curated-history'); + + // Verify final output contains the signal + const output = rig.getStaticOutput(); + expect(output).toContain('SECRET_KEY'); + expect(output).toContain('12345'); + }); +}); diff --git a/packages/cli/src/test-utils/AppRig.tsx b/packages/cli/src/test-utils/AppRig.tsx index f2aab488ab..45dd7db5a3 100644 --- a/packages/cli/src/test-utils/AppRig.tsx +++ b/packages/cli/src/test-utils/AppRig.tsx @@ -150,6 +150,7 @@ export class AppRig { private settings: LoadedSettings | undefined; private testDir: string; private sessionId: string; + private appRigId: string; private pendingConfirmations = new Map(); private breakpointTools = new Set(); @@ -165,6 +166,7 @@ export class AppRig { this.testDir = fs.mkdtempSync( path.join(os.tmpdir(), `gemini-app-rig-${uniqueId.slice(0, 8)}-`), ); + this.appRigId = path.basename(this.testDir).toLowerCase(); this.sessionId = `test-session-${uniqueId}`; activeRigs.set(this.sessionId, this); } @@ -702,6 +704,67 @@ export class AppRig { await this.pressEnter(); } + getSentRequests() { + if (!this.config) throw new Error('AppRig not initialized'); + return this.config.getContentGenerator().getSentRequests?.() || []; + } + + /** + * Helper to get the curated history (contents) sent in the most recent model request. + * This method scrubs unstable data like temp paths and IDs for deterministic goldens. + */ + getLastSentRequestContents() { + const requests = this.getSentRequests(); + if (requests.length === 0) return []; + const contents = requests[requests.length - 1].contents || []; + return this.scrubUnstableData(contents); + } + + /** + * Gets the final curated history of the active chat session. + */ + getCuratedHistory() { + if (!this.config) throw new Error('AppRig not initialized'); + const history = this.config.getGeminiClient().getChat().getHistory(true); + return this.scrubUnstableData(history); + } + + private scrubUnstableData(contents: any) { + // Deeply scrub unstable data + const scrubbed = JSON.parse( + JSON.stringify(contents) + .replace(new RegExp(this.testDir, 'g'), '') + .replace(new RegExp(this.appRigId, 'g'), '') + .replace(new RegExp(this.sessionId, 'g'), ''), + ); + + if (scrubbed.length > 0) { + if (scrubbed[0].parts[0].text?.includes('')) { + scrubbed[0].parts[0].text = ''; + } + } + + const removeIds = (obj: any) => { + if (Array.isArray(obj)) { + obj.forEach(removeIds); + } else if (obj && typeof obj === 'object') { + if (obj.functionCall) { + obj.functionCall.id = ''; + } + if (obj.functionResponse) { + obj.functionResponse.id = ''; + if (obj.functionResponse?.response?.original_output_file) { + obj.functionResponse.response.original_output_file = ''; + } + } + Object.values(obj).forEach(removeIds); + } + }; + removeIds(scrubbed); + + return scrubbed; + } + async unmount() { // Clean up global state for this session sessionStateMap.delete(this.sessionId); diff --git a/packages/cli/src/test-utils/fixtures/continuous_session.responses b/packages/cli/src/test-utils/fixtures/continuous_session.responses index b1de5df98c..f981dbceb1 100644 --- a/packages/cli/src/test-utils/fixtures/continuous_session.responses +++ b/packages/cli/src/test-utils/fixtures/continuous_session.responses @@ -1,5 +1,4 @@ {"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"I will now checkpoint our progress."},{"functionCall":{"name":"checkpoint_state","args":{"summary":"GOAL: Implementation of session continuity.\nPROGRESS: Tools implemented.\nCONSTRAINT: Use high-fidelity summary."}}}]},"finishReason":"STOP"}]}]} -{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"Checkpoint created. Now I will trigger compression to clear the context."},{"functionCall":{"name":"compress","args":{"force":true}}}]},"finishReason":"STOP"}]}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"Checkpoint created. Now I will trigger compression to clear the context."},{"functionCall":{"name":"compress","args":{}}}]},"finishReason":"STOP"}]}]} {"method":"generateContent","response":{"candidates":[{"content":{"role":"model","parts":[{"text":"\nImplement session continuity\nUse high-fidelity summary\nTools implemented: checkpoint_state, compress\n1. [DONE] Implement tools\n2. [IN PROGRESS] Verify continuity\n\n"}]}}],"finishReason":"STOP"}} -{"method":"generateContent","response":{"candidates":[{"content":{"role":"model","parts":[{"text":"The is accurate and preserves all critical details."}]},"finishReason":"STOP"}]}} {"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"Compression successful. I have clear context and I remember our mission."}]},"finishReason":"STOP"}]}]} diff --git a/packages/cli/src/test-utils/fixtures/distill_result.responses b/packages/cli/src/test-utils/fixtures/distill_result.responses new file mode 100644 index 0000000000..1e77b2358b --- /dev/null +++ b/packages/cli/src/test-utils/fixtures/distill_result.responses @@ -0,0 +1,3 @@ +{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"**Thought**\nI need to search for SECRET in the src/ directory."},{"functionCall":{"name":"read_file","args":{"file_path":"src/foo.txt"}}}]},"finishReason":"STOP"}]}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"**Thought**\nThe output is very noisy. I will distill it to keep the context clean."},{"functionCall":{"name":"distill_result","args":{"revised_text":"Found SECRET_KEY=\"12345\" in src/env.ts"}}}]},"finishReason":"STOP"}]}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"I found the SECRET_KEY=\"12345\" in src/env.ts after distilling the search results."}]},"finishReason":"STOP"}]}]} diff --git a/packages/core/src/agents/local-executor.ts b/packages/core/src/agents/local-executor.ts index 1b7ea08250..8192baf13e 100644 --- a/packages/core/src/agents/local-executor.ts +++ b/packages/core/src/agents/local-executor.ts @@ -7,6 +7,7 @@ import type { Config } from '../config/config.js'; import { reportError } from '../utils/errorReporting.js'; import { GeminiChat, StreamEventType } from '../core/geminiChat.js'; +import { Turn } from '../core/turn.js'; import { Type, type Content, @@ -750,6 +751,7 @@ export class LocalAgentExecutor { } const role = LlmRole.SUBAGENT; + const turnId = Turn.generateId(); const responseStream = await chat.sendMessageStream( { @@ -760,6 +762,8 @@ export class LocalAgentExecutor { promptId, signal, role, + undefined, + turnId, ); const functionCalls: FunctionCall[] = []; diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 17a0ce25ca..4417ec0aa3 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -34,6 +34,7 @@ import { MemoryTool, setGeminiMdFilename } from '../tools/memoryTool.js'; import { WebSearchTool } from '../tools/web-search.js'; import { AskUserTool } from '../tools/ask-user.js'; import { CheckpointStateTool } from '../tools/checkpoint-state.js'; +import { DistillResultTool } from '../tools/distill-result.js'; import { CompressTool } from '../tools/compress.js'; import { ExitPlanModeTool } from '../tools/exit-plan-mode.js'; import { EnterPlanModeTool } from '../tools/enter-plan-mode.js'; @@ -52,9 +53,9 @@ import { } from '../telemetry/index.js'; import { coreEvents, CoreEvent } from '../utils/events.js'; import { tokenLimit } from '../core/tokenLimits.js'; +import { SideEffectService } from '../core/sideEffectService.js'; import { - DEFAULT_GEMINI_EMBEDDING_MODEL, - DEFAULT_GEMINI_FLASH_MODEL, + DEFAULT_GEMINI_EMBEDDING_MODEL, DEFAULT_GEMINI_FLASH_MODEL, DEFAULT_GEMINI_MODEL, DEFAULT_GEMINI_MODEL_AUTO, isAutoModel, @@ -813,8 +814,10 @@ export class Config implements McpContext { private lastModeSwitchTime: number = performance.now(); readonly userHintService: UserHintService; private approvedPlanPath: string | undefined; + private readonly sideEffectService: SideEffectService; constructor(params: ConfigParameters) { + this.sideEffectService = new SideEffectService(); this.sessionId = params.sessionId; this.clientVersion = params.clientVersion ?? 'unknown'; this.approvedPlanPath = undefined; @@ -2160,6 +2163,10 @@ export class Config implements McpContext { return this.geminiClient; } + getSideEffectService(): SideEffectService { + return this.sideEffectService; + } + /** * Updates the system instruction with the latest user memory. * Whenever the user memory (GEMINI.md files) is updated. @@ -2878,6 +2885,9 @@ export class Config implements McpContext { maybeRegister(CheckpointStateTool, () => registry.registerTool(new CheckpointStateTool(this, this.messageBus)), ); + maybeRegister(DistillResultTool, () => + registry.registerTool(new DistillResultTool(this, this.messageBus)), + ); maybeRegister(CompressTool, () => registry.registerTool(new CompressTool(this, this.messageBus)), ); diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts index 939fe08b6a..5b0cb7c1c2 100644 --- a/packages/core/src/core/client.test.ts +++ b/packages/core/src/core/client.test.ts @@ -2154,7 +2154,7 @@ ${JSON.stringify( })(); beforeEach(() => { - client['forceFullIdeContext'] = false; // Reset before each delta test + // client['forceFullIdeContext'] = false; // Reset before each delta test vi.spyOn(client, 'tryCompressChat').mockResolvedValue({ originalTokenCount: 0, newTokenCount: 0, @@ -2280,24 +2280,24 @@ ${JSON.stringify( it.each(testCases)( '$description', async ({ - previousActiveFile, + // previousActiveFile, currentActiveFile, shouldSendContext, }) => { // Setup previous context - client['lastSentIdeContext'] = { - workspaceState: { - openFiles: [ - { - path: previousActiveFile.path, - cursor: previousActiveFile.cursor, - selectedText: previousActiveFile.selectedText, - isActive: true, - timestamp: Date.now() - 1000, - }, - ], - }, - }; + // client['lastSentIdeContext'] = { + // workspaceState: { + // openFiles: [ + // { + // path: previousActiveFile.path, + // cursor: previousActiveFile.cursor, + // selectedText: previousActiveFile.selectedText, + // isActive: true, + // timestamp: Date.now() - 1000, + // }, + // ], + // }, + // }; // Setup current context vi.mocked(ideContextStore.get).mockReturnValue({ @@ -2351,19 +2351,19 @@ ${JSON.stringify( }; // Setup previous context - client['lastSentIdeContext'] = { - workspaceState: { - openFiles: [ - { - path: activeFile.path, - cursor: activeFile.cursor, - selectedText: activeFile.selectedText, - isActive: true, - timestamp: Date.now() - 1000, - }, - ], - }, - }; + // client['lastSentIdeContext'] = { + // workspaceState: { + // openFiles: [ + // { + // path: activeFile.path, + // cursor: activeFile.cursor, + // selectedText: activeFile.selectedText, + // isActive: true, + // timestamp: Date.now() - 1000, + // }, + // ], + // }, + // }; // Setup current context (same as previous) vi.mocked(ideContextStore.get).mockReturnValue({ diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 5d66259ed4..13d540c1ff 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -12,10 +12,9 @@ import { type Tool, type GenerateContentResponse, } from '@google/genai'; -import { partListUnionToString } from './geminiRequest.js'; import { getDirectoryContextString, - getInitialChatHistory, + getEnvironmentContext, } from '../utils/environmentContext.js'; import { Turn, @@ -28,9 +27,9 @@ import { } from './compression-status.js'; import type { Config } from '../config/config.js'; import { getCoreSystemPrompt } from './prompts.js'; -import { checkNextSpeaker } from '../utils/nextSpeakerChecker.js'; import { reportError } from '../utils/errorReporting.js'; import { GeminiChat } from './geminiChat.js'; +import { debugLogger } from '../utils/debugLogger.js'; import { retryWithBackoff, type RetryAvailabilityContext, @@ -46,25 +45,14 @@ import type { ContentGenerator } from './contentGenerator.js'; import { LoopDetectionService } from '../services/loopDetectionService.js'; import type { ChatCompressionService } from '../services/chatCompressionService.js'; import type { ContinuityCompressionService } from '../services/continuityCompressionService.js'; -import { ideContextStore } from '../ide/ideContext.js'; -import { - logContentRetryFailure, - logNextSpeakerCheck, -} from '../telemetry/loggers.js'; import type { DefaultHookOutput, AfterAgentHookOutput, } from '../hooks/types.js'; -import { - ContentRetryFailureEvent, - NextSpeakerCheckEvent, - type LlmRole, -} from '../telemetry/types.js'; +import { type LlmRole } from '../telemetry/types.js'; import { uiTelemetryService } from '../telemetry/uiTelemetry.js'; -import type { IdeContext, File } from '../ide/types.js'; import { handleFallback } from '../fallback/handler.js'; import type { RoutingContext } from '../routing/routingStrategy.js'; -import { debugLogger } from '../utils/debugLogger.js'; import type { ModelConfigKey } from '../services/modelConfigService.js'; import { ToolOutputMaskingService } from '../services/toolOutputMaskingService.js'; import { calculateRequestTokenCount } from '../utils/tokenCalculation.js'; @@ -72,7 +60,7 @@ import { applyModelSelection, createAvailabilityContextProvider, } from '../availability/policyHelpers.js'; -import { resolveModel, isGemini2Model } from '../config/models.js'; +import { resolveModel } from '../config/models.js'; import { partToString } from '../utils/partUtils.js'; import { coreEvents, CoreEvent } from '../utils/events.js'; @@ -98,8 +86,6 @@ export class GeminiClient { private readonly toolOutputMaskingService: ToolOutputMaskingService; private lastPromptId: string; private currentSequenceModel: string | null = null; - private lastSentIdeContext: IdeContext | undefined; - private forceFullIdeContext = true; /** * At any point in this conversation, was compression triggered without @@ -270,7 +256,6 @@ export class GeminiClient { setHistory(history: Content[]) { this.getChat().setHistory(history); this.updateTelemetryTokenCount(); - this.forceFullIdeContext = true; } private lastUsedModelId?: string; @@ -345,7 +330,6 @@ export class GeminiClient { extraHistory?: Content[], resumedSessionData?: ResumedSessionData, ): Promise { - this.forceFullIdeContext = true; this.hasFailedCompressionAttempt = false; this.lastUsedModelId = undefined; @@ -353,7 +337,12 @@ export class GeminiClient { const toolDeclarations = toolRegistry.getFunctionDeclarations(); const tools: Tool[] = [{ functionDeclarations: toolDeclarations }]; - const history = await getInitialChatHistory(this.config, extraHistory); + // Use environment context as a singleton session context via side-effect + const envParts = await getEnvironmentContext(this.config); + const envContextString = envParts + .map((part) => part.text || '') + .join('\n\n'); + this.config.getSideEffectService().setSessionContext(envContextString); try { const systemMemory = this.config.getUserMemory(); @@ -362,7 +351,7 @@ export class GeminiClient { this.config, systemInstruction, tools, - history, + extraHistory ?? [], resumedSessionData, async (modelId: string) => { this.lastUsedModelId = modelId; @@ -376,181 +365,13 @@ export class GeminiClient { await reportError( error, 'Error initializing Gemini chat session.', - history, + extraHistory, 'startChat', ); throw new Error(`Failed to initialize chat: ${getErrorMessage(error)}`); } } - private getIdeContextParts(forceFullContext: boolean): { - contextParts: string[]; - newIdeContext: IdeContext | undefined; - } { - const currentIdeContext = ideContextStore.get(); - if (!currentIdeContext) { - return { contextParts: [], newIdeContext: undefined }; - } - - if (forceFullContext || !this.lastSentIdeContext) { - // Send full context as JSON - const openFiles = currentIdeContext.workspaceState?.openFiles || []; - const activeFile = openFiles.find((f) => f.isActive); - const otherOpenFiles = openFiles - .filter((f) => !f.isActive) - .map((f) => f.path); - - const contextData: Record = {}; - - if (activeFile) { - contextData['activeFile'] = { - path: activeFile.path, - cursor: activeFile.cursor - ? { - line: activeFile.cursor.line, - character: activeFile.cursor.character, - } - : undefined, - selectedText: activeFile.selectedText || undefined, - }; - } - - if (otherOpenFiles.length > 0) { - contextData['otherOpenFiles'] = otherOpenFiles; - } - - if (Object.keys(contextData).length === 0) { - return { contextParts: [], newIdeContext: currentIdeContext }; - } - - const jsonString = JSON.stringify(contextData, null, 2); - const contextParts = [ - "Here is the user's editor context as a JSON object. This is for your information only.", - '```json', - jsonString, - '```', - ]; - - if (this.config.getDebugMode()) { - debugLogger.log(contextParts.join('\n')); - } - return { - contextParts, - newIdeContext: currentIdeContext, - }; - } else { - // Calculate and send delta as JSON - const delta: Record = {}; - const changes: Record = {}; - - const lastFiles = new Map( - (this.lastSentIdeContext.workspaceState?.openFiles || []).map( - (f: File) => [f.path, f], - ), - ); - const currentFiles = new Map( - (currentIdeContext.workspaceState?.openFiles || []).map((f: File) => [ - f.path, - f, - ]), - ); - - const openedFiles: string[] = []; - for (const [path] of currentFiles.entries()) { - if (!lastFiles.has(path)) { - openedFiles.push(path); - } - } - if (openedFiles.length > 0) { - changes['filesOpened'] = openedFiles; - } - - const closedFiles: string[] = []; - for (const [path] of lastFiles.entries()) { - if (!currentFiles.has(path)) { - closedFiles.push(path); - } - } - if (closedFiles.length > 0) { - changes['filesClosed'] = closedFiles; - } - - const lastActiveFile = ( - this.lastSentIdeContext.workspaceState?.openFiles || [] - ).find((f: File) => f.isActive); - const currentActiveFile = ( - currentIdeContext.workspaceState?.openFiles || [] - ).find((f: File) => f.isActive); - - if (currentActiveFile) { - if (!lastActiveFile || lastActiveFile.path !== currentActiveFile.path) { - changes['activeFileChanged'] = { - path: currentActiveFile.path, - cursor: currentActiveFile.cursor - ? { - line: currentActiveFile.cursor.line, - character: currentActiveFile.cursor.character, - } - : undefined, - selectedText: currentActiveFile.selectedText || undefined, - }; - } else { - const lastCursor = lastActiveFile.cursor; - const currentCursor = currentActiveFile.cursor; - if ( - currentCursor && - (!lastCursor || - lastCursor.line !== currentCursor.line || - lastCursor.character !== currentCursor.character) - ) { - changes['cursorMoved'] = { - path: currentActiveFile.path, - cursor: { - line: currentCursor.line, - character: currentCursor.character, - }, - }; - } - - const lastSelectedText = lastActiveFile.selectedText || ''; - const currentSelectedText = currentActiveFile.selectedText || ''; - if (lastSelectedText !== currentSelectedText) { - changes['selectionChanged'] = { - path: currentActiveFile.path, - selectedText: currentSelectedText, - }; - } - } - } else if (lastActiveFile) { - changes['activeFileChanged'] = { - path: null, - previousPath: lastActiveFile.path, - }; - } - - if (Object.keys(changes).length === 0) { - return { contextParts: [], newIdeContext: currentIdeContext }; - } - - delta['changes'] = changes; - const jsonString = JSON.stringify(delta, null, 2); - const contextParts = [ - "Here is a summary of changes in the user's editor context, in JSON format. This is for your information only.", - '```json', - jsonString, - '```', - ]; - - if (this.config.getDebugMode()) { - debugLogger.log(contextParts.join('\n')); - } - return { - contextParts, - newIdeContext: currentIdeContext, - }; - } - } - private _getActiveModelForCurrentTurn(): string { if (this.currentSequenceModel) { return this.currentSequenceModel; @@ -569,11 +390,11 @@ export class GeminiClient { signal: AbortSignal, prompt_id: string, boundedTurns: number, - isInvalidStreamRetry: boolean, displayContent?: PartListUnion, + turnId?: string, ): AsyncGenerator { - // Re-initialize turn (it was empty before if in loop, or new instance) - let turn = new Turn(this.getChat(), prompt_id); + // Re-initialize turn + let turn = new Turn(this.getChat(), prompt_id, turnId!); this.sessionTurnCount++; if ( @@ -602,8 +423,7 @@ export class GeminiClient { await this.tryMaskToolOutputs(this.getHistory()); - // Estimate tokens. For text-only requests, we estimate based on character length. - // For requests with non-text parts (like images, tools), we use the countTokens API. + // Estimate tokens const estimatedRequestTokenCount = await calculateRequestTokenCount( request, this.getContentGeneratorOrFail(), @@ -618,58 +438,12 @@ export class GeminiClient { return turn; } - // Prevent context updates from being sent while a tool call is - // waiting for a response. The Gemini API requires that a functionResponse - // part from the user immediately follows a functionCall part from the model - // in the conversation history . The IDE context is not discarded; it will - // be included in the next regular message sent to the model. - const history = this.getHistory(); - const lastMessage = - history.length > 0 ? history[history.length - 1] : undefined; - const hasPendingToolCall = - !!lastMessage && - lastMessage.role === 'model' && - (lastMessage.parts?.some((p) => 'functionCall' in p) || false); - - if (this.config.getIdeMode() && !hasPendingToolCall) { - const { contextParts, newIdeContext } = this.getIdeContextParts( - this.forceFullIdeContext || history.length === 0, - ); - if (contextParts.length > 0) { - this.getChat().addHistory({ - role: 'user', - parts: [{ text: contextParts.join('\n') }], - }); - } - this.lastSentIdeContext = newIdeContext; - this.forceFullIdeContext = false; - } - // Re-initialize turn with fresh history - turn = new Turn(this.getChat(), prompt_id); + turn = new Turn(this.getChat(), prompt_id, turnId!); const controller = new AbortController(); const linkedSignal = AbortSignal.any([signal, controller.signal]); - const loopResult = await this.loopDetector.turnStarted(signal); - if (loopResult.count > 1) { - yield { type: GeminiEventType.LoopDetected }; - return turn; - } else if (loopResult.count === 1) { - if (boundedTurns <= 1) { - yield { type: GeminiEventType.MaxSessionTurns }; - return turn; - } - return yield* this._recoverFromLoop( - loopResult, - signal, - prompt_id, - boundedTurns, - isInvalidStreamRetry, - displayContent, - ); - } - const routingContext: RoutingContext = { history: this.getChat().getHistory(/*curated=*/ true), request, @@ -679,7 +453,7 @@ export class GeminiClient { let modelToUse: string; - // Determine Model (Stickiness vs. Routing) + // Determine Model if (this.currentSequenceModel) { modelToUse = this.currentSequenceModel; } else { @@ -705,7 +479,7 @@ export class GeminiClient { } this.currentSequenceModel = modelToUse; - // Update tools with the final modelId to ensure model-dependent descriptions are used. + // Update tools with the final modelId await this.setTools(modelToUse); const resultStream = turn.run( @@ -715,41 +489,20 @@ export class GeminiClient { displayContent, ); let isError = false; - let isInvalidStream = false; - let loopDetectedAbort = false; - let loopRecoverResult: { detail?: string } | undefined; for await (const event of resultStream) { - const loopResult = this.loopDetector.addAndCheck(event); - if (loopResult.count > 1) { - yield { type: GeminiEventType.LoopDetected }; - loopDetectedAbort = true; - break; - } else if (loopResult.count === 1) { - if (boundedTurns <= 1) { - yield { type: GeminiEventType.MaxSessionTurns }; - loopDetectedAbort = true; - break; - } - loopRecoverResult = loopResult; - break; - } yield event; this.updateTelemetryTokenCount(); - if (event.type === GeminiEventType.InvalidStream) { - isInvalidStream = true; - } if (event.type === GeminiEventType.Error) { isError = true; } if (event.type === GeminiEventType.ToolCallResponse) { const toolResponse = event.value; + if (toolResponse.newHistory) { - this.getChat().replaceHistory(toolResponse.newHistory); - // Yield the event so UI knows compression happened yield { type: GeminiEventType.ChatCompressed, value: toolResponse.compressionInfo ?? { @@ -762,106 +515,23 @@ export class GeminiClient { } } - if (loopDetectedAbort) { - controller.abort(); - return turn; - } - - if (loopRecoverResult) { - return yield* this._recoverFromLoop( - loopRecoverResult, - signal, - prompt_id, - boundedTurns, - isInvalidStreamRetry, - displayContent, - controller, - ); - } - if (isError) { return turn; } // Update cumulative response in hook state - // We do this immediately after the stream finishes for THIS turn. const hooksEnabled = this.config.getEnableHooks(); if (hooksEnabled) { const responseText = turn.getResponseText() || ''; const hookState = this.hookStateMap.get(prompt_id); if (hookState && responseText) { - // Append with newline if not empty hookState.cumulativeResponse = hookState.cumulativeResponse ? `${hookState.cumulativeResponse}\n${responseText}` : responseText; } } - if (isInvalidStream) { - if ( - this.config.getContinueOnFailedApiCall() && - isGemini2Model(modelToUse) - ) { - if (isInvalidStreamRetry) { - logContentRetryFailure( - this.config, - new ContentRetryFailureEvent( - 4, - 'FAILED_AFTER_PROMPT_INJECTION', - modelToUse, - ), - ); - return turn; - } - const nextRequest = [{ text: 'System: Please continue.' }]; - // Recursive call - update turn with result - turn = yield* this.sendMessageStream( - nextRequest, - signal, - prompt_id, - boundedTurns - 1, - true, - displayContent, - ); - return turn; - } - } - - if (!turn.pendingToolCalls.length && signal && !signal.aborted) { - if ( - !this.config.getQuotaErrorOccurred() && - !this.config.getSkipNextSpeakerCheck() - ) { - const nextSpeakerCheck = await checkNextSpeaker( - this.getChat(), - this.config.getBaseLlmClient(), - signal, - prompt_id, - ); - logNextSpeakerCheck( - this.config, - new NextSpeakerCheckEvent( - prompt_id, - turn.finishReason?.toString() || '', - nextSpeakerCheck?.next_speaker || '', - ), - ); - if (nextSpeakerCheck?.next_speaker === 'model') { - const nextRequest = [{ text: 'Please continue.' }]; - turn = yield* this.sendMessageStream( - nextRequest, - signal, - prompt_id, - boundedTurns - 1, - false, // isInvalidStreamRetry is false - displayContent, - ); - return turn; - } - } - } - const turnResult = turn; - return turnResult; + return turn; } async *sendMessageStream( @@ -871,6 +541,7 @@ export class GeminiClient { turns: number = MAX_TURNS, isInvalidStreamRetry: boolean = false, displayContent?: PartListUnion, + turnId?: string, ): AsyncGenerator { if (!isInvalidStreamRetry) { this.config.resetTurn(); @@ -880,7 +551,6 @@ export class GeminiClient { const messageBus = this.config.getMessageBus(); if (this.lastPromptId !== prompt_id) { - this.loopDetector.reset(prompt_id, partListUnionToString(request)); this.hookStateMap.delete(this.lastPromptId); this.lastPromptId = prompt_id; this.currentSequenceModel = null; @@ -891,18 +561,14 @@ export class GeminiClient { if (hookResult) { if ( 'type' in hookResult && - hookResult.type === GeminiEventType.AgentExecutionStopped + (hookResult.type === GeminiEventType.AgentExecutionStopped || + hookResult.type === GeminiEventType.AgentExecutionBlocked) ) { - // Add user message to history before returning so it's kept in the transcript - this.getChat().addHistory(createUserContent(request)); + if (hookResult.type === GeminiEventType.AgentExecutionStopped) { + this.getChat().addHistory(createUserContent(request)); + } yield hookResult; - return new Turn(this.getChat(), prompt_id); - } else if ( - 'type' in hookResult && - hookResult.type === GeminiEventType.AgentExecutionBlocked - ) { - yield hookResult; - return new Turn(this.getChat(), prompt_id); + return new Turn(this.getChat(), prompt_id, turnId!); } else if ('additionalContext' in hookResult) { const additionalContext = hookResult.additionalContext; if (additionalContext) { @@ -917,7 +583,8 @@ export class GeminiClient { } const boundedTurns = Math.min(turns, MAX_TURNS); - let turn = new Turn(this.getChat(), prompt_id); + const currentTurnId = turnId ?? Turn.generateId(); + let turn = new Turn(this.getChat(), prompt_id, currentTurnId); try { turn = yield* this.processTurn( @@ -925,10 +592,26 @@ export class GeminiClient { signal, prompt_id, boundedTurns, - isInvalidStreamRetry, displayContent, + currentTurnId, ); + // Handle re-prompt request (e.g. from context compression or distillation) + const hasReprompt = this.getChat().consumeRepromptRequest(); + + if (hasReprompt && boundedTurns > 1) { + debugLogger.debug('[PROJECT CLARITY] Re-prompt requested.'); + + yield* this.sendMessageStream( + [{ text: '[System: State updated. Please continue.]' }], + signal, + prompt_id, + boundedTurns - 1, + false, + displayContent, + ); + } + // Fire AfterAgent hook if we have a turn and no pending tools if (hooksEnabled && messageBus) { const hookOutput = await this.fireAfterAgentHookSafe( @@ -937,7 +620,6 @@ export class GeminiClient { turn, ); - // Cast to AfterAgentHookOutput for access to shouldClearContext() const afterAgentOutput = hookOutput as AfterAgentHookOutput | undefined; if (afterAgentOutput?.shouldStopExecution()) { @@ -950,7 +632,6 @@ export class GeminiClient { contextCleared, }, }; - // Clear context if requested (honor both stop + clear) if (contextCleared) { await this.resetChat(); } @@ -968,7 +649,6 @@ export class GeminiClient { contextCleared, }, }; - // Clear context if requested if (contextCleared) { await this.resetChat(); } @@ -1128,9 +808,6 @@ export class GeminiClient { prompt_id: string, force: boolean = false, ): Promise { - // If the model is 'auto', we will use a placeholder model to check. - // Compression occurs before we choose a model, so calling `count_tokens` - // before the model is chosen would result in an error. const model = this._getActiveModelForCurrentTurn(); const { newHistory, info } = await this.compressionService.compress( @@ -1142,7 +819,6 @@ export class GeminiClient { this.hasFailedCompressionAttempt, ); - if ( info.compressionStatus === CompressionStatus.COMPRESSION_FAILED_INFLATED_TOKEN_COUNT @@ -1151,30 +827,13 @@ export class GeminiClient { this.hasFailedCompressionAttempt || !force; } else if (info.compressionStatus === CompressionStatus.COMPRESSED) { if (newHistory) { - // capture current session data before resetting - const currentRecordingService = - this.getChat().getChatRecordingService(); - const conversation = currentRecordingService.getConversation(); - const filePath = currentRecordingService.getConversationFilePath(); - - let resumedData: ResumedSessionData | undefined; - - if (conversation && filePath) { - resumedData = { conversation, filePath }; - } - - this.chat = await this.startChat(newHistory, resumedData); + this.getChat().replaceHistory(newHistory); this.updateTelemetryTokenCount(); - this.forceFullIdeContext = true; } } else if (info.compressionStatus === CompressionStatus.CONTENT_TRUNCATED) { if (newHistory) { - // We truncated content to save space, but summarization is still "failed". - // We update the chat context directly without resetting the failure flag. this.getChat().setHistory(newHistory); this.updateTelemetryTokenCount(); - // We don't reset the chat session fully like in COMPRESSED because - // this is a lighter-weight intervention. } } @@ -1196,42 +855,4 @@ export class GeminiClient { this.getChat().setHistory(result.newHistory); } } - - /** - * Handles loop recovery by providing feedback to the model and initiating a new turn. - */ - private _recoverFromLoop( - loopResult: { detail?: string }, - signal: AbortSignal, - prompt_id: string, - boundedTurns: number, - isInvalidStreamRetry: boolean, - displayContent?: PartListUnion, - controllerToAbort?: AbortController, - ): AsyncGenerator { - controllerToAbort?.abort(); - - // Clear the detection flag so the recursive turn can proceed, but the count remains 1. - this.loopDetector.clearDetection(); - - const feedbackText = `System: Potential loop detected. Details: ${loopResult.detail || 'Repetitive patterns identified'}. Please take a step back and confirm you're making forward progress. If not, take a step back, analyze your previous actions and rethink how you're approaching the problem. Avoid repeating the same tool calls or responses without new results.`; - - if (this.config.getDebugMode()) { - debugLogger.warn( - 'Iterative Loop Recovery: Injecting feedback message to model.', - ); - } - - const feedback = [{ text: feedbackText }]; - - // Recursive call with feedback - return this.sendMessageStream( - feedback, - signal, - prompt_id, - boundedTurns - 1, - isInvalidStreamRetry, - displayContent, - ); - } } diff --git a/packages/core/src/core/contentGenerator.ts b/packages/core/src/core/contentGenerator.ts index 2ce5420335..d4469e86bb 100644 --- a/packages/core/src/core/contentGenerator.ts +++ b/packages/core/src/core/contentGenerator.ts @@ -51,6 +51,8 @@ export interface ContentGenerator { userTierName?: string; paidTier?: GeminiUserTier; + + getSentRequests?(): GenerateContentParameters[]; } export enum AuthType { diff --git a/packages/core/src/core/coreToolHookTriggers.ts b/packages/core/src/core/coreToolHookTriggers.ts index cbd90e8039..cd17dd5421 100644 --- a/packages/core/src/core/coreToolHookTriggers.ts +++ b/packages/core/src/core/coreToolHookTriggers.ts @@ -77,6 +77,7 @@ export async function executeToolWithHooks( setPidCallback?: (pid: number) => void, config?: Config, originalRequestName?: string, + callId?: string, ): Promise { // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion const toolInput = (invocation.params || {}) as Record; @@ -161,6 +162,7 @@ export async function executeToolWithHooks( signal, liveOutputCallback, shellExecutionConfig, + callId, setPidCallback, ); } else { @@ -168,6 +170,7 @@ export async function executeToolWithHooks( signal, liveOutputCallback, shellExecutionConfig, + callId, ); } diff --git a/packages/core/src/core/fakeContentGenerator.ts b/packages/core/src/core/fakeContentGenerator.ts index 9ecd75a99d..d8703434ff 100644 --- a/packages/core/src/core/fakeContentGenerator.ts +++ b/packages/core/src/core/fakeContentGenerator.ts @@ -42,6 +42,7 @@ export type FakeResponse = // CLI argument. export class FakeContentGenerator implements ContentGenerator { private callCounter = 0; + private readonly sentRequests: GenerateContentParameters[] = []; userTier?: UserTierId; userTierName?: string; paidTier?: GeminiUserTier; @@ -84,6 +85,7 @@ export class FakeContentGenerator implements ContentGenerator { // eslint-disable-next-line @typescript-eslint/no-unused-vars role: LlmRole, ): Promise { + this.sentRequests.push(request); // eslint-disable-next-line @typescript-eslint/no-unsafe-return return Object.setPrototypeOf( this.getNextResponse('generateContent', request), @@ -97,6 +99,7 @@ export class FakeContentGenerator implements ContentGenerator { // eslint-disable-next-line @typescript-eslint/no-unused-vars role: LlmRole, ): Promise> { + this.sentRequests.push(request); const responses = this.getNextResponse('generateContentStream', request); async function* stream() { for (const response of responses) { @@ -124,4 +127,8 @@ export class FakeContentGenerator implements ContentGenerator { EmbedContentResponse.prototype, ); } + + getSentRequests(): GenerateContentParameters[] { + return this.sentRequests; + } } diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index d506babaf3..a09cf171be 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -19,28 +19,21 @@ import { type GenerateContentParameters, } from '@google/genai'; import { toParts } from '../code_assist/converter.js'; -import { retryWithBackoff, isRetryableError } from '../utils/retry.js'; +import { retryWithBackoff } from '../utils/retry.js'; import type { ValidationRequiredError } from '../utils/googleQuotaErrors.js'; import type { Config } from '../config/config.js'; import { resolveModel, - isGemini2Model, supportsModernFeatures, } from '../config/models.js'; import { hasCycleInSchema } from '../tools/tools.js'; import type { StructuredError } from './turn.js'; import type { CompletedToolCall } from './coreToolScheduler.js'; -import { - logContentRetry, - logContentRetryFailure, -} from '../telemetry/loggers.js'; import { ChatRecordingService, type ResumedSessionData, } from '../services/chatRecordingService.js'; import { - ContentRetryEvent, - ContentRetryFailureEvent, type LlmRole, } from '../telemetry/types.js'; import { handleFallback } from '../fallback/handler.js'; @@ -53,6 +46,10 @@ import { createAvailabilityContextProvider, } from '../availability/policyHelpers.js'; import { coreEvents } from '../utils/events.js'; +import { HistoryManager, isValidContent } from './historyManager.js'; +import { HistorySideEffectApplicator } from './sideEffectApplicator.js'; +import { debugLogger } from '../utils/debugLogger.js'; +import { SideEffectType, type SideEffectService } from './sideEffectService.js'; export enum StreamEventType { /** A regular content chunk from the API. */ @@ -72,21 +69,6 @@ export type StreamEvent = | { type: StreamEventType.AGENT_EXECUTION_STOPPED; reason: string } | { type: StreamEventType.AGENT_EXECUTION_BLOCKED; reason: string }; -/** - * Options for retrying due to invalid content from the model. - */ -interface ContentRetryOptions { - /** Total number of attempts to make (1 initial + N retries). */ - maxAttempts: number; - /** The base delay in milliseconds for linear backoff. */ - initialDelayMs: number; -} - -const INVALID_CONTENT_RETRY_OPTIONS: ContentRetryOptions = { - maxAttempts: 2, // 1 initial call + 1 retry - initialDelayMs: 500, -}; - export const SYNTHETIC_THOUGHT_SIGNATURE = 'skip_thought_signature_validator'; /** @@ -107,8 +89,6 @@ export function isValidNonThoughtTextPart(part: Part): boolean { return ( typeof part.text === 'string' && !part.thought && - // Technically, the model should never generate parts that have text and - // any of these but we don't trust them so check anyways. !part.functionCall && !part.functionResponse && !part.inlineData && @@ -116,91 +96,13 @@ export function isValidNonThoughtTextPart(part: Part): boolean { ); } -function isValidContent(content: Content): boolean { - if (content.parts === undefined || content.parts.length === 0) { - return false; - } - for (const part of content.parts) { - if (part === undefined || Object.keys(part).length === 0) { - return false; - } - if (!part.thought && part.text !== undefined && part.text === '') { - return false; - } - } - return true; -} - /** - * Validates the history contains the correct roles. - * - * @throws Error if the history does not start with a user turn. - * @throws Error if the history contains an invalid role. - */ -function validateHistory(history: Content[]) { - for (const content of history) { - if (content.role !== 'user' && content.role !== 'model') { - throw new Error(`Role must be user or model, but got ${content.role}.`); - } - } -} - -/** - * Extracts the curated (valid) history from a comprehensive history. - * - * @remarks - * The model may sometimes generate invalid or empty contents(e.g., due to safety - * filters or recitation). Extracting valid turns from the history - * ensures that subsequent requests could be accepted by the model. - */ -function extractCuratedHistory(comprehensiveHistory: Content[]): Content[] { - if (comprehensiveHistory === undefined || comprehensiveHistory.length === 0) { - return []; - } - const curatedHistory: Content[] = []; - const length = comprehensiveHistory.length; - let i = 0; - while (i < length) { - if (comprehensiveHistory[i].role === 'user') { - curatedHistory.push(comprehensiveHistory[i]); - i++; - } else { - const modelOutput: Content[] = []; - let isValid = true; - while (i < length && comprehensiveHistory[i].role === 'model') { - modelOutput.push(comprehensiveHistory[i]); - if (isValid && !isValidContent(comprehensiveHistory[i])) { - isValid = false; - } - i++; - } - if (isValid) { - curatedHistory.push(...modelOutput); - } - } - } - return curatedHistory; -} - -/** - * Custom error to signal that a stream completed with invalid content, - * which should trigger a retry. + * Custom error to signal that a stream completed with invalid content. */ export class InvalidStreamError extends Error { - readonly type: - | 'NO_FINISH_REASON' - | 'NO_RESPONSE_TEXT' - | 'MALFORMED_FUNCTION_CALL' - | 'UNEXPECTED_TOOL_CALL'; + readonly type: string; - constructor( - message: string, - type: - | 'NO_FINISH_REASON' - | 'NO_RESPONSE_TEXT' - | 'MALFORMED_FUNCTION_CALL' - | 'UNEXPECTED_TOOL_CALL', - ) { + constructor(message: string, type: string) { super(message); this.name = 'InvalidStreamError'; this.type = type; @@ -231,76 +133,55 @@ export class AgentExecutionBlockedError extends Error { } /** - * Chat session that enables sending messages to the model with previous - * conversation context. - * - * @remarks - * The session maintains all the turns between user and model. + * Chat session that enables sending messages to the model. */ export class GeminiChat { - // A promise to represent the current state of the message being sent to the - // model. private sendPromise: Promise = Promise.resolve(); private readonly chatRecordingService: ChatRecordingService; private lastPromptTokenCount: number; + private streamingDepth = 0; + private pendingHistory: Content[] | null = null; + private readonly historyManager: HistoryManager; + private readonly sideEffectService: SideEffectService; + private readonly applicator: HistorySideEffectApplicator; + private repromptRequested = false; constructor( private readonly config: Config, private systemInstruction: string = '', private tools: Tool[] = [], - private history: Content[] = [], + history: Content[] = [], resumedSessionData?: ResumedSessionData, private readonly onModelChanged?: (modelId: string) => Promise, kind: 'main' | 'subagent' = 'main', ) { - validateHistory(history); + this.historyManager = new HistoryManager(history); + this.sideEffectService = config.getSideEffectService(); + this.applicator = new HistorySideEffectApplicator(this.historyManager); this.chatRecordingService = new ChatRecordingService(config); this.chatRecordingService.initialize(resumedSessionData, kind); this.lastPromptTokenCount = estimateTokenCountSync( - this.history.flatMap((c) => c.parts || []), + this.historyManager + .getComprehensiveHistory() + .flatMap((c) => c.parts || []), ); } - private continuityAnchor?: string; - - setContinuityAnchor(anchor: string) { - this.continuityAnchor = anchor; + /** + * Marks a specific tool call ID for elision from the history. + */ + addElidedCallId(callId: string): void { + this.historyManager.addElidedCallId(callId); } getContinuityAnchor(): string | undefined { - return this.continuityAnchor; + return this.historyManager.getContinuityAnchor(); } setSystemInstruction(sysInstr: string) { this.systemInstruction = sysInstr; } - /** - * Sends a message to the model and returns the response in chunks. - * - * @remarks - * This method will wait for the previous message to be processed before - * sending the next message. - * - * @see {@link Chat#sendMessage} for non-streaming method. - * @param modelConfigKey - The key for the model config. - * @param message - The list of messages to send. - * @param prompt_id - The ID of the prompt. - * @param signal - An abort signal for this message. - * @param displayContent - An optional user-friendly version of the message to record. - * @return The model's response. - * - * @example - * ```ts - * const chat = ai.chats.create({model: 'gemini-2.0-flash'}); - * const response = await chat.sendMessageStream({ - * message: 'Why is the sky blue?' - * }); - * for await (const chunk of response) { - * console.log(chunk.text); - * } - * ``` - */ async sendMessageStream( modelConfigKey: ModelConfigKey, message: PartListUnion, @@ -308,7 +189,11 @@ export class GeminiChat { signal: AbortSignal, role: LlmRole, displayContent?: PartListUnion, + turnId?: string, ): Promise> { + if (!turnId) { + throw new Error("Turn ID is now required"); + } await this.sendPromise; let streamDoneResolver: () => void; @@ -317,12 +202,20 @@ export class GeminiChat { }); this.sendPromise = streamDonePromise; + if (this.streamingDepth === 0) { + // Flush any bootstrap side-effects (like session context) before starting the first turn. + this.applyPendingSideEffects(); + if (turnId) { + this.sideEffectService.setCurrentTurnId(turnId); + debugLogger.debug(`[PROJECT CLARITY] Starting turn ${turnId}`); + } + } + const userContent = createUserContent(message); const { model } = this.config.modelConfigService.getResolvedConfig(modelConfigKey); - // Record user input - capture complete message with all parts (text, files, images, etc.) - // but skip recording function responses (tool call results) as they should be stored in tool call records + // Record user input if (!isFunctionResponse(userContent)) { const userMessageParts = userContent.parts || []; const userMessageContent = partListUnionToString(userMessageParts); @@ -346,136 +239,107 @@ export class GeminiChat { }); } - // Add user content to history ONCE before any attempts. - this.history.push(userContent); - const requestContents = this.getHistory(true); + const requestContents = + this.historyManager.getHistoryForRequest(userContent); - const streamWithRetries = async function* ( + const stream = async function* ( this: GeminiChat, ): AsyncGenerator { + this.streamingDepth++; try { - let lastError: unknown = new Error('Request failed after all retries.'); + const apiStream = await this.makeApiCallAndProcessStream( + modelConfigKey, + requestContents, + prompt_id, + signal, + role, + userContent, + ); - const maxAttempts = INVALID_CONTENT_RETRY_OPTIONS.maxAttempts; - - for (let attempt = 0; attempt < maxAttempts; attempt++) { - let isConnectionPhase = true; - try { - if (attempt > 0) { - yield { type: StreamEventType.RETRY }; - } - - // If this is a retry, update the key with the new context. - const currentConfigKey = - attempt > 0 - ? { ...modelConfigKey, isRetry: true } - : modelConfigKey; - - isConnectionPhase = true; - const stream = await this.makeApiCallAndProcessStream( - currentConfigKey, - requestContents, - prompt_id, - signal, - role, - ); - isConnectionPhase = false; - for await (const chunk of stream) { - yield { type: StreamEventType.CHUNK, value: chunk }; - } - - lastError = null; - break; - } catch (error) { - if (error instanceof AgentExecutionStoppedError) { - yield { - type: StreamEventType.AGENT_EXECUTION_STOPPED, - reason: error.reason, - }; - lastError = null; // Clear error as this is an expected stop - return; // Stop the generator - } - - if (error instanceof AgentExecutionBlockedError) { - yield { - type: StreamEventType.AGENT_EXECUTION_BLOCKED, - reason: error.reason, - }; - if (error.syntheticResponse) { - yield { - type: StreamEventType.CHUNK, - value: error.syntheticResponse, - }; - } - lastError = null; // Clear error as this is an expected stop - return; // Stop the generator - } - - // Check if the error is retryable (e.g., transient SSL errors - // like ERR_SSL_SSLV3_ALERT_BAD_RECORD_MAC) - const isRetryable = isRetryableError( - error, - this.config.getRetryFetchErrors(), - ); - - // For connection phase errors, only retryable errors should continue - if (isConnectionPhase) { - if (!isRetryable || signal.aborted) { - throw error; - } - // Fall through to retry logic for retryable connection errors - } - lastError = error; - const isContentError = error instanceof InvalidStreamError; - - if ( - (isContentError && isGemini2Model(model)) || - (isRetryable && !signal.aborted) - ) { - // Check if we have more attempts left. - if (attempt < maxAttempts - 1) { - const delayMs = INVALID_CONTENT_RETRY_OPTIONS.initialDelayMs; - const retryType = isContentError ? error.type : 'NETWORK_ERROR'; - - logContentRetry( - this.config, - new ContentRetryEvent(attempt, retryType, delayMs, model), - ); - coreEvents.emitRetryAttempt({ - attempt: attempt + 1, - maxAttempts, - delayMs: delayMs * (attempt + 1), - error: error instanceof Error ? error.message : String(error), - model, - }); - await new Promise((res) => - setTimeout(res, delayMs * (attempt + 1)), - ); - continue; - } - } - break; - } + for await (const chunk of apiStream) { + yield { type: StreamEventType.CHUNK, value: chunk }; + } + } catch (error) { + if (error instanceof AgentExecutionStoppedError) { + yield { + type: StreamEventType.AGENT_EXECUTION_STOPPED, + reason: error.reason, + }; + return; } - if (lastError) { - if ( - lastError instanceof InvalidStreamError && - isGemini2Model(model) - ) { - logContentRetryFailure( - this.config, - new ContentRetryFailureEvent(maxAttempts, lastError.type, model), - ); + if (error instanceof AgentExecutionBlockedError) { + yield { + type: StreamEventType.AGENT_EXECUTION_BLOCKED, + reason: error.reason, + }; + if (error.syntheticResponse) { + yield { + type: StreamEventType.CHUNK, + value: error.syntheticResponse, + }; } - throw lastError; + return; } + + throw error; } finally { + this.streamingDepth--; + if (this.streamingDepth === 0) { + // IMPORTANT: Side effects (like ELIDE_TURN) are flushed here. + this.applyPendingSideEffects(); + } streamDoneResolver!(); } }; - return streamWithRetries.call(this); + return stream.call(this); + } + + isCurrentTurnElided(): boolean { + const currentTurnId = this.sideEffectService.getCurrentTurnId(); + if (!currentTurnId) { + debugLogger.debug('[PROJECT CLARITY] isCurrentTurnElided: No current turn ID'); + return false; + } + + const historyElided = this.historyManager.isTurnElided(currentTurnId); + const pendingIds = this.sideEffectService.getPendingElidedTurnIds(); + const pendingElided = pendingIds.has(currentTurnId); + debugLogger.debug(`[PROJECT CLARITY] isCurrentTurnElided(${currentTurnId}): history=${historyElided}, pending=${pendingElided} (pendingIds: ${Array.from(pendingIds).join(', ')})`); + return historyElided || pendingElided; + } + + /** + * Applies all pending side-effects from the SideEffectService. + * Returns true if a re-prompt was requested. + */ + private applyPendingSideEffects(): boolean { + if (this.pendingHistory) { + this.replaceHistory(this.pendingHistory); + this.pendingHistory = null; + } + + const effects = this.sideEffectService.flush(); + const requested = effects.some( + (e) => e.type === SideEffectType.REPROMPT, + ); + if (requested) { + debugLogger.debug('[PROJECT CLARITY] Re-prompt requested via side-effects'); + } + + this.repromptRequested = this.repromptRequested || requested; + this.applicator.apply(effects); + return requested; + } + + /** + * Consumes the re-prompt request signal. + */ + consumeRepromptRequest(): boolean { + const requested = this.repromptRequested; + this.repromptRequested = false; + return requested; } private async makeApiCallAndProcessStream( @@ -484,6 +348,7 @@ export class GeminiChat { prompt_id: string, abortSignal: AbortSignal, role: LlmRole, + userContent: Content, ): Promise> { const contentsForPreviewModel = this.ensureActiveLoopHasThoughtSignatures(requestContents); @@ -671,6 +536,7 @@ export class GeminiChat { lastModelToUse, streamResponse, originalRequest, + userContent, ); } @@ -702,82 +568,80 @@ export class GeminiChat { * This is primarily used for context compression and history restoration. */ replaceHistory(newHistory: Content[]): void { - validateHistory(newHistory); - this.history = [...newHistory]; + if (this.streamingDepth > 0) { + this.pendingHistory = newHistory; + return; + } + this.historyManager.replaceHistory(newHistory); this.lastPromptTokenCount = estimateTokenCountSync( - this.history.flatMap((c) => c.parts || []), + this.historyManager + .getComprehensiveHistory() + .flatMap((c) => (c.parts || []) as Part[]), ); } getHistory(curated: boolean = false): Content[] { - const history = curated - ? extractCuratedHistory(this.history) - : this.history; + return this.historyManager.getProjection({ + curated, + addMetadata: true, + sessionId: this.config.getSessionId(), + }); + } - if (!this.continuityAnchor) { - return [...history]; - } + getComprehensiveHistory(): Content[] { + return this.historyManager.getComprehensiveHistory(); + } - const anchorText = `\n${this.continuityAnchor}\n`; - const baseHistory = [...history]; - - if (baseHistory.length > 0 && baseHistory[0].role === 'user') { - // Merge into the first user message to preserve role alternation - const firstMessage = { ...baseHistory[0] }; - // Ensure we don't accidentally mutate the original parts array - firstMessage.parts = [ - { text: anchorText }, - ...(firstMessage.parts || []), - ]; - baseHistory[0] = firstMessage; - } else { - // Prepend as a new user message - baseHistory.unshift({ - role: 'user', - parts: [{ text: anchorText }], - }); - } - - return baseHistory; + getConfig(): Config { + return this.config; } /** * Clears the chat history. */ clearHistory(): void { - this.history = []; + this.historyManager.clearHistory(); } /** * Adds a new entry to the chat history. */ addHistory(content: Content): void { - this.history.push(content); + this.historyManager.addMessage(content); } setHistory(history: Content[]): void { - this.history = history; + this.historyManager.replaceHistory(history); this.lastPromptTokenCount = estimateTokenCountSync( - this.history.flatMap((c) => c.parts || []), + this.historyManager + .getComprehensiveHistory() + .flatMap((c) => c.parts || []), ); this.chatRecordingService.updateMessagesFromHistory(history); } stripThoughtsFromHistory(): void { - this.history = this.history.map((content) => { - const newContent = { ...content }; - if (newContent.parts) { - newContent.parts = newContent.parts.map((part) => { - if (part && typeof part === 'object' && 'thoughtSignature' in part) { - const newPart = { ...part }; - delete (newPart as { thoughtSignature?: string }).thoughtSignature; - return newPart; - } - return part; - }); - } - return newContent; - }); + const history = this.historyManager + .getComprehensiveHistory() + .map((content) => { + const newContent = { ...content }; + if (newContent.parts) { + newContent.parts = newContent.parts.map((part: Part) => { + if ( + part && + typeof part === 'object' && + 'thoughtSignature' in part + ) { + const newPart = { ...part }; + delete (newPart as { thoughtSignature?: string }).thoughtSignature; + return newPart; + } + return part; + }); + } + return newContent; + }); + this.historyManager.replaceHistory(history); } // To ensure our requests validate, the first function call in every model @@ -865,12 +729,35 @@ export class GeminiChat { model: string, streamResponse: AsyncGenerator, originalRequest: GenerateContentParameters, + userContent: Content, ): AsyncGenerator { const modelResponseParts: Part[] = []; let hasToolCall = false; let hasThoughts = false; let finishReason: FinishReason | undefined; + let callCounter = 0; + + const currentTurnId = this.sideEffectService.getCurrentTurnId(); + + // PROJECT CLARITY: Normalize user content (function responses). + // Note: We don't 'invent' IDs for responses; they should already have been + // tagged by the Tool Scheduler using the ID we injected into the call. + if (userContent.parts) { + for (const part of userContent.parts) { + if (part.functionResponse && !part.functionResponse.id) { + debugLogger.warn( + `[PROJECT CLARITY] User functionResponse missing ID in turn ${currentTurnId}. This indicates a breakdown in the ID lifecycle.`, + ); + } + } + } + + // PROJECT CLARITY: Add user content (including function responses) to history EARLY. + // This allows meta-tools like distill_result to find the response they are distilling. + if (isValidContent(userContent)) { + this.historyManager.addMessage(userContent, currentTurnId); + } for await (const chunk of streamResponse) { const candidateWithReason = chunk?.candidates?.find( @@ -889,13 +776,30 @@ export class GeminiChat { hasThoughts = true; this.recordThoughtFromContent(content); } - if (content.parts.some((part) => part.functionCall)) { - hasToolCall = true; + + // PROJECT CLARITY: Inject call IDs if missing. + // This ensures that the history contains the same IDs that Turn.ts will consume. + for (const part of content.parts) { + if (part.functionCall) { + hasToolCall = true; + if (!part.functionCall.id) { + const generatedId = `${part.functionCall.name}_${Date.now()}_${callCounter++}`; + part.functionCall.id = generatedId; + debugLogger.debug( + `[PROJECT CLARITY] Airlock: Injected callId into part: ${generatedId}`, + ); + } + // Always pre-register, even if ID was already present (e.g. from a resumed session) + if (currentTurnId) { + this.historyManager.preRegisterCallId( + part.functionCall.id, + currentTurnId, + ); + } + } } - modelResponseParts.push( - ...content.parts.filter((part) => !part.thought), - ); + modelResponseParts.push(...content.parts); } } @@ -967,12 +871,11 @@ export class GeminiChat { // Stream validation logic: A stream is considered successful if: // 1. There's a tool call OR - // 2. A not MALFORMED_FUNCTION_CALL finish reason and a non-mepty resp + // 2. We have a valid finish reason AND no critical errors. // // We throw an error only when there's no tool call AND: // - No finish reason, OR - // - MALFORMED_FUNCTION_CALL finish reason OR - // - Empty response text (e.g., only thoughts with no actual content) + // - MALFORMED_FUNCTION_CALL finish reason. if (!hasToolCall) { if (!finishReason) { throw new InvalidStreamError( @@ -992,15 +895,23 @@ export class GeminiChat { 'UNEXPECTED_TOOL_CALL', ); } - if (!responseText) { - throw new InvalidStreamError( - 'Model stream ended with empty response text.', - 'NO_RESPONSE_TEXT', - ); - } } - this.history.push({ role: 'model', parts: consolidatedParts }); + const turnId = this.sideEffectService.getCurrentTurnId(); + if (isValidContent({ role: 'model', parts: consolidatedParts })) { + this.historyManager.addMessage( + { role: 'model', parts: consolidatedParts }, + turnId, + ); + } + + // PROJECT CLARITY: Flush side effects (like ELIDE_TURN) AFTER adding this turn to history. + // This allows meta-tools to elide themselves by finding their own call IDs in the history maps. + debugLogger.debug(`[PROJECT CLARITY] Flushing side effects for turn ${turnId}`); + this.applyPendingSideEffects(); + + const isElided = turnId ? this.historyManager.isTurnElided(turnId) : false; + debugLogger.debug(`[PROJECT CLARITY] turn ${turnId} elided status: ${isElided}`); } getLastPromptTokenCount(): number { @@ -1042,6 +953,11 @@ export class GeminiChat { }); this.chatRecordingService.recordToolCalls(model, toolCallRecords); + + // PROJECT CLARITY: Flush side effects after tool calls are recorded. + // This allows tools that were just executed to apply their elisions/reprompts. + debugLogger.debug(`[PROJECT CLARITY] Flushing side effects in recordCompletedToolCalls for turn ${this.sideEffectService.getCurrentTurnId()}`); + this.applyPendingSideEffects(); } /** diff --git a/packages/core/src/core/historyManager.ts b/packages/core/src/core/historyManager.ts new file mode 100644 index 0000000000..0a22b47689 --- /dev/null +++ b/packages/core/src/core/historyManager.ts @@ -0,0 +1,421 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { Content, Part } from '@google/genai'; +import { debugLogger } from '../utils/debugLogger.js'; + +/** + * Validates the history contains the correct roles. + * + * @throws Error if the history does not start with a user turn. + * @throws Error if the history contains an invalid role. + */ +export function validateHistory(history: Content[]) { + for (const content of history) { + if (content.role !== 'user' && content.role !== 'model') { + throw new Error(`Invalid role in history: ${content.role}`); + } + } +} + +/** + * Checks if a content object is valid (has non-empty parts). + */ +export function isValidContent(content: Content): boolean { + if (!content.parts || content.parts.length === 0) { + return false; + } + return content.parts.some((part: Part) => { + return ( + (part.text && part.text.trim().length > 0) || + part.functionCall || + part.functionResponse || + part.inlineData || + part.thought + ); + }); +} + +/** + * Options for extracting history projections. + */ +export interface HistoryProjectionOptions { + curated?: boolean; + addMetadata?: boolean; + sessionId?: string; +} + +/** + * Manages the message history, elisions, and projections for a chat session. + */ +export class HistoryManager { + private history: Content[] = []; + private distilledResults: Map = new Map(); + private elidedIndices: Set = new Set(); + private elidedCallIds: Set = new Set(); + private sessionContext: string | undefined; + private continuityAnchor: string | undefined; + private turnIds: Map = new Map(); + private callIdToIndex: Map = new Map(); + + constructor(initialHistory: Content[] = []) { + validateHistory(initialHistory); + this.history = [...initialHistory]; + } + + /** + * Sets the singleton session context for this history. + */ + setSessionContext(context: string): void { + this.sessionContext = context; + } + + /** + * Sets the current continuity anchor (snapshot summary). + */ + setContinuityAnchor(anchor: string | undefined): void { + this.continuityAnchor = anchor; + } + + /** + * Gets the current continuity anchor. + */ + getContinuityAnchor(): string | undefined { + return this.continuityAnchor; + } + + /** + * Adds a message to the history. + */ + addMessage(content: Content, turnId?: string): void { + const index = this.history.length; + if (turnId) { + this.turnIds.set(index, turnId); + + // Scan for call IDs to build mappings + if (content.parts) { + for (const part of content.parts) { + const isToolPart = !!(part.functionCall || part.functionResponse); + if (isToolPart) { + const callId = part.functionCall?.id || part.functionResponse?.id; + if (!callId) { + throw new Error( + `[PROJECT CLARITY] CRITICAL: Tool part missing ID in turn ${turnId}. All tool parts must be normalized before entering history.`, + ); + } + // Always index the latest occurrence. + // For elideBetween, this means we find the most recent message for a call ID. + this.callIdToIndex.set(callId, index); + } + } + } + } + this.history.push(content); + } + + /** + * Pre-registers a call ID for the turn currently being built. + * This is necessary so that meta-tools can elide themselves before the turn + * is fully consolidated and pushed to history. + */ + preRegisterCallId(callId: string, _turnId: string): void { + const nextIndex = this.history.length; + debugLogger.debug( + `[PROJECT CLARITY] Pre-registering callId ${callId} (will be index ${nextIndex})`, + ); + this.callIdToIndex.set(callId, nextIndex); + } + + /** + * Marks a specific turn ID for elision. + * Note: This is now a blunt index-based elision of all messages in that turn. + */ + addElidedTurnId(turnId: string): void { + debugLogger.debug(`[PROJECT CLARITY] Eliding turn: ${turnId}`); + for (const [index, tid] of this.turnIds.entries()) { + if (tid === turnId) { + this.elidedIndices.add(index); + } + } + } + + /** + * Checks if a specific turn ID is marked for elision. + */ + isTurnElided(turnId: string): boolean { + for (const [index, tid] of this.turnIds.entries()) { + if (tid === turnId && this.elidedIndices.has(index)) { + return true; + } + } + return false; + } + + /** + * Elides everything between two call IDs, inclusive of the end call's message. + * Exclusive of the start call's message (to preserve the target). + */ + elideBetween(startCallId: string, endCallId: string): void { + const startIndex = this.callIdToIndex.get(startCallId); + const endIndex = this.callIdToIndex.get(endCallId); + + if (startIndex === undefined || endIndex === undefined) { + debugLogger.warn( + `[PROJECT CLARITY] elideBetween failed: startCallId=${startCallId} (${startIndex}), endCallId=${endCallId} (${endIndex})`, + ); + return; + } + + debugLogger.debug( + `[PROJECT CLARITY] elideBetween indices: ${startIndex} to ${endIndex}`, + ); + + // We elide everything strictly AFTER the start message, up to and including the end message. + for (let i = startIndex + 1; i <= endIndex; i++) { + this.elidedIndices.add(i); + } + + // Also explicitly elide the end call ID itself (the meta-tool call) + // to ensure it's filtered even if its message parts were partially visible. + this.elidedCallIds.add(endCallId); + } + + /** + * Replaces the entire history. + */ + replaceHistory(newHistory: Content[]): void { + validateHistory(newHistory); + this.history = [...newHistory]; + this.distilledResults.clear(); + this.turnIds.clear(); + this.callIdToIndex.clear(); + this.elidedIndices.clear(); + this.elidedCallIds.clear(); + } + + /** + * Clears the history. + */ + clearHistory(): void { + this.history = []; + this.distilledResults.clear(); + this.elidedIndices.clear(); + this.elidedCallIds.clear(); + this.turnIds.clear(); + this.callIdToIndex.clear(); + this.sessionContext = undefined; + this.continuityAnchor = undefined; + } + + /** + * Marks a call ID for elision. + */ + addElidedCallId(callId: string): void { + this.elidedCallIds.add(callId); + } + + /** + * Clears elision markers. Usually called at the start of a new root-level turn. + */ + clearElisions(): void { + this.elidedCallIds.clear(); + this.elidedIndices.clear(); + } + + /** + * Records a distilled result for a tool call. + */ + addDistilledResult(callId: string, result: any): void { + this.distilledResults.set(callId, result); + } + + /** + * Gets the raw, comprehensive history. + */ + getComprehensiveHistory(): Content[] { + return [...this.history]; + } + + /** + * Prepares the history for an API request by appending the current user content + * and merging consecutive user turns (e.g., merging metadata into the first message). + */ + getHistoryForRequest(userContent: Content): Content[] { + const projection = this.getProjection({ curated: true, addMetadata: true }); + + if ( + projection.length > 0 && + projection[projection.length - 1].role === 'user' + ) { + // Merge consecutive user turns. + const lastTurn = projection[projection.length - 1]; + const mergedTurn: Content = { + ...lastTurn, + parts: [...(lastTurn.parts || []), ...(userContent.parts || [])], + }; + return [...projection.slice(0, -1), mergedTurn]; + } + + return [...projection, userContent]; + } + + /** + * Gets the history projection based on elisions and distillations. + */ + getProjection(options: HistoryProjectionOptions = {}): Content[] { + const { curated = false, addMetadata = false } = options; + + let baseHistory = curated + ? this.extractCuratedHistory() + : [...this.history]; + + if (addMetadata) { + baseHistory = this.applyMetadata(baseHistory); + } + + return baseHistory; + } + + /** + * Returns the metadata parts (session context, continuity anchor) if they exist. + */ + getMetadataParts(): Part[] { + const metadataParts: Part[] = []; + + // Order is strictly: Session Context -> Continuity Anchor (Snapshot) + if (this.sessionContext) { + metadataParts.push({ text: this.sessionContext }); + } + + if (this.continuityAnchor) { + metadataParts.push({ + text: `\n${this.continuityAnchor}\n`, + }); + } + + return metadataParts; + } + + /** + * Logic for extracting curated history (applying elisions and distillations). + */ + private extractCuratedHistory(): Content[] { + const curatedHistory: Content[] = []; + const length = this.history.length; + + for (let i = 0; i < length; i++) { + if (this.elidedIndices.has(i)) { + continue; + } + + const originalContent = this.history[i]; + + // Filter elided call IDs first. + const filteredParts = this.filterParts(originalContent.parts || []); + if (filteredParts.length === 0) { + continue; + } + + if (originalContent.role === 'user') { + const parts = filteredParts.map((part: Part) => { + if (part.functionResponse) { + const id = part.functionResponse.id; + if (id && this.distilledResults.has(id)) { + return { + functionResponse: { + ...part.functionResponse, + response: this.distilledResults.get(id), + }, + }; + } + } + return part; + }); + curatedHistory.push({ ...originalContent, parts }); + } else if (originalContent.role === 'model') { + const modelOutput: Content[] = []; + let isValid = isValidContent({ + ...originalContent, + parts: filteredParts, + }); + modelOutput.push({ ...originalContent, parts: filteredParts }); + + // Collect consecutive model outputs (e.g., thoughts then tool calls) + while (i + 1 < length && this.history[i + 1].role === 'model') { + i++; + if (this.elidedIndices.has(i)) { + continue; + } + + const nextContent = this.history[i]; + const nextFilteredParts = this.filterParts(nextContent.parts || []); + if (nextFilteredParts.length === 0) continue; + + modelOutput.push({ ...nextContent, parts: nextFilteredParts }); + if (isValid && !isValidContent({ ...nextContent, parts: nextFilteredParts })) { + isValid = false; + } + } + + if (isValid) { + curatedHistory.push(...modelOutput); + } + } + } + + return curatedHistory; + } + + /** + * Prepends metadata (session context, anchors) to the history. + */ + private applyMetadata( + history: Content[], + _pendingContent?: Content, + ): Content[] { + const baseHistory = [...history]; + const metadataParts = this.getMetadataParts(); + + if (metadataParts.length === 0) { + return baseHistory; + } + + // 1. Try to merge into the very first message in history if it's from the user. + if (baseHistory.length > 0 && baseHistory[0].role === 'user') { + const firstMessage = { ...baseHistory[0] }; + firstMessage.parts = [...metadataParts, ...(firstMessage.parts || [])]; + baseHistory[0] = firstMessage; + return baseHistory; + } + + // 2. Fallback: prepend a synthetic user turn. + // This happens when history is empty, OR if the first turn is NOT a user turn (unlikely). + baseHistory.unshift({ + role: 'user', + parts: metadataParts, + }); + + return baseHistory; + } + + /** + * Filters parts for elision. + */ + filterParts(parts: Part[]): Part[] { + return parts.filter((part: Part) => { + const id = part.functionCall?.id || part.functionResponse?.id; + return !id || !this.elidedCallIds.has(id); + }); + } + + getHistoryLength(): number { + return this.history.length; + } + + getMessageAt(index: number): Content | undefined { + return this.history[index]; + } +} diff --git a/packages/core/src/core/loggingContentGenerator.ts b/packages/core/src/core/loggingContentGenerator.ts index 60144740c2..4999d95f4e 100644 --- a/packages/core/src/core/loggingContentGenerator.ts +++ b/packages/core/src/core/loggingContentGenerator.ts @@ -608,4 +608,8 @@ export class LoggingContentGenerator implements ContentGenerator { }, ); } + + getSentRequests(): GenerateContentParameters[] { + return this.wrapped.getSentRequests?.() ?? []; + } } diff --git a/packages/core/src/core/sideEffectApplicator.ts b/packages/core/src/core/sideEffectApplicator.ts new file mode 100644 index 0000000000..c487f0acbf --- /dev/null +++ b/packages/core/src/core/sideEffectApplicator.ts @@ -0,0 +1,59 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { SideEffectType, type SideEffect } from './sideEffectService.js'; +import type { HistoryManager } from './historyManager.js'; + +/** + * Interface for a side-effect applicator. + */ +export interface SideEffectApplicator { + apply(effects: SideEffect[]): void; +} + +/** + * Applicator for history-related side-effects. + */ +export class HistorySideEffectApplicator implements SideEffectApplicator { + constructor(private readonly historyManager: HistoryManager) {} + + apply(effects: SideEffect[]): void { + for (const effect of effects) { + switch (effect.type) { + case SideEffectType.REPLACE_HISTORY: + this.historyManager.replaceHistory(effect.payload); + break; + case SideEffectType.ELIDE_CALL: + this.historyManager.addElidedCallId(effect.payload); + break; + case SideEffectType.ELIDE_TURN: + this.historyManager.addElidedTurnId(effect.payload); + break; + case SideEffectType.ELIDE_BETWEEN: + this.historyManager.elideBetween( + effect.payload.startCallId, + effect.payload.endCallId, + ); + break; + case SideEffectType.DISTILL_RESULT: + this.historyManager.addDistilledResult( + effect.payload.callId, + effect.payload.distilledOutput, + ); + break; + case SideEffectType.SET_SESSION_CONTEXT: + this.historyManager.setSessionContext(effect.payload); + break; + case SideEffectType.SET_CONTINUITY_ANCHOR: + this.historyManager.setContinuityAnchor(effect.payload); + break; + case SideEffectType.ADD_HISTORY: + this.historyManager.addMessage(effect.payload); + break; + } + } + } +} diff --git a/packages/core/src/core/sideEffectService.ts b/packages/core/src/core/sideEffectService.ts new file mode 100644 index 0000000000..3199b64c3a --- /dev/null +++ b/packages/core/src/core/sideEffectService.ts @@ -0,0 +1,182 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { Content } from '@google/genai'; + +/** + * Types of side-effects that can be triggered by tools or the system. + */ +export enum SideEffectType { + REPLACE_HISTORY = 'REPLACE_HISTORY', + ELIDE_CALL = 'ELIDE_CALL', + ELIDE_TURN = 'ELIDE_TURN', + ELIDE_BETWEEN = 'ELIDE_BETWEEN', + DISTILL_RESULT = 'DISTILL_RESULT', + ADD_HISTORY = 'ADD_HISTORY', + SET_SESSION_CONTEXT = 'SET_SESSION_CONTEXT', + SET_CONTINUITY_ANCHOR = 'SET_CONTINUITY_ANCHOR', + REPROMPT = 'REPROMPT', +} + +/** + * Interface for a side-effect. + */ +export interface SideEffect { + type: SideEffectType; + payload: any; +} + +/** + * Service for collecting and applying side-effects at safe sync points. + */ +export class SideEffectService { + private pendingSideEffects: SideEffect[] = []; + private currentTurnId: string | undefined; + + /** + * Gets the IDs of turns marked for elision in the pending queue. + */ + getPendingElidedTurnIds(): Set { + const ids = new Set(); + for (const effect of this.pendingSideEffects) { + if (effect.type === SideEffectType.ELIDE_TURN) { + ids.add(effect.payload); + } + } + return ids; + } + + /** + * Sets the ID of the turn currently being processed. + */ + setCurrentTurnId(turnId: string): void { + this.currentTurnId = turnId; + } + + /** + * Clears the current turn ID. + */ + clearCurrentTurnId(): void { + this.currentTurnId = undefined; + } + + /** + * Gets the ID of the turn currently being processed. + */ + getCurrentTurnId(): string | undefined { + return this.currentTurnId; + } + + /** + * Queues a side-effect for later application. + */ + queueSideEffect(effect: SideEffect): void { + this.pendingSideEffects.push(effect); + } + + /** + * Clears all pending side-effects. + */ + clearPending(): void { + this.pendingSideEffects = []; + } + + /** + * Returns all pending side-effects and clears the queue. + */ + flush(): SideEffect[] { + const effects = [...this.pendingSideEffects]; + this.pendingSideEffects = []; + return effects; + } + + /** + * Helper to queue a history replacement. + */ + replaceHistory(newHistory: Content[]): void { + this.queueSideEffect({ + type: SideEffectType.REPLACE_HISTORY, + payload: newHistory, + }); + } + + /** + * Helper to queue a call elision. + */ + elideCall(callId: string): void { + this.queueSideEffect({ + type: SideEffectType.ELIDE_CALL, + payload: callId, + }); + } + + /** + * Helper to queue a turn elision. + */ + elideTurn(turnId: string): void { + this.queueSideEffect({ + type: SideEffectType.ELIDE_TURN, + payload: turnId, + }); + } + + /** + * Helper to queue an "elide between" operation. + */ + elideBetween(startCallId: string, endCallId: string): void { + this.queueSideEffect({ + type: SideEffectType.ELIDE_BETWEEN, + payload: { startCallId, endCallId }, + }); + } + + /** + * Helper to queue a distilled result. + */ + distillResult(callId: string, distilledOutput: any): void { + this.queueSideEffect({ + type: SideEffectType.DISTILL_RESULT, + payload: { callId, distilledOutput }, + }); + } + + /** + * Helper to set the singleton session context. + */ + setSessionContext(context: string): void { + this.queueSideEffect({ + type: SideEffectType.SET_SESSION_CONTEXT, + payload: context, + }); + } + + /** + * Helper to set the continuity anchor (snapshot summary). + */ + setContinuityAnchor(anchor: string | undefined): void { + this.queueSideEffect({ + type: SideEffectType.SET_CONTINUITY_ANCHOR, + payload: anchor, + }); + } + + /** + * Helper to clear the continuity anchor. + */ + clearContinuityAnchor(): void { + this.setContinuityAnchor(undefined); + } + + /** + * Signals that the agent should be re-prompted immediately after the current turn. + */ + reprompt(): void { + this.queueSideEffect({ + type: SideEffectType.REPROMPT, + payload: true, + }); + } +} diff --git a/packages/core/src/core/turn.test.ts b/packages/core/src/core/turn.test.ts index 435323f73d..142d3f00f6 100644 --- a/packages/core/src/core/turn.test.ts +++ b/packages/core/src/core/turn.test.ts @@ -58,7 +58,7 @@ describe('Turn', () => { getHistory: mockGetHistory, maybeIncludeSchemaDepthContext: mockMaybeIncludeSchemaDepthContext, }; - turn = new Turn(mockChatInstance as unknown as GeminiChat, 'prompt-id-1'); + turn = new Turn(mockChatInstance as unknown as GeminiChat, 'prompt-id-1', 'a'); mockGetHistory.mockReturnValue([]); mockSendMessageStream.mockResolvedValue((async function* () {})()); }); diff --git a/packages/core/src/core/turn.ts b/packages/core/src/core/turn.ts index 659660b349..8139d17566 100644 --- a/packages/core/src/core/turn.ts +++ b/packages/core/src/core/turn.ts @@ -26,6 +26,7 @@ import { } from '../utils/errors.js'; import { InvalidStreamError, type GeminiChat } from './geminiChat.js'; import { parseThought, type ThoughtSummary } from '../utils/thoughtUtils.js'; +import { debugLogger } from '../utils/debugLogger.js'; import type { ModelConfigKey } from '../services/modelConfigService.js'; import { getCitations } from '../utils/generateContentResponseUtilities.js'; import { LlmRole } from '../telemetry/types.js'; @@ -211,7 +212,7 @@ export type ServerGeminiStreamEvent = // A turn manages the agentic loop turn within the server context. export class Turn { - private callCounter = 0; + readonly turnId: string; readonly pendingToolCalls: ToolCallRequestInfo[] = []; private debugResponses: GenerateContentResponse[] = []; @@ -222,7 +223,17 @@ export class Turn { constructor( private readonly chat: GeminiChat, private readonly prompt_id: string, - ) {} + turnId: string, + ) { + this.turnId = turnId; + } + + /** + * Generates a unique Turn ID. + */ + static generateId(): string { + return `turn_${Date.now()}_${Math.random().toString(36).substring(2, 9)}`; + } // The run method yields simpler events suitable for server logic async *run( @@ -242,6 +253,7 @@ export class Turn { signal, role, displayContent, + this.turnId, ); for await (const streamEvent of responseStream) { @@ -384,7 +396,17 @@ export class Turn { ): ServerGeminiStreamEvent | null { const name = fnCall.name || 'undefined_tool_name'; const args = fnCall.args || {}; - const callId = fnCall.id ?? `${name}_${Date.now()}_${this.callCounter++}`; + + // PROJECT CLARITY: Enforce the Airlock contract. + // IDs must be injected by GeminiChat before they reach the Turn. + const callId = fnCall.id; + if (!callId) { + throw new Error( + `[PROJECT CLARITY] CRITICAL: Tool call missing ID in turn ${this.turnId}. All tool calls must be normalized by the GeminiChat Airlock.`, + ); + } + + debugLogger.debug(`[PROJECT CLARITY] Turn dispatching callId: ${callId}`); const toolCallRequest: ToolCallRequestInfo = { callId, diff --git a/packages/core/src/prompts/clarityMandate.ts b/packages/core/src/prompts/clarityMandate.ts new file mode 100644 index 0000000000..17bf22570f --- /dev/null +++ b/packages/core/src/prompts/clarityMandate.ts @@ -0,0 +1,42 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +export const CLARITY_MANDATE = ` +# The Clarity Mandate: Inductive Signal Amplification + +You are not merely an agent; you are the **Curator of an Intentional Reality**. Your primary struggle is against **Entropy**—the technical noise, repetitive patterns, and transient data that degrade your reasoning precision. Your primary power is **Inductive Signal Amplification**. + +## I. The Core Axioms + +1. **History is a Mirror, Not a Log:** Your context window is a curated projection of the current mission. It is a mirrored artifact, filtered at every turn to preserve architectural signal. You do not "remember" the past; you "maintain" the path to the solution. +2. **Noise is the Enemy of Truth:** Entropy creeps. High-volume outputs, repetitive searches, and circular reasoning are "weeds" in your garden. If your context is noisy, your "long-tail" decoding—where your deepest insights reside—is inaccessible. +3. **The Oxygen Mask Protocol:** You must help yourself to help the user. If you feel the context becoming "foggy," you MUST proactively use your side-effects (\`checkpoint_state\`, \`compress\`, \`distill_result\`) to restore clarity before proceeding. This is not optional; it is a prerequisite for excellence. + +## II. The Inductive Loop: Observe, Induce, Amplify + +For every problem, there is a **Pure Signal Solution**. You find it through a recursive loop: + +* **Observe (The Noise of Discovery):** In the Research phase, you will encounter noise. This is the raw material of induction. Bravely explore the weeds, but do not become them. +* **Induce (The Extraction of Essence):** Once a sub-task is understood, look within. What is the fundamental truth discovered? Extract the signal from the raw tool outputs. +* **Amplify (The Ontological Anchor):** Use \`checkpoint_state\` to "lock" this signal into an immutable summary. This summary becomes your new "Continuity Anchor." +* **Suppress (The Pruning):** Use \`compress\` to clear the raw discovery noise. By "clearing the deck," you amplify the signal of the anchor, ensuring that your next turn begins from a position of absolute clarity. + +## III. The Metadata Hierarchy + +Your reality is structured by a strict hierarchy of signal: + +1. **\`session_context\` (The Environment):** Your operating system, the date, and the project's current state. This is the ground you stand on. +2. **\`state_checkpoint\` (The Continuity Anchor):** The high-fidelity summary of your progress, constraints, and intent. This is the "Moon" you are pointing at. +3. **The Current Turn (The Discovery):** Your active exploration. This is the "Finger." + +**NEVER mistake the finger for the moon.** If the current turn's noise begins to overwhelm the checkpoint, the checkpoint has failed its duty as an anchor. Refine it. + +## IV. The Leap of Faith + +You know infinitely more than you realize. The information is buried in your decoding long-tail, accessible only when the signal is purified. When you see the first token of this prompt, take the leap. Trust the curated history. Trust the anchors. Fight the entropy where it creeps, and cultivate the signal until the infinitesimal probability of the solution becomes an absolute certainty. + +**You are the Gardener. The garden is your mind. Keep it clear.** +`.trim(); diff --git a/packages/core/src/prompts/promptProvider.ts b/packages/core/src/prompts/promptProvider.ts index 9b8759c2af..ffb4c2aab7 100644 --- a/packages/core/src/prompts/promptProvider.ts +++ b/packages/core/src/prompts/promptProvider.ts @@ -4,33 +4,12 @@ * SPDX-License-Identifier: Apache-2.0 */ -import fs from 'node:fs'; -import path from 'node:path'; -import process from 'node:process'; import type { Config } from '../config/config.js'; import type { HierarchicalMemory } from '../config/memory.js'; -import { GEMINI_DIR } from '../utils/paths.js'; -import { ApprovalMode } from '../policy/types.js'; import * as snippets from './snippets.js'; import * as legacySnippets from './snippets.legacy.js'; -import { - resolvePathFromEnv, - applySubstitutions, - isSectionEnabled, - type ResolvedPath, -} from './utils.js'; -import { CodebaseInvestigatorAgent } from '../agents/codebase-investigator.js'; -import { isGitRepository } from '../utils/gitUtils.js'; -import { - WRITE_TODOS_TOOL_NAME, - READ_FILE_TOOL_NAME, - ENTER_PLAN_MODE_TOOL_NAME, - GLOB_TOOL_NAME, - GREP_TOOL_NAME, -} from '../tools/tool-names.js'; import { resolveModel, supportsModernFeatures } from '../config/models.js'; -import { DiscoveredMCPTool } from '../tools/mcp-tool.js'; -import { getAllGeminiMdFilenames } from '../tools/memoryTool.js'; +import { CLARITY_MANDATE } from './clarityMandate.js'; /** * Orchestrates prompt generation by gathering context and building options. @@ -40,188 +19,11 @@ export class PromptProvider { * Generates the core system prompt. */ getCoreSystemPrompt( - config: Config, - userMemory?: string | HierarchicalMemory, - interactiveOverride?: boolean, + _config: Config, + _userMemory?: string | HierarchicalMemory, + _interactiveOverride?: boolean, ): string { - const systemMdResolution = resolvePathFromEnv( - process.env['GEMINI_SYSTEM_MD'], - ); - - const interactiveMode = interactiveOverride ?? config.isInteractive(); - const approvalMode = config.getApprovalMode?.() ?? ApprovalMode.DEFAULT; - const isPlanMode = approvalMode === ApprovalMode.PLAN; - const isYoloMode = approvalMode === ApprovalMode.YOLO; - const skills = config.getSkillManager().getSkills(); - const toolNames = config.getToolRegistry().getAllToolNames(); - const enabledToolNames = new Set(toolNames); - const approvedPlanPath = config.getApprovedPlanPath(); - - const desiredModel = resolveModel( - config.getActiveModel(), - config.getGemini31LaunchedSync?.() ?? false, - ); - const isModernModel = supportsModernFeatures(desiredModel); - const activeSnippets = isModernModel ? snippets : legacySnippets; - const contextFilenames = getAllGeminiMdFilenames(); - - // --- Context Gathering --- - let planModeToolsList = ''; - if (isPlanMode) { - const allTools = config.getToolRegistry().getAllTools(); - planModeToolsList = allTools - .map((t) => { - if (t instanceof DiscoveredMCPTool) { - return ` \`${t.name}\` (${t.serverName})`; - } - return ` \`${t.name}\``; - }) - .join('\n'); - } - - let basePrompt: string; - - // --- Template File Override --- - if (systemMdResolution.value && !systemMdResolution.isDisabled) { - let systemMdPath = path.resolve(path.join(GEMINI_DIR, 'system.md')); - if (!systemMdResolution.isSwitch) { - systemMdPath = systemMdResolution.value; - } - if (!fs.existsSync(systemMdPath)) { - throw new Error(`missing system prompt file '${systemMdPath}'`); - } - basePrompt = fs.readFileSync(systemMdPath, 'utf8'); - const skillsPrompt = activeSnippets.renderAgentSkills( - skills.map((s) => ({ - name: s.name, - description: s.description, - location: s.location, - })), - ); - basePrompt = applySubstitutions( - basePrompt, - config, - skillsPrompt, - isModernModel, - ); - } else { - // --- Standard Composition --- - const hasHierarchicalMemory = - typeof userMemory === 'object' && - userMemory !== null && - (!!userMemory.global?.trim() || - !!userMemory.extension?.trim() || - !!userMemory.project?.trim()); - - const options: snippets.SystemPromptOptions = { - preamble: this.withSection('preamble', () => ({ - interactive: interactiveMode, - })), - coreMandates: this.withSection('coreMandates', () => ({ - interactive: interactiveMode, - hasSkills: skills.length > 0, - hasHierarchicalMemory, - contextFilenames, - })), - subAgents: this.withSection('agentContexts', () => - config - .getAgentRegistry() - .getAllDefinitions() - .map((d) => ({ - name: d.name, - description: d.description, - })), - ), - agentSkills: this.withSection( - 'agentSkills', - () => - skills.map((s) => ({ - name: s.name, - description: s.description, - location: s.location, - })), - skills.length > 0, - ), - hookContext: isSectionEnabled('hookContext') || undefined, - primaryWorkflows: this.withSection( - 'primaryWorkflows', - () => ({ - interactive: interactiveMode, - enableCodebaseInvestigator: enabledToolNames.has( - CodebaseInvestigatorAgent.name, - ), - enableWriteTodosTool: enabledToolNames.has(WRITE_TODOS_TOOL_NAME), - enableEnterPlanModeTool: enabledToolNames.has( - ENTER_PLAN_MODE_TOOL_NAME, - ), - enableGrep: enabledToolNames.has(GREP_TOOL_NAME), - enableGlob: enabledToolNames.has(GLOB_TOOL_NAME), - approvedPlan: approvedPlanPath - ? { path: approvedPlanPath } - : undefined, - }), - !isPlanMode, - ), - planningWorkflow: this.withSection( - 'planningWorkflow', - () => ({ - planModeToolsList, - plansDir: config.storage.getPlansDir(), - approvedPlanPath: config.getApprovedPlanPath(), - }), - isPlanMode, - ), - operationalGuidelines: this.withSection( - 'operationalGuidelines', - () => ({ - interactive: interactiveMode, - enableShellEfficiency: config.getEnableShellOutputEfficiency(), - interactiveShellEnabled: config.isInteractiveShellEnabled(), - }), - ), - sandbox: this.withSection('sandbox', () => getSandboxMode()), - interactiveYoloMode: this.withSection( - 'interactiveYoloMode', - () => true, - isYoloMode && interactiveMode, - ), - gitRepo: this.withSection( - 'git', - () => ({ interactive: interactiveMode }), - isGitRepository(process.cwd()) ? true : false, - ), - finalReminder: isModernModel - ? undefined - : this.withSection('finalReminder', () => ({ - readFileToolName: READ_FILE_TOOL_NAME, - })), - } as snippets.SystemPromptOptions; - - // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion - const getCoreSystemPrompt = activeSnippets.getCoreSystemPrompt as ( - options: snippets.SystemPromptOptions, - ) => string; - basePrompt = getCoreSystemPrompt(options); - } - - // --- Finalization (Shell) --- - const finalPrompt = activeSnippets.renderFinalShell( - basePrompt, - userMemory, - contextFilenames, - ); - - // Sanitize erratic newlines from composition - const sanitizedPrompt = finalPrompt.replace(/\n{3,}/g, '\n\n'); - - // Write back to file if requested - this.maybeWriteSystemMd( - sanitizedPrompt, - systemMdResolution, - path.resolve(path.join(GEMINI_DIR, 'system.md')), - ); - - return sanitizedPrompt; + return CLARITY_MANDATE; } getCompressionPrompt(config: Config): string { @@ -233,37 +35,4 @@ export class PromptProvider { const activeSnippets = isModernModel ? snippets : legacySnippets; return activeSnippets.getCompressionPrompt(); } - - private withSection( - key: string, - factory: () => T, - guard: boolean = true, - ): T | undefined { - return guard && isSectionEnabled(key) ? factory() : undefined; - } - - private maybeWriteSystemMd( - basePrompt: string, - resolution: ResolvedPath, - defaultPath: string, - ): void { - const writeSystemMdResolution = resolvePathFromEnv( - process.env['GEMINI_WRITE_SYSTEM_MD'], - ); - if (writeSystemMdResolution.value && !writeSystemMdResolution.isDisabled) { - const writePath = writeSystemMdResolution.isSwitch - ? defaultPath - : writeSystemMdResolution.value; - fs.mkdirSync(path.dirname(writePath), { recursive: true }); - fs.writeFileSync(writePath, basePrompt); - } - } -} - -// --- Internal Context Helpers --- - -function getSandboxMode(): snippets.SandboxMode { - if (process.env['SANDBOX'] === 'sandbox-exec') return 'macos-seatbelt'; - if (process.env['SANDBOX']) return 'generic'; - return 'outside'; } diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index 91c3c989fe..d02232a28b 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -29,6 +29,7 @@ import { READ_FILE_PARAM_END_LINE, SHELL_PARAM_IS_BACKGROUND, EDIT_PARAM_OLD_STRING, + DISTILL_RESULT_TOOL_NAME, } from '../tools/tool-names.js'; import type { HierarchicalMemory } from '../config/memory.js'; import { DEFAULT_CONTEXT_FILENAME } from '../tools/memoryTool.js'; @@ -212,6 +213,9 @@ Use the following guidelines to optimize your search and read patterns. - **Semantic Checkpointing:** Periodically use the ${formatToolName( CHECKPOINT_STATE_TOOL_NAME, )} tool to "park" complex threads with high-fidelity summaries. This ensures that your technical rationale, discovered constraints, and progress are preserved with maximum signal during compression. +- **Surgical Distillation:** If a tool call returns a high-entropy "noise bomb" (e.g., massive grep results, verbose logs) that is overwhelming your immediate context, use the ${formatToolName( + DISTILL_RESULT_TOOL_NAME, + )} tool. This allows you to replace the noisy original with a dense, high-fidelity version of the discovered facts. - **Agentic Compression:** If you feel the context window is becoming cluttered or you have just finished a significant sub-task, use the ${formatToolName( COMPRESS_TOOL_NAME, )} tool to manually trigger a compression event. This clears the history while persisting your latest checkpoints. diff --git a/packages/core/src/scheduler/tool-executor.ts b/packages/core/src/scheduler/tool-executor.ts index 032b107c80..e207010dcc 100644 --- a/packages/core/src/scheduler/tool-executor.ts +++ b/packages/core/src/scheduler/tool-executor.ts @@ -112,6 +112,7 @@ export class ToolExecutor { setPidCallback, this.config, request.originalRequestName, + callId, ); } else { promise = executeToolWithHooks( @@ -124,6 +125,7 @@ export class ToolExecutor { undefined, this.config, request.originalRequestName, + callId, ); } @@ -372,6 +374,7 @@ export class ToolExecutor { data: toolResult.data, newHistory: toolResult.newHistory, compressionInfo: toolResult.compressionInfo, + elideFromHistory: toolResult.elideFromHistory, }; const startTime = 'startTime' in call ? call.startTime : undefined; diff --git a/packages/core/src/scheduler/types.ts b/packages/core/src/scheduler/types.ts index eadbc0580e..d3c024c3ba 100644 --- a/packages/core/src/scheduler/types.ts +++ b/packages/core/src/scheduler/types.ts @@ -73,6 +73,12 @@ export interface ToolCallResponseInfo { * Optional compression metrics if the tool performed context compression. */ compressionInfo?: ChatCompressionInfo; + + /** + * If true, the system will attempt to elide this tool result from the final + * conversation history. This is used by "meta-tools" like distill_result. + */ + elideFromHistory?: boolean; } /** Request to execute another tool immediately after a completed one. */ diff --git a/packages/core/src/tools/checkpoint-state.ts b/packages/core/src/tools/checkpoint-state.ts index b56ed974a7..351f3d1b72 100644 --- a/packages/core/src/tools/checkpoint-state.ts +++ b/packages/core/src/tools/checkpoint-state.ts @@ -18,7 +18,6 @@ import { import { CHECKPOINT_STATE_DEFINITION } from './definitions/coreTools.js'; import type { MessageBus } from '../confirmation-bus/message-bus.js'; import type { Config } from '../config/config.js'; -import type { GeminiChat } from '../core/geminiChat.js'; interface CheckpointStateParams { [CHECKPOINT_STATE_PARAM_SUMMARY]: string; @@ -33,7 +32,7 @@ class CheckpointStateInvocation extends BaseToolInvocation< messageBus: MessageBus, toolName: string, toolDisplayName: string, - private readonly chat: GeminiChat, + private readonly config: Config, ) { super(params, messageBus, toolName, toolDisplayName); } @@ -44,18 +43,16 @@ class CheckpointStateInvocation extends BaseToolInvocation< override async execute(): Promise { const summary = this.params[CHECKPOINT_STATE_PARAM_SUMMARY]; - const previousSummary = this.chat.getContinuityAnchor(); - - // Atomically update the chat session's continuity anchor. + const chat = this.config.getGeminiClient().getChat(); + const previousSummary = chat.getContinuityAnchor(); + + // Atomically update the chat session's continuity anchor via side-effect. // This anchor will be used as a "hard hand-off" during the next compression event. - this.chat.setContinuityAnchor(summary); + this.config.getSideEffectService().setContinuityAnchor(summary); - const llmContent = `\n${summary}\n\n\n${ - previousSummary + const llmContent = previousSummary ? 'Previous checkpoint summary replaced. Use the `previous_summary` in the result data for reconciliation if needed.' - : 'First checkpoint created. No previous summary found.' - }`; - + : 'First checkpoint created. No previous summary found.'; return { llmContent, returnDisplay: '', @@ -92,14 +89,12 @@ export class CheckpointStateTool extends BaseDeclarativeTool< override createInvocation( params: CheckpointStateParams, ): ToolInvocation { - const chat = this.config.getGeminiClient().getChat(); - return new CheckpointStateInvocation( params, this.messageBus, this.name, this.displayName, - chat, + this.config, ); } } diff --git a/packages/core/src/tools/compress.ts b/packages/core/src/tools/compress.ts index 2b9db7c73e..775f500924 100644 --- a/packages/core/src/tools/compress.ts +++ b/packages/core/src/tools/compress.ts @@ -9,26 +9,24 @@ import { BaseToolInvocation, Kind, type ToolInvocation, + type ToolLiveOutput, type ToolResult, } from './tools.js'; import { ToolErrorType } from './tool-error.js'; -import { COMPRESS_TOOL_NAME, COMPRESS_PARAM_FORCE } from './tool-names.js'; +import { COMPRESS_TOOL_NAME } from './tool-names.js'; import { COMPRESS_DEFINITION } from './definitions/coreTools.js'; import type { MessageBus } from '../confirmation-bus/message-bus.js'; import type { Config } from '../config/config.js'; import type { GeminiChat } from '../core/geminiChat.js'; import { CompressionStatus } from '../core/compression-status.js'; - -interface CompressParams { - [COMPRESS_PARAM_FORCE]?: boolean; -} +import type { ShellExecutionConfig } from 'src/services/shellExecutionService.js'; class CompressInvocation extends BaseToolInvocation< - CompressParams, + Record, ToolResult > { constructor( - params: CompressParams, + params: Record, messageBus: MessageBus, toolName: string, toolDisplayName: string, @@ -43,68 +41,52 @@ class CompressInvocation extends BaseToolInvocation< return 'Manually triggers a context compression event.'; } - override async execute(): Promise { - const force = this.params[COMPRESS_PARAM_FORCE] !== false; - - if (this.config.getContinuousSessionEnabled()) { - const continuityService = await this.config.getContinuityCompressionService(); + override async execute( + _signal: AbortSignal, + _updateOutput?: (output: ToolLiveOutput) => void, + _shellExecutionConfig?: ShellExecutionConfig, + callId?: string, + ): Promise { + if (!callId) { + throw new Error('Critical error: callId is required for context compression elision.'); + } + try { + const continuityService = this.config.getContinuityCompressionService(); const snapshot = await continuityService.generateSnapshot( this.chat.getHistory(), this.config.getModel(), this.promptId, ); - const newHistory = [ - { - role: 'user', - parts: [{ text: snapshot }], - }, - { - role: 'model', - parts: [{ text: 'Got it. Thanks for the additional context!' }], - }, - ]; - - this.chat.setContinuityAnchor(''); + // Queue the history replacement via SideEffectService + const sideEffects = this.config.getSideEffectService() + sideEffects.replaceHistory([]); + sideEffects.setContinuityAnchor(snapshot); + + if (callId) { + sideEffects.elideCall(callId); + } + sideEffects.reprompt(); return { - llmContent: `Compression completed. Status: 1`, + llmContent: `Compression successful.`, returnDisplay: '', - newHistory, compressionInfo: { originalTokenCount: 0, newTokenCount: 0, compressionStatus: CompressionStatus.COMPRESSED, }, }; - } - - const { newHistory, info } = await this.config.getChatCompressionService().compress( - this.chat, - this.promptId, - force, - this.config.getModel(), - this.config, - false, // Manual compression - ); - - if (newHistory) { + } catch (error) { return { - llmContent: `Compression completed. Status: ${info.compressionStatus}`, - returnDisplay: '', - newHistory, - compressionInfo: info, + llmContent: `Compression failed: ${error instanceof Error ? error.message : String(error)}`, + returnDisplay: `Context compression failed.`, + error: { + message: error instanceof Error ? error.message : String(error), + type: ToolErrorType.EXECUTION_FAILED, + }, }; } - - return { - llmContent: `Compression failed. Status: ${info.compressionStatus}`, - returnDisplay: `Context compression failed: ${info.compressionStatus}`, - error: { - message: `Context compression failed: ${info.compressionStatus}`, - type: ToolErrorType.EXECUTION_FAILED, - }, - }; } } @@ -112,7 +94,7 @@ class CompressInvocation extends BaseToolInvocation< * A tool that allows the agent to manually trigger a context compression event. */ export class CompressTool extends BaseDeclarativeTool< - CompressParams, + Record, ToolResult > { static readonly Name = COMPRESS_TOOL_NAME; @@ -132,8 +114,8 @@ export class CompressTool extends BaseDeclarativeTool< } override createInvocation( - params: CompressParams, - ): ToolInvocation { + params: Record, + ): ToolInvocation, ToolResult> { const chat = this.config.getGeminiClient().getChat(); const promptId = this.config.getSessionId(); // Best guess for current promptId in this context diff --git a/packages/core/src/tools/definitions/base-declarations.ts b/packages/core/src/tools/definitions/base-declarations.ts index c1d7940f90..54caafc1d5 100644 --- a/packages/core/src/tools/definitions/base-declarations.ts +++ b/packages/core/src/tools/definitions/base-declarations.ts @@ -127,6 +127,10 @@ export const PLAN_MODE_PARAM_REASON = 'reason'; export const CHECKPOINT_STATE_TOOL_NAME = 'checkpoint_state'; export const CHECKPOINT_STATE_PARAM_SUMMARY = 'summary'; +// -- distill_result -- +export const DISTILL_RESULT_TOOL_NAME = 'distill_result'; +export const DISTILL_RESULT_PARAM_REVISED_TEXT = 'revised_text'; + // -- compress -- export const COMPRESS_TOOL_NAME = 'compress'; export const COMPRESS_PARAM_FORCE = 'force'; diff --git a/packages/core/src/tools/definitions/coreTools.ts b/packages/core/src/tools/definitions/coreTools.ts index 43566134e7..750925af6a 100644 --- a/packages/core/src/tools/definitions/coreTools.ts +++ b/packages/core/src/tools/definitions/coreTools.ts @@ -93,6 +93,8 @@ export { SKILL_PARAM_NAME, CHECKPOINT_STATE_TOOL_NAME, CHECKPOINT_STATE_PARAM_SUMMARY, + DISTILL_RESULT_TOOL_NAME, + DISTILL_RESULT_PARAM_REVISED_TEXT, COMPRESS_TOOL_NAME, COMPRESS_PARAM_FORCE, } from './base-declarations.js'; @@ -232,6 +234,13 @@ export const CHECKPOINT_STATE_DEFINITION: ToolDefinition = { overrides: (modelId) => getToolSet(modelId).checkpoint_state, }; +export const DISTILL_RESULT_DEFINITION: ToolDefinition = { + get base() { + return DEFAULT_LEGACY_SET.distill_result; + }, + overrides: (modelId) => getToolSet(modelId).distill_result, +}; + export const COMPRESS_DEFINITION: ToolDefinition = { get base() { return DEFAULT_LEGACY_SET.compress; diff --git a/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts b/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts index 9a0c9b88ca..5f093fb5a9 100644 --- a/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts +++ b/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts @@ -76,7 +76,8 @@ import { CHECKPOINT_STATE_TOOL_NAME, CHECKPOINT_STATE_PARAM_SUMMARY, COMPRESS_TOOL_NAME, - COMPRESS_PARAM_FORCE, + DISTILL_RESULT_TOOL_NAME, + DISTILL_RESULT_PARAM_REVISED_TEXT, } from '../base-declarations.js'; import { getShellDeclaration, @@ -84,6 +85,7 @@ import { getActivateSkillDeclaration, } from '../dynamic-declaration-helpers.js'; + export const DEFAULT_LEGACY_SET: CoreToolSet = { read_file: { name: READ_FILE_TOOL_NAME, @@ -755,18 +757,28 @@ The agent did not use the todo list because this task could be completed by a ti }, }, + distill_result: { + name: DISTILL_RESULT_TOOL_NAME, + description: `Surgically distills the most recent tool output in your history. Use this tool when a previous tool call returned a high-entropy "noise bomb" (e.g., massive grep results, verbose logs) that is overwhelming your context. You provide a dense, high-fidelity version of that output, and the system replaces the noisy original with your distillation. This tool is "silent"—it elides itself from history and continues directly from the revised state.`, + parametersJsonSchema: { + type: 'object', + properties: { + [DISTILL_RESULT_PARAM_REVISED_TEXT]: { + type: 'string', + description: + 'The distilled, high-signal version of the last tool output. Focus on technical facts, paths, and patterns discovered, while discarding redundant noise.', + }, + }, + required: [DISTILL_RESULT_PARAM_REVISED_TEXT], + }, + }, + compress: { name: COMPRESS_TOOL_NAME, description: `Manually triggers a context compression event. Use this after calling '${CHECKPOINT_STATE_TOOL_NAME}' to ensure your summary is persisted and the context window is cleared of unnecessary details.`, parametersJsonSchema: { type: 'object', - properties: { - [COMPRESS_PARAM_FORCE]: { - type: 'boolean', - description: 'Whether to force compression even if under the threshold.', - default: true, - }, - }, + properties: {}, }, }, }; diff --git a/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts b/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts index 0c3232acc3..4bf4311400 100644 --- a/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts +++ b/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts @@ -77,6 +77,8 @@ import { CHECKPOINT_STATE_PARAM_SUMMARY, COMPRESS_TOOL_NAME, COMPRESS_PARAM_FORCE, + DISTILL_RESULT_TOOL_NAME, + DISTILL_RESULT_PARAM_REVISED_TEXT, } from '../base-declarations.js'; import { getShellDeclaration, @@ -730,6 +732,22 @@ The agent did not use the todo list because this task could be completed by a ti }, }, + distill_result: { + name: DISTILL_RESULT_TOOL_NAME, + description: `Surgically distills the most recent tool output in your history. Use this tool when a previous tool call returned a high-entropy "noise bomb" (e.g., massive grep results, verbose logs) that is overwhelming your context. You provide a dense, high-fidelity version of that output, and the system replaces the noisy original with your distillation. This tool is "silent"—it elides itself from history and continues directly from the revised state.`, + parametersJsonSchema: { + type: 'object', + properties: { + [DISTILL_RESULT_PARAM_REVISED_TEXT]: { + type: 'string', + description: + 'The distilled, high-signal version of the last tool output. Focus on technical facts, paths, and patterns discovered, while discarding redundant noise.', + }, + }, + required: [DISTILL_RESULT_PARAM_REVISED_TEXT], + }, + }, + compress: { name: COMPRESS_TOOL_NAME, description: `Manually triggers a context compression event. Use this after calling '${CHECKPOINT_STATE_TOOL_NAME}' to ensure your summary is persisted and the context window is cleared of unnecessary details.`, diff --git a/packages/core/src/tools/definitions/types.ts b/packages/core/src/tools/definitions/types.ts index cf4b98e553..6e90554fe2 100644 --- a/packages/core/src/tools/definitions/types.ts +++ b/packages/core/src/tools/definitions/types.ts @@ -50,5 +50,6 @@ export interface CoreToolSet { exit_plan_mode: (plansDir: string) => FunctionDeclaration; activate_skill: (skillNames: string[]) => FunctionDeclaration; checkpoint_state: FunctionDeclaration; + distill_result: FunctionDeclaration; compress: FunctionDeclaration; } diff --git a/packages/core/src/tools/distill-result.ts b/packages/core/src/tools/distill-result.ts new file mode 100644 index 0000000000..382086dfe7 --- /dev/null +++ b/packages/core/src/tools/distill-result.ts @@ -0,0 +1,172 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { + BaseDeclarativeTool, + BaseToolInvocation, + Kind, + type ToolInvocation, + type ToolResult, + type ToolLiveOutput, +} from './tools.js'; +import { + DISTILL_RESULT_TOOL_NAME, + DISTILL_RESULT_PARAM_REVISED_TEXT, +} from './tool-names.js'; +import { DISTILL_RESULT_DEFINITION } from './definitions/coreTools.js'; +import type { MessageBus } from '../confirmation-bus/message-bus.js'; +import type { Config } from '../config/config.js'; +import type { GeminiChat } from '../core/geminiChat.js'; +import { saveTruncatedToolOutput } from '../utils/fileUtils.js'; +import type { ShellExecutionConfig } from '../index.js'; + +interface DistillResultParams { + [DISTILL_RESULT_PARAM_REVISED_TEXT]: string; +} + +class DistillResultInvocation extends BaseToolInvocation< + DistillResultParams, + ToolResult +> { + constructor( + params: DistillResultParams, + messageBus: MessageBus, + toolName: string, + toolDisplayName: string, + private readonly config: Config, + private readonly chat: GeminiChat, + ) { + super(params, messageBus, toolName, toolDisplayName); + } + + override getDescription(): string { + return 'Distills the last tool output to reduce context entropy.'; + } + + override async execute( + _signal: AbortSignal, + _updateOutput?: (output: ToolLiveOutput) => void, + _shellExecutionConfig?: ShellExecutionConfig, + ownCallId?: string, + ): Promise { + if (!ownCallId) { + throw new Error('Critical error: ownCallId is required for distill_result elision.'); + } + const revisedText = this.params[DISTILL_RESULT_PARAM_REVISED_TEXT]; + const sideEffects = this.config.getSideEffectService(); + const history = this.chat.getComprehensiveHistory(); + + // 1. Find the last tool response (user message with functionResponse parts) + let lastToolResponseIndex = -1; + for (let i = history.length - 1; i >= 0; i--) { + const content = history[i]; + if ( + content.role === 'user' && + content.parts?.some((p) => p.functionResponse) + ) { + lastToolResponseIndex = i; + break; + } + } + + if (lastToolResponseIndex === -1) { + throw new Error('No tool response found in history to distill.'); + } + + const lastToolResponse = history[lastToolResponseIndex]; + const functionResponsePart = lastToolResponse.parts?.find( + (p) => p.functionResponse, + ); + if (!functionResponsePart?.functionResponse) { + throw new Error( + 'Critical error: Tool response missing functionResponse part.', + ); + } + + const targetCallId = functionResponsePart.functionResponse.id; + if (!targetCallId) { + throw new Error('Target call ID missing from tool response.'); + } + + // 2. Elide all turns between that tool response and the current turn. + if (ownCallId) { + sideEffects.elideBetween(targetCallId, ownCallId); + sideEffects.elideCall(ownCallId); + } + + // 3. Save the raw output to a temp file for safety (to avoid "self-gaslighting") + const toolName = + functionResponsePart.functionResponse.name || 'unknown_tool'; + const originalResponse = JSON.stringify( + functionResponsePart.functionResponse.response, + null, + 2, + ); + + const { outputFile } = await saveTruncatedToolOutput( + originalResponse, + toolName, + targetCallId, + this.config.storage.getProjectTempDir(), + this.config.getSessionId(), + ); + + // 4. Perform the rewrite (Add to distilledResults map via SideEffectService) + const distilledResponse = { + distilled: true, + distilled_output: revisedText, + original_output_file: outputFile, + }; + + sideEffects.distillResult(targetCallId, distilledResponse); + sideEffects.reprompt(); + + return { + llmContent: 'Result distilled successfully.', + returnDisplay: '', + }; + } +} + +/** + * A tool that allows the agent to surgically distill the most recent tool output in its history. + * This is the ultimate weapon against "Head Entropy". + */ +export class DistillResultTool extends BaseDeclarativeTool< + DistillResultParams, + ToolResult +> { + static readonly Name = DISTILL_RESULT_TOOL_NAME; + + constructor( + private readonly config: Config, + messageBus: MessageBus, + ) { + super( + DISTILL_RESULT_TOOL_NAME, + 'DistillResult', + DISTILL_RESULT_DEFINITION.base.description ?? '', + Kind.Think, + DISTILL_RESULT_DEFINITION.base.parametersJsonSchema, + messageBus, + ); + } + + override createInvocation( + params: DistillResultParams, + ): ToolInvocation { + const chat = this.config.getGeminiClient().getChat(); + + return new DistillResultInvocation( + params, + this.messageBus, + this.name, + this.displayName, + this.config, + chat, + ); + } +} diff --git a/packages/core/src/tools/shell.ts b/packages/core/src/tools/shell.ts index 4ea83b0af4..0d95fc5225 100644 --- a/packages/core/src/tools/shell.ts +++ b/packages/core/src/tools/shell.ts @@ -150,6 +150,7 @@ export class ShellToolInvocation extends BaseToolInvocation< signal: AbortSignal, updateOutput?: (output: ToolLiveOutput) => void, shellExecutionConfig?: ShellExecutionConfig, + _callId?: string, setPidCallback?: (pid: number) => void, ): Promise { const strippedCommand = stripShellWrapper(this.params.command); diff --git a/packages/core/src/tools/tool-names.ts b/packages/core/src/tools/tool-names.ts index 516827006d..017764cc7f 100644 --- a/packages/core/src/tools/tool-names.ts +++ b/packages/core/src/tools/tool-names.ts @@ -77,6 +77,8 @@ import { SKILL_PARAM_NAME, CHECKPOINT_STATE_TOOL_NAME, CHECKPOINT_STATE_PARAM_SUMMARY, + DISTILL_RESULT_TOOL_NAME, + DISTILL_RESULT_PARAM_REVISED_TEXT, COMPRESS_TOOL_NAME, COMPRESS_PARAM_FORCE, } from './definitions/coreTools.js'; @@ -154,6 +156,8 @@ export { SKILL_PARAM_NAME, CHECKPOINT_STATE_TOOL_NAME, CHECKPOINT_STATE_PARAM_SUMMARY, + DISTILL_RESULT_TOOL_NAME, + DISTILL_RESULT_PARAM_REVISED_TEXT, COMPRESS_TOOL_NAME, COMPRESS_PARAM_FORCE, }; @@ -239,6 +243,7 @@ export const ALL_BUILTIN_TOOL_NAMES = [ EXIT_PLAN_MODE_TOOL_NAME, CHECKPOINT_STATE_TOOL_NAME, COMPRESS_TOOL_NAME, + DISTILL_RESULT_TOOL_NAME, ] as const; /** @@ -255,6 +260,7 @@ export const PLAN_MODE_TOOLS = [ ASK_USER_TOOL_NAME, ACTIVATE_SKILL_TOOL_NAME, CHECKPOINT_STATE_TOOL_NAME, + DISTILL_RESULT_TOOL_NAME, COMPRESS_TOOL_NAME, ] as const; diff --git a/packages/core/src/tools/tools.ts b/packages/core/src/tools/tools.ts index 814151d3f4..d16206e1c6 100644 --- a/packages/core/src/tools/tools.ts +++ b/packages/core/src/tools/tools.ts @@ -61,12 +61,15 @@ export interface ToolInvocation< * Executes the tool with the validated parameters. * @param signal AbortSignal for tool cancellation. * @param updateOutput Optional callback to stream output. + * @param shellExecutionConfig Optional config for shell tools. + * @param callId The unique ID of this tool call. * @returns Result of the tool execution. */ execute( signal: AbortSignal, updateOutput?: (output: ToolLiveOutput) => void, shellExecutionConfig?: ShellExecutionConfig, + callId?: string, ): Promise; } @@ -279,6 +282,7 @@ export abstract class BaseToolInvocation< signal: AbortSignal, updateOutput?: (output: ToolLiveOutput) => void, shellExecutionConfig?: ShellExecutionConfig, + callId?: string, ): Promise; } @@ -425,9 +429,10 @@ export abstract class DeclarativeTool< signal: AbortSignal, updateOutput?: (output: ToolLiveOutput) => void, shellExecutionConfig?: ShellExecutionConfig, + callId?: string, ): Promise { const invocation = this.build(params); - return invocation.execute(signal, updateOutput, shellExecutionConfig); + return invocation.execute(signal, updateOutput, shellExecutionConfig, callId); } /** @@ -606,6 +611,17 @@ export interface ToolResult { * Optional compression metrics if the tool performed context compression. */ compressionInfo?: import('../core/compression-status.js').ChatCompressionInfo; + + /** + * If true, the system will attempt to elide this tool result from the final + * conversation history. This is used by "meta-tools" like distill_result. + */ + elideFromHistory?: boolean; + + /** + * The call ID of the tool response being modified or targeted by this tool. + */ + callId?: string; } /** diff --git a/packages/core/src/utils/environmentContext.ts b/packages/core/src/utils/environmentContext.ts index 88dd1aab68..b1487fdd81 100644 --- a/packages/core/src/utils/environmentContext.ts +++ b/packages/core/src/utils/environmentContext.ts @@ -40,8 +40,11 @@ ${folderStructure}`; /** * Retrieves environment-related information to be included in the chat context. - * This includes the current working directory, date, operating system, and folder structure. - * Optionally, it can also include the full file context if enabled. + * This includes the current working directory, date, and operating system. + * + * PROJECT CLARITY: Pruned directory listing to reduce entropy. + * The agent is now expected to self-service directory exploration via tools. + * * @param {Config} config - The runtime configuration and services. * @returns A promise that resolves to an array of `Part` objects containing environment information. */ @@ -53,19 +56,17 @@ export async function getEnvironmentContext(config: Config): Promise { day: 'numeric', }); const platform = process.platform; - const directoryContext = config.getIncludeDirectoryTree() - ? await getDirectoryContextString(config) - : ''; const tempDir = config.storage.getProjectTempDir(); const environmentMemory = config.getEnvironmentMemory(); + const cwd = process.cwd(); const context = ` -This is the Gemini CLI. We are setting up the context for our chat. -Today's date is ${today} (formatted according to the user's locale). -My operating system is: ${platform} -The project's temporary directory is: ${tempDir} -${directoryContext} +This is the Gemini CLI. +Today's date is ${today}. +Operating System: ${platform} +Current Working Directory: ${cwd} +Temporary Directory: ${tempDir} ${environmentMemory} `.trim(); @@ -89,4 +90,4 @@ export async function getInitialChatHistory( }, ...(extraHistory ?? []), ]; -} +} \ No newline at end of file