feat: implement adaptive thinking budget

2026-05-14 22:02:59 -07:00 · 2026-01-06 15:54:07 -08:00
parent 6f4b2ad0b9
commit 2404e4fae8
10 changed files with 450 additions and 1 deletions
@@ -0,0 +1,88 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+import { describe, it, expect, vi } from 'vitest';
+import {
+  AdaptiveBudgetService,
+  ComplexityLevel,
+} from './adaptiveBudgetService.js';
+import type { Config } from '../config/config.js';
+import { ThinkingLevel } from '@google/genai';
+
+describe('AdaptiveBudgetService', () => {
+  it('should map complexity levels to correct V2 budgets', () => {
+    const service = new AdaptiveBudgetService({} as Config);
+    expect(service.getThinkingBudgetV2(ComplexityLevel.SIMPLE)).toBe(1024);
+    expect(service.getThinkingBudgetV2(ComplexityLevel.MODERATE)).toBe(4096);
+    expect(service.getThinkingBudgetV2(ComplexityLevel.HIGH)).toBe(16384);
+    expect(service.getThinkingBudgetV2(ComplexityLevel.EXTREME)).toBe(32768);
+  });
+
+  it('should map complexity levels to correct V3 levels', () => {
+    const service = new AdaptiveBudgetService({} as Config);
+    expect(service.getThinkingLevelV3(ComplexityLevel.SIMPLE)).toBe(
+      ThinkingLevel.LOW,
+    );
+    expect(service.getThinkingLevelV3(ComplexityLevel.MODERATE)).toBe(
+      ThinkingLevel.LOW,
+    );
+    expect(service.getThinkingLevelV3(ComplexityLevel.HIGH)).toBe(
+      ThinkingLevel.HIGH,
+    );
+    expect(service.getThinkingLevelV3(ComplexityLevel.EXTREME)).toBe(
+      ThinkingLevel.HIGH,
+    );
+  });
+
+  it('should determine adaptive config based on LLM response', async () => {
+    const mockGenerateContent = vi.fn().mockResolvedValue({
+      candidates: [{ content: { parts: [{ text: '3' }] } }],
+    });
+
+    const mockConfig = {
+      getBaseLlmClient: () => ({
+        generateContent: mockGenerateContent,
+      }),
+      getAdaptiveThinkingConfig: () => ({
+        enabled: true,
+        classifierModel: 'gemini-2.0-flash',
+      }),
+    } as unknown as Config;
+
+    const service = new AdaptiveBudgetService(mockConfig);
+    const result = await service.determineAdaptiveConfig(
+      'Complex task',
+      'gemini-2.5-pro',
+    );
+
+    expect(result?.complexity).toBe(ComplexityLevel.HIGH);
+    expect(result?.thinkingBudget).toBe(16384);
+    expect(mockGenerateContent).toHaveBeenCalled();
+  });
+
+  it('should handle Gemini 3 models with thinkingLevel', async () => {
+    const mockConfig = {
+      getBaseLlmClient: () => ({
+        generateContent: vi.fn().mockResolvedValue({
+          candidates: [{ content: { parts: [{ text: '1' }] } }],
+        }),
+      }),
+      getAdaptiveThinkingConfig: () => ({
+        enabled: true,
+        classifierModel: 'gemini-2.0-flash',
+      }),
+    } as unknown as Config;
+
+    const service = new AdaptiveBudgetService(mockConfig);
+    const result = await service.determineAdaptiveConfig(
+      'Hi',
+      'gemini-3-pro-preview',
+    );
+
+    expect(result?.complexity).toBe(ComplexityLevel.SIMPLE);
+    expect(result?.thinkingLevel).toBe(ThinkingLevel.LOW);
+    expect(result?.thinkingBudget).toBeUndefined();
+  });
+});
@@ -0,0 +1,132 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+import type { Config } from '../config/config.js';
+import { debugLogger } from '../utils/debugLogger.js';
+import { isGemini2Model, isPreviewModel } from '../config/models.js';
+import { ThinkingLevel } from '@google/genai';
+
+export enum ComplexityLevel {
+  SIMPLE = 1,
+  MODERATE = 2,
+  HIGH = 3,
+  EXTREME = 4,
+}
+
+export const BUDGET_MAPPING_V2: Record<ComplexityLevel, number> = {
+  [ComplexityLevel.SIMPLE]: 1024,
+  [ComplexityLevel.MODERATE]: 4096,
+  [ComplexityLevel.HIGH]: 16384,
+  [ComplexityLevel.EXTREME]: 32768,
+};
+
+export const LEVEL_MAPPING_V3: Record<ComplexityLevel, ThinkingLevel> = {
+  [ComplexityLevel.SIMPLE]: ThinkingLevel.LOW,
+  [ComplexityLevel.MODERATE]: ThinkingLevel.LOW,
+  [ComplexityLevel.HIGH]: ThinkingLevel.HIGH,
+  [ComplexityLevel.EXTREME]: ThinkingLevel.HIGH,
+};
+
+export interface AdaptiveBudgetResult {
+  complexity: ComplexityLevel;
+  thinkingBudget?: number;
+  thinkingLevel?: ThinkingLevel;
+  strategyNote?: string;
+}
+
+export class AdaptiveBudgetService {
+  constructor(private config: Config) {}
+
+  /**
+   * Analyzes the user prompt and determines the optimal thinking configuration.
+   *
+   * Note on future scaling (per arXiv:2512.19585):
+   * At Complexity 4 (Extreme), we should consider:
+   * 1. Best-of-N: Generate multiple solutions.
+   * 2. LLM-as-a-Judge: Use a strong model to evaluate candidates.
+   * 3. Compiler Verification: Check code correctness via environment tools.
+   */
+  async determineAdaptiveConfig(
+    userPrompt: string,
+    model: string,
+  ): Promise<AdaptiveBudgetResult | undefined> {
+    const { classifierModel } = this.config.getAdaptiveThinkingConfig();
+
+    try {
+      const llm = this.config.getBaseLlmClient();
+      debugLogger.debug(
+        `AdaptiveBudgetService: Classifying prompt complexity using ${classifierModel}...`,
+      );
+      const systemPrompt = `You are a complexity classifier for a coding assistant. 
+Analyze the user's request and determine the complexity of the task.
+Output ONLY a single integer from 1 to 4 based on the following scale:
+
+1 (Simple): Quick fixes, syntax questions, simple explanations, greetings.
+2 (Moderate): Function-level logic, writing small scripts, standard debugging.
+3 (High): Module-level refactoring, complex feature implementation, multi-file changes.
+4 (Extreme): Architecture design, deep root-cause analysis of obscure bugs, large-scale migrations.
+
+Request: ${userPrompt}
+Complexity Level:`;
+
+      const response = await llm.generateContent({
+        modelConfigKey: { model: classifierModel },
+        contents: [{ role: 'user', parts: [{ text: systemPrompt }] }],
+        promptId: 'adaptive-budget-classifier',
+        abortSignal: new AbortController().signal,
+      });
+
+      const text = response.candidates?.[0]?.content?.parts?.[0]?.text?.trim();
+      if (!text) {
+        debugLogger.debug(
+          'AdaptiveBudgetService: No response from classifier.',
+        );
+        return undefined;
+      }
+
+      const level = parseInt(text, 10) as ComplexityLevel;
+      if (isNaN(level) || level < 1 || level > 4) {
+        debugLogger.debug(
+          `AdaptiveBudgetService: Invalid complexity level returned: ${text}`,
+        );
+        return undefined;
+      }
+
+      const result: AdaptiveBudgetResult = { complexity: level };
+
+      // Determine mapping based on model version
+      // Gemini 3 uses ThinkingLevel, Gemini 2.x uses thinkingBudget
+      if (isPreviewModel(model)) {
+        result.thinkingLevel = LEVEL_MAPPING_V3[level] ?? ThinkingLevel.HIGH;
+      } else if (isGemini2Model(model)) {
+        result.thinkingBudget = BUDGET_MAPPING_V2[level];
+      }
+
+      if (level === ComplexityLevel.EXTREME) {
+        result.strategyNote =
+          'EXTREME complexity detected. Future implementations should use Best-of-N + Verification.';
+      }
+
+      debugLogger.debug(
+        `AdaptiveBudgetService: Complexity ${level} -> Thinking Param: ${result.thinkingLevel || result.thinkingBudget}`,
+      );
+      return result;
+    } catch (error) {
+      debugLogger.error(
+        'AdaptiveBudgetService: Error classifying complexity',
+        error,
+      );
+      return undefined;
+    }
+  }
+
+  getThinkingBudgetV2(level: ComplexityLevel): number {
+    return BUDGET_MAPPING_V2[level];
+  }
+
+  getThinkingLevelV3(level: ComplexityLevel): ThinkingLevel {
+    return LEVEL_MAPPING_V3[level] ?? ThinkingLevel.HIGH;
+  }
+}
@@ -4,7 +4,7 @@
 * SPDX-License-Identifier: Apache-2.0
 */

-import type { GenerateContentConfig } from '@google/genai';
+import type { GenerateContentConfig, ThinkingLevel } from '@google/genai';

 // The primary key for the ModelConfig is the model string. However, we also
 // support a secondary key to limit the override scope, typically an agent name.
@@ -26,6 +26,10 @@ export interface ModelConfigKey {
  // This allows overrides to specify different settings (e.g., higher temperature)
  // specifically for retry scenarios.
  isRetry?: boolean;
+
+  // Dynamic thinking configuration determined at runtime (e.g. via complexity classification)
+  thinkingBudget?: number;
+  thinkingLevel?: ThinkingLevel;
 }

 export interface ModelConfig {
@@ -205,6 +209,22 @@ export class ModelConfigService {
      }
    }

+    // Apply dynamic thinking parameters from context if present
+    if (
+      context.thinkingBudget !== undefined ||
+      context.thinkingLevel !== undefined
+    ) {
+      resolvedConfig.thinkingConfig = {
+        ...(resolvedConfig.thinkingConfig as object),
+        ...(context.thinkingBudget !== undefined
+          ? { thinkingBudget: context.thinkingBudget }
+          : {}),
+        ...(context.thinkingLevel !== undefined
+          ? { thinkingLevel: context.thinkingLevel }
+          : {}),
+      };
+    }
+
    return {
      model: baseModel,
      generateContentConfig: resolvedConfig,