feat(config): implement dynamic experiment resolution and unified overrides

2026-06-10 11:12:35 -07:00 · 2026-02-19 16:37:43 -06:00
parent 2e1b556764
commit d61eed2880
5 changed files with 253 additions and 17 deletions
@@ -7,30 +7,37 @@ description: Guide developers through the process of adding new remote experimen

 This skill assists developers in adding net-new remote experiments (feature flags) to the Gemini CLI codebase. It acts as an interactive guide to ensure all necessary scaffolding, telemetry, command-line overrides, settings, and code placement are considered.

+## Core Pattern: Unified Configuration
+
+Gemini CLI uses a unified `Config` object as the source of truth. Every experiment should be accessed via the `config.getExperimentValue(flagId)` method, which internally handles the priority of overrides:
+**Command Line Argument (--experiment-*) > Local Setting (experimental.*) > Remote Experiment > Default Value.**
+
 ## Workflow: Adding a New Experiment

 When a user asks to add a new experiment, follow these steps sequentially:

 ### 1. Scaffolding the Config Entry
- Guide the user to add the new experiment ID to `ExperimentFlags` in `packages/core/src/code_assist/experiments/flagNames.ts`.
- Ensure a corresponding entry is added to `ExperimentMetadata` in the same file. This must include a clear `description`, the `type` (`boolean`, `number`, `string`), and a sensible `defaultValue`.
+- **File:** `packages/core/src/code_assist/experiments/flagNames.ts`
+- Add the new experiment ID to `ExperimentFlags`.
+- Add a corresponding entry to `ExperimentMetadata`, including `description`, `type`, and `defaultValue`.
+- **Note:** The key in `ExperimentFlags` (converted to kebab-case) will be the name used for CLI flags and Settings. For example, `MY_NEW_FEATURE` becomes `my-new-feature`.

-### 2. Command Line and Settings Overrides (Crucial Pattern)
-Every remote flag must be overridable via local settings and command-line arguments.
- **Settings:** Guide the user to add a corresponding property to the settings schema (e.g., in `schemas/settings.schema.json` and the corresponding TypeScript interfaces).
- **Command Line:** Guide the user to add a corresponding CLI argument in the appropriate command definitions.
- **The Config Object:** The source of truth for the rest of the application should be the unified `Config` object. Guide the user to wrap the remote experiment value, the local setting, and the command-line argument into a single property on the `Config` object. The typical priority order is: Command Line > Local Setting > Remote Experiment > Default Value.
+### 2. Usage in Code
+- **Method:** `config.getExperimentValue<Type>(ExperimentFlags.YOUR_FLAG_ID)`
+- This method is dynamic. You do **not** need to update the `Config` class or Yargs for every new flag.
+- **CLI Override:** Users can override via `--experiment your-flag-name=value`.
+- **Settings Override:** Users can override in their `settings.json`:
+  ```json
+  "experimental": {
+    "your-flag-name": value
+  }
+  ```

-### 3. Code Placement and Usage
- Discuss with the user *where* in the codebase this experiment should take effect.
- Guide them on how to correctly fetch and evaluate the experiment value using the unified `Config` object.
- Help them write the necessary conditional logic around the experimental feature.
-
-### 4. Telemetry and Metrics (Crucial)
+### 3. Telemetry and Metrics (Crucial)
 - Prompt the user to think deeply about what metrics are necessary to evaluate the success or failure of the experiment.
 - Ask questions like: "How will we know if this feature is working as intended?" or "What telemetry events should be fired when this new code path is executed?"
 - Help them add the necessary telemetry calls to the codebase to capture these insights.

-### 5. Branching and PR Preparation
+### 4. Branching and PR Preparation
 - If not already on a dedicated branch, help the user create a new git branch for this experiment (e.g., `git checkout -b exp/feature-name`).
 - Remind them to run local tests and linting (`npm run lint:all`, `npm test` or `npm run preflight`) before preparing a Pull Request.
@@ -106,6 +106,7 @@ export interface CliArgs {
  rawOutput: boolean | undefined;
  acceptRawOutputRisk: boolean | undefined;
  isCommand: boolean | undefined;
+  [key: string]: unknown;
 }

 /**
@@ -443,13 +444,22 @@ export async function parseArguments(
        .option('accept-raw-output-risk', {
          type: 'boolean',
          description: 'Suppress the security warning when using --raw-output.',
+        })
+        .option('experiment', {
+          type: 'array',
+          string: true,
+          nargs: 1,
+          description:
+            'Override experiment flags locally (format: flag=value, comma-separated or multiple --experiment)',
+          coerce: (exps: string[]) =>
+            // Handle comma-separated values
+            exps.flatMap((e) => e.split(',').map((s) => s.trim())),
        }),
    )
    .version(await getVersion()) // This will enable the --version flag based on package.json
    .alias('v', 'version')
    .help()
    .alias('h', 'help')
-    .strict()
    .demandCommand(0, 0) // Allow base command to run with no subcommands
    .exitProcess(false);

@@ -873,6 +883,22 @@ export async function loadCliConfig(
    }
  }

+  const experimentalCliArgs: Record<string, unknown> = {};
+  if (argv['experiment'] && Array.isArray(argv['experiment'])) {
+    for (const entry of argv['experiment']) {
+      const [key, ...valueParts] = entry.split('=');
+      const value = valueParts.join('=');
+      if (key && value !== undefined) {
+        // Simple type inference for CLI args
+        if (value === 'true') experimentalCliArgs[key] = true;
+        else if (value === 'false') experimentalCliArgs[key] = false;
+        else if (!isNaN(Number(value)))
+          experimentalCliArgs[key] = Number(value);
+        else experimentalCliArgs[key] = value;
+      }
+    }
+  }
+
  let clientName: string | undefined = undefined;
  if (isAcpMode) {
    const ide = detectIdeFromEnv();
@@ -893,7 +919,6 @@ export async function loadCliConfig(
    ...(useContextManagement ? settings?.contextManagement : {}),
    enabled: useContextManagement || useGeneralistProfile,
  };
-
  return new Config({
    acpMode: isAcpMode,
    clientName,
@@ -985,6 +1010,8 @@ export async function loadCliConfig(
    planSettings: settings.general?.plan?.directory
      ? settings.general.plan
      : (extensionPlanSettings ?? settings.general?.plan),
+    experimentalSettings: settings.experimental,
+    experimentalCliArgs,
    enableEventDrivenScheduler: true,
    skillsSupport: settings.skills?.enabled ?? true,
    disabledSkills: settings.skills?.disabled,
@@ -88,3 +88,15 @@ export const ExperimentMetadata: Record<number, ExperimentMetadataEntry> = {
    defaultValue: true,
  },
 };
+
+/**
+ * Gets the name of an experiment flag from its ID.
+ */
+export function getExperimentFlagName(flagId: number): string | undefined {
+  for (const [name, id] of Object.entries(ExperimentFlags)) {
+    if (id === flagId) {
+      return name.toLowerCase().replace(/_/g, '-');
+    }
+  }
+  return undefined;
+}
@@ -0,0 +1,123 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect } from 'vitest';
+import { Config } from './config.js';
+import { ExperimentFlags } from '../code_assist/experiments/flagNames.js';
+
+describe('Config getExperimentValue', () => {
+  const sessionId = 'test-session';
+  const targetDir = process.cwd();
+  const cwd = process.cwd();
+  const model = 'gemini-pro';
+
+  it('should prioritize CLI argument over others', () => {
+    const config = new Config({
+      sessionId,
+      targetDir,
+      cwd,
+      model,
+      experimentalCliArgs: { 'enable-preview': true },
+      experimentalSettings: { 'enable-preview': false },
+      experiments: {
+        flags: {
+          [ExperimentFlags.ENABLE_PREVIEW]: { boolValue: false },
+        },
+        experimentIds: [],
+      },
+    });
+
+    expect(config.getExperimentValue(ExperimentFlags.ENABLE_PREVIEW)).toBe(
+      true,
+    );
+  });
+
+  it('should prioritize local setting over remote experiment', () => {
+    const config = new Config({
+      sessionId,
+      targetDir: process.cwd(),
+      cwd: process.cwd(),
+      model,
+      experimentalSettings: { 'enable-preview': true },
+      experiments: {
+        flags: {
+          [ExperimentFlags.ENABLE_PREVIEW]: { boolValue: false },
+        },
+        experimentIds: [],
+      },
+    });
+
+    expect(config.getExperimentValue(ExperimentFlags.ENABLE_PREVIEW)).toBe(
+      true,
+    );
+  });
+
+  it('should use remote experiment if no local override', () => {
+    const config = new Config({
+      sessionId,
+      targetDir: process.cwd(),
+      cwd: process.cwd(),
+      model,
+      experiments: {
+        flags: {
+          [ExperimentFlags.ENABLE_PREVIEW]: { boolValue: true },
+        },
+        experimentIds: [],
+      },
+    });
+
+    expect(config.getExperimentValue(ExperimentFlags.ENABLE_PREVIEW)).toBe(
+      true,
+    );
+  });
+
+  it('should use default value if nothing else is set', () => {
+    const config = new Config({
+      sessionId,
+      targetDir: process.cwd(),
+      cwd: process.cwd(),
+      model,
+    });
+
+    // Default for ENABLE_PREVIEW is false
+    expect(config.getExperimentValue(ExperimentFlags.ENABLE_PREVIEW)).toBe(
+      false,
+    );
+  });
+
+  it('should handle numeric values correctly', () => {
+    const config = new Config({
+      sessionId,
+      targetDir: process.cwd(),
+      cwd: process.cwd(),
+      model,
+      experimentalCliArgs: { 'classifier-threshold': 0.8 },
+    });
+
+    expect(
+      config.getExperimentValue<number>(ExperimentFlags.CLASSIFIER_THRESHOLD),
+    ).toBe(0.8);
+  });
+
+  it('should handle string representation of numbers from remote', () => {
+    const config = new Config({
+      sessionId,
+      targetDir: process.cwd(),
+      cwd: process.cwd(),
+      model,
+      experiments: {
+        flags: {
+          [ExperimentFlags.CLASSIFIER_THRESHOLD]: { stringValue: '0.7' },
+        },
+        experimentIds: [],
+      },
+    });
+
+    expect(
+      config.getExperimentValue<number>(ExperimentFlags.CLASSIFIER_THRESHOLD),
+    ).toBe(0.7);
+  });
+});
@@ -162,7 +162,12 @@ import {
 import { AgentRegistry } from '../agents/registry.js';
 import { AcknowledgedAgentsService } from '../agents/acknowledgedAgents.js';
 import { setGlobalProxy, updateGlobalFetchTimeouts } from '../utils/fetch.js';
-import { ExperimentFlags } from '../code_assist/experiments/flagNames.js';
+import { SubagentTool } from '../agents/subagent-tool.js';
+import {
+  ExperimentFlags,
+  ExperimentMetadata,
+  getExperimentFlagName,
+} from '../code_assist/experiments/flagNames.js';
 import { debugLogger } from '../utils/debugLogger.js';
 import { SkillManager, type SkillDefinition } from '../skills/skillManager.js';
 import { startupProfiler } from '../telemetry/startupProfiler.js';
@@ -692,6 +697,8 @@ export interface ConfigParameters {
  disabledHooks?: string[];
  projectHooks?: { [K in HookEventName]?: HookDefinition[] };
  enableAgents?: boolean;
+  experimentalSettings?: Record<string, unknown>;
+  experimentalCliArgs?: Record<string, unknown>;
  enableEventDrivenScheduler?: boolean;
  skillsSupport?: boolean;
  disabledSkills?: string[];
@@ -935,6 +942,8 @@ export class Config implements McpContext, AgentLoopContext {

  private readonly enableAgents: boolean;
  private agents: AgentSettings;
+  private readonly experimentalSettings: Record<string, unknown>;
+  private readonly experimentalCliArgs: Record<string, unknown>;
  private readonly enableEventDrivenScheduler: boolean;
  private readonly skillsSupport: boolean;
  private disabledSkills: string[];
@@ -1092,6 +1101,8 @@ export class Config implements McpContext, AgentLoopContext {
    this._activeModel = params.model;
    this.enableAgents = params.enableAgents ?? true;
    this.agents = params.agents ?? {};
+    this.experimentalSettings = params.experimentalSettings ?? {};
+    this.experimentalCliArgs = params.experimentalCliArgs ?? {};
    this.disableLLMCorrection = params.disableLLMCorrection ?? true;
    this.planEnabled = params.plan ?? true;
    this.trackerEnabled = params.tracker ?? false;
@@ -3677,6 +3688,62 @@ export class Config implements McpContext, AgentLoopContext {
    return this.experiments;
  }

+  /**
+   * Resolves the value of an experiment flag based on priority:
+   * 1. Command-line argument (--experiment-<flag-name>)
+   * 2. Local setting (experimental.<flag-name>)
+   * 3. Remote experiment
+   * 4. Default value
+   */
+  getExperimentValue<T extends boolean | number | string>(
+    flagId: number,
+  ): T | undefined {
+    const flagName = getExperimentFlagName(flagId);
+    if (!flagName) {
+      return undefined;
+    }
+
+    // 1. Command-line argument
+    const cliValue = this.experimentalCliArgs[flagName];
+    if (cliValue !== undefined) {
+      // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
+      return cliValue as T;
+    }
+
+    // 2. Local setting
+    const settingValue = this.experimentalSettings[flagName];
+    if (settingValue !== undefined) {
+      // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
+      return settingValue as T;
+    }
+
+    // 3. Remote experiment
+    const remoteFlag = this.experiments?.flags[flagId];
+    if (remoteFlag) {
+      const val =
+        remoteFlag.boolValue ??
+        remoteFlag.floatValue ??
+        remoteFlag.intValue ??
+        remoteFlag.stringValue;
+      if (val !== undefined) {
+        // Handle string representation of numbers if necessary
+        if (typeof val === 'string' && !isNaN(Number(val))) {
+          const metadata = ExperimentMetadata[flagId];
+          if (metadata?.type === 'number') {
+            // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
+            return Number(val) as unknown as T;
+          }
+        }
+        // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
+        return val as unknown as T;
+      }
+    }
+
+    // 4. Default value from metadata
+    // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
+    return ExperimentMetadata[flagId]?.defaultValue as unknown as T;
+  }
+
  /**
   * Set experiments configuration
   */