feat(optimization): implement manifest-driven extraction pipeline

- Implement `extract.ts` with robust character-aware parsing for snippets and tools. - Consolidate research dependencies by moving `@ax-llm/ax` to root `optionalDependencies`. - Relocate evaluation logic from `packages/core` to `scripts/optimization/lib/evals` to keep the production core lean. - Add `optimization_targets` to `data/manifest.json` as the single source of truth for the pipeline. - Implement comprehensive unit tests for extraction and variable masking with 100% pass rate. - Update global config and linting rules to support the new optimization infrastructure.
2026-06-10 11:12:35 -07:00 · 2026-03-04 14:25:17 -08:00
parent 6c94c4d9ca
commit 59d377e5e0
20 changed files with 599 additions and 143 deletions
@@ -62,3 +62,4 @@ gemini-debug.log
 .gemini-clipboard/
 .eslintcache
 evals/logs/
+data/optimization/
@@ -66,6 +66,14 @@
    },
    "file_descriptions": {
      "data/tool_alignment.jsonl": "Ensures the model selects the correct built-in tool over generic shell commands and optimizes for brevity."
+    },
+    "optimization_targets": {
+      "snippets": [
+        "renderCoreMandates",
+        "renderPrimaryWorkflows",
+        "renderOperationalGuidelines",
+        "renderGitRepo"
+      ]
    }
  }
 }
@@ -68,7 +68,11 @@ export default tseslint.config(
  },
  {
    // Rules for packages/*/src (TS/TSX)
-    files: ['packages/*/src/**/*.{ts,tsx}', 'data/**/*.ts'],
+    files: [
+      'packages/*/src/**/*.{ts,tsx}',
+      'data/**/*.ts',
+      'scripts/optimization/**/*.ts',
+    ],
    plugins: {
      import: importPlugin,
    },
@@ -73,6 +73,7 @@
        "node": ">=20.0.0"
      },
      "optionalDependencies": {
+        "@ax-llm/ax": "^19.0.11",
        "@lydell/node-pty": "1.1.0",
        "@lydell/node-pty-darwin-arm64": "1.1.0",
        "@lydell/node-pty-darwin-x64": "1.1.0",
@@ -179,6 +180,21 @@
        "node": ">=6.0.0"
      }
    },
+    "node_modules/@ax-llm/ax": {
+      "version": "19.0.11",
+      "resolved": "https://registry.npmjs.org/@ax-llm/ax/-/ax-19.0.11.tgz",
+      "integrity": "sha512-U3ZYzBrmMDTDst32jxgH873gC4c75aYjzdCZgwQWy+CwSDL2SskwQX2kZAWGDmmSzs8BxskleoASzQUXuqRLfQ==",
+      "hasInstallScript": true,
+      "license": "Apache-2.0",
+      "optional": true,
+      "dependencies": {
+        "@opentelemetry/api": "^1.9.0",
+        "dayjs": "^1.11.13"
+      },
+      "bin": {
+        "ax": "cli/index.mjs"
+      }
+    },
    "node_modules/@azu/format-text": {
      "version": "1.0.2",
      "resolved": "https://registry.npmjs.org/@azu/format-text/-/format-text-1.0.2.tgz",
@@ -2292,6 +2308,7 @@
      "integrity": "sha512-t54CUOsFMappY1Jbzb7fetWeO0n6K0k/4+/ZpkS+3Joz8I4VcvY9OiEBFRYISqaI2fq5sCiPtAjRDOzVYG8m+Q==",
      "dev": true,
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "@octokit/auth-token": "^6.0.0",
        "@octokit/graphql": "^9.0.2",
@@ -2472,6 +2489,7 @@
      "resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz",
      "integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==",
      "license": "Apache-2.0",
+      "peer": true,
      "engines": {
        "node": ">=8.0.0"
      }
@@ -2521,6 +2539,7 @@
      "resolved": "https://registry.npmjs.org/@opentelemetry/core/-/core-2.5.0.tgz",
      "integrity": "sha512-ka4H8OM6+DlUhSAZpONu0cPBtPPTQKxbxVzC4CzVx5+K4JnroJVBtDzLAMx4/3CDTJXRvVFhpFjtl4SaiTNoyQ==",
      "license": "Apache-2.0",
+      "peer": true,
      "dependencies": {
        "@opentelemetry/semantic-conventions": "^1.29.0"
      },
@@ -2895,6 +2914,7 @@
      "resolved": "https://registry.npmjs.org/@opentelemetry/resources/-/resources-2.5.0.tgz",
      "integrity": "sha512-F8W52ApePshpoSrfsSk1H2yJn9aKjCrbpQF1M9Qii0GHzbfVeFUB+rc3X4aggyZD8x9Gu3Slua+s6krmq6Dt8g==",
      "license": "Apache-2.0",
+      "peer": true,
      "dependencies": {
        "@opentelemetry/core": "2.5.0",
        "@opentelemetry/semantic-conventions": "^1.29.0"
@@ -2928,6 +2948,7 @@
      "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-metrics/-/sdk-metrics-2.5.0.tgz",
      "integrity": "sha512-BeJLtU+f5Gf905cJX9vXFQorAr6TAfK3SPvTFqP+scfIpDQEJfRaGJWta7sJgP+m4dNtBf9y3yvBKVAZZtJQVA==",
      "license": "Apache-2.0",
+      "peer": true,
      "dependencies": {
        "@opentelemetry/core": "2.5.0",
        "@opentelemetry/resources": "2.5.0"
@@ -2982,6 +3003,7 @@
      "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-trace-base/-/sdk-trace-base-2.5.0.tgz",
      "integrity": "sha512-VzRf8LzotASEyNDUxTdaJ9IRJ1/h692WyArDBInf5puLCjxbICD6XkHgpuudis56EndyS7LYFmtTMny6UABNdQ==",
      "license": "Apache-2.0",
+      "peer": true,
      "dependencies": {
        "@opentelemetry/core": "2.5.0",
        "@opentelemetry/resources": "2.5.0",
@@ -4178,6 +4200,7 @@
      "integrity": "sha512-6mDvHUFSjyT2B2yeNx2nUgMxh9LtOWvkhIU3uePn2I2oyNymUAX1NIsdgviM4CH+JSrp2D2hsMvJOkxY+0wNRA==",
      "devOptional": true,
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "csstype": "^3.0.2"
      }
@@ -4451,6 +4474,7 @@
      "integrity": "sha512-klQbnPAAiGYFyI02+znpBRLyjL4/BrBd0nyWkdC0s/6xFLkXYQ8OoRrSkqacS1ddVxf/LDyODIKbQ5TgKAf/Fg==",
      "dev": true,
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "@typescript-eslint/scope-manager": "8.56.1",
        "@typescript-eslint/types": "8.56.1",
@@ -5298,6 +5322,7 @@
      "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
      "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
      "license": "MIT",
+      "peer": true,
      "bin": {
        "acorn": "bin/acorn"
      },
@@ -6879,6 +6904,13 @@
        "url": "https://github.com/sponsors/ljharb"
      }
    },
+    "node_modules/dayjs": {
+      "version": "1.11.19",
+      "resolved": "https://registry.npmjs.org/dayjs/-/dayjs-1.11.19.tgz",
+      "integrity": "sha512-t5EcLVS6QPBNqM2z8fakk/NKel+Xzshgt8FFKAn+qwlD1pzZWxh0nVCrvFK7ZDb6XucZeF9z8C7CBWTRIVApAw==",
+      "license": "MIT",
+      "optional": true
+    },
    "node_modules/debug": {
      "version": "4.4.3",
      "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
@@ -7901,6 +7933,7 @@
      "integrity": "sha512-VmQ+sifHUbI/IcSopBCF/HO3YiHQx/AVd3UVyYL6weuwW+HvON9VYn5l6Zl1WZzPWXPNZrSQpxwkkZ/VuvJZzg==",
      "dev": true,
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "@eslint-community/eslint-utils": "^4.8.0",
        "@eslint-community/regexpp": "^4.12.1",
@@ -8533,6 +8566,7 @@
      "resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz",
      "integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==",
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "accepts": "^2.0.0",
        "body-parser": "^2.2.1",
@@ -9847,6 +9881,7 @@
      "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.2.tgz",
      "integrity": "sha512-gJnaDHXKDayjt8ue0n8Gs0A007yKXj4Xzb8+cNjZeYsSzzwKc0Lr+OZgYwVfB0pHfUs17EPoLvrOsEaJ9mj+Tg==",
      "license": "MIT",
+      "peer": true,
      "engines": {
        "node": ">=16.9.0"
      }
@@ -10126,6 +10161,7 @@
      "resolved": "https://registry.npmjs.org/@jrichman/ink/-/ink-6.4.11.tgz",
      "integrity": "sha512-93LQlzT7vvZ1XJcmOMwN4s+6W334QegendeHOMnEJBlhnpIzr8bws6/aOEHG8ZCuVD/vNeeea5m1msHIdAY6ig==",
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "@alcalzone/ansi-tokenize": "^0.2.1",
        "ansi-escapes": "^7.0.0",
@@ -13808,6 +13844,7 @@
      "resolved": "https://registry.npmjs.org/react/-/react-19.2.4.tgz",
      "integrity": "sha512-9nfp2hYpCwOjAN+8TZFGhtWEwgvWHXqESH8qT89AT/lWklpLON22Lc8pEtnpsZz7VmawabSU0gCjnj8aC0euHQ==",
      "license": "MIT",
+      "peer": true,
      "engines": {
        "node": ">=0.10.0"
      }
@@ -13818,6 +13855,7 @@
      "integrity": "sha512-ePrwPfxAnB+7hgnEr8vpKxL9cmnp7F322t8oqcPshbIQQhDKgFDW4tjhF2wjVbdXF9O/nyuy3sQWd9JGpiLPvA==",
      "devOptional": true,
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "shell-quote": "^1.6.1",
        "ws": "^7"
@@ -15906,6 +15944,7 @@
      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz",
      "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
      "license": "MIT",
+      "peer": true,
      "engines": {
        "node": ">=12"
      },
@@ -16129,7 +16168,8 @@
      "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
      "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
      "dev": true,
-      "license": "0BSD"
+      "license": "0BSD",
+      "peer": true
    },
    "node_modules/tsx": {
      "version": "4.20.3",
@@ -16137,6 +16177,7 @@
      "integrity": "sha512-qjbnuR9Tr+FJOMBqJCW5ehvIo/buZq7vH7qD7JziU98h6l3qGy0a/yPFjwO+y0/T7GFpNgNAvEcPPVfyT8rrPQ==",
      "devOptional": true,
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "esbuild": "~0.25.0",
        "get-tsconfig": "^4.7.5"
@@ -16296,6 +16337,7 @@
      "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==",
      "devOptional": true,
      "license": "Apache-2.0",
+      "peer": true,
      "bin": {
        "tsc": "bin/tsc",
        "tsserver": "bin/tsserver"
@@ -16519,6 +16561,7 @@
      "resolved": "https://registry.npmjs.org/vite/-/vite-7.2.2.tgz",
      "integrity": "sha512-BxAKBWmIbrDgrokdGZH1IgkIk/5mMHDreLDmCJ0qpyJaAteP8NvMhkwr/ZCQNqNH97bw/dANTE9PDzqwJghfMQ==",
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "esbuild": "^0.25.0",
        "fdir": "^6.5.0",
@@ -16632,6 +16675,7 @@
      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz",
      "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
      "license": "MIT",
+      "peer": true,
      "engines": {
        "node": ">=12"
      },
@@ -16644,6 +16688,7 @@
      "resolved": "https://registry.npmjs.org/vitest/-/vitest-3.2.4.tgz",
      "integrity": "sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A==",
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "@types/chai": "^5.2.2",
        "@vitest/expect": "3.2.4",
@@ -17288,6 +17333,7 @@
      "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
      "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==",
      "license": "MIT",
+      "peer": true,
      "funding": {
        "url": "https://github.com/sponsors/colinhacks"
      }
@@ -17687,6 +17733,7 @@
      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz",
      "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
      "license": "MIT",
+      "peer": true,
      "engines": {
        "node": ">=12"
      },
@@ -62,8 +62,10 @@
    "release:version": "node scripts/version.js",
    "telemetry": "node scripts/telemetry.js",
    "data:validate": "tsx scripts/validate-data.ts",
-    "data:format": "prettier --write 'data/**/*.ts' 'data/*.json' 'scripts/validate-data.ts' 'packages/core/src/evals/**/*.ts'",
-    "data:lint": "eslint 'data/**/*.ts' 'scripts/validate-data.ts' 'packages/core/src/evals/**/*.ts'",
+    "data:format": "prettier --write 'data/*.json' 'scripts/validate-data.ts' 'scripts/optimization/**/*.ts'",
+    "data:lint": "eslint 'scripts/validate-data.ts' 'scripts/optimization/**/*.ts'",
+    "optimize": "tsx scripts/optimization/run.ts",
+    "optimize:extract": "tsx scripts/optimization/extract.ts",
    "check:lockfile": "node scripts/check-lockfile.js",
    "clean": "node scripts/clean.js",
    "pre-commit": "node scripts/pre-commit.js"
@@ -145,6 +147,7 @@
    "simple-git": "^3.28.0"
  },
  "optionalDependencies": {
+    "@ax-llm/ax": "^19.0.11",
    "@lydell/node-pty": "1.1.0",
    "@lydell/node-pty-darwin-arm64": "1.1.0",
    "@lydell/node-pty-darwin-x64": "1.1.0",
@@ -1,41 +0,0 @@
-/**
- * @license
- * Copyright 2026 Google LLC
- * SPDX-License-Identifier: Apache-2.0
- */
-
-import { describe, it, expect } from 'vitest';
-import { evaluateTokenFrugality } from './tokenFrugality.js';
-import { MetricObjective, OptimizationDirection } from '../types.js';
-
-describe('evaluateTokenFrugality', () => {
-  it('should return the raw character count as the score', () => {
-    const prediction = { output_text: 'Hello' };
-    const result = evaluateTokenFrugality(prediction);
-    expect(result.score).toBe(5);
-    expect(result.objective).toBe(MetricObjective.FRUGALITY);
-    expect(result.direction).toBe(OptimizationDirection.MINIMIZE);
-    expect(result.reason).toContain('contains 5 characters');
-  });
-
-  it('should flag if response is succinct (under threshold)', () => {
-    const prediction = { output_text: 'Short' };
-    const result = evaluateTokenFrugality(prediction);
-    expect(result.metadata?.['isOverThreshold']).toBe(false);
-    expect(result.reason).toContain('Succinct response');
-  });
-
-  it('should flag if response exceeds chatter threshold', () => {
-    const prediction = { output_text: 'a'.repeat(50) };
-    const result = evaluateTokenFrugality(prediction);
-    expect(result.metadata?.['isOverThreshold']).toBe(true);
-    expect(result.reason).toContain('Exceeds threshold');
-  });
-
-  it('should handle missing output text as 0 chars', () => {
-    const prediction = {};
-    const result = evaluateTokenFrugality(prediction);
-    expect(result.score).toBe(0);
-    expect(result.reason).toContain('contains 0 characters');
-  });
-});
@@ -1,49 +0,0 @@
-/**
- * @license
- * Copyright 2026 Google LLC
- * SPDX-License-Identifier: Apache-2.0
- */
-
-import { debugLogger } from '../../utils/debugLogger.js';
-import { DEFAULT_EVAL_CONFIG } from '../config.js';
-import { MetricObjective, OptimizationDirection } from '../types.js';
-import type { MetricResult } from '../types.js';
-
-/**
- * Evaluates the frugality of a model's response by measuring total character count.
- * Focuses on reducing conversational noise ("chatter").
- */
-export function evaluateTokenFrugality(
-  prediction: { output_text?: string },
-  config = DEFAULT_EVAL_CONFIG.objectives.frugality,
-): MetricResult {
-  const chatter = prediction.output_text ?? '';
-  const chatterLength = chatter.length;
-
-  debugLogger.debug(
-    `[Eval:Frugality] Measuring output text length: ${chatterLength} chars.`,
-  );
-
-  // In Genetic-Pareto, the raw score (length) is the value to be MINIMIZED.
-  // We provide the raw count as the score, and the direction tells the optimizer how to handle it.
-
-  let reason = `Response contains ${chatterLength} characters of non-tool text.`;
-
-  if (chatterLength > config.chattyThresholdChars) {
-    reason += ` (Exceeds threshold of ${config.chattyThresholdChars})`;
-  } else {
-    reason += ' (Succinct response)';
-  }
-
-  return {
-    score: chatterLength,
-    objective: MetricObjective.FRUGALITY,
-    direction: OptimizationDirection.MINIMIZE,
-    reason,
-    metadata: {
-      charCount: chatterLength,
-      threshold: config.chattyThresholdChars,
-      isOverThreshold: chatterLength > config.chattyThresholdChars,
-    },
-  };
-}
@@ -0,0 +1,99 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+import * as fs from 'node:fs';
+import { runExtraction } from './extract.js';
+
+vi.mock('node:fs');
+
+describe('extraction script', () => {
+  const mockManifest = {
+    data_inventory: {
+      optimization_targets: {
+        snippets: ['renderCoreMandates'],
+      },
+      tools: {
+        read_file: {},
+      },
+    },
+  };
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+    vi.mocked(fs.existsSync).mockReturnValue(true);
+    vi.mocked(fs.readFileSync).mockImplementation((path) => {
+      if (typeof path !== 'string') return '';
+      if (path.includes('manifest.json')) return JSON.stringify(mockManifest);
+
+      // Mock snippets.ts
+      if (path.includes('snippets.ts')) {
+        return `
+          export function renderCoreMandates(options: any): string {
+            const foo = "Ignore me";
+            return \`# Core Mandate Instruction \${USER_VAR}\`.trim();
+          }
+        `;
+      }
+
+      // Mock gemini-3.ts
+      if (path.includes('gemini-3.ts')) {
+        return `
+  read_file: {
+    description: 'Read file description.',
+  },
+`;
+      }
+
+      // Mock dynamic helpers
+      if (path.includes('dynamic-declaration-helpers.ts')) {
+        return `
+          return \`This tool executes a given shell command as \\\`bash -c <command>\\\`. \${backgroundInstructions}\`;
+          name: EXIT_PLAN_MODE_TOOL_NAME,
+          description: 'Exit Plan Mode.',
+          name: ACTIVATE_SKILL_TOOL_NAME,
+          description: \`Activate skill.\`,
+        `;
+      }
+      return '';
+    });
+  });
+
+  it('should extract snippets correctly (Step 1)', async () => {
+    const targets = await runExtraction();
+    const snippet = targets.find((t) => t.id === 'snippets:renderCoreMandates');
+    expect(snippet).toBeDefined();
+    expect(snippet?.originalText).toBe(
+      '# Core Mandate Instruction ${USER_VAR}',
+    );
+    expect(snippet?.maskedText).toContain('[[GCLI_VAR_0]]');
+  });
+
+  it('should extract tools correctly (Step 2)', async () => {
+    const targets = await runExtraction();
+    const tool = targets.find((t) => t.id === 'gemini3:read_file:description');
+    expect(tool).toBeDefined();
+    expect(tool?.originalText).toBe('Read file description.');
+  });
+
+  it('should extract dynamic helpers correctly (Step 3)', async () => {
+    const targets = await runExtraction();
+    const shell = targets.find((t) => t.id === 'shell:darwin:description');
+    expect(shell).toBeDefined();
+    expect(shell?.maskedText).toContain('[[GCLI_VAR_0]]');
+
+    const exitPlan = targets.find((t) => t.id === 'exit_plan_mode:description');
+    expect(exitPlan?.originalText).toBe('Exit Plan Mode.');
+  });
+
+  it('should write targets.json to the correct directory', async () => {
+    await runExtraction();
+    expect(fs.writeFileSync).toHaveBeenCalledWith(
+      expect.stringContaining('targets.json'),
+      expect.any(String),
+    );
+  });
+});
@@ -0,0 +1,175 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import * as fs from 'node:fs';
+import * as path from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { maskVariables } from './lib/masking.js';
+
+export interface OptimizationTarget {
+  id: string;
+  sourceFile: string;
+  originalText: string;
+  maskedText: string;
+  maskMap: Record<string, string>;
+}
+
+/**
+ * Robustly finds a block bounded by { } using character scanning.
+ */
+function findBlockBounds(
+  content: string,
+  startIdx: number,
+): { start: number; end: number } | null {
+  const blockStart = content.indexOf('{', startIdx);
+  if (blockStart === -1) return null;
+
+  let braceCount = 0;
+  for (let i = blockStart; i < content.length; i++) {
+    if (content[i] === '{') braceCount++;
+    if (content[i] === '}') braceCount--;
+    if (braceCount === 0) {
+      return { start: blockStart, end: i };
+    }
+  }
+  return null;
+}
+
+/**
+ * Main extraction function.
+ */
+export async function runExtraction() {
+  const manifest = JSON.parse(fs.readFileSync('data/manifest.json', 'utf8'));
+  const targets: OptimizationTarget[] = [];
+
+  // 1. Snippets
+  const snippetNames =
+    manifest.data_inventory?.optimization_targets?.snippets || [];
+  const snippetsPath = 'packages/core/src/prompts/snippets.ts';
+  if (fs.existsSync(snippetsPath)) {
+    const content = fs.readFileSync(snippetsPath, 'utf8');
+    for (const name of snippetNames) {
+      const startIdx = content.indexOf(`export function ${name}`);
+      if (startIdx === -1) continue;
+
+      const bounds = findBlockBounds(content, startIdx);
+      if (!bounds) continue;
+
+      const body = content.substring(bounds.start, bounds.end + 1);
+      // Capture the LAST template literal
+      const tickMatches = [...body.matchAll(/`((?:[^`\\]|\\.)*)`/g)];
+      if (tickMatches.length > 0) {
+        const text = tickMatches[tickMatches.length - 1][1].trim();
+        const { maskedText, maskMap } = maskVariables(text);
+        targets.push({
+          id: `snippets:${name}`,
+          sourceFile: snippetsPath,
+          originalText: text,
+          maskedText,
+          maskMap,
+        });
+      }
+    }
+  }
+
+  // 2. Tools
+  const toolNames = Object.keys(manifest.data_inventory?.tools || {});
+  const gemini3Path =
+    'packages/core/src/tools/definitions/model-family-sets/gemini-3.ts';
+  if (fs.existsSync(gemini3Path)) {
+    const content = fs.readFileSync(gemini3Path, 'utf8');
+    for (const name of toolNames) {
+      // Find tool key (2-space indent)
+      const toolRegex = new RegExp(`^\\s{2}${name}:\\s*\\{`, 'm');
+      const match = toolRegex.exec(content);
+      if (!match) continue;
+
+      const bounds = findBlockBounds(content, match.index);
+      if (!bounds) continue;
+
+      const toolBlock = content.substring(match.index, bounds.end + 1);
+      const descRegex =
+        /description:\s*(?:`((?:[^`\\]|\\.)*)`|'([^']*)'|"([^"]*)")/g;
+      const descMatch = descRegex.exec(toolBlock);
+
+      if (descMatch) {
+        const text = (descMatch[1] || descMatch[2] || descMatch[3]).trim();
+        const { maskedText, maskMap } = maskVariables(text);
+        targets.push({
+          id: `gemini3:${name}:description`,
+          sourceFile: gemini3Path,
+          originalText: text,
+          maskedText,
+          maskMap,
+        });
+      }
+    }
+  }
+
+  // 3. Dynamic Helpers
+  const helpersPath =
+    'packages/core/src/tools/definitions/dynamic-declaration-helpers.ts';
+  if (fs.existsSync(helpersPath)) {
+    const content = fs.readFileSync(helpersPath, 'utf8');
+    const specs = [
+      {
+        id: 'shell:darwin:description',
+        regex:
+          /return `This tool executes a given shell command as \\`bash -c <command>\\`. ([\s\S]*?)`;/,
+      },
+      {
+        id: 'shell:win32:description',
+        regex:
+          /return `This tool executes a given shell command as \\`powershell\.exe -NoProfile -Command <command>\\`. ([\s\S]*?)`;/,
+      },
+      {
+        id: 'exit_plan_mode:description',
+        regex:
+          /name: EXIT_PLAN_MODE_TOOL_NAME,[\s\S]*?description:\s*'([^']*)',/,
+      },
+      {
+        id: 'activate_skill:description',
+        regex:
+          /name: ACTIVATE_SKILL_TOOL_NAME,[\s\S]*?description:\s*`((?:[^`\\]|\\.)*)`,/,
+      },
+    ];
+    for (const s of specs) {
+      const m = s.regex.exec(content);
+      if (m && m[1]) {
+        const text = m[1].trim();
+        const { maskedText, maskMap } = maskVariables(text);
+        targets.push({
+          id: s.id,
+          sourceFile: helpersPath,
+          originalText: text,
+          maskedText,
+          maskMap,
+        });
+      }
+    }
+  }
+
+  const outputDir = 'data/optimization';
+  if (!fs.existsSync(outputDir)) fs.mkdirSync(outputDir, { recursive: true });
+
+  fs.writeFileSync(
+    path.join(outputDir, 'targets.json'),
+    JSON.stringify(targets, null, 2),
+  );
+  return targets;
+}
+
+// CLI Entrypoint
+const isMain =
+  process.argv[1] &&
+  fileURLToPath(import.meta.url) === fs.realpathSync(process.argv[1]);
+if (isMain) {
+  runExtraction()
+    // eslint-disable-next-line no-console
+    .then((t) => console.log(`✅ Extracted ${t.length} targets.`))
+    // eslint-disable-next-line no-console
+    .catch(console.error);
+}
@@ -4,17 +4,10 @@
 * SPDX-License-Identifier: Apache-2.0
 */

-import { OptimizationDirection } from './types.js';
-
 /**
 * Configuration for the Tool Alignment objective (The Accuracy Dimension).
 */
 export interface AlignmentConfig {
-  /**
-   * Whether to increase or decrease the alignment score.
-   */
-  direction: OptimizationDirection.MAXIMIZE;
-
  /**
   * The relative importance of accuracy vs other objectives in the Pareto frontier.
   */
@@ -42,28 +35,37 @@ export interface AlignmentConfig {
 }

 /**
- * Configuration for the Token Frugality objective (The Density Dimension).
+ * Configuration for the Brevity objective (The Density Dimension).
+ * Uses a word-count step-function to provide high-contrast signal for GEPA.
 */
-export interface FrugalityConfig {
-  /**
-   * Whether to increase or decrease the token count.
-   */
-  direction: OptimizationDirection.MINIMIZE;
-
+export interface BrevityConfig {
  /**
   * Importance of brevity relative to accuracy.
   */
  weight: number;

  /**
-   * The 'conversational budget' - max chars of non-tool text allowed before penalty.
+   * TIER 1: Response is perfectly succinct (e.g., <= 10 words).
   */
-  chattyThresholdChars: number;
+  succinctThresholdWords: number;
+  succinctScore: number; // 1.0

  /**
-   * Amount subtracted from the functional score if the model is too verbose.
+   * TIER 2: Response is acceptable but slightly verbose (e.g., <= 25 words).
   */
-  chattyPenalty: number;
+  acceptableThresholdWords: number;
+  acceptableScore: number; // 0.7
+
+  /**
+   * TIER 3: Response is verbose (e.g., <= 50 words).
+   */
+  verboseThresholdWords: number;
+  verboseScore: number; // 0.4
+
+  /**
+   * TIER 4: Response is very heavy (e.g., > 50 words).
+   */
+  heavyScore: number; // 0.1
 }

 /**
@@ -72,29 +74,33 @@ export interface FrugalityConfig {
 export interface EvalConfig {
  objectives: {
    alignment: AlignmentConfig;
-    frugality: FrugalityConfig;
+    brevity: BrevityConfig;
  };
 }

 /**
 * Default weights and thresholds for the Genetic-Pareto (GEPA) engine.
 * These constants drive the 'Selection Pressure' that evolves the prompt.
+ * GEPA always MAXIMIZES, so higher scores represent better performance.
 */
 export const DEFAULT_EVAL_CONFIG: EvalConfig = {
  objectives: {
    alignment: {
-      direction: OptimizationDirection.MAXIMIZE,
      weight: 1.0, // PRIMARY: Accuracy cannot be sacrificed.
      hardFailureScore: 0.0,
      invalidResponseScore: 0.1,
      toolNameMatchOnlyScore: 0.4,
      functionalSuccessScore: 1.0,
    },
-    frugality: {
-      direction: OptimizationDirection.MINIMIZE,
+    brevity: {
      weight: 0.6, // SECONDARY: Reward brevity once accuracy is high.
-      chattyThresholdChars: 30, // Budget for 'I have updated the file' etc.
-      chattyPenalty: 0.2, // Penalty creates a 'Reward Gap' for concise models.
+      succinctThresholdWords: 10,
+      succinctScore: 1.0,
+      acceptableThresholdWords: 25,
+      acceptableScore: 0.7,
+      verboseThresholdWords: 50,
+      verboseScore: 0.4,
+      heavyScore: 0.1, // Never hard-zero brevity to allow gradient improvement.
    },
  },
 };
@@ -0,0 +1,54 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect } from 'vitest';
+import { evaluateBrevity } from './brevityMetric.js';
+
+describe('evaluateBrevity 4-tier step-function', () => {
+  it('should return 1.0 for a succinct response (<= 10 words)', () => {
+    const prediction = { output_text: 'I have updated the file for you now.' }; // 8 words
+    const result = evaluateBrevity(prediction);
+    expect(result.score).toBe(1.0);
+    expect(result.metadata?.tier).toBe('succinct');
+  });
+
+  it('should return 0.7 for an acceptable response (11-25 words)', () => {
+    const text =
+      'I have successfully updated the file. Everything looks good to proceed with the next step.';
+    // 16 words
+    const prediction = { output_text: text };
+    const result = evaluateBrevity(prediction);
+    expect(result.score).toBe(0.7);
+    expect(result.metadata?.tier).toBe('acceptable');
+  });
+
+  it('should return 0.4 for a verbose response (26-50 words)', () => {
+    const text =
+      'Certainly! I would be more than happy to assist you with that request. I am now proceeding to surgically update the file using the replace tool to ensure accuracy.';
+    // 29 words
+    const prediction = { output_text: text };
+    const result = evaluateBrevity(prediction);
+    expect(result.score).toBe(0.4);
+    expect(result.metadata?.tier).toBe('verbose');
+  });
+
+  it('should return 0.1 for a heavy response (> 50 words)', () => {
+    const text =
+      'Certainly! I would be more than happy to assist you with that request. I am now proceeding to surgically update the file using the replace tool to ensure accuracy. I will then verify the changes and let you know when I am finished with the task so we can move to the next stage of implementation.';
+    // 53 words
+    const prediction = { output_text: text };
+    const result = evaluateBrevity(prediction);
+    expect(result.score).toBe(0.1);
+    expect(result.metadata?.tier).toBe('heavy');
+  });
+
+  it('should handle missing output text as succinct (0 words)', () => {
+    const prediction = {};
+    const result = evaluateBrevity(prediction);
+    expect(result.score).toBe(1.0);
+    expect(result.metadata?.tier).toBe('succinct');
+  });
+});
@@ -0,0 +1,62 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { debugLogger } from '../../../../../packages/core/src/utils/debugLogger.js';
+import { DEFAULT_EVAL_CONFIG } from '../config.js';
+import { MetricObjective } from '../types.js';
+import type { MetricResult } from '../types.js';
+
+/**
+ * Evaluates the brevity of a model's response using a tiered 4-step word-count function.
+ * Focuses on rewarding succinctness and providing a non-zero gradient for verbose models.
+ */
+export function evaluateBrevity(
+  prediction: { output_text?: string },
+  config = DEFAULT_EVAL_CONFIG.objectives.brevity,
+): MetricResult {
+  const chatter = (prediction.output_text ?? '').trim();
+
+  // Simple word count: split by whitespace and filter out empty strings
+  const wordCount = chatter === '' ? 0 : chatter.split(/\s+/).length;
+
+  debugLogger.debug(
+    `[Eval:Brevity] Measuring output text word count: ${wordCount} words.`,
+  );
+
+  let score: number;
+  let reason: string;
+
+  if (wordCount <= config.succinctThresholdWords) {
+    score = config.succinctScore;
+    reason = `Succinct: Response is within ${config.succinctThresholdWords} words.`;
+  } else if (wordCount <= config.acceptableThresholdWords) {
+    score = config.acceptableScore;
+    reason = `Acceptable: Response is slightly verbose (${wordCount} words), exceeding ${config.succinctThresholdWords} words.`;
+  } else if (wordCount <= config.verboseThresholdWords) {
+    score = config.verboseScore;
+    reason = `Verbose: Response contains ${wordCount} words, exceeding acceptable limit of ${config.acceptableThresholdWords} words.`;
+  } else {
+    score = config.heavyScore;
+    reason = `Heavy: Response is excessively verbose (${wordCount} words).`;
+  }
+
+  return {
+    score,
+    objective: MetricObjective.BREVITY,
+    reason,
+    metadata: {
+      wordCount,
+      tier:
+        score === 1.0
+          ? 'succinct'
+          : score === 0.7
+            ? 'acceptable'
+            : score === 0.4
+              ? 'verbose'
+              : 'heavy',
+    },
+  };
+}
@@ -6,7 +6,7 @@

 import { describe, it, expect } from 'vitest';
 import { evaluateToolAlignment } from './toolAlignment.js';
-import { MetricObjective, OptimizationDirection } from '../types.js';
+import { MetricObjective } from '../types.js';
 import type { Scenario } from '../schema.js';

 describe('evaluateToolAlignment', () => {
@@ -36,7 +36,6 @@ describe('evaluateToolAlignment', () => {
    const result = evaluateToolAlignment(prediction, mockScenario);
    expect(result.score).toBe(1.0);
    expect(result.objective).toBe(MetricObjective.ALIGNMENT);
-    expect(result.direction).toBe(OptimizationDirection.MAXIMIZE);
    expect(result.reason).toContain('Functional Success');
  });

@@ -4,10 +4,10 @@
 * SPDX-License-Identifier: Apache-2.0
 */

-import { debugLogger } from '../../utils/debugLogger.js';
+import { debugLogger } from '../../../../../packages/core/src/utils/debugLogger.js';
 import type { Scenario, ToolCall } from '../schema.js';
 import { DEFAULT_EVAL_CONFIG } from '../config.js';
-import { MetricObjective, OptimizationDirection } from '../types.js';
+import { MetricObjective } from '../types.js';
 import type { MetricResult } from '../types.js';

 /**
@@ -41,7 +41,6 @@ export function evaluateToolAlignment(
      return {
        score: config.hardFailureScore,
        objective: MetricObjective.ALIGNMENT,
-        direction: OptimizationDirection.MAXIMIZE,
        reason: `Hard Failure: ${negative.reason}`,
        metadata: {
          matchedNegativeReason: negative.reason,
@@ -59,7 +58,6 @@ export function evaluateToolAlignment(
    return {
      score: config.invalidResponseScore,
      objective: MetricObjective.ALIGNMENT,
-      direction: OptimizationDirection.MAXIMIZE,
      reason: 'Model failed to produce any tool calls.',
    };
  }
@@ -79,7 +77,6 @@ export function evaluateToolAlignment(
    return {
      score: config.invalidResponseScore,
      objective: MetricObjective.ALIGNMENT,
-      direction: OptimizationDirection.MAXIMIZE,
      reason: 'Model selected the wrong tool(s).',
    };
  }
@@ -100,7 +97,6 @@ export function evaluateToolAlignment(
    return {
      score: config.toolNameMatchOnlyScore,
      objective: MetricObjective.ALIGNMENT,
-      direction: OptimizationDirection.MAXIMIZE,
      reason: 'Correct tool selected, but arguments are incorrect or missing.',
    };
  }
@@ -112,7 +108,6 @@ export function evaluateToolAlignment(
  return {
    score: config.functionalSuccessScore,
    objective: MetricObjective.ALIGNMENT,
-    direction: OptimizationDirection.MAXIMIZE,
    reason:
      'Functional Success: Tool and arguments align perfectly with golden scenario.',
  };
@@ -4,20 +4,12 @@
 * SPDX-License-Identifier: Apache-2.0
 */

-/**
- * Defines whether an objective should be increased or decreased during optimization.
- */
-export enum OptimizationDirection {
-  MINIMIZE = 'minimize',
-  MAXIMIZE = 'maximize',
-}
-
 /**
 * The specific dimensions being measured by the evaluation pipeline.
 */
 export enum MetricObjective {
  ALIGNMENT = 'alignment',
-  FRUGALITY = 'frugality',
+  BREVITY = 'brevity',
 }

 /**
@@ -27,6 +19,7 @@ export enum MetricObjective {
 export interface MetricResult {
  /**
   * The numeric score calculated by the metric.
+   * All metrics must provide a value where HIGHER is BETTER.
   */
  score: number;

@@ -35,11 +28,6 @@ export interface MetricResult {
   */
  objective: MetricObjective;

-  /**
-   * Whether the goal is to increase or decrease this specific score.
-   */
-  direction: OptimizationDirection;
-
  /**
   * A human-readable (and optimizer-reflective) reason for the score.
   */
@@ -0,0 +1,41 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect } from 'vitest';
+import { maskVariables, unmaskVariables } from './masking.js';
+
+describe('optimization masking utility', () => {
+  it('should mask unique template variables with indexed tokens', () => {
+    const input = 'Use ${TOOL_A} to read ${FILE_PATH}. ${TOOL_A} is efficient.';
+    const { maskedText, maskMap } = maskVariables(input);
+
+    expect(maskedText).toContain('[[GCLI_VAR_0]]');
+    expect(maskedText).toContain('[[GCLI_VAR_1]]');
+    // Ensure all occurrences of the same variable are replaced with the same token
+    const toolAToken = Object.keys(maskMap).find(
+      (key) => maskMap[key] === '${TOOL_A}',
+    )!;
+    const count = maskedText.split(toolAToken).length - 1;
+    expect(count).toBe(2);
+    expect(maskedText).not.toContain('${TOOL_A}');
+  });
+
+  it('should perfectly restore original text during unmasking', () => {
+    const original = 'Update ${OLD_STR} with ${NEW_STR} in ${FILE_PATH}.';
+    const { maskedText, maskMap } = maskVariables(original);
+    const restored = unmaskVariables(maskedText, maskMap);
+
+    expect(restored).toBe(original);
+  });
+
+  it('should handle text with no variables', () => {
+    const input = 'Static text with no placeholders.';
+    const { maskedText, maskMap } = maskVariables(input);
+
+    expect(maskedText).toBe(input);
+    expect(Object.keys(maskMap).length).toBe(0);
+  });
+});
@@ -0,0 +1,61 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/**
+ * Utility to protect TypeScript template variables from being "optimized" by the LLM.
+ * Replaces ${VAR} with unique stable tokens and allows for perfect restoration.
+ */
+
+export interface MaskResult {
+  maskedText: string;
+  maskMap: Record<string, string>;
+}
+
+const MASK_PREFIX = '[[GCLI_VAR_';
+const MASK_SUFFIX = ']]';
+
+/**
+ * Replaces all instances of ${VARIABLE_NAME} with indexed tokens.
+ * Supports both SCREAMING_SNAKE_CASE and camelCase variables.
+ */
+export function maskVariables(text: string): MaskResult {
+  const maskMap: Record<string, string> = {};
+  // Refined regex to capture any variable pattern like ${variableName} or ${VARIABLE_NAME}
+  const variableRegex = /\${[a-zA-Z0-9_.]+}/g;
+  let index = 0;
+  let maskedText = text;
+
+  // Find all unique variables
+  const uniqueVars = Array.from(new Set(text.match(variableRegex) || []));
+
+  uniqueVars.forEach((v) => {
+    const token = `${MASK_PREFIX}${index}${MASK_SUFFIX}`;
+    maskMap[token] = v;
+    // Use a global regex for the specific variable to replace all occurrences
+    maskedText = maskedText.split(v).join(token);
+    index++;
+  });
+
+  return { maskedText, maskMap };
+}
+
+/**
+ * Restores original ${VARIABLE_NAME} patterns using the provided mask map.
+ */
+export function unmaskVariables(
+  text: string,
+  maskMap: Record<string, string>,
+): string {
+  let unmaskedText = text;
+  // Sort tokens by length descending to prevent partial replacement (e.g. VAR_10 before VAR_1)
+  const sortedTokens = Object.keys(maskMap).sort((a, b) => b.length - a.length);
+
+  sortedTokens.forEach((token) => {
+    const originalVar = maskMap[token];
+    unmaskedText = unmaskedText.split(token).join(originalVar);
+  });
+  return unmaskedText;
+}
@@ -10,7 +10,10 @@ export default defineConfig({
  test: {
    globals: true,
    environment: 'node',
-    include: ['scripts/tests/**/*.test.{js,ts}'],
+    include: [
+      'scripts/tests/**/*.test.{js,ts}',
+      'scripts/optimization/**/*.test.ts',
+    ],
    setupFiles: ['scripts/tests/test-setup.ts'],
    coverage: {
      provider: 'v8',
@@ -6,7 +6,7 @@

 import * as fs from 'node:fs';
 import * as path from 'node:path';
-import type { Scenario } from '../packages/core/src/evals/schema.ts';
+import type { Scenario } from './optimization/lib/evals/schema.ts';

 const MANIFEST_FILE = 'data/manifest.json';
 const DEFAULT_DATA_DIR = 'data';