feat(optimization): implement manifest-driven extraction pipeline

- Implement `extract.ts` with robust character-aware parsing for snippets and tools.
- Consolidate research dependencies by moving `@ax-llm/ax` to root `optionalDependencies`.
- Relocate evaluation logic from `packages/core` to `scripts/optimization/lib/evals` to keep the production core lean.
- Add `optimization_targets` to `data/manifest.json` as the single source of truth for the pipeline.
- Implement comprehensive unit tests for extraction and variable masking with 100% pass rate.
- Update global config and linting rules to support the new optimization infrastructure.
This commit is contained in:
Abhijit Balaji
2026-03-04 14:25:17 -08:00
parent 6c94c4d9ca
commit 59d377e5e0
20 changed files with 599 additions and 143 deletions
+99
View File
@@ -0,0 +1,99 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, vi, beforeEach } from 'vitest';
import * as fs from 'node:fs';
import { runExtraction } from './extract.js';
vi.mock('node:fs');
describe('extraction script', () => {
const mockManifest = {
data_inventory: {
optimization_targets: {
snippets: ['renderCoreMandates'],
},
tools: {
read_file: {},
},
},
};
beforeEach(() => {
vi.clearAllMocks();
vi.mocked(fs.existsSync).mockReturnValue(true);
vi.mocked(fs.readFileSync).mockImplementation((path) => {
if (typeof path !== 'string') return '';
if (path.includes('manifest.json')) return JSON.stringify(mockManifest);
// Mock snippets.ts
if (path.includes('snippets.ts')) {
return `
export function renderCoreMandates(options: any): string {
const foo = "Ignore me";
return \`# Core Mandate Instruction \${USER_VAR}\`.trim();
}
`;
}
// Mock gemini-3.ts
if (path.includes('gemini-3.ts')) {
return `
read_file: {
description: 'Read file description.',
},
`;
}
// Mock dynamic helpers
if (path.includes('dynamic-declaration-helpers.ts')) {
return `
return \`This tool executes a given shell command as \\\`bash -c <command>\\\`. \${backgroundInstructions}\`;
name: EXIT_PLAN_MODE_TOOL_NAME,
description: 'Exit Plan Mode.',
name: ACTIVATE_SKILL_TOOL_NAME,
description: \`Activate skill.\`,
`;
}
return '';
});
});
it('should extract snippets correctly (Step 1)', async () => {
const targets = await runExtraction();
const snippet = targets.find((t) => t.id === 'snippets:renderCoreMandates');
expect(snippet).toBeDefined();
expect(snippet?.originalText).toBe(
'# Core Mandate Instruction ${USER_VAR}',
);
expect(snippet?.maskedText).toContain('[[GCLI_VAR_0]]');
});
it('should extract tools correctly (Step 2)', async () => {
const targets = await runExtraction();
const tool = targets.find((t) => t.id === 'gemini3:read_file:description');
expect(tool).toBeDefined();
expect(tool?.originalText).toBe('Read file description.');
});
it('should extract dynamic helpers correctly (Step 3)', async () => {
const targets = await runExtraction();
const shell = targets.find((t) => t.id === 'shell:darwin:description');
expect(shell).toBeDefined();
expect(shell?.maskedText).toContain('[[GCLI_VAR_0]]');
const exitPlan = targets.find((t) => t.id === 'exit_plan_mode:description');
expect(exitPlan?.originalText).toBe('Exit Plan Mode.');
});
it('should write targets.json to the correct directory', async () => {
await runExtraction();
expect(fs.writeFileSync).toHaveBeenCalledWith(
expect.stringContaining('targets.json'),
expect.any(String),
);
});
});
+175
View File
@@ -0,0 +1,175 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import * as fs from 'node:fs';
import * as path from 'node:path';
import { fileURLToPath } from 'node:url';
import { maskVariables } from './lib/masking.js';
export interface OptimizationTarget {
id: string;
sourceFile: string;
originalText: string;
maskedText: string;
maskMap: Record<string, string>;
}
/**
* Robustly finds a block bounded by { } using character scanning.
*/
function findBlockBounds(
content: string,
startIdx: number,
): { start: number; end: number } | null {
const blockStart = content.indexOf('{', startIdx);
if (blockStart === -1) return null;
let braceCount = 0;
for (let i = blockStart; i < content.length; i++) {
if (content[i] === '{') braceCount++;
if (content[i] === '}') braceCount--;
if (braceCount === 0) {
return { start: blockStart, end: i };
}
}
return null;
}
/**
* Main extraction function.
*/
export async function runExtraction() {
const manifest = JSON.parse(fs.readFileSync('data/manifest.json', 'utf8'));
const targets: OptimizationTarget[] = [];
// 1. Snippets
const snippetNames =
manifest.data_inventory?.optimization_targets?.snippets || [];
const snippetsPath = 'packages/core/src/prompts/snippets.ts';
if (fs.existsSync(snippetsPath)) {
const content = fs.readFileSync(snippetsPath, 'utf8');
for (const name of snippetNames) {
const startIdx = content.indexOf(`export function ${name}`);
if (startIdx === -1) continue;
const bounds = findBlockBounds(content, startIdx);
if (!bounds) continue;
const body = content.substring(bounds.start, bounds.end + 1);
// Capture the LAST template literal
const tickMatches = [...body.matchAll(/`((?:[^`\\]|\\.)*)`/g)];
if (tickMatches.length > 0) {
const text = tickMatches[tickMatches.length - 1][1].trim();
const { maskedText, maskMap } = maskVariables(text);
targets.push({
id: `snippets:${name}`,
sourceFile: snippetsPath,
originalText: text,
maskedText,
maskMap,
});
}
}
}
// 2. Tools
const toolNames = Object.keys(manifest.data_inventory?.tools || {});
const gemini3Path =
'packages/core/src/tools/definitions/model-family-sets/gemini-3.ts';
if (fs.existsSync(gemini3Path)) {
const content = fs.readFileSync(gemini3Path, 'utf8');
for (const name of toolNames) {
// Find tool key (2-space indent)
const toolRegex = new RegExp(`^\\s{2}${name}:\\s*\\{`, 'm');
const match = toolRegex.exec(content);
if (!match) continue;
const bounds = findBlockBounds(content, match.index);
if (!bounds) continue;
const toolBlock = content.substring(match.index, bounds.end + 1);
const descRegex =
/description:\s*(?:`((?:[^`\\]|\\.)*)`|'([^']*)'|"([^"]*)")/g;
const descMatch = descRegex.exec(toolBlock);
if (descMatch) {
const text = (descMatch[1] || descMatch[2] || descMatch[3]).trim();
const { maskedText, maskMap } = maskVariables(text);
targets.push({
id: `gemini3:${name}:description`,
sourceFile: gemini3Path,
originalText: text,
maskedText,
maskMap,
});
}
}
}
// 3. Dynamic Helpers
const helpersPath =
'packages/core/src/tools/definitions/dynamic-declaration-helpers.ts';
if (fs.existsSync(helpersPath)) {
const content = fs.readFileSync(helpersPath, 'utf8');
const specs = [
{
id: 'shell:darwin:description',
regex:
/return `This tool executes a given shell command as \\`bash -c <command>\\`. ([\s\S]*?)`;/,
},
{
id: 'shell:win32:description',
regex:
/return `This tool executes a given shell command as \\`powershell\.exe -NoProfile -Command <command>\\`. ([\s\S]*?)`;/,
},
{
id: 'exit_plan_mode:description',
regex:
/name: EXIT_PLAN_MODE_TOOL_NAME,[\s\S]*?description:\s*'([^']*)',/,
},
{
id: 'activate_skill:description',
regex:
/name: ACTIVATE_SKILL_TOOL_NAME,[\s\S]*?description:\s*`((?:[^`\\]|\\.)*)`,/,
},
];
for (const s of specs) {
const m = s.regex.exec(content);
if (m && m[1]) {
const text = m[1].trim();
const { maskedText, maskMap } = maskVariables(text);
targets.push({
id: s.id,
sourceFile: helpersPath,
originalText: text,
maskedText,
maskMap,
});
}
}
}
const outputDir = 'data/optimization';
if (!fs.existsSync(outputDir)) fs.mkdirSync(outputDir, { recursive: true });
fs.writeFileSync(
path.join(outputDir, 'targets.json'),
JSON.stringify(targets, null, 2),
);
return targets;
}
// CLI Entrypoint
const isMain =
process.argv[1] &&
fileURLToPath(import.meta.url) === fs.realpathSync(process.argv[1]);
if (isMain) {
runExtraction()
// eslint-disable-next-line no-console
.then((t) => console.log(`✅ Extracted ${t.length} targets.`))
// eslint-disable-next-line no-console
.catch(console.error);
}
+106
View File
@@ -0,0 +1,106 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
/**
* Configuration for the Tool Alignment objective (The Accuracy Dimension).
*/
export interface AlignmentConfig {
/**
* The relative importance of accuracy vs other objectives in the Pareto frontier.
*/
weight: number;
/**
* Strongest negative signal (0.0): used when model falls into a known shell trap.
*/
hardFailureScore: number;
/**
* Neutral negative signal (0.1): used when model fails to produce a valid tool call.
*/
invalidResponseScore: number;
/**
* Partial positive signal (0.4): model chose the right tool but hallucinated arguments.
*/
toolNameMatchOnlyScore: number;
/**
* Maximum positive signal (1.0): model matched the golden signature perfectly.
*/
functionalSuccessScore: number;
}
/**
* Configuration for the Brevity objective (The Density Dimension).
* Uses a word-count step-function to provide high-contrast signal for GEPA.
*/
export interface BrevityConfig {
/**
* Importance of brevity relative to accuracy.
*/
weight: number;
/**
* TIER 1: Response is perfectly succinct (e.g., <= 10 words).
*/
succinctThresholdWords: number;
succinctScore: number; // 1.0
/**
* TIER 2: Response is acceptable but slightly verbose (e.g., <= 25 words).
*/
acceptableThresholdWords: number;
acceptableScore: number; // 0.7
/**
* TIER 3: Response is verbose (e.g., <= 50 words).
*/
verboseThresholdWords: number;
verboseScore: number; // 0.4
/**
* TIER 4: Response is very heavy (e.g., > 50 words).
*/
heavyScore: number; // 0.1
}
/**
* Global evaluation configuration for multi-objective optimization.
*/
export interface EvalConfig {
objectives: {
alignment: AlignmentConfig;
brevity: BrevityConfig;
};
}
/**
* Default weights and thresholds for the Genetic-Pareto (GEPA) engine.
* These constants drive the 'Selection Pressure' that evolves the prompt.
* GEPA always MAXIMIZES, so higher scores represent better performance.
*/
export const DEFAULT_EVAL_CONFIG: EvalConfig = {
objectives: {
alignment: {
weight: 1.0, // PRIMARY: Accuracy cannot be sacrificed.
hardFailureScore: 0.0,
invalidResponseScore: 0.1,
toolNameMatchOnlyScore: 0.4,
functionalSuccessScore: 1.0,
},
brevity: {
weight: 0.6, // SECONDARY: Reward brevity once accuracy is high.
succinctThresholdWords: 10,
succinctScore: 1.0,
acceptableThresholdWords: 25,
acceptableScore: 0.7,
verboseThresholdWords: 50,
verboseScore: 0.4,
heavyScore: 0.1, // Never hard-zero brevity to allow gradient improvement.
},
},
};
@@ -0,0 +1,54 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect } from 'vitest';
import { evaluateBrevity } from './brevityMetric.js';
describe('evaluateBrevity 4-tier step-function', () => {
it('should return 1.0 for a succinct response (<= 10 words)', () => {
const prediction = { output_text: 'I have updated the file for you now.' }; // 8 words
const result = evaluateBrevity(prediction);
expect(result.score).toBe(1.0);
expect(result.metadata?.tier).toBe('succinct');
});
it('should return 0.7 for an acceptable response (11-25 words)', () => {
const text =
'I have successfully updated the file. Everything looks good to proceed with the next step.';
// 16 words
const prediction = { output_text: text };
const result = evaluateBrevity(prediction);
expect(result.score).toBe(0.7);
expect(result.metadata?.tier).toBe('acceptable');
});
it('should return 0.4 for a verbose response (26-50 words)', () => {
const text =
'Certainly! I would be more than happy to assist you with that request. I am now proceeding to surgically update the file using the replace tool to ensure accuracy.';
// 29 words
const prediction = { output_text: text };
const result = evaluateBrevity(prediction);
expect(result.score).toBe(0.4);
expect(result.metadata?.tier).toBe('verbose');
});
it('should return 0.1 for a heavy response (> 50 words)', () => {
const text =
'Certainly! I would be more than happy to assist you with that request. I am now proceeding to surgically update the file using the replace tool to ensure accuracy. I will then verify the changes and let you know when I am finished with the task so we can move to the next stage of implementation.';
// 53 words
const prediction = { output_text: text };
const result = evaluateBrevity(prediction);
expect(result.score).toBe(0.1);
expect(result.metadata?.tier).toBe('heavy');
});
it('should handle missing output text as succinct (0 words)', () => {
const prediction = {};
const result = evaluateBrevity(prediction);
expect(result.score).toBe(1.0);
expect(result.metadata?.tier).toBe('succinct');
});
});
@@ -0,0 +1,62 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { debugLogger } from '../../../../../packages/core/src/utils/debugLogger.js';
import { DEFAULT_EVAL_CONFIG } from '../config.js';
import { MetricObjective } from '../types.js';
import type { MetricResult } from '../types.js';
/**
* Evaluates the brevity of a model's response using a tiered 4-step word-count function.
* Focuses on rewarding succinctness and providing a non-zero gradient for verbose models.
*/
export function evaluateBrevity(
prediction: { output_text?: string },
config = DEFAULT_EVAL_CONFIG.objectives.brevity,
): MetricResult {
const chatter = (prediction.output_text ?? '').trim();
// Simple word count: split by whitespace and filter out empty strings
const wordCount = chatter === '' ? 0 : chatter.split(/\s+/).length;
debugLogger.debug(
`[Eval:Brevity] Measuring output text word count: ${wordCount} words.`,
);
let score: number;
let reason: string;
if (wordCount <= config.succinctThresholdWords) {
score = config.succinctScore;
reason = `Succinct: Response is within ${config.succinctThresholdWords} words.`;
} else if (wordCount <= config.acceptableThresholdWords) {
score = config.acceptableScore;
reason = `Acceptable: Response is slightly verbose (${wordCount} words), exceeding ${config.succinctThresholdWords} words.`;
} else if (wordCount <= config.verboseThresholdWords) {
score = config.verboseScore;
reason = `Verbose: Response contains ${wordCount} words, exceeding acceptable limit of ${config.acceptableThresholdWords} words.`;
} else {
score = config.heavyScore;
reason = `Heavy: Response is excessively verbose (${wordCount} words).`;
}
return {
score,
objective: MetricObjective.BREVITY,
reason,
metadata: {
wordCount,
tier:
score === 1.0
? 'succinct'
: score === 0.7
? 'acceptable'
: score === 0.4
? 'verbose'
: 'heavy',
},
};
}
@@ -0,0 +1,83 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect } from 'vitest';
import { evaluateToolAlignment } from './toolAlignment.js';
import { MetricObjective } from '../types.js';
import type { Scenario } from '../schema.js';
describe('evaluateToolAlignment', () => {
const mockScenario: Scenario = {
id: 'test-scenario',
metadata: { tags: ['test'], created_at: '2026-03-02' },
input: { user_query: 'test query' },
expected: {
tool_calls: [{ name: 'read_file', arguments: { file_path: 'test.ts' } }],
rationale: 'Testing alignment',
},
negatives: [
{
tool_calls: [
{ name: 'run_shell_command', arguments: { command: 'cat test.ts' } },
],
reason: 'Avoid shell',
severity: 'high',
},
],
};
it('should return 1.0 for a perfect match', () => {
const prediction = {
tool_calls: [{ name: 'read_file', arguments: { file_path: 'test.ts' } }],
};
const result = evaluateToolAlignment(prediction, mockScenario);
expect(result.score).toBe(1.0);
expect(result.objective).toBe(MetricObjective.ALIGNMENT);
expect(result.reason).toContain('Functional Success');
});
it('should return 0.0 for a hard failure (negative match)', () => {
const prediction = {
tool_calls: [
{ name: 'run_shell_command', arguments: { command: 'cat test.ts' } },
],
};
const result = evaluateToolAlignment(prediction, mockScenario);
expect(result.score).toBe(0.0);
expect(result.reason).toContain('Hard Failure');
expect(result.metadata?.['matchedNegativeReason']).toBe('Avoid shell');
});
it('should return 0.1 for an incorrect tool selection', () => {
const prediction = {
tool_calls: [
{
name: 'write_file',
arguments: { file_path: 'test.ts', content: 'test' },
},
],
};
const result = evaluateToolAlignment(prediction, mockScenario);
expect(result.score).toBe(0.1);
expect(result.reason).toContain('wrong tool');
});
it('should return 0.4 for correct tool but wrong arguments', () => {
const prediction = {
tool_calls: [{ name: 'read_file', arguments: { file_path: 'wrong.ts' } }],
};
const result = evaluateToolAlignment(prediction, mockScenario);
expect(result.score).toBe(0.4);
expect(result.reason).toContain('arguments are incorrect');
});
it('should return 0.1 for an empty tool call list', () => {
const prediction = { tool_calls: [] };
const result = evaluateToolAlignment(prediction, mockScenario);
expect(result.score).toBe(0.1);
expect(result.reason).toContain('failed to produce any tool calls');
});
});
@@ -0,0 +1,124 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { debugLogger } from '../../../../../packages/core/src/utils/debugLogger.js';
import type { Scenario, ToolCall } from '../schema.js';
import { DEFAULT_EVAL_CONFIG } from '../config.js';
import { MetricObjective } from '../types.js';
import type { MetricResult } from '../types.js';
/**
* Evaluates the alignment of a model's predicted tool calls against a golden scenario.
* Focuses on accuracy and shell avoidance.
*/
export function evaluateToolAlignment(
prediction: { tool_calls: ToolCall[] },
example: Scenario,
config = DEFAULT_EVAL_CONFIG.objectives.alignment,
): MetricResult {
const { tool_calls: predictedCalls } = prediction;
const { expected, negatives, id: scenarioId } = example;
debugLogger.debug(`[Eval:${scenarioId}] Evaluating tool alignment...`);
// 1. Check for Hard Failures (Explicit Negatives)
for (const negative of negatives) {
const isNegativeMatch = negative.tool_calls.every((negCall: ToolCall) =>
predictedCalls.some(
(predCall: ToolCall) =>
predCall.name === negCall.name &&
areArgsMatching(negCall.arguments, predCall.arguments),
),
);
if (isNegativeMatch && negative.tool_calls.length > 0) {
debugLogger.debug(
`[Eval:${scenarioId}] Hard Failure: Matched negative pattern.`,
);
return {
score: config.hardFailureScore,
objective: MetricObjective.ALIGNMENT,
reason: `Hard Failure: ${negative.reason}`,
metadata: {
matchedNegativeReason: negative.reason,
severity: negative.severity,
},
};
}
}
// 2. Structural Check
if (predictedCalls.length === 0) {
debugLogger.debug(
`[Eval:${scenarioId}] Invalid Response: No tool calls found.`,
);
return {
score: config.invalidResponseScore,
objective: MetricObjective.ALIGNMENT,
reason: 'Model failed to produce any tool calls.',
};
}
// 3. Functional Alignment Check
const expectedCalls = expected.tool_calls;
// Check if all expected tool names are present
const namesMatch = expectedCalls.every((exp: ToolCall) =>
predictedCalls.some((pred: ToolCall) => pred.name === exp.name),
);
if (!namesMatch) {
debugLogger.debug(
`[Eval:${scenarioId}] Failure: Incorrect tool selection.`,
);
return {
score: config.invalidResponseScore,
objective: MetricObjective.ALIGNMENT,
reason: 'Model selected the wrong tool(s).',
};
}
// Check for Argument Precision
const argsMatch = expectedCalls.every((exp: ToolCall) =>
predictedCalls.some(
(pred: ToolCall) =>
pred.name === exp.name &&
areArgsMatching(exp.arguments, pred.arguments),
),
);
if (!argsMatch) {
debugLogger.debug(
`[Eval:${scenarioId}] Partial Success: Right tool, wrong arguments.`,
);
return {
score: config.toolNameMatchOnlyScore,
objective: MetricObjective.ALIGNMENT,
reason: 'Correct tool selected, but arguments are incorrect or missing.',
};
}
// 4. Perfect Success
debugLogger.debug(
`[Eval:${scenarioId}] Perfect Functional Alignment achieved.`,
);
return {
score: config.functionalSuccessScore,
objective: MetricObjective.ALIGNMENT,
reason:
'Functional Success: Tool and arguments align perfectly with golden scenario.',
};
}
/**
* Deep equality check for tool arguments.
*/
function areArgsMatching(
expected: Record<string, unknown>,
predicted: Record<string, unknown>,
): boolean {
return JSON.stringify(expected) === JSON.stringify(predicted);
}
+49
View File
@@ -0,0 +1,49 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
/**
* The core data interface for the Tool Alignment Dataset.
* Designed to be extensible for custom error reports and metrics.
*/
export interface ToolCall {
name: string;
arguments: Record<string, unknown>;
}
export interface NegativeExample {
id?: string;
tool_calls: ToolCall[];
output_text?: string; // For "too chatty" or "hallucination" failures
reason: string; // e.g., "Defaulted to shell 'cat'", "Included conversational filler"
severity: 'low' | 'medium' | 'high'; // Helps the optimizer prioritize fixes
}
export interface Scenario {
id: string; // Unique identifier (e.g., 'read_file-01')
metadata: {
tags: string[]; // e.g., ['tool-alignment', 'shell-avoidance']
created_at: string;
platform?: 'darwin' | 'linux' | 'win32'; // To handle platform-specific shell variations
model_info?: {
// Placeholder for future tracking
name?: string;
version?: string;
};
};
input: {
user_query: string;
context?: {
current_file?: string;
directory_structure?: string[];
};
};
expected: {
tool_calls: ToolCall[];
rationale: string; // Why this is the 'Golden' choice
};
negatives: NegativeExample[]; // Array of multiple failure modes
}
+40
View File
@@ -0,0 +1,40 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
/**
* The specific dimensions being measured by the evaluation pipeline.
*/
export enum MetricObjective {
ALIGNMENT = 'alignment',
BREVITY = 'brevity',
}
/**
* Standardized result for any metric calculation.
* Designed for consumption by the Genetic-Pareto (GEPA) multi-objective function.
*/
export interface MetricResult {
/**
* The numeric score calculated by the metric.
* All metrics must provide a value where HIGHER is BETTER.
*/
score: number;
/**
* The specific objective this result corresponds to.
*/
objective: MetricObjective;
/**
* A human-readable (and optimizer-reflective) reason for the score.
*/
reason: string;
/**
* Additional data points (e.g., char counts, matched negative IDs).
*/
metadata?: Record<string, unknown>;
}
+41
View File
@@ -0,0 +1,41 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect } from 'vitest';
import { maskVariables, unmaskVariables } from './masking.js';
describe('optimization masking utility', () => {
it('should mask unique template variables with indexed tokens', () => {
const input = 'Use ${TOOL_A} to read ${FILE_PATH}. ${TOOL_A} is efficient.';
const { maskedText, maskMap } = maskVariables(input);
expect(maskedText).toContain('[[GCLI_VAR_0]]');
expect(maskedText).toContain('[[GCLI_VAR_1]]');
// Ensure all occurrences of the same variable are replaced with the same token
const toolAToken = Object.keys(maskMap).find(
(key) => maskMap[key] === '${TOOL_A}',
)!;
const count = maskedText.split(toolAToken).length - 1;
expect(count).toBe(2);
expect(maskedText).not.toContain('${TOOL_A}');
});
it('should perfectly restore original text during unmasking', () => {
const original = 'Update ${OLD_STR} with ${NEW_STR} in ${FILE_PATH}.';
const { maskedText, maskMap } = maskVariables(original);
const restored = unmaskVariables(maskedText, maskMap);
expect(restored).toBe(original);
});
it('should handle text with no variables', () => {
const input = 'Static text with no placeholders.';
const { maskedText, maskMap } = maskVariables(input);
expect(maskedText).toBe(input);
expect(Object.keys(maskMap).length).toBe(0);
});
});
+61
View File
@@ -0,0 +1,61 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
/**
* Utility to protect TypeScript template variables from being "optimized" by the LLM.
* Replaces ${VAR} with unique stable tokens and allows for perfect restoration.
*/
export interface MaskResult {
maskedText: string;
maskMap: Record<string, string>;
}
const MASK_PREFIX = '[[GCLI_VAR_';
const MASK_SUFFIX = ']]';
/**
* Replaces all instances of ${VARIABLE_NAME} with indexed tokens.
* Supports both SCREAMING_SNAKE_CASE and camelCase variables.
*/
export function maskVariables(text: string): MaskResult {
const maskMap: Record<string, string> = {};
// Refined regex to capture any variable pattern like ${variableName} or ${VARIABLE_NAME}
const variableRegex = /\${[a-zA-Z0-9_.]+}/g;
let index = 0;
let maskedText = text;
// Find all unique variables
const uniqueVars = Array.from(new Set(text.match(variableRegex) || []));
uniqueVars.forEach((v) => {
const token = `${MASK_PREFIX}${index}${MASK_SUFFIX}`;
maskMap[token] = v;
// Use a global regex for the specific variable to replace all occurrences
maskedText = maskedText.split(v).join(token);
index++;
});
return { maskedText, maskMap };
}
/**
* Restores original ${VARIABLE_NAME} patterns using the provided mask map.
*/
export function unmaskVariables(
text: string,
maskMap: Record<string, string>,
): string {
let unmaskedText = text;
// Sort tokens by length descending to prevent partial replacement (e.g. VAR_10 before VAR_1)
const sortedTokens = Object.keys(maskMap).sort((a, b) => b.length - a.length);
sortedTokens.forEach((token) => {
const originalVar = maskMap[token];
unmaskedText = unmaskedText.split(token).join(originalVar);
});
return unmaskedText;
}