Files
gemini-cli/data/schema.ts
T
Abhijit Balaji c0b463dbcf feat(prompt-optimization): implement Data Layer MVP and Tool Alignment dataset
Established the "Heart" of the Prompt Optimization Pipeline by building a robust,
extensible data infrastructure and a high-fidelity golden dataset.

Key improvements:
- Core Schema: Defined the `Scenario` interface in `data/schema.ts` supporting
  multiple negative failure modes, platform-specific shell contexts (Unix/Win32),
  and strict tool-call typing.
- Optimization Manifest: Created `data/manifest.json` to define "No-Fly Zones"
  for the optimizer, protecting literal tool names and template variables, while
  providing descriptive context for validation.
- Tool Alignment Dataset: Authored 113 scenarios in `data/tool_alignment.jsonl`
  across 20 tools, focusing on "Built-in over Shell" preference. Heavily weighted
  `replace` (12) and `write_file` (10) to enforce surgical editing.
- Extensible Validator: Implemented `scripts/validate-data.ts` to provide
  real-time integrity checks and purpose-driven coverage reports.
- Project Integration: Added `data:validate`, `data:format`, and `data:lint`
  scripts to package.json and updated ESLint config to cover the data directory.
2026-03-04 10:08:13 -08:00

50 lines
1.3 KiB
TypeScript

/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
/**
* The core data interface for the Tool Alignment Dataset.
* Designed to be extensible for custom error reports and metrics.
*/
export interface ToolCall {
name: string;
arguments: Record<string, unknown>;
}
export interface NegativeExample {
id?: string;
tool_calls: ToolCall[];
output_text?: string; // For "too chatty" or "hallucination" failures
reason: string; // e.g., "Defaulted to shell 'cat'", "Included conversational filler"
severity: 'low' | 'medium' | 'high'; // Helps the optimizer prioritize fixes
}
export interface Scenario {
id: string; // Unique identifier (e.g., 'read_file-01')
metadata: {
tags: string[]; // e.g., ['tool-alignment', 'shell-avoidance']
created_at: string;
platform?: 'darwin' | 'linux' | 'win32'; // To handle platform-specific shell variations
model_info?: {
// Placeholder for future tracking
name?: string;
version?: string;
};
};
input: {
user_query: string;
context?: {
current_file?: string;
directory_structure?: string[];
};
};
expected: {
tool_calls: ToolCall[];
rationale: string; // Why this is the 'Golden' choice
};
negatives: NegativeExample[]; // Array of multiple failure modes
}