mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-01 15:34:29 -07:00
feat(policy): map --yolo to allowedTools wildcard policy
This PR maps the `--yolo` flag natively into a wildcard policy array (`allowedTools: ["*"]`) and removes the concept of `ApprovalMode.YOLO` as a distinct state in the application, fulfilling issue #11303. This removes the hardcoded `ApprovalMode.YOLO` state and its associated UI/bypasses. The `PolicyEngine` now evaluates YOLO purely via data-driven rules. - Removes `ApprovalMode.YOLO` - Removes UI toggle (`Ctrl+Y`) and indicators for YOLO - Removes `yolo.toml` - Updates A2A server and CLI config logic to translate YOLO into a wildcard tool - Rewrites policy engine tests to evaluate the wildcard - Enforces enterprise `disableYoloMode` and `secureModeEnabled` controls by actively preventing manual `--allowed-tools=*` bypasses. Fixes #11303
This commit is contained in:
@@ -1750,7 +1750,7 @@ describe('setApprovalMode with folder trust', () => {
|
||||
it('should throw an error when setting YOLO mode in an untrusted folder', () => {
|
||||
const config = new Config(baseParams);
|
||||
vi.spyOn(config, 'isTrustedFolder').mockReturnValue(false);
|
||||
expect(() => config.setApprovalMode(ApprovalMode.YOLO)).toThrow(
|
||||
expect(() => config.setApprovalMode(ApprovalMode.PLAN)).toThrow(
|
||||
'Cannot enable privileged approval modes in an untrusted folder.',
|
||||
);
|
||||
});
|
||||
@@ -1778,7 +1778,7 @@ describe('setApprovalMode with folder trust', () => {
|
||||
it('should NOT throw an error when setting any mode in a trusted folder', () => {
|
||||
const config = new Config(baseParams);
|
||||
vi.spyOn(config, 'isTrustedFolder').mockReturnValue(true);
|
||||
expect(() => config.setApprovalMode(ApprovalMode.YOLO)).not.toThrow();
|
||||
expect(() => config.setApprovalMode(ApprovalMode.PLAN)).not.toThrow();
|
||||
expect(() => config.setApprovalMode(ApprovalMode.AUTO_EDIT)).not.toThrow();
|
||||
expect(() => config.setApprovalMode(ApprovalMode.DEFAULT)).not.toThrow();
|
||||
});
|
||||
@@ -1786,7 +1786,7 @@ describe('setApprovalMode with folder trust', () => {
|
||||
it('should NOT throw an error when setting any mode if trustedFolder is undefined', () => {
|
||||
const config = new Config(baseParams);
|
||||
vi.spyOn(config, 'isTrustedFolder').mockReturnValue(true); // isTrustedFolder defaults to true
|
||||
expect(() => config.setApprovalMode(ApprovalMode.YOLO)).not.toThrow();
|
||||
expect(() => config.setApprovalMode(ApprovalMode.PLAN)).not.toThrow();
|
||||
expect(() => config.setApprovalMode(ApprovalMode.AUTO_EDIT)).not.toThrow();
|
||||
expect(() => config.setApprovalMode(ApprovalMode.DEFAULT)).not.toThrow();
|
||||
});
|
||||
@@ -1834,7 +1834,7 @@ describe('setApprovalMode with folder trust', () => {
|
||||
} as Partial<ToolRegistry> as ToolRegistry);
|
||||
const updateSpy = vi.spyOn(config, 'updateSystemInstructionIfInitialized');
|
||||
|
||||
config.setApprovalMode(ApprovalMode.YOLO);
|
||||
config.setApprovalMode(ApprovalMode.PLAN);
|
||||
|
||||
expect(updateSpy).toHaveBeenCalled();
|
||||
});
|
||||
@@ -1915,7 +1915,7 @@ describe('setApprovalMode with folder trust', () => {
|
||||
vi.mocked(logApprovalModeDuration).mockClear();
|
||||
|
||||
performanceSpy.mockReturnValueOnce(time3);
|
||||
config.setApprovalMode(ApprovalMode.YOLO);
|
||||
config.setApprovalMode(ApprovalMode.AUTO_EDIT);
|
||||
expect(logApprovalModeDuration).toHaveBeenCalledWith(
|
||||
config,
|
||||
expect.objectContaining({
|
||||
|
||||
@@ -2569,11 +2569,8 @@ export class Config implements McpContext, AgentLoopContext {
|
||||
const isPlanModeTransition =
|
||||
currentMode !== mode &&
|
||||
(currentMode === ApprovalMode.PLAN || mode === ApprovalMode.PLAN);
|
||||
const isYoloModeTransition =
|
||||
currentMode !== mode &&
|
||||
(currentMode === ApprovalMode.YOLO || mode === ApprovalMode.YOLO);
|
||||
|
||||
if (isPlanModeTransition || isYoloModeTransition) {
|
||||
if (isPlanModeTransition) {
|
||||
if (this._geminiClient?.isInitialized()) {
|
||||
this._geminiClient.clearCurrentSequenceModel();
|
||||
this._geminiClient.setTools().catch((err) => {
|
||||
|
||||
@@ -157,18 +157,11 @@ async function truncateHistoryToBudget(
|
||||
if (typeof responseObj === 'string') {
|
||||
contentStr = responseObj;
|
||||
} else if (responseObj && typeof responseObj === 'object') {
|
||||
if (
|
||||
'output' in responseObj &&
|
||||
// eslint-disable-next-line no-restricted-syntax
|
||||
typeof responseObj['output'] === 'string'
|
||||
) {
|
||||
contentStr = responseObj['output'];
|
||||
} else if (
|
||||
'content' in responseObj &&
|
||||
// eslint-disable-next-line no-restricted-syntax
|
||||
typeof responseObj['content'] === 'string'
|
||||
) {
|
||||
contentStr = responseObj['content'];
|
||||
const obj = responseObj as { output?: unknown; content?: unknown };
|
||||
if ('output' in obj && typeof obj.output === 'string') {
|
||||
contentStr = obj.output;
|
||||
} else if ('content' in obj && typeof obj.content === 'string') {
|
||||
contentStr = obj.content;
|
||||
} else {
|
||||
contentStr = JSON.stringify(responseObj, null, 2);
|
||||
}
|
||||
|
||||
@@ -58,6 +58,8 @@ describe('Core System Prompt Substitution', () => {
|
||||
getSkillManager: vi.fn().mockReturnValue({
|
||||
getSkills: vi.fn().mockReturnValue([]),
|
||||
}),
|
||||
getAllowedTools: vi.fn().mockReturnValue([]),
|
||||
getApprovalMode: vi.fn().mockReturnValue('default'),
|
||||
getApprovedPlanPath: vi.fn().mockReturnValue(undefined),
|
||||
isTopicUpdateNarrationEnabled: vi.fn().mockReturnValue(false),
|
||||
isTrackerEnabled: vi.fn().mockReturnValue(false),
|
||||
|
||||
@@ -121,6 +121,7 @@ describe('Core System Prompt (prompts.ts)', () => {
|
||||
getApprovalMode: vi.fn().mockReturnValue(ApprovalMode.DEFAULT),
|
||||
getApprovedPlanPath: vi.fn().mockReturnValue(undefined),
|
||||
isTrackerEnabled: vi.fn().mockReturnValue(false),
|
||||
getAllowedTools: vi.fn().mockReturnValue([]),
|
||||
get config() {
|
||||
return this;
|
||||
},
|
||||
@@ -443,6 +444,7 @@ describe('Core System Prompt (prompts.ts)', () => {
|
||||
}),
|
||||
getApprovedPlanPath: vi.fn().mockReturnValue(undefined),
|
||||
isTrackerEnabled: vi.fn().mockReturnValue(false),
|
||||
getAllowedTools: vi.fn().mockReturnValue([]),
|
||||
get config() {
|
||||
return this;
|
||||
},
|
||||
@@ -598,7 +600,7 @@ describe('Core System Prompt (prompts.ts)', () => {
|
||||
});
|
||||
|
||||
it('should include YOLO mode instructions in interactive mode', () => {
|
||||
vi.mocked(mockConfig.getApprovalMode).mockReturnValue(ApprovalMode.YOLO);
|
||||
vi.mocked(mockConfig.getAllowedTools).mockReturnValue(['*']);
|
||||
vi.mocked(mockConfig.isInteractive).mockReturnValue(true);
|
||||
const prompt = getCoreSystemPrompt(mockConfig);
|
||||
expect(prompt).toContain('# Autonomous Mode (YOLO)');
|
||||
@@ -606,7 +608,7 @@ describe('Core System Prompt (prompts.ts)', () => {
|
||||
});
|
||||
|
||||
it('should NOT include YOLO mode instructions in non-interactive mode', () => {
|
||||
vi.mocked(mockConfig.getApprovalMode).mockReturnValue(ApprovalMode.YOLO);
|
||||
vi.mocked(mockConfig.getAllowedTools).mockReturnValue(['*']);
|
||||
vi.mocked(mockConfig.isInteractive).mockReturnValue(false);
|
||||
const prompt = getCoreSystemPrompt(mockConfig);
|
||||
expect(prompt).not.toContain('# Autonomous Mode (YOLO)');
|
||||
|
||||
@@ -360,12 +360,12 @@ export class HookAggregator {
|
||||
}
|
||||
|
||||
// Extract additionalContext from various hook types
|
||||
const specificObj = specific as { additionalContext?: unknown };
|
||||
if (
|
||||
'additionalContext' in specific &&
|
||||
// eslint-disable-next-line no-restricted-syntax
|
||||
typeof specific['additionalContext'] === 'string'
|
||||
'additionalContext' in specificObj &&
|
||||
typeof specificObj.additionalContext === 'string'
|
||||
) {
|
||||
contexts.push(specific['additionalContext']);
|
||||
contexts.push(specificObj.additionalContext);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -314,13 +314,16 @@ describe('createPolicyEngineConfig', () => {
|
||||
expect(excludedRule?.priority).toBe(4.9); // MCP excluded server
|
||||
});
|
||||
|
||||
it('should allow all tools in YOLO mode', async () => {
|
||||
const config = await createPolicyEngineConfig({}, ApprovalMode.YOLO);
|
||||
it('should allow all tools with wildcard allowedTools', async () => {
|
||||
const config = await createPolicyEngineConfig(
|
||||
{ tools: { allowed: ['*'] } },
|
||||
ApprovalMode.DEFAULT,
|
||||
);
|
||||
const rule = config.rules?.find(
|
||||
(r) => r.decision === PolicyDecision.ALLOW && r.toolName === '*',
|
||||
);
|
||||
expect(rule).toBeDefined();
|
||||
expect(rule?.priority).toBeCloseTo(1.998, 5);
|
||||
expect(rule?.priority).toBeCloseTo(4.3, 5);
|
||||
});
|
||||
|
||||
it('should allow edit tool in AUTO_EDIT mode', async () => {
|
||||
@@ -509,10 +512,10 @@ describe('createPolicyEngineConfig', () => {
|
||||
expect(explicitFalseRule).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should have YOLO allow-all rule beat write tool rules in YOLO mode', async () => {
|
||||
it('should have wildcard allow rule beat write tool rules', async () => {
|
||||
const config = await createPolicyEngineConfig(
|
||||
{ tools: { exclude: ['dangerous-tool'] } },
|
||||
ApprovalMode.YOLO,
|
||||
{ tools: { allowed: ['*'], exclude: ['dangerous-tool'] } },
|
||||
ApprovalMode.DEFAULT,
|
||||
);
|
||||
|
||||
const wildcardRule = config.rules?.find(
|
||||
|
||||
@@ -9,15 +9,14 @@ import * as path from 'node:path';
|
||||
import * as crypto from 'node:crypto';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { Storage } from '../config/storage.js';
|
||||
import {
|
||||
import type {
|
||||
ApprovalMode,
|
||||
type PolicyEngineConfig,
|
||||
PolicyDecision,
|
||||
type PolicyRule,
|
||||
type PolicySettings,
|
||||
type SafetyCheckerRule,
|
||||
ALWAYS_ALLOW_PRIORITY_OFFSET,
|
||||
PolicyEngineConfig,
|
||||
PolicyRule,
|
||||
PolicySettings,
|
||||
SafetyCheckerRule,
|
||||
} from './types.js';
|
||||
import { PolicyDecision, ALWAYS_ALLOW_PRIORITY_OFFSET } from './types.js';
|
||||
import type { PolicyEngine } from './policy-engine.js';
|
||||
import { loadPoliciesFromToml, type PolicyFileError } from './toml-loader.js';
|
||||
import { buildArgsPatterns, isSafeRegExp } from './utils.js';
|
||||
@@ -220,7 +219,7 @@ async function filterSecurePolicyDirectories(
|
||||
|
||||
/**
|
||||
* Loads and sanitizes policies from an extension's policies directory.
|
||||
* Security: Filters out 'ALLOW' rules and YOLO mode configurations.
|
||||
* Security: Filters out 'ALLOW' rules and Allow-all configurations.
|
||||
*/
|
||||
export async function loadExtensionPolicies(
|
||||
extensionName: string,
|
||||
@@ -244,14 +243,6 @@ export async function loadExtensionPolicies(
|
||||
return false;
|
||||
}
|
||||
|
||||
// Security: Extensions are not allowed to contribute YOLO mode rules.
|
||||
if (rule.modes?.includes(ApprovalMode.YOLO)) {
|
||||
debugLogger.warn(
|
||||
`[PolicyConfig] Extension "${extensionName}" attempted to contribute a rule for YOLO mode. Ignoring this rule for security.`,
|
||||
);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Prefix source with extension name to avoid collisions and double prefixing.
|
||||
// toml-loader.ts adds "Extension: file.toml", we transform it to "Extension (name): file.toml".
|
||||
rule.source = rule.source?.replace(
|
||||
@@ -262,14 +253,6 @@ export async function loadExtensionPolicies(
|
||||
});
|
||||
|
||||
const checkers = result.checkers.filter((checker) => {
|
||||
// Security: Extensions are not allowed to contribute YOLO mode checkers.
|
||||
if (checker.modes?.includes(ApprovalMode.YOLO)) {
|
||||
debugLogger.warn(
|
||||
`[PolicyConfig] Extension "${extensionName}" attempted to contribute a safety checker for YOLO mode. Ignoring this checker for security.`,
|
||||
);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Prefix source with extension name.
|
||||
checker.source = checker.source?.replace(
|
||||
/^Extension: /,
|
||||
@@ -401,7 +384,7 @@ export async function createPolicyEngineConfig(
|
||||
// 50: Read-only tools (becomes 1.050 in default tier)
|
||||
// 60: Plan mode catch-all DENY override (becomes 1.060 in default tier)
|
||||
// 70: Plan mode explicit ALLOW override (becomes 1.070 in default tier)
|
||||
// 999: YOLO mode allow-all (becomes 1.999 in default tier)
|
||||
// 999: Allow-all (becomes 1.999 in default tier)
|
||||
|
||||
// MCP servers that are explicitly excluded in settings.mcp.excluded
|
||||
// Priority: MCP_EXCLUDED_PRIORITY (highest in user tier for security - persistent server blocks)
|
||||
@@ -437,6 +420,17 @@ export async function createPolicyEngineConfig(
|
||||
// Priority: ALLOWED_TOOLS_FLAG_PRIORITY (user tier - explicit temporary allows)
|
||||
if (settings.tools?.allowed) {
|
||||
for (const tool of settings.tools.allowed) {
|
||||
if (tool === '*') {
|
||||
rules.push({
|
||||
toolName: '*',
|
||||
decision: PolicyDecision.ALLOW,
|
||||
priority: ALLOWED_TOOLS_FLAG_PRIORITY,
|
||||
source: 'Settings (Tools Allowed)',
|
||||
allowRedirection: true,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for legacy format: toolName(args)
|
||||
const match = tool.match(/^([a-zA-Z0-9_-]+)\((.*)\)$/);
|
||||
if (match) {
|
||||
|
||||
@@ -253,15 +253,14 @@ decision = "deny"
|
||||
type: MessageBusType.UPDATE_POLICY,
|
||||
toolName: 'test_tool',
|
||||
persist: true,
|
||||
modes: [ApprovalMode.DEFAULT, ApprovalMode.YOLO],
|
||||
modes: [ApprovalMode.DEFAULT, ApprovalMode.AUTO_EDIT],
|
||||
});
|
||||
|
||||
await vi.advanceTimersByTimeAsync(100);
|
||||
|
||||
const content = memfs.readFileSync(policyFile, 'utf-8') as string;
|
||||
expect(content).toContain('modes = [ "default", "yolo" ]');
|
||||
expect(content).toContain('modes = [ "default", "autoEdit" ]');
|
||||
});
|
||||
|
||||
it('should update existing rule modes instead of appending redundant rule', async () => {
|
||||
createPolicyUpdater(policyEngine, messageBus, mockStorage);
|
||||
|
||||
@@ -279,12 +278,12 @@ modes = [ "autoEdit", "yolo" ]
|
||||
memfs.mkdirSync(dir, { recursive: true });
|
||||
memfs.writeFileSync(policyFile, existingContent);
|
||||
|
||||
// Now grant in DEFAULT mode, which should include [default, autoEdit, yolo]
|
||||
// Now grant in DEFAULT mode, which should include [default, autoEdit]
|
||||
await messageBus.publish({
|
||||
type: MessageBusType.UPDATE_POLICY,
|
||||
toolName: 'test_tool',
|
||||
persist: true,
|
||||
modes: [ApprovalMode.DEFAULT, ApprovalMode.AUTO_EDIT, ApprovalMode.YOLO],
|
||||
modes: [ApprovalMode.DEFAULT, ApprovalMode.AUTO_EDIT],
|
||||
});
|
||||
|
||||
await vi.advanceTimersByTimeAsync(100);
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
# 10: Write tools default to ASK_USER (becomes 1.010 in default tier)
|
||||
# 60: Plan mode catch-all DENY override (becomes 1.060 in default tier)
|
||||
# 70: Plan mode explicit ALLOW override (becomes 1.070 in default tier)
|
||||
# 999: YOLO mode allow-all (becomes 1.999 in default tier)
|
||||
# 999: Allow-all allow-all (becomes 1.999 in default tier)
|
||||
|
||||
# Mode Transitions (into/out of Plan Mode)
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
# 10: Write tools default to ASK_USER (becomes 1.010 in default tier)
|
||||
# 15: Auto-edit tool override (becomes 1.015 in default tier)
|
||||
# 50: Read-only tools (becomes 1.050 in default tier)
|
||||
# 999: YOLO mode allow-all (becomes 1.999 in default tier)
|
||||
# 999: Allow-all allow-all (becomes 1.999 in default tier)
|
||||
|
||||
[[rule]]
|
||||
toolName = "glob"
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
# 10: Write tools default to ASK_USER (becomes 1.010 in default tier)
|
||||
# 15: Auto-edit tool override (becomes 1.015 in default tier)
|
||||
# 50: Read-only tools (becomes 1.050 in default tier)
|
||||
# 999: YOLO mode allow-all (becomes 1.999 in default tier)
|
||||
# 999: Allow-all allow-all (becomes 1.999 in default tier)
|
||||
|
||||
[[rule]]
|
||||
toolName = "replace"
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
# Priority system for policy rules:
|
||||
# - Higher priority numbers win over lower priority numbers
|
||||
# - When multiple rules match, the highest priority rule is applied
|
||||
# - Rules are evaluated in order of priority (highest first)
|
||||
#
|
||||
# Priority bands (tiers):
|
||||
# - Default policies (TOML): 1 + priority/1000 (e.g., priority 100 → 1.100)
|
||||
# - Extension policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100)
|
||||
# - Workspace policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100)
|
||||
# - User policies (TOML): 4 + priority/1000 (e.g., priority 100 → 4.100)
|
||||
# - Admin policies (TOML): 5 + priority/1000 (e.g., priority 100 → 5.100)
|
||||
#
|
||||
# This ensures Admin > User > Workspace > Extension > Default hierarchy is always preserved,
|
||||
# while allowing user-specified priorities to work within each tier.
|
||||
#
|
||||
# Settings-based and dynamic rules (all in user tier 4.x):
|
||||
# 4.95: Tools that the user has selected as "Always Allow" in the interactive UI
|
||||
# 4.9: MCP servers excluded list (security: persistent server blocks)
|
||||
# 4.4: Command line flag --exclude-tools (explicit temporary blocks)
|
||||
# 4.3: Command line flag --allowed-tools (explicit temporary allows)
|
||||
# 4.2: MCP servers with trust=true (persistent trusted servers)
|
||||
# 4.1: MCP servers allowed list (persistent general server allows)
|
||||
#
|
||||
# TOML policy priorities (before transformation):
|
||||
# 10: Write tools default to ASK_USER (becomes 1.010 in default tier)
|
||||
# 15: Auto-edit tool override (becomes 1.015 in default tier)
|
||||
# 50: Read-only tools (becomes 1.050 in default tier)
|
||||
# 998: YOLO mode allow-all (becomes 1.998 in default tier)
|
||||
# 999: Ask-user tool (becomes 1.999 in default tier)
|
||||
|
||||
# Ask-user tool always requires user interaction, even in YOLO mode.
|
||||
# This ensures the model can gather user preferences/decisions when needed.
|
||||
[[rule]]
|
||||
toolName = "ask_user"
|
||||
decision = "ask_user"
|
||||
priority = 999
|
||||
modes = ["yolo"]
|
||||
interactive = true
|
||||
|
||||
# Plan mode transitions are blocked in YOLO mode to maintain state consistency
|
||||
# and because planning currently requires human interaction (plan approval),
|
||||
# which conflicts with YOLO's autonomous nature.
|
||||
[[rule]]
|
||||
toolName = ["enter_plan_mode", "exit_plan_mode"]
|
||||
decision = "deny"
|
||||
priority = 999
|
||||
modes = ["yolo"]
|
||||
interactive = true
|
||||
|
||||
# Allow everything else in YOLO mode
|
||||
[[rule]]
|
||||
toolName = "*"
|
||||
decision = "allow"
|
||||
priority = 998
|
||||
modes = ["yolo"]
|
||||
allowRedirection = true
|
||||
@@ -15,7 +15,6 @@ import {
|
||||
ApprovalMode,
|
||||
PRIORITY_SUBAGENT_TOOL,
|
||||
ALWAYS_ALLOW_PRIORITY_FRACTION,
|
||||
PRIORITY_YOLO_ALLOW_ALL,
|
||||
} from './types.js';
|
||||
import type { FunctionCall } from '@google/genai';
|
||||
import { SafetyCheckDecision } from '../safety/protocol.js';
|
||||
@@ -391,19 +390,25 @@ describe('PolicyEngine', () => {
|
||||
expect(decision).toBe(PolicyDecision.ALLOW);
|
||||
});
|
||||
|
||||
it('should return ALLOW by default in YOLO mode when no rules match', async () => {
|
||||
engine = new PolicyEngine({ approvalMode: ApprovalMode.YOLO });
|
||||
it('should return ALLOW by default when a wildcard ALLOW rule exists', async () => {
|
||||
engine = new PolicyEngine({
|
||||
rules: [{ toolName: '*', decision: PolicyDecision.ALLOW, priority: 1 }],
|
||||
});
|
||||
|
||||
// No rules defined, should return ALLOW in YOLO mode
|
||||
const { decision } = await engine.check({ name: 'any-tool' }, undefined);
|
||||
expect(decision).toBe(PolicyDecision.ALLOW);
|
||||
});
|
||||
|
||||
it('should NOT override explicit DENY rules in YOLO mode', async () => {
|
||||
it('should NOT override explicit DENY rules when a wildcard rule exists', async () => {
|
||||
const rules: PolicyRule[] = [
|
||||
{ toolName: 'dangerous-tool', decision: PolicyDecision.DENY },
|
||||
{ toolName: '*', decision: PolicyDecision.ALLOW, priority: 1 },
|
||||
{
|
||||
toolName: 'dangerous-tool',
|
||||
decision: PolicyDecision.DENY,
|
||||
priority: 10,
|
||||
},
|
||||
];
|
||||
engine = new PolicyEngine({ rules, approvalMode: ApprovalMode.YOLO });
|
||||
engine = new PolicyEngine({ rules });
|
||||
|
||||
const { decision } = await engine.check(
|
||||
{ name: 'dangerous-tool' },
|
||||
@@ -417,18 +422,18 @@ describe('PolicyEngine', () => {
|
||||
).toBe(PolicyDecision.ALLOW);
|
||||
});
|
||||
|
||||
it('should respect rule priority in YOLO mode when a match exists', async () => {
|
||||
it('should respect rule priority when a wildcard match exists', async () => {
|
||||
const rules: PolicyRule[] = [
|
||||
{
|
||||
toolName: 'test-tool',
|
||||
decision: PolicyDecision.ASK_USER,
|
||||
toolName: '*',
|
||||
decision: PolicyDecision.ALLOW,
|
||||
priority: 10,
|
||||
},
|
||||
{ toolName: 'test-tool', decision: PolicyDecision.DENY, priority: 20 },
|
||||
];
|
||||
engine = new PolicyEngine({ rules, approvalMode: ApprovalMode.YOLO });
|
||||
engine = new PolicyEngine({ rules });
|
||||
|
||||
// Priority 20 (DENY) should win over priority 10 (ASK_USER)
|
||||
// Priority 20 (DENY) should win over priority 10 (ALLOW)
|
||||
const { decision } = await engine.check({ name: 'test-tool' }, undefined);
|
||||
expect(decision).toBe(PolicyDecision.DENY);
|
||||
});
|
||||
@@ -1746,14 +1751,13 @@ describe('PolicyEngine', () => {
|
||||
});
|
||||
|
||||
describe('shell command parsing failure', () => {
|
||||
it('should return ALLOW in YOLO mode even if shell command parsing fails', async () => {
|
||||
it('should return ALLOW when using wildcard policy even if shell command parsing fails', async () => {
|
||||
const { splitCommands } = await import('../utils/shell-utils.js');
|
||||
const rules: PolicyRule[] = [
|
||||
{
|
||||
toolName: '*',
|
||||
decision: PolicyDecision.ALLOW,
|
||||
priority: 999,
|
||||
modes: [ApprovalMode.YOLO],
|
||||
},
|
||||
{
|
||||
toolName: 'run_shell_command',
|
||||
@@ -1762,10 +1766,7 @@ describe('PolicyEngine', () => {
|
||||
},
|
||||
];
|
||||
|
||||
engine = new PolicyEngine({
|
||||
rules,
|
||||
approvalMode: ApprovalMode.YOLO,
|
||||
});
|
||||
engine = new PolicyEngine({ rules });
|
||||
|
||||
// Simulate parsing failure (splitCommands returning empty array)
|
||||
vi.mocked(splitCommands).mockReturnValueOnce([]);
|
||||
@@ -1780,7 +1781,7 @@ describe('PolicyEngine', () => {
|
||||
expect(result.rule?.priority).toBe(999);
|
||||
});
|
||||
|
||||
it('should return DENY in YOLO mode if shell command parsing fails and a higher priority rule says DENY', async () => {
|
||||
it('should return DENY when using wildcard policy if shell command parsing fails and a higher priority rule says DENY', async () => {
|
||||
const { splitCommands } = await import('../utils/shell-utils.js');
|
||||
const rules: PolicyRule[] = [
|
||||
{
|
||||
@@ -1792,14 +1793,10 @@ describe('PolicyEngine', () => {
|
||||
toolName: '*',
|
||||
decision: PolicyDecision.ALLOW,
|
||||
priority: 999,
|
||||
modes: [ApprovalMode.YOLO],
|
||||
},
|
||||
];
|
||||
|
||||
engine = new PolicyEngine({
|
||||
rules,
|
||||
approvalMode: ApprovalMode.YOLO,
|
||||
});
|
||||
engine = new PolicyEngine({ rules });
|
||||
|
||||
// Simulate parsing failure
|
||||
vi.mocked(splitCommands).mockReturnValueOnce([]);
|
||||
@@ -2463,16 +2460,16 @@ describe('PolicyEngine', () => {
|
||||
toolName: '*',
|
||||
decision: PolicyDecision.ALLOW,
|
||||
priority: 999,
|
||||
modes: [ApprovalMode.YOLO],
|
||||
modes: [ApprovalMode.AUTO_EDIT],
|
||||
},
|
||||
{
|
||||
toolName: 'dangerous-tool',
|
||||
decision: PolicyDecision.DENY,
|
||||
priority: 10,
|
||||
modes: [ApprovalMode.YOLO],
|
||||
modes: [ApprovalMode.AUTO_EDIT],
|
||||
},
|
||||
],
|
||||
approvalMode: ApprovalMode.YOLO,
|
||||
approvalMode: ApprovalMode.AUTO_EDIT,
|
||||
allToolNames: ['dangerous-tool', 'safe-tool'],
|
||||
expected: [],
|
||||
},
|
||||
@@ -3002,26 +2999,26 @@ describe('PolicyEngine', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('YOLO mode with ask_user tool', () => {
|
||||
it('should return ASK_USER for ask_user tool even in YOLO mode', async () => {
|
||||
describe('AUTO_EDIT mode with ask_user tool', () => {
|
||||
it('should return ASK_USER for ask_user tool even in AUTO_EDIT mode', async () => {
|
||||
const rules: PolicyRule[] = [
|
||||
{
|
||||
toolName: 'ask_user',
|
||||
decision: PolicyDecision.ASK_USER,
|
||||
priority: 999,
|
||||
modes: [ApprovalMode.YOLO],
|
||||
modes: [ApprovalMode.AUTO_EDIT],
|
||||
},
|
||||
{
|
||||
toolName: '*',
|
||||
decision: PolicyDecision.ALLOW,
|
||||
priority: PRIORITY_YOLO_ALLOW_ALL,
|
||||
modes: [ApprovalMode.YOLO],
|
||||
priority: 998,
|
||||
modes: [ApprovalMode.AUTO_EDIT],
|
||||
},
|
||||
];
|
||||
|
||||
engine = new PolicyEngine({
|
||||
rules,
|
||||
approvalMode: ApprovalMode.YOLO,
|
||||
approvalMode: ApprovalMode.AUTO_EDIT,
|
||||
});
|
||||
|
||||
const result = await engine.check(
|
||||
@@ -3031,25 +3028,25 @@ describe('PolicyEngine', () => {
|
||||
expect(result.decision).toBe(PolicyDecision.ASK_USER);
|
||||
});
|
||||
|
||||
it('should return ALLOW for other tools in YOLO mode', async () => {
|
||||
it('should return ALLOW for other tools in AUTO_EDIT mode', async () => {
|
||||
const rules: PolicyRule[] = [
|
||||
{
|
||||
toolName: 'ask_user',
|
||||
decision: PolicyDecision.ASK_USER,
|
||||
priority: 999,
|
||||
modes: [ApprovalMode.YOLO],
|
||||
modes: [ApprovalMode.AUTO_EDIT],
|
||||
},
|
||||
{
|
||||
toolName: '*',
|
||||
decision: PolicyDecision.ALLOW,
|
||||
priority: PRIORITY_YOLO_ALLOW_ALL,
|
||||
modes: [ApprovalMode.YOLO],
|
||||
priority: 998,
|
||||
modes: [ApprovalMode.AUTO_EDIT],
|
||||
},
|
||||
];
|
||||
|
||||
engine = new PolicyEngine({
|
||||
rules,
|
||||
approvalMode: ApprovalMode.YOLO,
|
||||
approvalMode: ApprovalMode.AUTO_EDIT,
|
||||
});
|
||||
|
||||
const result = await engine.check(
|
||||
@@ -3148,19 +3145,19 @@ describe('PolicyEngine', () => {
|
||||
toolName: 'enter_plan_mode',
|
||||
decision: PolicyDecision.DENY,
|
||||
priority: 999,
|
||||
modes: [ApprovalMode.YOLO],
|
||||
modes: [ApprovalMode.AUTO_EDIT],
|
||||
},
|
||||
{
|
||||
toolName: 'exit_plan_mode',
|
||||
decision: PolicyDecision.DENY,
|
||||
priority: 999,
|
||||
modes: [ApprovalMode.YOLO],
|
||||
modes: [ApprovalMode.AUTO_EDIT],
|
||||
},
|
||||
];
|
||||
|
||||
engine = new PolicyEngine({
|
||||
rules,
|
||||
approvalMode: ApprovalMode.YOLO,
|
||||
approvalMode: ApprovalMode.AUTO_EDIT,
|
||||
});
|
||||
|
||||
const resultEnter = await engine.check(
|
||||
|
||||
@@ -285,13 +285,9 @@ export class PolicyEngine {
|
||||
if (allowRedirection) return false;
|
||||
if (!hasRedirection(command)) return false;
|
||||
|
||||
// Do not downgrade (do not ask user) if sandboxing is enabled and in AUTO_EDIT or YOLO
|
||||
// Do not downgrade (do not ask user) if sandboxing is enabled and in AUTO_EDIT
|
||||
const sandboxEnabled = !(this.sandboxManager instanceof NoopSandboxManager);
|
||||
if (
|
||||
sandboxEnabled &&
|
||||
(this.approvalMode === ApprovalMode.AUTO_EDIT ||
|
||||
this.approvalMode === ApprovalMode.YOLO)
|
||||
) {
|
||||
if (sandboxEnabled && this.approvalMode === ApprovalMode.AUTO_EDIT) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -359,14 +355,6 @@ export class PolicyEngine {
|
||||
return { decision: PolicyDecision.DENY, rule };
|
||||
}
|
||||
|
||||
// In YOLO mode, we should proceed anyway even if we can't parse the command.
|
||||
if (this.approvalMode === ApprovalMode.YOLO) {
|
||||
return {
|
||||
decision: PolicyDecision.ALLOW,
|
||||
rule,
|
||||
};
|
||||
}
|
||||
|
||||
debugLogger.debug(
|
||||
`[PolicyEngine.check] Command parsing failed for: ${command}. Falling back to ${this.defaultDecision}.`,
|
||||
);
|
||||
@@ -611,15 +599,6 @@ export class PolicyEngine {
|
||||
|
||||
// Default if no rule matched
|
||||
if (decision === undefined) {
|
||||
if (this.approvalMode === ApprovalMode.YOLO) {
|
||||
debugLogger.debug(
|
||||
`[PolicyEngine.check] NO MATCH in YOLO mode - using ALLOW`,
|
||||
);
|
||||
return {
|
||||
decision: PolicyDecision.ALLOW,
|
||||
};
|
||||
}
|
||||
|
||||
debugLogger.debug(
|
||||
`[PolicyEngine.check] NO MATCH - using default decision: ${this.defaultDecision}`,
|
||||
);
|
||||
|
||||
@@ -231,21 +231,21 @@ priority = 100
|
||||
toolName = "glob"
|
||||
decision = "allow"
|
||||
priority = 100
|
||||
modes = ["default", "yolo"]
|
||||
modes = ["default", "autoEdit"]
|
||||
|
||||
[[rule]]
|
||||
toolName = "grep"
|
||||
decision = "allow"
|
||||
priority = 100
|
||||
modes = ["yolo"]
|
||||
modes = ["autoEdit"]
|
||||
`);
|
||||
|
||||
// Both rules should be included
|
||||
expect(result.rules).toHaveLength(2);
|
||||
expect(result.rules[0].toolName).toBe('glob');
|
||||
expect(result.rules[0].modes).toEqual(['default', 'yolo']);
|
||||
expect(result.rules[0].modes).toEqual(['default', 'autoEdit']);
|
||||
expect(result.rules[1].toolName).toBe('grep');
|
||||
expect(result.rules[1].modes).toEqual(['yolo']);
|
||||
expect(result.rules[1].modes).toEqual(['autoEdit']);
|
||||
expect(getErrors(result)).toHaveLength(0);
|
||||
});
|
||||
|
||||
|
||||
@@ -47,18 +47,4 @@ describe('Topic Tool Policy', () => {
|
||||
);
|
||||
expect(result.decision).toBe(PolicyDecision.ALLOW);
|
||||
});
|
||||
|
||||
it('should allow update_topic in YOLO mode', async () => {
|
||||
const rules = await loadDefaultPolicies();
|
||||
const engine = new PolicyEngine({
|
||||
rules,
|
||||
approvalMode: ApprovalMode.YOLO,
|
||||
});
|
||||
|
||||
const result = await engine.check(
|
||||
{ name: UPDATE_TOPIC_TOOL_NAME },
|
||||
undefined,
|
||||
);
|
||||
expect(result.decision).toBe(PolicyDecision.ALLOW);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -48,7 +48,6 @@ export function getHookSource(input: Record<string, unknown>): HookSource {
|
||||
export enum ApprovalMode {
|
||||
DEFAULT = 'default',
|
||||
AUTO_EDIT = 'autoEdit',
|
||||
YOLO = 'yolo',
|
||||
PLAN = 'plan',
|
||||
}
|
||||
|
||||
@@ -61,7 +60,6 @@ export const MODES_BY_PERMISSIVENESS = [
|
||||
ApprovalMode.PLAN,
|
||||
ApprovalMode.DEFAULT,
|
||||
ApprovalMode.AUTO_EDIT,
|
||||
ApprovalMode.YOLO,
|
||||
];
|
||||
|
||||
/**
|
||||
|
||||
@@ -81,6 +81,7 @@ describe('PromptProvider', () => {
|
||||
}),
|
||||
getApprovedPlanPath: vi.fn().mockReturnValue(undefined),
|
||||
getApprovalMode: vi.fn(),
|
||||
getAllowedTools: vi.fn().mockReturnValue([]),
|
||||
isTrackerEnabled: vi.fn().mockReturnValue(false),
|
||||
getHasAccessToPreviewModel: vi.fn().mockReturnValue(true),
|
||||
getGemini31LaunchedSync: vi.fn().mockReturnValue(true),
|
||||
|
||||
@@ -53,7 +53,7 @@ export class PromptProvider {
|
||||
const approvalMode =
|
||||
context.config.getApprovalMode?.() ?? ApprovalMode.DEFAULT;
|
||||
const isPlanMode = approvalMode === ApprovalMode.PLAN;
|
||||
const isYoloMode = approvalMode === ApprovalMode.YOLO;
|
||||
const isYoloMode = context.config.getAllowedTools()?.includes('*') ?? false;
|
||||
const skills = context.config.getSkillManager().getSkills();
|
||||
const toolNames = context.toolRegistry.getAllToolNames();
|
||||
const enabledToolNames = new Set(toolNames);
|
||||
|
||||
@@ -896,32 +896,22 @@ describe('Plan Mode Denial Consistency', () => {
|
||||
const testCases = [
|
||||
{
|
||||
currentMode: ApprovalMode.DEFAULT,
|
||||
expectedModes: [
|
||||
ApprovalMode.DEFAULT,
|
||||
ApprovalMode.AUTO_EDIT,
|
||||
ApprovalMode.YOLO,
|
||||
],
|
||||
expectedModes: [ApprovalMode.DEFAULT, ApprovalMode.AUTO_EDIT],
|
||||
description:
|
||||
'include current and more permissive modes in DEFAULT mode',
|
||||
},
|
||||
{
|
||||
currentMode: ApprovalMode.AUTO_EDIT,
|
||||
expectedModes: [ApprovalMode.AUTO_EDIT, ApprovalMode.YOLO],
|
||||
expectedModes: [ApprovalMode.AUTO_EDIT],
|
||||
description:
|
||||
'include current and more permissive modes in AUTO_EDIT mode',
|
||||
},
|
||||
{
|
||||
currentMode: ApprovalMode.YOLO,
|
||||
expectedModes: [ApprovalMode.YOLO],
|
||||
description: 'include current and more permissive modes in YOLO mode',
|
||||
},
|
||||
{
|
||||
currentMode: ApprovalMode.PLAN,
|
||||
expectedModes: [
|
||||
ApprovalMode.PLAN,
|
||||
ApprovalMode.DEFAULT,
|
||||
ApprovalMode.AUTO_EDIT,
|
||||
ApprovalMode.YOLO,
|
||||
],
|
||||
description: 'include all modes explicitly when granted in PLAN mode',
|
||||
},
|
||||
|
||||
@@ -103,7 +103,7 @@ describe('Scheduler Hooks', () => {
|
||||
const mockConfig = createMockConfig({
|
||||
getToolRegistry: () => toolRegistry,
|
||||
getMessageBus: () => mockMessageBus,
|
||||
getApprovalMode: () => ApprovalMode.YOLO,
|
||||
getApprovalMode: () => ApprovalMode.DEFAULT,
|
||||
});
|
||||
|
||||
const hookSystem = new HookSystem(mockConfig);
|
||||
@@ -172,7 +172,7 @@ describe('Scheduler Hooks', () => {
|
||||
const mockConfig = createMockConfig({
|
||||
getToolRegistry: () => toolRegistry,
|
||||
getMessageBus: () => mockMessageBus,
|
||||
getApprovalMode: () => ApprovalMode.YOLO,
|
||||
getApprovalMode: () => ApprovalMode.DEFAULT,
|
||||
});
|
||||
|
||||
const hookSystem = new HookSystem(mockConfig);
|
||||
@@ -243,7 +243,7 @@ describe('Scheduler Hooks', () => {
|
||||
const mockConfig = createMockConfig({
|
||||
getToolRegistry: () => toolRegistry,
|
||||
getMessageBus: () => mockMessageBus,
|
||||
getApprovalMode: () => ApprovalMode.YOLO,
|
||||
getApprovalMode: () => ApprovalMode.DEFAULT,
|
||||
});
|
||||
|
||||
const hookSystem = new HookSystem(mockConfig);
|
||||
|
||||
@@ -583,16 +583,12 @@ export class LoopDetectionService {
|
||||
return { isLoop: false };
|
||||
}
|
||||
|
||||
const confidenceVal = flashResult['unproductive_state_confidence'];
|
||||
const flashConfidence =
|
||||
// eslint-disable-next-line no-restricted-syntax
|
||||
typeof flashResult['unproductive_state_confidence'] === 'number'
|
||||
? flashResult['unproductive_state_confidence']
|
||||
: 0;
|
||||
const flashAnalysis =
|
||||
// eslint-disable-next-line no-restricted-syntax
|
||||
typeof flashResult['unproductive_state_analysis'] === 'string'
|
||||
? flashResult['unproductive_state_analysis']
|
||||
: '';
|
||||
typeof confidenceVal === 'number' ? confidenceVal : 0;
|
||||
|
||||
const analysisVal = flashResult['unproductive_state_analysis'];
|
||||
const flashAnalysis = typeof analysisVal === 'string' ? analysisVal : '';
|
||||
|
||||
const doubleCheckModelName =
|
||||
this.context.config.modelConfigService.getResolvedConfig({
|
||||
@@ -634,17 +630,22 @@ export class LoopDetectionService {
|
||||
signal,
|
||||
);
|
||||
|
||||
const mainModelObj = mainModelResult as {
|
||||
unproductive_state_confidence?: unknown;
|
||||
unproductive_state_analysis?: unknown;
|
||||
} | null;
|
||||
|
||||
const mainModelConfidence =
|
||||
mainModelResult &&
|
||||
// eslint-disable-next-line no-restricted-syntax
|
||||
typeof mainModelResult['unproductive_state_confidence'] === 'number'
|
||||
? mainModelResult['unproductive_state_confidence']
|
||||
mainModelObj &&
|
||||
'unproductive_state_confidence' in mainModelObj &&
|
||||
typeof mainModelObj.unproductive_state_confidence === 'number'
|
||||
? mainModelObj.unproductive_state_confidence
|
||||
: 0;
|
||||
const mainModelAnalysis =
|
||||
mainModelResult &&
|
||||
// eslint-disable-next-line no-restricted-syntax
|
||||
typeof mainModelResult['unproductive_state_analysis'] === 'string'
|
||||
? mainModelResult['unproductive_state_analysis']
|
||||
mainModelObj &&
|
||||
'unproductive_state_analysis' in mainModelObj &&
|
||||
typeof mainModelObj.unproductive_state_analysis === 'string'
|
||||
? mainModelObj.unproductive_state_analysis
|
||||
: undefined;
|
||||
|
||||
logLlmLoopCheck(
|
||||
@@ -689,10 +690,11 @@ export class LoopDetectionService {
|
||||
role: LlmRole.UTILITY_LOOP_DETECTOR,
|
||||
});
|
||||
|
||||
const resultObj = result as { unproductive_state_confidence?: unknown };
|
||||
if (
|
||||
result &&
|
||||
// eslint-disable-next-line no-restricted-syntax
|
||||
typeof result['unproductive_state_confidence'] === 'number'
|
||||
resultObj &&
|
||||
'unproductive_state_confidence' in resultObj &&
|
||||
typeof resultObj.unproductive_state_confidence === 'number'
|
||||
) {
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
---
|
||||
name: skill-creator
|
||||
description: Guide for creating effective skills. This skill should be used when users want to create a new skill (or update an existing skill) that extends Gemini CLI's capabilities with specialized knowledge, workflows, or tool integrations.
|
||||
description:
|
||||
Guide for creating effective skills. This skill should be used when users want
|
||||
to create a new skill (or update an existing skill) that extends Gemini CLI's
|
||||
capabilities with specialized knowledge, workflows, or tool integrations.
|
||||
---
|
||||
|
||||
# Skill Creator
|
||||
@@ -9,22 +12,33 @@ This skill provides guidance for creating effective skills.
|
||||
|
||||
## About Skills
|
||||
|
||||
Skills are modular, self-contained packages that extend Gemini CLI's capabilities by providing specialized knowledge, workflows, and tools. Think of them as "onboarding guides" for specific domains or tasks—they transform Gemini CLI from a general-purpose agent into a specialized agent equipped with procedural knowledge that no model can fully possess.
|
||||
Skills are modular, self-contained packages that extend Gemini CLI's
|
||||
capabilities by providing specialized knowledge, workflows, and tools. Think of
|
||||
them as "onboarding guides" for specific domains or tasks—they transform Gemini
|
||||
CLI from a general-purpose agent into a specialized agent equipped with
|
||||
procedural knowledge that no model can fully possess.
|
||||
|
||||
### What Skills Provide
|
||||
|
||||
1. Specialized workflows - Multi-step procedures for specific domains
|
||||
2. Tool integrations - Instructions for working with specific file formats or APIs
|
||||
2. Tool integrations - Instructions for working with specific file formats or
|
||||
APIs
|
||||
3. Domain expertise - Company-specific knowledge, schemas, business logic
|
||||
4. Bundled resources - Scripts, references, and assets for complex and repetitive tasks
|
||||
4. Bundled resources - Scripts, references, and assets for complex and
|
||||
repetitive tasks
|
||||
|
||||
## Core Principles
|
||||
|
||||
### Concise is Key
|
||||
|
||||
The context window is a public good. Skills share the context window with everything else Gemini CLI needs: system prompt, conversation history, other Skills' metadata, and the actual user request.
|
||||
The context window is a public good. Skills share the context window with
|
||||
everything else Gemini CLI needs: system prompt, conversation history, other
|
||||
Skills' metadata, and the actual user request.
|
||||
|
||||
**Default assumption: Gemini CLI is already very smart.** Only add context Gemini CLI doesn't already have. Challenge each piece of information: "Does Gemini CLI really need this explanation?" and "Does this paragraph justify its token cost?"
|
||||
**Default assumption: Gemini CLI is already very smart.** Only add context
|
||||
Gemini CLI doesn't already have. Challenge each piece of information: "Does
|
||||
Gemini CLI really need this explanation?" and "Does this paragraph justify its
|
||||
token cost?"
|
||||
|
||||
Prefer concise examples over verbose explanations.
|
||||
|
||||
@@ -32,13 +46,19 @@ Prefer concise examples over verbose explanations.
|
||||
|
||||
Match the level of specificity to the task's fragility and variability:
|
||||
|
||||
**High freedom (text-based instructions)**: Use when multiple approaches are valid, decisions depend on context, or heuristics guide the approach.
|
||||
**High freedom (text-based instructions)**: Use when multiple approaches are
|
||||
valid, decisions depend on context, or heuristics guide the approach.
|
||||
|
||||
**Medium freedom (pseudocode or scripts with parameters)**: Use when a preferred pattern exists, some variation is acceptable, or configuration affects behavior.
|
||||
**Medium freedom (pseudocode or scripts with parameters)**: Use when a preferred
|
||||
pattern exists, some variation is acceptable, or configuration affects behavior.
|
||||
|
||||
**Low freedom (specific scripts, few parameters)**: Use when operations are fragile and error-prone, consistency is critical, or a specific sequence must be followed.
|
||||
**Low freedom (specific scripts, few parameters)**: Use when operations are
|
||||
fragile and error-prone, consistency is critical, or a specific sequence must be
|
||||
followed.
|
||||
|
||||
Think of Gemini CLI as exploring a path: a narrow bridge with cliffs needs specific guardrails (low freedom), while an open field allows many routes (high freedom).
|
||||
Think of Gemini CLI as exploring a path: a narrow bridge with cliffs needs
|
||||
specific guardrails (low freedom), while an open field allows many routes (high
|
||||
freedom).
|
||||
|
||||
### Anatomy of a Skill
|
||||
|
||||
@@ -61,45 +81,75 @@ skill-name/
|
||||
|
||||
Every SKILL.md consists of:
|
||||
|
||||
- **Frontmatter** (YAML): Contains `name` and `description` fields. These are the only fields that Gemini CLI reads to determine when the skill gets used, thus it is very important to be clear and comprehensive in describing what the skill is, and when it should be used.
|
||||
- **Body** (Markdown): Instructions and guidance for using the skill. Only loaded AFTER the skill triggers (if at all).
|
||||
- **Frontmatter** (YAML): Contains `name` and `description` fields. These are
|
||||
the only fields that Gemini CLI reads to determine when the skill gets used,
|
||||
thus it is very important to be clear and comprehensive in describing what the
|
||||
skill is, and when it should be used.
|
||||
- **Body** (Markdown): Instructions and guidance for using the skill. Only
|
||||
loaded AFTER the skill triggers (if at all).
|
||||
|
||||
#### Bundled Resources (optional)
|
||||
|
||||
##### Scripts (`scripts/`)
|
||||
|
||||
Executable code (Node.js/Python/Bash/etc.) for tasks that require deterministic reliability or are repeatedly rewritten.
|
||||
Executable code (Node.js/Python/Bash/etc.) for tasks that require deterministic
|
||||
reliability or are repeatedly rewritten.
|
||||
|
||||
- **When to include**: When the same code is being rewritten repeatedly or deterministic reliability is needed
|
||||
- **When to include**: When the same code is being rewritten repeatedly or
|
||||
deterministic reliability is needed
|
||||
- **Example**: `scripts/rotate_pdf.cjs` for PDF rotation tasks
|
||||
- **Benefits**: Token efficient, deterministic, may be executed without loading into context
|
||||
- **Agentic Ergonomics**: Scripts must output LLM-friendly stdout. Suppress standard tracebacks. Output clear, concise success/failure messages, and paginate or truncate outputs (e.g., "Success: First 50 lines of processed file...") to prevent context window overflow.
|
||||
- **Note**: Scripts may still need to be read by Gemini CLI for patching or environment-specific adjustments
|
||||
- **Benefits**: Token efficient, deterministic, may be executed without loading
|
||||
into context
|
||||
- **Agentic Ergonomics**: Scripts must output LLM-friendly stdout. Suppress
|
||||
standard tracebacks. Output clear, concise success/failure messages, and
|
||||
paginate or truncate outputs (e.g., "Success: First 50 lines of processed
|
||||
file...") to prevent context window overflow.
|
||||
- **Note**: Scripts may still need to be read by Gemini CLI for patching or
|
||||
environment-specific adjustments
|
||||
|
||||
##### References (`references/`)
|
||||
|
||||
Documentation and reference material intended to be loaded as needed into context to inform Gemini CLI's process and thinking.
|
||||
Documentation and reference material intended to be loaded as needed into
|
||||
context to inform Gemini CLI's process and thinking.
|
||||
|
||||
- **When to include**: For documentation that Gemini CLI should reference while working
|
||||
- **Examples**: `references/finance.md` for financial schemas, `references/mnda.md` for company NDA template, `references/policies.md` for company policies, `references/api_docs.md` for API specifications
|
||||
- **Use cases**: Database schemas, API documentation, domain knowledge, company policies, detailed workflow guides
|
||||
- **Benefits**: Keeps SKILL.md lean, loaded only when Gemini CLI determines it's needed
|
||||
- **Best practice**: If files are large (>10k words), include grep search patterns in SKILL.md
|
||||
- **When to include**: For documentation that Gemini CLI should reference while
|
||||
working
|
||||
- **Examples**: `references/finance.md` for financial schemas,
|
||||
`references/mnda.md` for company NDA template, `references/policies.md` for
|
||||
company policies, `references/api_docs.md` for API specifications
|
||||
- **Use cases**: Database schemas, API documentation, domain knowledge, company
|
||||
policies, detailed workflow guides
|
||||
- **Benefits**: Keeps SKILL.md lean, loaded only when Gemini CLI determines it's
|
||||
needed
|
||||
- **Best practice**: If files are large (>10k words), include grep search
|
||||
patterns in SKILL.md
|
||||
- **Avoid duplication**: Information should live in either SKILL.md or
|
||||
references files, not both. Prefer references files for detailed information unless it's truly core to the skill—this keeps SKILL.md lean while making information discoverable without hogging the context window. Keep only essential procedural instructions and workflow guidance in SKILL.md; move detailed reference material, schemas, and examples to references files.
|
||||
references files, not both. Prefer references files for detailed information
|
||||
unless it's truly core to the skill—this keeps SKILL.md lean while making
|
||||
information discoverable without hogging the context window. Keep only
|
||||
essential procedural instructions and workflow guidance in SKILL.md; move
|
||||
detailed reference material, schemas, and examples to references files.
|
||||
|
||||
##### Assets (`assets/`)
|
||||
|
||||
Files not intended to be loaded into context, but rather used within the output Gemini CLI produces.
|
||||
Files not intended to be loaded into context, but rather used within the output
|
||||
Gemini CLI produces.
|
||||
|
||||
- **When to include**: When the skill needs files that will be used in the final output
|
||||
- **Examples**: `assets/logo.png` for brand assets, `assets/slides.pptx` for PowerPoint templates, `assets/frontend-template/` for HTML/React boilerplate, `assets/font.ttf` for typography
|
||||
- **Use cases**: Templates, images, icons, boilerplate code, fonts, sample documents that get copied or modified
|
||||
- **Benefits**: Separates output resources from documentation, enables Gemini CLI to use files without loading them into context
|
||||
- **When to include**: When the skill needs files that will be used in the final
|
||||
output
|
||||
- **Examples**: `assets/logo.png` for brand assets, `assets/slides.pptx` for
|
||||
PowerPoint templates, `assets/frontend-template/` for HTML/React boilerplate,
|
||||
`assets/font.ttf` for typography
|
||||
- **Use cases**: Templates, images, icons, boilerplate code, fonts, sample
|
||||
documents that get copied or modified
|
||||
- **Benefits**: Separates output resources from documentation, enables Gemini
|
||||
CLI to use files without loading them into context
|
||||
|
||||
#### What to Not Include in a Skill
|
||||
|
||||
A skill should only contain essential files that directly support its functionality. Do NOT create extraneous documentation or auxiliary files, including:
|
||||
A skill should only contain essential files that directly support its
|
||||
functionality. Do NOT create extraneous documentation or auxiliary files,
|
||||
including:
|
||||
|
||||
- README.md
|
||||
- INSTALLATION_GUIDE.md
|
||||
@@ -107,7 +157,10 @@ A skill should only contain essential files that directly support its functional
|
||||
- CHANGELOG.md
|
||||
- etc.
|
||||
|
||||
The skill should only contain the information needed for an AI agent to do the job at hand. It should not contain auxiliary context about the process that went into creating it, setup and testing procedures, user-facing documentation, etc. Creating additional documentation files just adds clutter and confusion.
|
||||
The skill should only contain the information needed for an AI agent to do the
|
||||
job at hand. It should not contain auxiliary context about the process that went
|
||||
into creating it, setup and testing procedures, user-facing documentation, etc.
|
||||
Creating additional documentation files just adds clutter and confusion.
|
||||
|
||||
### Progressive Disclosure Design Principle
|
||||
|
||||
@@ -115,13 +168,21 @@ Skills use a three-level loading system to manage context efficiently:
|
||||
|
||||
1. **Metadata (name + description)** - Always in context (~100 words)
|
||||
2. **SKILL.md body** - When skill triggers (<5k words)
|
||||
3. **Bundled resources** - As needed by Gemini CLI (Unlimited because scripts can be executed without reading into context window)
|
||||
3. **Bundled resources** - As needed by Gemini CLI (Unlimited because scripts
|
||||
can be executed without reading into context window)
|
||||
|
||||
#### Progressive Disclosure Patterns
|
||||
|
||||
Keep SKILL.md body to the essentials and under 500 lines to minimize context bloat. Split content into separate files when approaching this limit. When splitting out content into other files, it is very important to reference them from SKILL.md and describe clearly when to read them, to ensure the reader of the skill knows they exist and when to use them.
|
||||
Keep SKILL.md body to the essentials and under 500 lines to minimize context
|
||||
bloat. Split content into separate files when approaching this limit. When
|
||||
splitting out content into other files, it is very important to reference them
|
||||
from SKILL.md and describe clearly when to read them, to ensure the reader of
|
||||
the skill knows they exist and when to use them.
|
||||
|
||||
**Key principle:** When a skill supports multiple variations, frameworks, or options, keep only the core workflow and selection guidance in SKILL.md. Move variant-specific details (patterns, examples, configuration) into separate reference files.
|
||||
**Key principle:** When a skill supports multiple variations, frameworks, or
|
||||
options, keep only the core workflow and selection guidance in SKILL.md. Move
|
||||
variant-specific details (patterns, examples, configuration) into separate
|
||||
reference files.
|
||||
|
||||
**Pattern 1: High-level guide with references**
|
||||
|
||||
@@ -143,7 +204,8 @@ Gemini CLI loads FORMS.md, REFERENCE.md, or EXAMPLES.md only when needed.
|
||||
|
||||
**Pattern 2: Domain-specific organization**
|
||||
|
||||
For Skills with multiple domains, organize content by domain to avoid loading irrelevant context:
|
||||
For Skills with multiple domains, organize content by domain to avoid loading
|
||||
irrelevant context:
|
||||
|
||||
```
|
||||
bigquery-skill/
|
||||
@@ -157,7 +219,8 @@ bigquery-skill/
|
||||
|
||||
When a user asks about sales metrics, Gemini CLI only reads sales.md.
|
||||
|
||||
Similarly, for skills supporting multiple frameworks or variants, organize by variant:
|
||||
Similarly, for skills supporting multiple frameworks or variants, organize by
|
||||
variant:
|
||||
|
||||
```
|
||||
cloud-deploy/
|
||||
@@ -183,15 +246,20 @@ Use pandas for loading and basic queries. See [PANDAS.md](PANDAS.md).
|
||||
|
||||
## Advanced Operations
|
||||
|
||||
For massive files that exceed memory, see [STREAMING.md](STREAMING.md). For timestamp normalization, see [TIMESTAMPS.md](TIMESTAMPS.md).
|
||||
For massive files that exceed memory, see [STREAMING.md](STREAMING.md). For
|
||||
timestamp normalization, see [TIMESTAMPS.md](TIMESTAMPS.md).
|
||||
|
||||
Gemini CLI reads REDLINING.md or OOXML.md only when the user needs those features.
|
||||
Gemini CLI reads REDLINING.md or OOXML.md only when the user needs those
|
||||
features.
|
||||
```
|
||||
|
||||
**Important guidelines:**
|
||||
|
||||
- **Avoid deeply nested references** - Keep references one level deep from SKILL.md. All reference files should link directly from SKILL.md.
|
||||
- **Structure longer reference files** - For files longer than 100 lines, include a table of contents at the top so Gemini CLI can see the full scope when previewing.
|
||||
- **Avoid deeply nested references** - Keep references one level deep from
|
||||
SKILL.md. All reference files should link directly from SKILL.md.
|
||||
- **Structure longer reference files** - For files longer than 100 lines,
|
||||
include a table of contents at the top so Gemini CLI can see the full scope
|
||||
when previewing.
|
||||
|
||||
## Skill Creation Process
|
||||
|
||||
@@ -205,66 +273,93 @@ Skill creation involves these steps:
|
||||
6. Install and reload the skill
|
||||
7. Iterate based on real usage
|
||||
|
||||
Follow these steps in order, skipping only if there is a clear reason why they are not applicable.
|
||||
Follow these steps in order, skipping only if there is a clear reason why they
|
||||
are not applicable.
|
||||
|
||||
### Skill Naming
|
||||
|
||||
- Use lowercase letters, digits, and hyphens only; normalize user-provided titles to hyphen-case (e.g., "Plan Mode" -> `plan-mode`).
|
||||
- When generating names, generate a name under 64 characters (letters, digits, hyphens).
|
||||
- Use lowercase letters, digits, and hyphens only; normalize user-provided
|
||||
titles to hyphen-case (e.g., "Plan Mode" -> `plan-mode`).
|
||||
- When generating names, generate a name under 64 characters (letters, digits,
|
||||
hyphens).
|
||||
- Prefer short, verb-led phrases that describe the action.
|
||||
- Namespace by tool when it improves clarity or triggering (e.g., `gh-address-comments`, `linear-address-issue`).
|
||||
- Namespace by tool when it improves clarity or triggering (e.g.,
|
||||
`gh-address-comments`, `linear-address-issue`).
|
||||
- Name the skill folder exactly after the skill name.
|
||||
|
||||
### Step 1: Understanding the Skill with Concrete Examples
|
||||
|
||||
Skip this step only when the skill's usage patterns are already clearly understood. It remains valuable even when working with an existing skill.
|
||||
Skip this step only when the skill's usage patterns are already clearly
|
||||
understood. It remains valuable even when working with an existing skill.
|
||||
|
||||
To create an effective skill, clearly understand concrete examples of how the skill will be used. This understanding can come from either direct user examples or generated examples that are validated with user feedback.
|
||||
To create an effective skill, clearly understand concrete examples of how the
|
||||
skill will be used. This understanding can come from either direct user examples
|
||||
or generated examples that are validated with user feedback.
|
||||
|
||||
For example, when building an image-editor skill, relevant questions include:
|
||||
|
||||
- "What functionality should the image-editor skill support? Editing, rotating, anything else?"
|
||||
- "What functionality should the image-editor skill support? Editing, rotating,
|
||||
anything else?"
|
||||
- "Can you give some examples of how this skill would be used?"
|
||||
- "I can imagine users asking for things like 'Remove the red-eye from this image' or 'Rotate this image'. Are there other ways you imagine this skill being used?"
|
||||
- "I can imagine users asking for things like 'Remove the red-eye from this
|
||||
image' or 'Rotate this image'. Are there other ways you imagine this skill
|
||||
being used?"
|
||||
- "What would a user say that should trigger this skill?"
|
||||
|
||||
**Avoid interrogation loops:** Do not ask more than one or two clarifying questions at a time. Bias toward action: propose a concrete list of features or examples based on your initial understanding, and ask the user to refine them.
|
||||
**Avoid interrogation loops:** Do not ask more than one or two clarifying
|
||||
questions at a time. Bias toward action: propose a concrete list of features or
|
||||
examples based on your initial understanding, and ask the user to refine them.
|
||||
|
||||
Conclude this step when there is a clear sense of the functionality the skill should support.
|
||||
Conclude this step when there is a clear sense of the functionality the skill
|
||||
should support.
|
||||
|
||||
### Step 2: Planning the Reusable Skill Contents
|
||||
|
||||
To turn concrete examples into an effective skill, analyze each example by:
|
||||
|
||||
1. Considering how to execute on the example from scratch
|
||||
2. Identifying what scripts, references, and assets would be helpful when executing these workflows repeatedly
|
||||
2. Identifying what scripts, references, and assets would be helpful when
|
||||
executing these workflows repeatedly
|
||||
|
||||
Example: When building a `pdf-editor` skill to handle queries like "Help me rotate this PDF," the analysis shows:
|
||||
Example: When building a `pdf-editor` skill to handle queries like "Help me
|
||||
rotate this PDF," the analysis shows:
|
||||
|
||||
1. Rotating a PDF requires re-writing the same code each time
|
||||
2. A `scripts/rotate_pdf.cjs` script would be helpful to store in the skill
|
||||
|
||||
Example: When designing a `frontend-webapp-builder` skill for queries like "Build me a todo app" or "Build me a dashboard to track my steps," the analysis shows:
|
||||
Example: When designing a `frontend-webapp-builder` skill for queries like
|
||||
"Build me a todo app" or "Build me a dashboard to track my steps," the analysis
|
||||
shows:
|
||||
|
||||
1. Writing a frontend webapp requires the same boilerplate HTML/React each time
|
||||
2. An `assets/hello-world/` template containing the boilerplate HTML/React project files would be helpful to store in the skill
|
||||
2. An `assets/hello-world/` template containing the boilerplate HTML/React
|
||||
project files would be helpful to store in the skill
|
||||
|
||||
Example: When building a `big-query` skill to handle queries like "How many users have logged in today?" the analysis shows:
|
||||
Example: When building a `big-query` skill to handle queries like "How many
|
||||
users have logged in today?" the analysis shows:
|
||||
|
||||
1. Querying BigQuery requires re-discovering the table schemas and relationships each time
|
||||
2. A `references/schema.md` file documenting the table schemas would be helpful to store in the skill
|
||||
1. Querying BigQuery requires re-discovering the table schemas and relationships
|
||||
each time
|
||||
2. A `references/schema.md` file documenting the table schemas would be helpful
|
||||
to store in the skill
|
||||
|
||||
To establish the skill's contents, analyze each concrete example to create a list of the reusable resources to include: scripts, references, and assets.
|
||||
To establish the skill's contents, analyze each concrete example to create a
|
||||
list of the reusable resources to include: scripts, references, and assets.
|
||||
|
||||
### Step 3: Initializing the Skill
|
||||
|
||||
At this point, it is time to actually create the skill.
|
||||
|
||||
Skip this step only if the skill being developed already exists, and iteration or packaging is needed. In this case, continue to the next step.
|
||||
Skip this step only if the skill being developed already exists, and iteration
|
||||
or packaging is needed. In this case, continue to the next step.
|
||||
|
||||
When creating a new skill from scratch, always run the `init_skill.cjs` script. The script conveniently generates a new template skill directory that automatically includes everything a skill requires, making the skill creation process much more efficient and reliable.
|
||||
When creating a new skill from scratch, always run the `init_skill.cjs` script.
|
||||
The script conveniently generates a new template skill directory that
|
||||
automatically includes everything a skill requires, making the skill creation
|
||||
process much more efficient and reliable.
|
||||
|
||||
**Note:** Use the absolute path to the script as provided in the `available_resources` section.
|
||||
**Note:** Use the absolute path to the script as provided in the
|
||||
`available_resources` section.
|
||||
|
||||
Usage:
|
||||
|
||||
@@ -277,30 +372,48 @@ The script:
|
||||
- Creates the skill directory at the specified path
|
||||
- Generates a SKILL.md template with proper frontmatter and TODO placeholders
|
||||
- Creates example resource directories: `scripts/`, `references/`, and `assets/`
|
||||
- Adds example files (`scripts/example_script.cjs`, `references/example_reference.md`, `assets/example_asset.txt`) that can be customized or deleted
|
||||
- Adds example files (`scripts/example_script.cjs`,
|
||||
`references/example_reference.md`, `assets/example_asset.txt`) that can be
|
||||
customized or deleted
|
||||
|
||||
After initialization, customize or remove the generated SKILL.md and example files as needed.
|
||||
After initialization, customize or remove the generated SKILL.md and example
|
||||
files as needed.
|
||||
|
||||
### Step 4: Edit the Skill
|
||||
|
||||
When editing the (newly-generated or existing) skill, remember that the skill is being created for another instance of Gemini CLI to use. Include information that would be beneficial and non-obvious to Gemini CLI. Consider what procedural knowledge, domain-specific details, or reusable assets would help another Gemini CLI instance execute these tasks more effectively.
|
||||
When editing the (newly-generated or existing) skill, remember that the skill is
|
||||
being created for another instance of Gemini CLI to use. Include information
|
||||
that would be beneficial and non-obvious to Gemini CLI. Consider what procedural
|
||||
knowledge, domain-specific details, or reusable assets would help another Gemini
|
||||
CLI instance execute these tasks more effectively.
|
||||
|
||||
#### Learn Proven Design Patterns
|
||||
|
||||
Consult these helpful guides based on your skill's needs:
|
||||
|
||||
- **Multi-step processes**: See references/workflows.md for sequential workflows and conditional logic
|
||||
- **Specific output formats or quality standards**: See references/output-patterns.md for template and example patterns
|
||||
- **Multi-step processes**: See references/workflows.md for sequential workflows
|
||||
and conditional logic
|
||||
- **Specific output formats or quality standards**: See
|
||||
references/output-patterns.md for template and example patterns
|
||||
|
||||
These files contain established best practices for effective skill design.
|
||||
|
||||
#### Start with Reusable Skill Contents
|
||||
|
||||
To begin implementation, start with the reusable resources identified above: `scripts/`, `references/`, and `assets/` files. Note that this step may require user input. For example, when implementing a `brand-guidelines` skill, the user may need to provide brand assets or templates to store in `assets/`, or documentation to store in `references/`.
|
||||
To begin implementation, start with the reusable resources identified above:
|
||||
`scripts/`, `references/`, and `assets/` files. Note that this step may require
|
||||
user input. For example, when implementing a `brand-guidelines` skill, the user
|
||||
may need to provide brand assets or templates to store in `assets/`, or
|
||||
documentation to store in `references/`.
|
||||
|
||||
Added scripts must be tested by actually running them to ensure there are no bugs and that the output matches what is expected. If there are many similar scripts, only a representative sample needs to be tested to ensure confidence that they all work while balancing time to completion.
|
||||
Added scripts must be tested by actually running them to ensure there are no
|
||||
bugs and that the output matches what is expected. If there are many similar
|
||||
scripts, only a representative sample needs to be tested to ensure confidence
|
||||
that they all work while balancing time to completion.
|
||||
|
||||
Any example files and directories not needed for the skill should be deleted. The initialization script creates example files in `scripts/`, `references/`, and `assets/` to demonstrate structure, but most skills won't need all of them.
|
||||
Any example files and directories not needed for the skill should be deleted.
|
||||
The initialization script creates example files in `scripts/`, `references/`,
|
||||
and `assets/` to demonstrate structure, but most skills won't need all of them.
|
||||
|
||||
#### Update SKILL.md
|
||||
|
||||
@@ -311,11 +424,17 @@ Any example files and directories not needed for the skill should be deleted. Th
|
||||
Write the YAML frontmatter with `name` and `description`:
|
||||
|
||||
- `name`: The skill name
|
||||
- `description`: This is the primary triggering mechanism for your skill, and helps Gemini CLI understand when to use the skill.
|
||||
- Include both what the Skill does and specific triggers/contexts for when to use it.
|
||||
- **Must be a single-line string** (e.g., `description: Data ingestion...`). Quotes are optional.
|
||||
- Include all "when to use" information here - Not in the body. The body is only loaded after triggering, so "When to Use This Skill" sections in the body are not helpful to Gemini CLI.
|
||||
- Example: `description: Data ingestion, cleaning, and transformation for tabular data. Use when Gemini CLI needs to work with CSV/TSV files to analyze large datasets, normalize schemas, or merge sources.`
|
||||
- `description`: This is the primary triggering mechanism for your skill, and
|
||||
helps Gemini CLI understand when to use the skill.
|
||||
- Include both what the Skill does and specific triggers/contexts for when to
|
||||
use it.
|
||||
- **Must be a single-line string** (e.g., `description: Data ingestion...`).
|
||||
Quotes are optional.
|
||||
- Include all "when to use" information here - Not in the body. The body is
|
||||
only loaded after triggering, so "When to Use This Skill" sections in the
|
||||
body are not helpful to Gemini CLI.
|
||||
- Example:
|
||||
`description: Data ingestion, cleaning, and transformation for tabular data. Use when Gemini CLI needs to work with CSV/TSV files to analyze large datasets, normalize schemas, or merge sources.`
|
||||
|
||||
Do not include any other fields in YAML frontmatter.
|
||||
|
||||
@@ -325,9 +444,13 @@ Write instructions for using the skill and its bundled resources.
|
||||
|
||||
### Step 5: Packaging a Skill
|
||||
|
||||
Once development of the skill is complete, it must be packaged into a distributable .skill file that gets shared with the user. The packaging process automatically validates the skill first (checking YAML and ensuring no TODOs remain) to ensure it meets all requirements:
|
||||
Once development of the skill is complete, it must be packaged into a
|
||||
distributable .skill file that gets shared with the user. The packaging process
|
||||
automatically validates the skill first (checking YAML and ensuring no TODOs
|
||||
remain) to ensure it meets all requirements:
|
||||
|
||||
**Note:** Use the absolute path to the script as provided in the `available_resources` section.
|
||||
**Note:** Use the absolute path to the script as provided in the
|
||||
`available_resources` section.
|
||||
|
||||
```bash
|
||||
node <path-to-skill-creator>/scripts/package_skill.cjs <path/to/skill-folder>
|
||||
@@ -347,15 +470,22 @@ The packaging script will:
|
||||
- Description completeness and quality
|
||||
- File organization and resource references
|
||||
|
||||
2. **Package** the skill if validation passes, creating a .skill file named after the skill (e.g., `my-skill.skill`) that includes all files and maintains the proper directory structure for distribution. The .skill file is a zip file with a .skill extension.
|
||||
2. **Package** the skill if validation passes, creating a .skill file named
|
||||
after the skill (e.g., `my-skill.skill`) that includes all files and
|
||||
maintains the proper directory structure for distribution. The .skill file is
|
||||
a zip file with a .skill extension.
|
||||
|
||||
If validation fails, the script will report the errors and exit without creating a package. Fix any validation errors and run the packaging command again.
|
||||
If validation fails, the script will report the errors and exit without creating
|
||||
a package. Fix any validation errors and run the packaging command again.
|
||||
|
||||
### Step 6: Installing and Reloading a Skill
|
||||
|
||||
Once the skill is packaged into a `.skill` file, offer to install it for the user. Ask whether they would like to install it locally in the current folder (workspace scope) or at the user level (user scope).
|
||||
Once the skill is packaged into a `.skill` file, offer to install it for the
|
||||
user. Ask whether they would like to install it locally in the current folder
|
||||
(workspace scope) or at the user level (user scope).
|
||||
|
||||
If the user agrees to an installation, perform it immediately using the `run_shell_command` tool:
|
||||
If the user agrees to an installation, perform it immediately using the
|
||||
`run_shell_command` tool:
|
||||
|
||||
- **Locally (workspace scope)**:
|
||||
```bash
|
||||
@@ -366,13 +496,19 @@ If the user agrees to an installation, perform it immediately using the `run_she
|
||||
gemini skills install <path/to/skill-name.skill> --scope user
|
||||
```
|
||||
|
||||
**Important:** After the installation is complete, notify the user that they MUST manually execute the `/skills reload` command in their interactive Gemini CLI session to enable the new skill. They can then verify the installation by running `/skills list`.
|
||||
**Important:** After the installation is complete, notify the user that they
|
||||
MUST manually execute the `/skills reload` command in their interactive Gemini
|
||||
CLI session to enable the new skill. They can then verify the installation by
|
||||
running `/skills list`.
|
||||
|
||||
Note: You (the agent) cannot execute the `/skills reload` command yourself; it must be done by the user in an interactive instance of Gemini CLI. Do not attempt to run it on their behalf.
|
||||
Note: You (the agent) cannot execute the `/skills reload` command yourself; it
|
||||
must be done by the user in an interactive instance of Gemini CLI. Do not
|
||||
attempt to run it on their behalf.
|
||||
|
||||
### Step 7: Iterate
|
||||
|
||||
After testing the skill, users may request improvements. Often this happens right after using the skill, with fresh context of how the skill performed.
|
||||
After testing the skill, users may request improvements. Often this happens
|
||||
right after using the skill, with fresh context of how the skill performed.
|
||||
|
||||
**Iteration workflow:**
|
||||
|
||||
|
||||
@@ -21,7 +21,6 @@ import type {
|
||||
RewindEvent,
|
||||
MalformedJsonResponseEvent,
|
||||
IdeConnectionEvent,
|
||||
ConversationFinishedEvent,
|
||||
ChatCompressionEvent,
|
||||
FileOperationEvent,
|
||||
InvalidChunkEvent,
|
||||
@@ -1150,28 +1149,6 @@ export class ClearcutLogger {
|
||||
});
|
||||
}
|
||||
|
||||
logConversationFinishedEvent(event: ConversationFinishedEvent): void {
|
||||
const data: EventValue[] = [
|
||||
{
|
||||
gemini_cli_key: EventMetadataKey.GEMINI_CLI_SESSION_ID,
|
||||
value: this.config?.getSessionId() ?? '',
|
||||
},
|
||||
{
|
||||
gemini_cli_key: EventMetadataKey.GEMINI_CLI_CONVERSATION_TURN_COUNT,
|
||||
value: JSON.stringify(event.turnCount),
|
||||
},
|
||||
{
|
||||
gemini_cli_key: EventMetadataKey.GEMINI_CLI_APPROVAL_MODE,
|
||||
value: event.approvalMode,
|
||||
},
|
||||
];
|
||||
|
||||
this.enqueueLogEvent(
|
||||
this.createLogEvent(EventNames.CONVERSATION_FINISHED, data),
|
||||
);
|
||||
this.flushIfNeeded();
|
||||
}
|
||||
|
||||
logEndSessionEvent(): void {
|
||||
// Flush immediately on session end.
|
||||
this.enqueueLogEvent(this.createLogEvent(EventNames.END_SESSION, []));
|
||||
|
||||
@@ -38,7 +38,6 @@ export {
|
||||
logApiResponse,
|
||||
logFlashFallback,
|
||||
logSlashCommand,
|
||||
logConversationFinishedEvent,
|
||||
logChatCompression,
|
||||
logToolOutputTruncated,
|
||||
logExtensionEnable,
|
||||
@@ -66,7 +65,6 @@ export {
|
||||
FlashFallbackEvent,
|
||||
StartSessionEvent,
|
||||
ToolCallEvent,
|
||||
ConversationFinishedEvent,
|
||||
ToolOutputTruncatedEvent,
|
||||
WebFetchFallbackAttemptEvent,
|
||||
NetworkRetryAttemptEvent,
|
||||
|
||||
@@ -26,7 +26,6 @@ import {
|
||||
type LoopDetectionDisabledEvent,
|
||||
type SlashCommandEvent,
|
||||
type RewindEvent,
|
||||
type ConversationFinishedEvent,
|
||||
type ChatCompressionEvent,
|
||||
type MalformedJsonResponseEvent,
|
||||
type InvalidChunkEvent,
|
||||
@@ -436,21 +435,6 @@ export function logIdeConnection(
|
||||
});
|
||||
}
|
||||
|
||||
export function logConversationFinishedEvent(
|
||||
config: Config,
|
||||
event: ConversationFinishedEvent,
|
||||
): void {
|
||||
ClearcutLogger.getInstance(config)?.logConversationFinishedEvent(event);
|
||||
bufferTelemetryEvent(() => {
|
||||
const logger = logs.getLogger(SERVICE_NAME);
|
||||
const logRecord: LogRecord = {
|
||||
body: event.toLogBody(),
|
||||
attributes: event.toOpenTelemetryAttributes(config),
|
||||
};
|
||||
logger.emit(logRecord);
|
||||
});
|
||||
}
|
||||
|
||||
export function logChatCompression(
|
||||
config: Config,
|
||||
event: ChatCompressionEvent,
|
||||
|
||||
@@ -63,26 +63,35 @@ function getStringReferences(parts: AnyPart[]): StringReference[] {
|
||||
});
|
||||
}
|
||||
} else if (part instanceof GenericPart) {
|
||||
// eslint-disable-next-line no-restricted-syntax
|
||||
if (part.type === 'executableCode' && typeof part['code'] === 'string') {
|
||||
refs.push({
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||
get: () => part['code'] as string,
|
||||
set: (val: string) => (part['code'] = val),
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||
len: () => (part['code'] as string).length,
|
||||
});
|
||||
} else if (
|
||||
part.type === 'codeExecutionResult' &&
|
||||
// eslint-disable-next-line no-restricted-syntax
|
||||
typeof part['output'] === 'string'
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||
const partObj = part as unknown as {
|
||||
type?: unknown;
|
||||
code?: unknown;
|
||||
output?: unknown;
|
||||
};
|
||||
if (
|
||||
partObj.type === 'executableCode' &&
|
||||
'code' in partObj &&
|
||||
typeof partObj.code === 'string'
|
||||
) {
|
||||
refs.push({
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||
get: () => part['output'] as string,
|
||||
get: () => partObj.code as string,
|
||||
set: (val: string) => (part['code'] = val),
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||
len: () => (partObj.code as string).length,
|
||||
});
|
||||
} else if (
|
||||
partObj.type === 'codeExecutionResult' &&
|
||||
'output' in partObj &&
|
||||
typeof partObj.output === 'string'
|
||||
) {
|
||||
refs.push({
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||
get: () => partObj.output as string,
|
||||
set: (val: string) => (part['output'] = val),
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||
len: () => (part['output'] as string).length,
|
||||
len: () => (partObj.output as string).length,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1184,35 +1184,6 @@ export class IdeConnectionEvent {
|
||||
}
|
||||
}
|
||||
|
||||
export const EVENT_CONVERSATION_FINISHED = 'gemini_cli.conversation_finished';
|
||||
export class ConversationFinishedEvent {
|
||||
'event_name': 'conversation_finished';
|
||||
'event.timestamp': string; // ISO 8601;
|
||||
approvalMode: ApprovalMode;
|
||||
turnCount: number;
|
||||
|
||||
constructor(approvalMode: ApprovalMode, turnCount: number) {
|
||||
this['event_name'] = 'conversation_finished';
|
||||
this['event.timestamp'] = new Date().toISOString();
|
||||
this.approvalMode = approvalMode;
|
||||
this.turnCount = turnCount;
|
||||
}
|
||||
|
||||
toOpenTelemetryAttributes(config: Config): LogAttributes {
|
||||
return {
|
||||
...getCommonAttributes(config),
|
||||
'event.name': EVENT_CONVERSATION_FINISHED,
|
||||
'event.timestamp': this['event.timestamp'],
|
||||
approvalMode: this.approvalMode,
|
||||
turnCount: this.turnCount,
|
||||
};
|
||||
}
|
||||
|
||||
toLogBody(): string {
|
||||
return `Conversation finished.`;
|
||||
}
|
||||
}
|
||||
|
||||
export const EVENT_FILE_OPERATION = 'gemini_cli.file_operation';
|
||||
export class FileOperationEvent implements BaseTelemetryEvent {
|
||||
'event.name': 'file_operation';
|
||||
@@ -1846,7 +1817,6 @@ export type TelemetryEvent =
|
||||
| NextSpeakerCheckEvent
|
||||
| MalformedJsonResponseEvent
|
||||
| IdeConnectionEvent
|
||||
| ConversationFinishedEvent
|
||||
| SlashCommandEvent
|
||||
| FileOperationEvent
|
||||
| InvalidChunkEvent
|
||||
|
||||
@@ -22,7 +22,7 @@ export class MockMessageBus {
|
||||
/**
|
||||
* Mock publish method that captures messages and simulates responses
|
||||
*/
|
||||
publish = vi.fn(async (message: Message) => {
|
||||
publish = vi.fn((message: Message) => {
|
||||
this.publishedMessages.push(message);
|
||||
|
||||
// Handle tool confirmation requests
|
||||
@@ -62,6 +62,7 @@ export class MockMessageBus {
|
||||
if (!this.subscriptions.has(type)) {
|
||||
this.subscriptions.set(type, new Set());
|
||||
}
|
||||
|
||||
this.subscriptions.get(type)!.add(listener as (message: Message) => void);
|
||||
},
|
||||
);
|
||||
|
||||
@@ -361,7 +361,7 @@ Ask the user for specific feedback on how to improve the plan.`,
|
||||
});
|
||||
|
||||
describe('getAllowApprovalMode (internal)', () => {
|
||||
it('should return YOLO when config.isInteractive() is false', async () => {
|
||||
it('should return AUTO_EDIT when config.isInteractive() is false', async () => {
|
||||
mockConfig.isInteractive = vi.fn().mockReturnValue(false);
|
||||
const planRelativePath = createPlanFile('test.md', '# Content');
|
||||
const invocation = tool.build({ plan_filename: planRelativePath });
|
||||
@@ -369,9 +369,9 @@ Ask the user for specific feedback on how to improve the plan.`,
|
||||
// Directly call execute to trigger the internal getAllowApprovalMode
|
||||
const result = await invocation.execute(new AbortController().signal);
|
||||
|
||||
expect(result.llmContent).toContain('YOLO mode');
|
||||
expect(result.llmContent).toContain('Auto-Edit mode');
|
||||
expect(mockConfig.setApprovalMode).toHaveBeenCalledWith(
|
||||
ApprovalMode.YOLO,
|
||||
ApprovalMode.AUTO_EDIT,
|
||||
);
|
||||
});
|
||||
|
||||
@@ -418,10 +418,6 @@ Ask the user for specific feedback on how to improve the plan.`,
|
||||
ApprovalMode.DEFAULT,
|
||||
'Default mode (edits will require confirmation)',
|
||||
);
|
||||
await testMode(
|
||||
ApprovalMode.YOLO,
|
||||
'YOLO mode (all tool calls auto-approved)',
|
||||
);
|
||||
});
|
||||
|
||||
it('should throw for invalid post-planning modes', async () => {
|
||||
|
||||
@@ -252,12 +252,12 @@ Ask the user for specific feedback on how to improve the plan.`,
|
||||
|
||||
/**
|
||||
* Determines the approval mode to switch to when plan mode is exited via a policy ALLOW.
|
||||
* In non-interactive environments, this defaults to YOLO to allow automated execution.
|
||||
* In non-interactive environments, this defaults to AUTO_EDIT to allow automated execution.
|
||||
*/
|
||||
private getAllowApprovalMode(): ApprovalMode {
|
||||
if (!this.config.isInteractive()) {
|
||||
// For non-interactive environment requires minimal user action, exit as YOLO mode for plan implementation.
|
||||
return ApprovalMode.YOLO;
|
||||
// For non-interactive environment requires minimal user action, exit as AUTO_EDIT mode for plan implementation.
|
||||
return ApprovalMode.AUTO_EDIT;
|
||||
}
|
||||
// By default, YOLO mode in interactive environment cannot enter/exit plan mode.
|
||||
// Always exit plan mode and move to default approval mode if exit_plan_mode tool is configured with allow decision.
|
||||
|
||||
@@ -80,11 +80,11 @@ export function formatMcpToolName(
|
||||
serverName: string,
|
||||
toolName?: string,
|
||||
): string {
|
||||
if (serverName === '*' && (toolName === undefined || toolName === '*')) {
|
||||
if (serverName === '*' && !toolName) {
|
||||
return `${MCP_TOOL_PREFIX}*`;
|
||||
} else if (serverName === '*') {
|
||||
return `${MCP_TOOL_PREFIX}*_${toolName}`;
|
||||
} else if (toolName === undefined || toolName === '*') {
|
||||
} else if (!toolName) {
|
||||
return `${MCP_TOOL_PREFIX}${serverName}_*`;
|
||||
} else {
|
||||
return `${MCP_TOOL_PREFIX}${serverName}_${toolName}`;
|
||||
@@ -105,13 +105,14 @@ export interface McpToolAnnotation extends Record<string, unknown> {
|
||||
export function isMcpToolAnnotation(
|
||||
annotation: unknown,
|
||||
): annotation is McpToolAnnotation {
|
||||
if (typeof annotation !== 'object' || annotation === null) {
|
||||
return false;
|
||||
}
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||
const record = annotation as Record<string, unknown>;
|
||||
const serverName = record['_serverName'];
|
||||
return typeof serverName === 'string';
|
||||
const obj = annotation as { _serverName?: unknown };
|
||||
return (
|
||||
typeof obj === 'object' &&
|
||||
obj !== null &&
|
||||
'_serverName' in obj &&
|
||||
typeof obj._serverName === 'string'
|
||||
);
|
||||
}
|
||||
|
||||
type ToolParams = Record<string, unknown>;
|
||||
@@ -332,35 +333,6 @@ export class DiscoveredMCPToolInvocation extends BaseToolInvocation<
|
||||
getDescription(): string {
|
||||
return safeJsonStringify(this.params);
|
||||
}
|
||||
|
||||
override getDisplayTitle(): string {
|
||||
// If it's a known terminal execute tool provided by JetBrains or similar,
|
||||
// and a command argument is present, return just the command.
|
||||
const command = this.params['command'];
|
||||
if (typeof command === 'string') {
|
||||
return command;
|
||||
}
|
||||
|
||||
// Otherwise fallback to the display name or server tool name
|
||||
return this.displayName || this.serverToolName;
|
||||
}
|
||||
|
||||
override getExplanation(): string {
|
||||
const MAX_EXPLANATION_LENGTH = 500;
|
||||
const stringified = safeJsonStringify(this.params);
|
||||
if (stringified.length > MAX_EXPLANATION_LENGTH) {
|
||||
const keys = Object.keys(this.params);
|
||||
const displayedKeys = keys.slice(0, 5);
|
||||
const keysDesc =
|
||||
displayedKeys.length > 0
|
||||
? ` with parameters: ${displayedKeys.join(', ')}${
|
||||
keys.length > 5 ? ', ...' : ''
|
||||
}`
|
||||
: '';
|
||||
return `[Payload omitted due to length${keysDesc}]`;
|
||||
}
|
||||
return stringified;
|
||||
}
|
||||
}
|
||||
|
||||
export class DiscoveredMCPTool extends BaseDeclarativeTool<
|
||||
|
||||
@@ -37,6 +37,7 @@ describe('Tracker Tools Integration', () => {
|
||||
model: 'gemini-3-flash',
|
||||
debugMode: false,
|
||||
});
|
||||
await config.storage.initialize();
|
||||
messageBus = new MessageBus(null as unknown as PolicyEngine, false);
|
||||
});
|
||||
|
||||
|
||||
@@ -30,12 +30,6 @@ describe('approvalModeUtils', () => {
|
||||
'Plan mode (read-only planning)',
|
||||
);
|
||||
});
|
||||
|
||||
it('should return correct description for YOLO mode', () => {
|
||||
expect(getApprovalModeDescription(ApprovalMode.YOLO)).toBe(
|
||||
'YOLO mode (all tool calls auto-approved)',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getPlanModeExitMessage', () => {
|
||||
@@ -50,11 +44,5 @@ describe('approvalModeUtils', () => {
|
||||
'User has manually exited Plan Mode. Switching to Auto-Edit mode (edits will be applied automatically).',
|
||||
);
|
||||
});
|
||||
|
||||
it('should default to non-manual message', () => {
|
||||
expect(getPlanModeExitMessage(ApprovalMode.YOLO)).toBe(
|
||||
'Plan approved. Switching to YOLO mode (all tool calls auto-approved).',
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -18,8 +18,7 @@ export function getApprovalModeDescription(mode: ApprovalMode): string {
|
||||
return 'Default mode (edits will require confirmation)';
|
||||
case ApprovalMode.PLAN:
|
||||
return 'Plan mode (read-only planning)';
|
||||
case ApprovalMode.YOLO:
|
||||
return 'YOLO mode (all tool calls auto-approved)';
|
||||
|
||||
default:
|
||||
return checkExhaustive(mode);
|
||||
}
|
||||
|
||||
@@ -110,13 +110,15 @@ Return ONLY the corrected string in the specified JSON format with the key 'corr
|
||||
role: LlmRole.UTILITY_EDIT_CORRECTOR,
|
||||
});
|
||||
|
||||
const resultObj = result as { corrected_string_escaping?: unknown };
|
||||
|
||||
if (
|
||||
result &&
|
||||
// eslint-disable-next-line no-restricted-syntax
|
||||
typeof result['corrected_string_escaping'] === 'string' &&
|
||||
result['corrected_string_escaping'].length > 0
|
||||
resultObj &&
|
||||
'corrected_string_escaping' in resultObj &&
|
||||
typeof resultObj.corrected_string_escaping === 'string' &&
|
||||
resultObj.corrected_string_escaping.length > 0
|
||||
) {
|
||||
return result['corrected_string_escaping'];
|
||||
return resultObj.corrected_string_escaping;
|
||||
} else {
|
||||
return potentiallyProblematicString;
|
||||
}
|
||||
|
||||
@@ -231,8 +231,10 @@ export function parseGoogleApiError(error: unknown): GoogleApiError | null {
|
||||
}
|
||||
// Basic structural check before casting.
|
||||
// Since the proto definitions are loose, we primarily rely on @type presence.
|
||||
// eslint-disable-next-line no-restricted-syntax
|
||||
if (typeof detailObj['@type'] === 'string') {
|
||||
const typeCast = detailObj as { '@type'?: unknown };
|
||||
|
||||
const atType = typeCast['@type'];
|
||||
if ('@type' in typeCast && typeof atType === 'string') {
|
||||
// We can just cast it; the consumer will have to switch on @type
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||
details.push(detailObj as unknown as GoogleApiErrorDetail);
|
||||
|
||||
@@ -357,29 +357,39 @@ async function parseTokenEndpointResponse(
|
||||
// Try to parse as JSON first, fall back to form-urlencoded
|
||||
try {
|
||||
const data: unknown = JSON.parse(responseText);
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||
const obj = data as {
|
||||
access_token?: unknown;
|
||||
token_type?: unknown;
|
||||
expires_in?: unknown;
|
||||
refresh_token?: unknown;
|
||||
scope?: unknown;
|
||||
};
|
||||
if (
|
||||
data &&
|
||||
typeof data === 'object' &&
|
||||
'access_token' in data &&
|
||||
// eslint-disable-next-line no-restricted-syntax
|
||||
typeof (data as Record<string, unknown>)['access_token'] === 'string'
|
||||
obj &&
|
||||
typeof obj === 'object' &&
|
||||
'access_token' in obj &&
|
||||
typeof obj.access_token === 'string'
|
||||
) {
|
||||
const obj = data as Record<string, unknown>;
|
||||
const result: OAuthTokenResponse = {
|
||||
access_token: String(obj['access_token']),
|
||||
access_token: String(obj.access_token),
|
||||
token_type:
|
||||
// eslint-disable-next-line no-restricted-syntax
|
||||
typeof obj['token_type'] === 'string' ? obj['token_type'] : 'Bearer',
|
||||
'token_type' in obj && typeof obj.token_type === 'string'
|
||||
? obj.token_type
|
||||
: 'Bearer',
|
||||
expires_in:
|
||||
// eslint-disable-next-line no-restricted-syntax
|
||||
typeof obj['expires_in'] === 'number' ? obj['expires_in'] : undefined,
|
||||
refresh_token:
|
||||
// eslint-disable-next-line no-restricted-syntax
|
||||
typeof obj['refresh_token'] === 'string'
|
||||
? obj['refresh_token']
|
||||
'expires_in' in obj && typeof obj.expires_in === 'number'
|
||||
? obj.expires_in
|
||||
: undefined,
|
||||
refresh_token:
|
||||
'refresh_token' in obj && typeof obj.refresh_token === 'string'
|
||||
? obj.refresh_token
|
||||
: undefined,
|
||||
|
||||
scope:
|
||||
'scope' in obj && typeof obj.scope === 'string'
|
||||
? obj.scope
|
||||
: undefined,
|
||||
// eslint-disable-next-line no-restricted-syntax
|
||||
scope: typeof obj['scope'] === 'string' ? obj['scope'] : undefined,
|
||||
};
|
||||
return result;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user