feat: GenerateImage tool for built-in image generation

This commit is contained in:
Jack Wotherspoon
2026-03-02 09:54:31 -05:00
parent fb6ff847dd
commit 989c7a9e60
12 changed files with 1495 additions and 0 deletions
+1
View File
@@ -832,6 +832,7 @@ export async function loadCliConfig(
skillsSupport: settings.skills?.enabled ?? true,
disabledSkills: settings.skills?.disabled,
experimentalJitContext: settings.experimental?.jitContext,
imageGeneration: settings.experimental?.imageGeneration,
modelSteering: settings.experimental?.modelSteering,
toolOutputMasking: settings.experimental?.toolOutputMasking,
noBrowser: !!process.env['NO_BROWSER'],
+10
View File
@@ -1811,6 +1811,16 @@ const SETTINGS_SCHEMA = {
description: 'Enable planning features (Plan Mode and tools).',
showInDialog: true,
},
imageGeneration: {
type: 'boolean',
label: 'Image Generation',
category: 'Experimental',
requiresRestart: true,
default: false,
description:
'Enable generating images with Nano Banana (experimental).',
showInDialog: true,
},
modelSteering: {
type: 'boolean',
label: 'Model Steering',
@@ -58,6 +58,7 @@ import { shellsCommand } from '../ui/commands/shellsCommand.js';
import { vimCommand } from '../ui/commands/vimCommand.js';
import { setupGithubCommand } from '../ui/commands/setupGithubCommand.js';
import { terminalSetupCommand } from '../ui/commands/terminalSetupCommand.js';
import { imageCommand } from '../ui/commands/imageCommand.js';
/**
* Loads the core, hard-coded slash commands that are an integral part
@@ -119,6 +120,7 @@ export class BuiltinCommandLoader implements ICommandLoader {
]
: [extensionsCommand(this.config?.getEnableExtensionReloading())]),
helpCommand,
imageCommand,
shortcutsCommand,
...(this.config?.getEnableHooksUI() ? [hooksCommand] : []),
rewindCommand,
@@ -0,0 +1,132 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect } from 'vitest';
import { parseImageArgs, imageCommand } from './imageCommand.js';
describe('parseImageArgs', () => {
it('should parse a simple prompt with no flags', () => {
const result = parseImageArgs('a sunset over the ocean');
expect(result.prompt).toBe('a sunset over the ocean');
expect(result.flags).toEqual({});
});
it('should parse prompt with a space-separated flag value', () => {
const result = parseImageArgs('a sunset --ratio 16:9');
expect(result.prompt).toBe('a sunset');
expect(result.flags['ratio']).toBe('16:9');
});
it('should parse --return as boolean flag', () => {
const result = parseImageArgs('a cat --return');
expect(result.prompt).toBe('a cat');
expect(result.flags['return']).toBe(true);
});
it('should parse inline flag values with =', () => {
const result = parseImageArgs('a cat --ratio=16:9 --size=4K');
expect(result.prompt).toBe('a cat');
expect(result.flags['ratio']).toBe('16:9');
expect(result.flags['size']).toBe('4K');
});
it('should handle multiple flags', () => {
const result = parseImageArgs(
'abstract wallpaper --ratio 21:9 --size 4K --count 2 --return',
);
expect(result.prompt).toBe('abstract wallpaper');
expect(result.flags['ratio']).toBe('21:9');
expect(result.flags['size']).toBe('4K');
expect(result.flags['count']).toBe('2');
expect(result.flags['return']).toBe(true);
});
it('should return empty prompt when input starts with flags', () => {
const result = parseImageArgs('--ratio 16:9');
expect(result.prompt).toBe('');
expect(result.flags['ratio']).toBe('16:9');
});
it('should handle empty input', () => {
const result = parseImageArgs('');
expect(result.prompt).toBe('');
expect(result.flags).toEqual({});
});
});
describe('imageCommand', () => {
const mockContext = {} as Parameters<
NonNullable<typeof imageCommand.action>
>[0];
it('should return error for empty args', () => {
const result = imageCommand.action!(mockContext, '');
expect(result).toEqual(
expect.objectContaining({
type: 'message',
messageType: 'error',
}),
);
});
it('should return error when prompt is empty (only flags)', () => {
const result = imageCommand.action!(mockContext, '--ratio 16:9');
expect(result).toEqual(
expect.objectContaining({
type: 'message',
messageType: 'error',
content: expect.stringContaining('No prompt provided'),
}),
);
});
it('should return tool action for valid prompt', () => {
const result = imageCommand.action!(mockContext, 'a sunset over the ocean');
expect(result).toEqual({
type: 'tool',
toolName: 'generate_image',
toolArgs: { prompt: 'a sunset over the ocean' },
});
});
it('should map all flags to tool args correctly', () => {
const result = imageCommand.action!(
mockContext,
'a cat --ratio 16:9 --size 2K --count 3 --model gemini-3-pro-image-preview --edit ./img.png --output ./out --return',
);
expect(result).toEqual({
type: 'tool',
toolName: 'generate_image',
toolArgs: {
prompt: 'a cat',
aspect_ratio: '16:9',
size: '2K',
count: 3,
model: 'gemini-3-pro-image-preview',
input_image: './img.png',
output_path: './out',
return_to_context: true,
},
});
});
it('should have correct metadata', () => {
expect(imageCommand.name).toBe('image');
expect(imageCommand.altNames).toContain('img');
expect(imageCommand.kind).toBe('built-in');
expect(imageCommand.autoExecute).toBe(false);
});
it('should provide flag completions', () => {
const completions = imageCommand.completion!(mockContext, '--ra');
expect(completions).toContain('--ratio');
});
it('should return empty completions for non-flag input', () => {
const completions = imageCommand.completion!(mockContext, 'some');
expect(completions).toEqual([]);
});
});
@@ -0,0 +1,110 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { GENERATE_IMAGE_TOOL_NAME } from '@google/gemini-cli-core';
import type { SlashCommand, SlashCommandActionReturn } from './types.js';
import { CommandKind } from './types.js';
interface ParsedImageArgs {
prompt: string;
flags: Record<string, string | boolean>;
}
export function parseImageArgs(input: string): ParsedImageArgs {
const flags: Record<string, string | boolean> = {};
const parts = input.split(/\s+/);
const promptParts: string[] = [];
let i = 0;
// Collect prompt text (everything before first --flag)
while (i < parts.length && !parts[i].startsWith('--')) {
promptParts.push(parts[i]);
i++;
}
// Parse flags
while (i < parts.length) {
const part = parts[i];
if (part.startsWith('--')) {
const flagName = part.slice(2).split('=')[0];
const inlineValue = part.includes('=') ? part.split('=')[1] : undefined;
if (inlineValue !== undefined) {
flags[flagName] = inlineValue;
} else if (flagName === 'return') {
flags[flagName] = true;
} else if (i + 1 < parts.length && !parts[i + 1].startsWith('--')) {
flags[flagName] = parts[i + 1];
i++;
}
}
i++;
}
return { prompt: promptParts.join(' '), flags };
}
export const imageCommand: SlashCommand = {
name: 'image',
altNames: ['img'],
description: 'Generate or edit images using Nano Banana',
kind: CommandKind.BUILT_IN,
autoExecute: false,
action: (_context, args): SlashCommandActionReturn | void => {
if (!args.trim()) {
return {
type: 'message',
messageType: 'error',
content:
'Usage: /image <prompt> [--ratio 16:9] [--size 2K] [--count 3] [--edit path/to/image.png]',
};
}
const { prompt, flags } = parseImageArgs(args);
if (!prompt) {
return {
type: 'message',
messageType: 'error',
content:
'Error: No prompt provided. The prompt must come before any --flags.',
};
}
const toolArgs: Record<string, unknown> = { prompt };
if (flags['ratio']) toolArgs['aspect_ratio'] = flags['ratio'];
if (flags['size']) toolArgs['size'] = flags['size'];
if (flags['count'])
toolArgs['count'] = parseInt(String(flags['count']), 10);
if (flags['model']) toolArgs['model'] = flags['model'];
if (flags['edit']) toolArgs['input_image'] = flags['edit'];
if (flags['output']) toolArgs['output_path'] = flags['output'];
if (flags['return']) toolArgs['return_to_context'] = true;
return {
type: 'tool',
toolName: GENERATE_IMAGE_TOOL_NAME,
toolArgs,
};
},
completion: (_context, partialArg) => {
const flagOptions = [
'--ratio',
'--size',
'--count',
'--model',
'--edit',
'--output',
'--return',
];
if (partialArg.startsWith('--')) {
return flagOptions.filter((f) => f.startsWith(partialArg));
}
return [];
},
};