feat: GenerateImage tool for built-in image generation

2026-04-22 11:04:42 -07:00 · 2026-03-02 09:54:31 -05:00
parent fb6ff847dd
commit 989c7a9e60
12 changed files with 1495 additions and 0 deletions
@@ -832,6 +832,7 @@ export async function loadCliConfig(
    skillsSupport: settings.skills?.enabled ?? true,
    disabledSkills: settings.skills?.disabled,
    experimentalJitContext: settings.experimental?.jitContext,
+    imageGeneration: settings.experimental?.imageGeneration,
    modelSteering: settings.experimental?.modelSteering,
    toolOutputMasking: settings.experimental?.toolOutputMasking,
    noBrowser: !!process.env['NO_BROWSER'],
@@ -1811,6 +1811,16 @@ const SETTINGS_SCHEMA = {
        description: 'Enable planning features (Plan Mode and tools).',
        showInDialog: true,
      },
+      imageGeneration: {
+        type: 'boolean',
+        label: 'Image Generation',
+        category: 'Experimental',
+        requiresRestart: true,
+        default: false,
+        description:
+          'Enable generating images with Nano Banana (experimental).',
+        showInDialog: true,
+      },
      modelSteering: {
        type: 'boolean',
        label: 'Model Steering',
@@ -58,6 +58,7 @@ import { shellsCommand } from '../ui/commands/shellsCommand.js';
 import { vimCommand } from '../ui/commands/vimCommand.js';
 import { setupGithubCommand } from '../ui/commands/setupGithubCommand.js';
 import { terminalSetupCommand } from '../ui/commands/terminalSetupCommand.js';
+import { imageCommand } from '../ui/commands/imageCommand.js';

 /**
 * Loads the core, hard-coded slash commands that are an integral part
@@ -119,6 +120,7 @@ export class BuiltinCommandLoader implements ICommandLoader {
          ]
        : [extensionsCommand(this.config?.getEnableExtensionReloading())]),
      helpCommand,
+      imageCommand,
      shortcutsCommand,
      ...(this.config?.getEnableHooksUI() ? [hooksCommand] : []),
      rewindCommand,
@@ -0,0 +1,132 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect } from 'vitest';
+import { parseImageArgs, imageCommand } from './imageCommand.js';
+
+describe('parseImageArgs', () => {
+  it('should parse a simple prompt with no flags', () => {
+    const result = parseImageArgs('a sunset over the ocean');
+    expect(result.prompt).toBe('a sunset over the ocean');
+    expect(result.flags).toEqual({});
+  });
+
+  it('should parse prompt with a space-separated flag value', () => {
+    const result = parseImageArgs('a sunset --ratio 16:9');
+    expect(result.prompt).toBe('a sunset');
+    expect(result.flags['ratio']).toBe('16:9');
+  });
+
+  it('should parse --return as boolean flag', () => {
+    const result = parseImageArgs('a cat --return');
+    expect(result.prompt).toBe('a cat');
+    expect(result.flags['return']).toBe(true);
+  });
+
+  it('should parse inline flag values with =', () => {
+    const result = parseImageArgs('a cat --ratio=16:9 --size=4K');
+    expect(result.prompt).toBe('a cat');
+    expect(result.flags['ratio']).toBe('16:9');
+    expect(result.flags['size']).toBe('4K');
+  });
+
+  it('should handle multiple flags', () => {
+    const result = parseImageArgs(
+      'abstract wallpaper --ratio 21:9 --size 4K --count 2 --return',
+    );
+    expect(result.prompt).toBe('abstract wallpaper');
+    expect(result.flags['ratio']).toBe('21:9');
+    expect(result.flags['size']).toBe('4K');
+    expect(result.flags['count']).toBe('2');
+    expect(result.flags['return']).toBe(true);
+  });
+
+  it('should return empty prompt when input starts with flags', () => {
+    const result = parseImageArgs('--ratio 16:9');
+    expect(result.prompt).toBe('');
+    expect(result.flags['ratio']).toBe('16:9');
+  });
+
+  it('should handle empty input', () => {
+    const result = parseImageArgs('');
+    expect(result.prompt).toBe('');
+    expect(result.flags).toEqual({});
+  });
+});
+
+describe('imageCommand', () => {
+  const mockContext = {} as Parameters<
+    NonNullable<typeof imageCommand.action>
+  >[0];
+
+  it('should return error for empty args', () => {
+    const result = imageCommand.action!(mockContext, '');
+    expect(result).toEqual(
+      expect.objectContaining({
+        type: 'message',
+        messageType: 'error',
+      }),
+    );
+  });
+
+  it('should return error when prompt is empty (only flags)', () => {
+    const result = imageCommand.action!(mockContext, '--ratio 16:9');
+    expect(result).toEqual(
+      expect.objectContaining({
+        type: 'message',
+        messageType: 'error',
+        content: expect.stringContaining('No prompt provided'),
+      }),
+    );
+  });
+
+  it('should return tool action for valid prompt', () => {
+    const result = imageCommand.action!(mockContext, 'a sunset over the ocean');
+    expect(result).toEqual({
+      type: 'tool',
+      toolName: 'generate_image',
+      toolArgs: { prompt: 'a sunset over the ocean' },
+    });
+  });
+
+  it('should map all flags to tool args correctly', () => {
+    const result = imageCommand.action!(
+      mockContext,
+      'a cat --ratio 16:9 --size 2K --count 3 --model gemini-3-pro-image-preview --edit ./img.png --output ./out --return',
+    );
+    expect(result).toEqual({
+      type: 'tool',
+      toolName: 'generate_image',
+      toolArgs: {
+        prompt: 'a cat',
+        aspect_ratio: '16:9',
+        size: '2K',
+        count: 3,
+        model: 'gemini-3-pro-image-preview',
+        input_image: './img.png',
+        output_path: './out',
+        return_to_context: true,
+      },
+    });
+  });
+
+  it('should have correct metadata', () => {
+    expect(imageCommand.name).toBe('image');
+    expect(imageCommand.altNames).toContain('img');
+    expect(imageCommand.kind).toBe('built-in');
+    expect(imageCommand.autoExecute).toBe(false);
+  });
+
+  it('should provide flag completions', () => {
+    const completions = imageCommand.completion!(mockContext, '--ra');
+    expect(completions).toContain('--ratio');
+  });
+
+  it('should return empty completions for non-flag input', () => {
+    const completions = imageCommand.completion!(mockContext, 'some');
+    expect(completions).toEqual([]);
+  });
+});
@@ -0,0 +1,110 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { GENERATE_IMAGE_TOOL_NAME } from '@google/gemini-cli-core';
+import type { SlashCommand, SlashCommandActionReturn } from './types.js';
+import { CommandKind } from './types.js';
+
+interface ParsedImageArgs {
+  prompt: string;
+  flags: Record<string, string | boolean>;
+}
+
+export function parseImageArgs(input: string): ParsedImageArgs {
+  const flags: Record<string, string | boolean> = {};
+  const parts = input.split(/\s+/);
+  const promptParts: string[] = [];
+  let i = 0;
+
+  // Collect prompt text (everything before first --flag)
+  while (i < parts.length && !parts[i].startsWith('--')) {
+    promptParts.push(parts[i]);
+    i++;
+  }
+
+  // Parse flags
+  while (i < parts.length) {
+    const part = parts[i];
+    if (part.startsWith('--')) {
+      const flagName = part.slice(2).split('=')[0];
+      const inlineValue = part.includes('=') ? part.split('=')[1] : undefined;
+
+      if (inlineValue !== undefined) {
+        flags[flagName] = inlineValue;
+      } else if (flagName === 'return') {
+        flags[flagName] = true;
+      } else if (i + 1 < parts.length && !parts[i + 1].startsWith('--')) {
+        flags[flagName] = parts[i + 1];
+        i++;
+      }
+    }
+    i++;
+  }
+
+  return { prompt: promptParts.join(' '), flags };
+}
+
+export const imageCommand: SlashCommand = {
+  name: 'image',
+  altNames: ['img'],
+  description: 'Generate or edit images using Nano Banana',
+  kind: CommandKind.BUILT_IN,
+  autoExecute: false,
+
+  action: (_context, args): SlashCommandActionReturn | void => {
+    if (!args.trim()) {
+      return {
+        type: 'message',
+        messageType: 'error',
+        content:
+          'Usage: /image <prompt> [--ratio 16:9] [--size 2K] [--count 3] [--edit path/to/image.png]',
+      };
+    }
+
+    const { prompt, flags } = parseImageArgs(args);
+
+    if (!prompt) {
+      return {
+        type: 'message',
+        messageType: 'error',
+        content:
+          'Error: No prompt provided. The prompt must come before any --flags.',
+      };
+    }
+
+    const toolArgs: Record<string, unknown> = { prompt };
+    if (flags['ratio']) toolArgs['aspect_ratio'] = flags['ratio'];
+    if (flags['size']) toolArgs['size'] = flags['size'];
+    if (flags['count'])
+      toolArgs['count'] = parseInt(String(flags['count']), 10);
+    if (flags['model']) toolArgs['model'] = flags['model'];
+    if (flags['edit']) toolArgs['input_image'] = flags['edit'];
+    if (flags['output']) toolArgs['output_path'] = flags['output'];
+    if (flags['return']) toolArgs['return_to_context'] = true;
+
+    return {
+      type: 'tool',
+      toolName: GENERATE_IMAGE_TOOL_NAME,
+      toolArgs,
+    };
+  },
+
+  completion: (_context, partialArg) => {
+    const flagOptions = [
+      '--ratio',
+      '--size',
+      '--count',
+      '--model',
+      '--edit',
+      '--output',
+      '--return',
+    ];
+    if (partialArg.startsWith('--')) {
+      return flagOptions.filter((f) => f.startsWith(partialArg));
+    }
+    return [];
+  },
+};