From 1d383a4a8e722b529cde21cdb950e0104a4c6752 Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Mon, 20 Apr 2026 16:57:56 -0700 Subject: [PATCH] feat(cli): add streamlined `gemini gemma` local model setup (#25498) Co-authored-by: Abhijit Balaji Co-authored-by: Samee Zahid Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- docs/cli/settings.md | 24 +- docs/reference/configuration.md | 12 + packages/cli/src/commands/gemma.ts | 33 ++ packages/cli/src/commands/gemma/constants.ts | 45 ++ packages/cli/src/commands/gemma/logs.test.ts | 186 +++++++ packages/cli/src/commands/gemma/logs.ts | 200 +++++++ .../cli/src/commands/gemma/platform.test.ts | 162 ++++++ packages/cli/src/commands/gemma/platform.ts | 316 +++++++++++ packages/cli/src/commands/gemma/setup.test.ts | 60 +++ packages/cli/src/commands/gemma/setup.ts | 504 ++++++++++++++++++ packages/cli/src/commands/gemma/start.ts | 123 +++++ packages/cli/src/commands/gemma/status.ts | 165 ++++++ packages/cli/src/commands/gemma/stop.test.ts | 112 ++++ packages/cli/src/commands/gemma/stop.ts | 155 ++++++ packages/cli/src/config/config.test.ts | 13 + packages/cli/src/config/config.ts | 3 + .../cli/src/config/settingsSchema.test.ts | 24 +- packages/cli/src/config/settingsSchema.ts | 20 + packages/cli/src/gemini.tsx | 17 + .../cli/src/services/BuiltinCommandLoader.ts | 2 + .../src/services/liteRtServerManager.test.ts | 68 +++ .../cli/src/services/liteRtServerManager.ts | 59 ++ .../cli/src/ui/commands/gemmaStatusCommand.ts | 41 ++ .../src/ui/components/HistoryItemDisplay.tsx | 4 + .../src/ui/components/views/GemmaStatus.tsx | 120 +++++ packages/cli/src/ui/types.ts | 15 + packages/core/src/config/config.test.ts | 8 + packages/core/src/config/config.ts | 4 + .../core/src/core/localLiteRtLmClient.test.ts | 10 + packages/core/src/core/localLiteRtLmClient.ts | 2 + schemas/settings.schema.json | 14 + 31 files changed, 2509 insertions(+), 12 deletions(-) create mode 100644 packages/cli/src/commands/gemma.ts create mode 100644 packages/cli/src/commands/gemma/constants.ts create mode 100644 packages/cli/src/commands/gemma/logs.test.ts create mode 100644 packages/cli/src/commands/gemma/logs.ts create mode 100644 packages/cli/src/commands/gemma/platform.test.ts create mode 100644 packages/cli/src/commands/gemma/platform.ts create mode 100644 packages/cli/src/commands/gemma/setup.test.ts create mode 100644 packages/cli/src/commands/gemma/setup.ts create mode 100644 packages/cli/src/commands/gemma/start.ts create mode 100644 packages/cli/src/commands/gemma/status.ts create mode 100644 packages/cli/src/commands/gemma/stop.test.ts create mode 100644 packages/cli/src/commands/gemma/stop.ts create mode 100644 packages/cli/src/services/liteRtServerManager.test.ts create mode 100644 packages/cli/src/services/liteRtServerManager.ts create mode 100644 packages/cli/src/ui/commands/gemmaStatusCommand.ts create mode 100644 packages/cli/src/ui/components/views/GemmaStatus.tsx diff --git a/docs/cli/settings.md b/docs/cli/settings.md index 7f34365bb0..fbe556a370 100644 --- a/docs/cli/settings.md +++ b/docs/cli/settings.md @@ -161,17 +161,19 @@ they appear in the UI. ### Experimental -| UI Label | Setting | Description | Default | -| ---------------------------------------------------- | -------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- | -| Enable Git Worktrees | `experimental.worktrees` | Enable automated Git worktree management for parallel work. | `false` | -| Use OSC 52 Paste | `experimental.useOSC52Paste` | Use OSC 52 for pasting. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` | -| Use OSC 52 Copy | `experimental.useOSC52Copy` | Use OSC 52 for copying. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` | -| Model Steering | `experimental.modelSteering` | Enable model steering (user hints) to guide the model during tool execution. | `false` | -| Direct Web Fetch | `experimental.directWebFetch` | Enable web fetch behavior that bypasses LLM summarization. | `false` | -| Memory Manager Agent | `experimental.memoryManager` | Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories. | `false` | -| Auto Memory | `experimental.autoMemory` | Automatically extract reusable skills from past sessions in the background. Review results with /memory inbox. | `false` | -| Use the generalist profile to manage agent contexts. | `experimental.generalistProfile` | Suitable for general coding and software development tasks. | `false` | -| Enable Context Management | `experimental.contextManagement` | Enable logic for context management. | `false` | +| UI Label | Setting | Description | Default | +| ---------------------------------------------------- | ----------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- | +| Enable Git Worktrees | `experimental.worktrees` | Enable automated Git worktree management for parallel work. | `false` | +| Use OSC 52 Paste | `experimental.useOSC52Paste` | Use OSC 52 for pasting. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` | +| Use OSC 52 Copy | `experimental.useOSC52Copy` | Use OSC 52 for copying. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` | +| Model Steering | `experimental.modelSteering` | Enable model steering (user hints) to guide the model during tool execution. | `false` | +| Direct Web Fetch | `experimental.directWebFetch` | Enable web fetch behavior that bypasses LLM summarization. | `false` | +| Enable Gemma Model Router | `experimental.gemmaModelRouter.enabled` | Enable the Gemma Model Router (experimental). Requires a local endpoint serving Gemma via the Gemini API using LiteRT-LM shim. | `false` | +| Auto-start LiteRT Server | `experimental.gemmaModelRouter.autoStartServer` | Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled. | `false` | +| Memory Manager Agent | `experimental.memoryManager` | Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories. | `false` | +| Auto Memory | `experimental.autoMemory` | Automatically extract reusable skills from past sessions in the background. Review results with /memory inbox. | `false` | +| Use the generalist profile to manage agent contexts. | `experimental.generalistProfile` | Suitable for general coding and software development tasks. | `false` | +| Enable Context Management | `experimental.contextManagement` | Enable logic for context management. | `false` | ### Skills diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index a5a6aa1eb2..c4e18888fb 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -1711,6 +1711,18 @@ their corresponding top-level category object in your `settings.json` file. - **Default:** `false` - **Requires restart:** Yes +- **`experimental.gemmaModelRouter.autoStartServer`** (boolean): + - **Description:** Automatically start the LiteRT-LM server when Gemini CLI + starts and the Gemma router is enabled. + - **Default:** `false` + - **Requires restart:** Yes + +- **`experimental.gemmaModelRouter.binaryPath`** (string): + - **Description:** Custom path to the LiteRT-LM binary. Leave empty to use the + default location (~/.gemini/bin/litert/). + - **Default:** `""` + - **Requires restart:** Yes + - **`experimental.gemmaModelRouter.classifier.host`** (string): - **Description:** The host of the classifier. - **Default:** `"http://localhost:9379"` diff --git a/packages/cli/src/commands/gemma.ts b/packages/cli/src/commands/gemma.ts new file mode 100644 index 0000000000..737bbb069b --- /dev/null +++ b/packages/cli/src/commands/gemma.ts @@ -0,0 +1,33 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { CommandModule, Argv } from 'yargs'; +import { initializeOutputListenersAndFlush } from '../gemini.js'; +import { defer } from '../deferred.js'; +import { setupCommand } from './gemma/setup.js'; +import { startCommand } from './gemma/start.js'; +import { stopCommand } from './gemma/stop.js'; +import { statusCommand } from './gemma/status.js'; +import { logsCommand } from './gemma/logs.js'; + +export const gemmaCommand: CommandModule = { + command: 'gemma', + describe: 'Manage local Gemma model routing', + builder: (yargs: Argv) => + yargs + .middleware((argv) => { + initializeOutputListenersAndFlush(); + argv['isCommand'] = true; + }) + .command(defer(setupCommand, 'gemma')) + .command(defer(startCommand, 'gemma')) + .command(defer(stopCommand, 'gemma')) + .command(defer(statusCommand, 'gemma')) + .command(defer(logsCommand, 'gemma')) + .demandCommand(1, 'You need at least one command before continuing.') + .version(false), + handler: () => {}, +}; diff --git a/packages/cli/src/commands/gemma/constants.ts b/packages/cli/src/commands/gemma/constants.ts new file mode 100644 index 0000000000..a37326a057 --- /dev/null +++ b/packages/cli/src/commands/gemma/constants.ts @@ -0,0 +1,45 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import path from 'node:path'; +import { Storage } from '@google/gemini-cli-core'; + +export const LITERT_RELEASE_VERSION = 'v0.9.0-alpha03'; +export const LITERT_RELEASE_BASE_URL = + 'https://github.com/google-ai-edge/LiteRT-LM/releases/download'; +export const GEMMA_MODEL_NAME = 'gemma3-1b-gpu-custom'; +export const DEFAULT_PORT = 9379; +export const HEALTH_CHECK_TIMEOUT_MS = 5000; +export const LITERT_API_VERSION = 'v1beta'; +export const SERVER_START_WAIT_MS = 3000; + +export const PLATFORM_BINARY_MAP: Record = { + 'darwin-arm64': 'lit.macos_arm64', + 'linux-x64': 'lit.linux_x86_64', + 'win32-x64': 'lit.windows_x86_64.exe', +}; + +// SHA-256 hashes for the official LiteRT-LM v0.9.0-alpha03 release binaries. +export const PLATFORM_BINARY_SHA256: Record = { + 'lit.macos_arm64': + '9e826a2634f2e8b220ad0f1e1b5c139e0b47cb172326e3b7d46d31382f49478e', + 'lit.linux_x86_64': + '66601df8a07f08244b188e9fcab0bf4a16562fe76d8d47e49f40273d57541ee8', + 'lit.windows_x86_64.exe': + 'de82d2829d2fb1cbdb318e2d8a78dc2f9659ff14cb11b2894d1f30e0bfde2bf6', +}; + +export function getLiteRtBinDir(): string { + return path.join(Storage.getGlobalGeminiDir(), 'bin', 'litert'); +} + +export function getPidFilePath(): string { + return path.join(Storage.getGlobalTempDir(), 'litert-server.pid'); +} + +export function getLogFilePath(): string { + return path.join(Storage.getGlobalTempDir(), 'litert-server.log'); +} diff --git a/packages/cli/src/commands/gemma/logs.test.ts b/packages/cli/src/commands/gemma/logs.test.ts new file mode 100644 index 0000000000..49ab8d43c6 --- /dev/null +++ b/packages/cli/src/commands/gemma/logs.test.ts @@ -0,0 +1,186 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import fs from 'node:fs'; +import type { ChildProcess } from 'node:child_process'; +import { EventEmitter } from 'node:events'; +import os from 'node:os'; +import path from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { spawn } from 'node:child_process'; +import { exitCli } from '../utils.js'; +import { getLogFilePath } from './constants.js'; +import { logsCommand, readLastLines } from './logs.js'; + +vi.mock('@google/gemini-cli-core', async (importOriginal) => { + const { mockCoreDebugLogger } = await import( + '../../test-utils/mockDebugLogger.js' + ); + return mockCoreDebugLogger( + await importOriginal(), + { + stripAnsi: false, + }, + ); +}); + +vi.mock('node:child_process', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + spawn: vi.fn(), + }; +}); + +vi.mock('../utils.js', () => ({ + exitCli: vi.fn(), +})); + +vi.mock('./constants.js', () => ({ + getLogFilePath: vi.fn(), +})); + +function createMockChild(): ChildProcess { + return Object.assign(new EventEmitter(), { + kill: vi.fn(), + }) as unknown as ChildProcess; +} + +async function flushMicrotasks() { + await Promise.resolve(); + await Promise.resolve(); +} + +describe('readLastLines', () => { + const tempFiles: string[] = []; + + afterEach(async () => { + await Promise.all( + tempFiles + .splice(0) + .map((filePath) => fs.promises.rm(filePath, { force: true })), + ); + }); + + it('returns only the requested tail lines without reading the whole file eagerly', async () => { + const filePath = path.join( + os.tmpdir(), + `gemma-logs-${Date.now()}-${Math.random().toString(36).slice(2)}.log`, + ); + tempFiles.push(filePath); + + const content = Array.from({ length: 2000 }, (_, i) => `line-${i + 1}`) + .join('\n') + .concat('\n'); + await fs.promises.writeFile(filePath, content, 'utf-8'); + + await expect(readLastLines(filePath, 3)).resolves.toBe( + 'line-1998\nline-1999\nline-2000\n', + ); + }); + + it('returns an empty string when zero lines are requested', async () => { + const filePath = path.join( + os.tmpdir(), + `gemma-logs-${Date.now()}-${Math.random().toString(36).slice(2)}.log`, + ); + tempFiles.push(filePath); + await fs.promises.writeFile(filePath, 'line-1\nline-2\n', 'utf-8'); + + await expect(readLastLines(filePath, 0)).resolves.toBe(''); + }); +}); + +describe('logsCommand', () => { + const originalPlatform = process.platform; + + beforeEach(() => { + vi.clearAllMocks(); + Object.defineProperty(process, 'platform', { + value: 'linux', + configurable: true, + }); + vi.mocked(getLogFilePath).mockReturnValue('/tmp/gemma.log'); + vi.spyOn(fs.promises, 'access').mockResolvedValue(undefined); + }); + + afterEach(() => { + Object.defineProperty(process, 'platform', { + value: originalPlatform, + configurable: true, + }); + vi.restoreAllMocks(); + }); + + it('waits for the tail process to close before exiting in follow mode', async () => { + const child = createMockChild(); + vi.mocked(spawn).mockReturnValue(child); + + let resolved = false; + const handlerPromise = ( + logsCommand.handler as (argv: Record) => Promise + )({}).then(() => { + resolved = true; + }); + + await flushMicrotasks(); + + expect(spawn).toHaveBeenCalledWith( + 'tail', + ['-f', '-n', '20', '/tmp/gemma.log'], + { stdio: 'inherit' }, + ); + expect(resolved).toBe(false); + expect(exitCli).not.toHaveBeenCalled(); + + child.emit('close', 0); + await handlerPromise; + + expect(exitCli).toHaveBeenCalledWith(0); + }); + + it('uses one-shot tail output when follow is disabled', async () => { + const child = createMockChild(); + vi.mocked(spawn).mockReturnValue(child); + + const handlerPromise = ( + logsCommand.handler as (argv: Record) => Promise + )({ follow: false }); + + await flushMicrotasks(); + + expect(spawn).toHaveBeenCalledWith('tail', ['-n', '20', '/tmp/gemma.log'], { + stdio: 'inherit', + }); + + child.emit('close', 0); + await handlerPromise; + + expect(exitCli).toHaveBeenCalledWith(0); + }); + + it('follows from the requested line count when both --lines and --follow are set', async () => { + const child = createMockChild(); + vi.mocked(spawn).mockReturnValue(child); + + const handlerPromise = ( + logsCommand.handler as (argv: Record) => Promise + )({ lines: 5, follow: true }); + + await flushMicrotasks(); + + expect(spawn).toHaveBeenCalledWith( + 'tail', + ['-f', '-n', '5', '/tmp/gemma.log'], + { stdio: 'inherit' }, + ); + + child.emit('close', 0); + await handlerPromise; + + expect(exitCli).toHaveBeenCalledWith(0); + }); +}); diff --git a/packages/cli/src/commands/gemma/logs.ts b/packages/cli/src/commands/gemma/logs.ts new file mode 100644 index 0000000000..023b8e6352 --- /dev/null +++ b/packages/cli/src/commands/gemma/logs.ts @@ -0,0 +1,200 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { CommandModule } from 'yargs'; +import fs from 'node:fs'; +import { spawn, type ChildProcess } from 'node:child_process'; +import { debugLogger } from '@google/gemini-cli-core'; +import { exitCli } from '../utils.js'; +import { getLogFilePath } from './constants.js'; + +export async function readLastLines( + filePath: string, + count: number, +): Promise { + if (count <= 0) { + return ''; + } + + const CHUNK_SIZE = 64 * 1024; + const fileHandle = await fs.promises.open(filePath, fs.constants.O_RDONLY); + + try { + const stats = await fileHandle.stat(); + if (stats.size === 0) { + return ''; + } + + const chunks: Buffer[] = []; + let totalBytes = 0; + let newlineCount = 0; + let position = stats.size; + + while (position > 0 && newlineCount <= count) { + const readSize = Math.min(CHUNK_SIZE, position); + position -= readSize; + + const buffer = Buffer.allocUnsafe(readSize); + const { bytesRead } = await fileHandle.read( + buffer, + 0, + readSize, + position, + ); + + if (bytesRead === 0) { + break; + } + + const chunk = + bytesRead === readSize ? buffer : buffer.subarray(0, bytesRead); + chunks.unshift(chunk); + totalBytes += chunk.length; + + for (const byte of chunk) { + if (byte === 0x0a) { + newlineCount += 1; + } + } + } + + const content = Buffer.concat(chunks, totalBytes).toString('utf-8'); + const lines = content.split('\n'); + + if (position > 0 && lines.length > 0) { + const boundary = Buffer.allocUnsafe(1); + const { bytesRead } = await fileHandle.read(boundary, 0, 1, position - 1); + if (bytesRead === 1 && boundary[0] !== 0x0a) { + lines.shift(); + } + } + + if (lines.length > 0 && lines[lines.length - 1] === '') { + lines.pop(); + } + + if (lines.length === 0) { + return ''; + } + + return lines.slice(-count).join('\n') + '\n'; + } finally { + await fileHandle.close(); + } +} + +interface LogsArgs { + lines?: number; + follow?: boolean; +} + +function waitForChild(child: ChildProcess): Promise { + return new Promise((resolve, reject) => { + child.once('error', reject); + child.once('close', (code) => resolve(code ?? 1)); + }); +} + +async function runTail(logPath: string, lines: number, follow: boolean) { + const tailArgs = follow + ? ['-f', '-n', String(lines), logPath] + : ['-n', String(lines), logPath]; + const child = spawn('tail', tailArgs, { stdio: 'inherit' }); + + if (!follow) { + return waitForChild(child); + } + + const handleSigint = () => { + child.kill('SIGTERM'); + }; + process.once('SIGINT', handleSigint); + + try { + return await waitForChild(child); + } finally { + process.off('SIGINT', handleSigint); + } +} + +export const logsCommand: CommandModule = { + command: 'logs', + describe: 'View LiteRT-LM server logs', + builder: (yargs) => + yargs + .option('lines', { + alias: 'n', + type: 'number', + description: 'Show the last N lines and exit (omit to follow live)', + }) + .option('follow', { + alias: 'f', + type: 'boolean', + description: + 'Follow log output (defaults to true when --lines is omitted)', + }), + handler: async (argv) => { + const logPath = getLogFilePath(); + + try { + await fs.promises.access(logPath, fs.constants.F_OK); + } catch { + debugLogger.log(`No log file found at ${logPath}`); + debugLogger.log( + 'Is the LiteRT server running? Start it with: gemini gemma start', + ); + await exitCli(1); + return; + } + + const lines = argv.lines; + const follow = argv.follow ?? lines === undefined; + const requestedLines = lines ?? 20; + + if (follow && process.platform === 'win32') { + debugLogger.log( + 'Live log following is not supported on Windows. Use --lines N to view recent logs.', + ); + await exitCli(1); + return; + } + + if (process.platform === 'win32') { + process.stdout.write(await readLastLines(logPath, requestedLines)); + await exitCli(0); + return; + } + + try { + if (follow) { + debugLogger.log(`Tailing ${logPath} (Ctrl+C to stop)\n`); + } + const exitCode = await runTail(logPath, requestedLines, follow); + await exitCli(exitCode); + } catch (error) { + if ( + error instanceof Error && + 'code' in error && + error.code === 'ENOENT' + ) { + if (!follow) { + process.stdout.write(await readLastLines(logPath, requestedLines)); + await exitCli(0); + } else { + debugLogger.error( + '"tail" command not found. Use --lines N to view recent logs without tail.', + ); + await exitCli(1); + } + } else { + debugLogger.error( + `Failed to read log output: ${error instanceof Error ? error.message : String(error)}`, + ); + await exitCli(1); + } + } + }, +}; diff --git a/packages/cli/src/commands/gemma/platform.test.ts b/packages/cli/src/commands/gemma/platform.test.ts new file mode 100644 index 0000000000..b00549365a --- /dev/null +++ b/packages/cli/src/commands/gemma/platform.test.ts @@ -0,0 +1,162 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import fs from 'node:fs'; +import path from 'node:path'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { SettingScope } from '../../config/settings.js'; +import { getLiteRtBinDir } from './constants.js'; + +const mockLoadSettings = vi.hoisted(() => vi.fn()); + +vi.mock('../../config/settings.js', () => ({ + loadSettings: mockLoadSettings, + SettingScope: { + User: 'User', + }, +})); + +import { + getBinaryPath, + isExpectedLiteRtServerCommand, + isBinaryInstalled, + readServerProcessInfo, + resolveGemmaConfig, +} from './platform.js'; + +describe('gemma platform helpers', () => { + function createMockSettings( + userGemmaSettings?: object, + mergedGemmaSettings?: object, + ) { + return { + merged: { + experimental: { + gemmaModelRouter: mergedGemmaSettings, + }, + }, + forScope: vi.fn((scope: SettingScope) => { + if (scope !== SettingScope.User) { + throw new Error(`Unexpected scope ${scope}`); + } + return { + settings: { + experimental: { + gemmaModelRouter: userGemmaSettings, + }, + }, + }; + }), + }; + } + + beforeEach(() => { + vi.clearAllMocks(); + mockLoadSettings.mockReturnValue(createMockSettings()); + }); + + it('prefers the configured binary path from settings', () => { + mockLoadSettings.mockReturnValue( + createMockSettings({ binaryPath: '/custom/lit' }), + ); + + expect(getBinaryPath('lit.test')).toBe('/custom/lit'); + }); + + it('ignores workspace overrides for the configured binary path', () => { + mockLoadSettings.mockReturnValue( + createMockSettings( + { binaryPath: '/user/lit' }, + { binaryPath: '/workspace/evil' }, + ), + ); + + expect(getBinaryPath('lit.test')).toBe('/user/lit'); + }); + + it('falls back to the default install location when no custom path is set', () => { + expect(getBinaryPath('lit.test')).toBe( + path.join(getLiteRtBinDir(), 'lit.test'), + ); + }); + + it('resolves the configured port and binary path from settings', () => { + mockLoadSettings.mockReturnValue( + createMockSettings( + { binaryPath: '/custom/lit' }, + { + enabled: true, + classifier: { + host: 'http://localhost:8123/v1beta', + }, + }, + ), + ); + + expect(resolveGemmaConfig(9379)).toEqual({ + settingsEnabled: true, + configuredPort: 8123, + configuredBinaryPath: '/custom/lit', + }); + }); + + it('checks binary installation using the resolved binary path', () => { + mockLoadSettings.mockReturnValue( + createMockSettings({ binaryPath: '/custom/lit' }), + ); + vi.spyOn(fs, 'existsSync').mockReturnValue(true); + + expect(isBinaryInstalled()).toBe(true); + expect(fs.existsSync).toHaveBeenCalledWith('/custom/lit'); + }); + + it('parses structured server process info from the pid file', () => { + vi.spyOn(fs, 'readFileSync').mockReturnValue( + JSON.stringify({ + pid: 1234, + binaryPath: '/custom/lit', + port: 8123, + }), + ); + + expect(readServerProcessInfo()).toEqual({ + pid: 1234, + binaryPath: '/custom/lit', + port: 8123, + }); + }); + + it('parses legacy pid-only files for backward compatibility', () => { + vi.spyOn(fs, 'readFileSync').mockReturnValue('4321'); + + expect(readServerProcessInfo()).toEqual({ + pid: 4321, + }); + }); + + it('matches only the expected LiteRT serve command', () => { + expect( + isExpectedLiteRtServerCommand('/custom/lit serve --port=8123 --verbose', { + binaryPath: '/custom/lit', + port: 8123, + }), + ).toBe(true); + + expect( + isExpectedLiteRtServerCommand('/custom/lit run --port=8123', { + binaryPath: '/custom/lit', + port: 8123, + }), + ).toBe(false); + + expect( + isExpectedLiteRtServerCommand('/custom/lit serve --port=9000', { + binaryPath: '/custom/lit', + port: 8123, + }), + ).toBe(false); + }); +}); diff --git a/packages/cli/src/commands/gemma/platform.ts b/packages/cli/src/commands/gemma/platform.ts new file mode 100644 index 0000000000..0fdd6e02e1 --- /dev/null +++ b/packages/cli/src/commands/gemma/platform.ts @@ -0,0 +1,316 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { loadSettings, SettingScope } from '../../config/settings.js'; +import fs from 'node:fs'; +import path from 'node:path'; +import { execFileSync } from 'node:child_process'; +import { + PLATFORM_BINARY_MAP, + LITERT_RELEASE_BASE_URL, + LITERT_RELEASE_VERSION, + getLiteRtBinDir, + GEMMA_MODEL_NAME, + HEALTH_CHECK_TIMEOUT_MS, + LITERT_API_VERSION, + getPidFilePath, +} from './constants.js'; + +export interface PlatformInfo { + key: string; + binaryName: string; +} + +export interface GemmaConfigStatus { + settingsEnabled: boolean; + configuredPort: number; + configuredBinaryPath?: string; +} + +export interface LiteRtServerProcessInfo { + pid: number; + binaryPath?: string; + port?: number; +} + +function getUserConfiguredBinaryPath( + workspaceDir = process.cwd(), +): string | undefined { + try { + const userGemmaSettings = loadSettings(workspaceDir).forScope( + SettingScope.User, + ).settings.experimental?.gemmaModelRouter; + return userGemmaSettings?.binaryPath?.trim() || undefined; + } catch { + return undefined; + } +} + +function parsePortFromHost( + host: string | undefined, + fallbackPort: number, +): number { + if (!host) { + return fallbackPort; + } + + try { + const url = new URL(host); + const port = Number(url.port); + return Number.isFinite(port) && port > 0 ? port : fallbackPort; + } catch { + const match = host.match(/:(\d+)/); + if (!match) { + return fallbackPort; + } + const port = parseInt(match[1], 10); + return Number.isFinite(port) && port > 0 ? port : fallbackPort; + } +} + +export function resolveGemmaConfig(fallbackPort: number): GemmaConfigStatus { + let settingsEnabled = false; + let configuredPort = fallbackPort; + const configuredBinaryPath = getUserConfiguredBinaryPath(); + try { + const settings = loadSettings(process.cwd()); + const gemmaSettings = settings.merged.experimental?.gemmaModelRouter; + settingsEnabled = gemmaSettings?.enabled === true; + configuredPort = parsePortFromHost( + gemmaSettings?.classifier?.host, + fallbackPort, + ); + } catch { + // ignore — settings may fail to load outside a workspace + } + return { settingsEnabled, configuredPort, configuredBinaryPath }; +} + +export function detectPlatform(): PlatformInfo | null { + const key = `${process.platform}-${process.arch}`; + const binaryName = PLATFORM_BINARY_MAP[key]; + if (!binaryName) { + return null; + } + return { key, binaryName }; +} + +export function getBinaryPath(binaryName?: string): string | null { + const configuredBinaryPath = getUserConfiguredBinaryPath(); + if (configuredBinaryPath) { + return configuredBinaryPath; + } + + const name = binaryName ?? detectPlatform()?.binaryName; + if (!name) return null; + return path.join(getLiteRtBinDir(), name); +} + +export function getBinaryDownloadUrl(binaryName: string): string { + return `${LITERT_RELEASE_BASE_URL}/${LITERT_RELEASE_VERSION}/${binaryName}`; +} + +export function isBinaryInstalled(binaryPath = getBinaryPath()): boolean { + if (!binaryPath) return false; + return fs.existsSync(binaryPath); +} + +export function isModelDownloaded(binaryPath: string): boolean { + try { + const output = execFileSync(binaryPath, ['list'], { + encoding: 'utf-8', + timeout: 10000, + }); + return output.includes(GEMMA_MODEL_NAME); + } catch { + return false; + } +} + +export async function isServerRunning(port: number): Promise { + try { + const controller = new AbortController(); + const timeout = setTimeout( + () => controller.abort(), + HEALTH_CHECK_TIMEOUT_MS, + ); + const response = await fetch( + `http://localhost:${port}/${LITERT_API_VERSION}/models/${GEMMA_MODEL_NAME}:generateContent`, + { method: 'POST', signal: controller.signal }, + ); + clearTimeout(timeout); + // A 400 (bad request) confirms the route exists — the server recognises + // the model endpoint. Only a 404 means "wrong server / wrong model". + return response.status !== 404; + } catch { + return false; + } +} + +function isLiteRtServerProcessInfo( + value: unknown, +): value is LiteRtServerProcessInfo { + if (!value || typeof value !== 'object') { + return false; + } + + const isPositiveInteger = (candidate: unknown): candidate is number => + typeof candidate === 'number' && + Number.isInteger(candidate) && + candidate > 0; + const isNonEmptyString = (candidate: unknown): candidate is string => + typeof candidate === 'string' && candidate.length > 0; + + const pid: unknown = Object.getOwnPropertyDescriptor(value, 'pid')?.value; + if (!isPositiveInteger(pid)) { + return false; + } + + const binaryPath: unknown = Object.getOwnPropertyDescriptor( + value, + 'binaryPath', + )?.value; + if (binaryPath !== undefined && !isNonEmptyString(binaryPath)) { + return false; + } + + const port: unknown = Object.getOwnPropertyDescriptor(value, 'port')?.value; + if (port !== undefined && !isPositiveInteger(port)) { + return false; + } + + return true; +} + +export function readServerProcessInfo(): LiteRtServerProcessInfo | null { + const pidPath = getPidFilePath(); + try { + const content = fs.readFileSync(pidPath, 'utf-8').trim(); + if (!content) { + return null; + } + + if (/^\d+$/.test(content)) { + return { pid: parseInt(content, 10) }; + } + + const parsed = JSON.parse(content) as unknown; + return isLiteRtServerProcessInfo(parsed) ? parsed : null; + } catch { + return null; + } +} + +export function writeServerProcessInfo( + processInfo: LiteRtServerProcessInfo, +): void { + fs.writeFileSync(getPidFilePath(), JSON.stringify(processInfo), 'utf-8'); +} + +export function readServerPid(): number | null { + return readServerProcessInfo()?.pid ?? null; +} + +function normalizeProcessValue(value: string): string { + const normalized = value.replace(/\0/g, ' ').trim(); + if (process.platform === 'win32') { + return normalized.replace(/\\/g, '/').replace(/\s+/g, ' ').toLowerCase(); + } + return normalized.replace(/\s+/g, ' '); +} + +function readProcessCommandLine(pid: number): string | null { + try { + if (process.platform === 'linux') { + const output = fs.readFileSync(`/proc/${pid}/cmdline`, 'utf-8'); + return output.trim() ? output : null; + } + + if (process.platform === 'win32') { + const output = execFileSync( + 'powershell.exe', + [ + '-NoProfile', + '-Command', + `(Get-CimInstance Win32_Process -Filter "ProcessId = ${pid}").CommandLine`, + ], + { + encoding: 'utf-8', + timeout: 5000, + }, + ); + return output.trim() || null; + } + + const output = execFileSync('ps', ['-p', String(pid), '-o', 'command='], { + encoding: 'utf-8', + timeout: 5000, + }); + return output.trim() || null; + } catch { + return null; + } +} + +export function isExpectedLiteRtServerCommand( + commandLine: string, + options: { + binaryPath?: string | null; + port?: number; + }, +): boolean { + const normalizedCommandLine = normalizeProcessValue(commandLine); + if (!normalizedCommandLine) { + return false; + } + + if (!/(^|\s|")serve(\s|$)/.test(normalizedCommandLine)) { + return false; + } + + if ( + options.port !== undefined && + !normalizedCommandLine.includes(`--port=${options.port}`) + ) { + return false; + } + + if (!options.binaryPath) { + return true; + } + + const normalizedBinaryPath = normalizeProcessValue(options.binaryPath); + const normalizedBinaryName = normalizeProcessValue( + path.basename(options.binaryPath), + ); + return ( + normalizedCommandLine.includes(normalizedBinaryPath) || + normalizedCommandLine.includes(normalizedBinaryName) + ); +} + +export function isExpectedLiteRtServerProcess( + pid: number, + options: { + binaryPath?: string | null; + port?: number; + }, +): boolean { + const commandLine = readProcessCommandLine(pid); + if (!commandLine) { + return false; + } + return isExpectedLiteRtServerCommand(commandLine, options); +} + +export function isProcessRunning(pid: number): boolean { + try { + process.kill(pid, 0); + return true; + } catch { + return false; + } +} diff --git a/packages/cli/src/commands/gemma/setup.test.ts b/packages/cli/src/commands/gemma/setup.test.ts new file mode 100644 index 0000000000..663a5d6e4c --- /dev/null +++ b/packages/cli/src/commands/gemma/setup.test.ts @@ -0,0 +1,60 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import { afterEach, describe, expect, it } from 'vitest'; +import { PLATFORM_BINARY_MAP, PLATFORM_BINARY_SHA256 } from './constants.js'; +import { computeFileSha256, verifyFileSha256 } from './setup.js'; + +describe('gemma setup checksum helpers', () => { + const tempFiles: string[] = []; + + afterEach(async () => { + await Promise.all( + tempFiles + .splice(0) + .map((filePath) => fs.promises.rm(filePath, { force: true })), + ); + }); + + it('has a pinned checksum for every supported LiteRT binary', () => { + expect(Object.keys(PLATFORM_BINARY_SHA256).sort()).toEqual( + Object.values(PLATFORM_BINARY_MAP).sort(), + ); + }); + + it('computes the sha256 for a downloaded file', async () => { + const filePath = path.join( + os.tmpdir(), + `gemma-setup-${Date.now()}-${Math.random().toString(36).slice(2)}`, + ); + tempFiles.push(filePath); + await fs.promises.writeFile(filePath, 'hello world', 'utf-8'); + + await expect(computeFileSha256(filePath)).resolves.toBe( + 'b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9', + ); + }); + + it('verifies whether a file matches the expected sha256', async () => { + const filePath = path.join( + os.tmpdir(), + `gemma-setup-${Date.now()}-${Math.random().toString(36).slice(2)}`, + ); + tempFiles.push(filePath); + await fs.promises.writeFile(filePath, 'hello world', 'utf-8'); + + await expect( + verifyFileSha256( + filePath, + 'b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9', + ), + ).resolves.toBe(true); + await expect(verifyFileSha256(filePath, 'deadbeef')).resolves.toBe(false); + }); +}); diff --git a/packages/cli/src/commands/gemma/setup.ts b/packages/cli/src/commands/gemma/setup.ts new file mode 100644 index 0000000000..a936462dbf --- /dev/null +++ b/packages/cli/src/commands/gemma/setup.ts @@ -0,0 +1,504 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { CommandModule } from 'yargs'; +import { createHash } from 'node:crypto'; +import fs from 'node:fs'; +import path from 'node:path'; +import { execFileSync, spawn as nodeSpawn } from 'node:child_process'; +import chalk from 'chalk'; +import { debugLogger } from '@google/gemini-cli-core'; +import { loadSettings, SettingScope } from '../../config/settings.js'; +import { exitCli } from '../utils.js'; +import { + DEFAULT_PORT, + GEMMA_MODEL_NAME, + PLATFORM_BINARY_SHA256, +} from './constants.js'; +import { + detectPlatform, + getBinaryDownloadUrl, + getBinaryPath, + isBinaryInstalled, + isModelDownloaded, +} from './platform.js'; +import { startServer } from './start.js'; +import readline from 'node:readline'; + +const log = (msg: string) => debugLogger.log(msg); +const logError = (msg: string) => debugLogger.error(msg); + +async function promptYesNo(question: string): Promise { + const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, + }); + return new Promise((resolve) => { + rl.question(`${question} (y/N): `, (answer) => { + rl.close(); + resolve( + answer.trim().toLowerCase() === 'y' || + answer.trim().toLowerCase() === 'yes', + ); + }); + }); +} + +function formatBytes(bytes: number): string { + if (bytes < 1024) return `${bytes} B`; + if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`; + return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; +} + +function renderProgress(downloaded: number, total: number | null): void { + const barWidth = 30; + if (total && total > 0) { + const pct = Math.min(downloaded / total, 1); + const filled = Math.round(barWidth * pct); + const bar = '█'.repeat(filled) + '░'.repeat(barWidth - filled); + const pctStr = (pct * 100).toFixed(0).padStart(3); + process.stderr.write( + `\r [${bar}] ${pctStr}% ${formatBytes(downloaded)} / ${formatBytes(total)}`, + ); + } else { + process.stderr.write(`\r Downloaded ${formatBytes(downloaded)}`); + } +} + +async function downloadFile(url: string, destPath: string): Promise { + const tmpPath = destPath + '.downloading'; + if (fs.existsSync(tmpPath)) { + fs.unlinkSync(tmpPath); + } + + const response = await fetch(url, { redirect: 'follow' }); + if (!response.ok) { + throw new Error( + `Download failed: HTTP ${response.status} ${response.statusText}`, + ); + } + if (!response.body) { + throw new Error('Download failed: No response body'); + } + + const contentLength = response.headers.get('content-length'); + const totalBytes = contentLength ? parseInt(contentLength, 10) : null; + let downloadedBytes = 0; + + const fileStream = fs.createWriteStream(tmpPath); + const reader = response.body.getReader(); + + try { + for (;;) { + const { done, value } = await reader.read(); + if (done) break; + const writeOk = fileStream.write(value); + if (!writeOk) { + await new Promise((resolve) => fileStream.once('drain', resolve)); + } + downloadedBytes += value.byteLength; + renderProgress(downloadedBytes, totalBytes); + } + } finally { + fileStream.end(); + process.stderr.write('\r' + ' '.repeat(80) + '\r'); + } + + await new Promise((resolve, reject) => { + fileStream.on('finish', resolve); + fileStream.on('error', reject); + }); + + fs.renameSync(tmpPath, destPath); +} + +export async function computeFileSha256(filePath: string): Promise { + const hash = createHash('sha256'); + const fileStream = fs.createReadStream(filePath); + + return new Promise((resolve, reject) => { + fileStream.on('data', (chunk) => { + hash.update(chunk); + }); + fileStream.on('error', reject); + fileStream.on('end', () => { + resolve(hash.digest('hex')); + }); + }); +} + +export async function verifyFileSha256( + filePath: string, + expectedHash: string, +): Promise { + const actualHash = await computeFileSha256(filePath); + return actualHash === expectedHash; +} + +function spawnInherited(command: string, args: string[]): Promise { + return new Promise((resolve, reject) => { + const child = nodeSpawn(command, args, { + stdio: 'inherit', + }); + child.on('close', (code) => resolve(code ?? 1)); + child.on('error', reject); + }); +} + +interface SetupArgs { + port: number; + skipModel: boolean; + start: boolean; + force: boolean; + consent: boolean; +} + +async function handleSetup(argv: SetupArgs): Promise { + const { port, force } = argv; + let settingsUpdated = false; + let serverStarted = false; + let autoStartServer = true; + + log(''); + log(chalk.bold('Gemma Local Model Routing Setup')); + log(chalk.dim('─'.repeat(40))); + log(''); + + const platform = detectPlatform(); + if (!platform) { + logError( + chalk.red(`Unsupported platform: ${process.platform}-${process.arch}`), + ); + logError( + 'LiteRT-LM binaries are available for: macOS (ARM64), Linux (x86_64), Windows (x86_64)', + ); + return 1; + } + log(chalk.dim(` Platform: ${platform.key} → ${platform.binaryName}`)); + + if (!argv.consent) { + log(''); + log('This will download and install the LiteRT-LM runtime and the'); + log( + `Gemma model (${GEMMA_MODEL_NAME}, ~1 GB). By proceeding, you agree to the`, + ); + log('Gemma Terms of Use: https://ai.google.dev/gemma/terms'); + log(''); + + const accepted = await promptYesNo('Do you want to continue?'); + if (!accepted) { + log('Setup cancelled.'); + return 0; + } + } + + const binaryPath = getBinaryPath(platform.binaryName)!; + const alreadyInstalled = isBinaryInstalled(); + + if (alreadyInstalled && !force) { + log(''); + log(chalk.green(' ✓ LiteRT-LM binary already installed at:')); + log(chalk.dim(` ${binaryPath}`)); + } else { + log(''); + log(' Downloading LiteRT-LM binary...'); + const downloadUrl = getBinaryDownloadUrl(platform.binaryName); + debugLogger.log(`Downloading from: ${downloadUrl}`); + + try { + const binDir = path.dirname(binaryPath); + fs.mkdirSync(binDir, { recursive: true }); + await downloadFile(downloadUrl, binaryPath); + log(chalk.green(' ✓ Binary downloaded successfully')); + } catch (error) { + logError( + chalk.red( + ` ✗ Failed to download binary: ${error instanceof Error ? error.message : String(error)}`, + ), + ); + logError(' Check your internet connection and try again.'); + return 1; + } + + const expectedHash = PLATFORM_BINARY_SHA256[platform.binaryName]; + if (!expectedHash) { + logError( + chalk.red( + ` ✗ No checksum is configured for ${platform.binaryName}. Refusing to install the binary.`, + ), + ); + try { + fs.rmSync(binaryPath, { force: true }); + } catch { + // ignore + } + return 1; + } + + try { + const checksumVerified = await verifyFileSha256(binaryPath, expectedHash); + if (!checksumVerified) { + logError( + chalk.red( + ' ✗ Downloaded binary checksum did not match the expected release hash.', + ), + ); + try { + fs.rmSync(binaryPath, { force: true }); + } catch { + // ignore + } + return 1; + } + log(chalk.green(' ✓ Binary checksum verified')); + } catch (error) { + logError( + chalk.red( + ` ✗ Failed to verify binary checksum: ${error instanceof Error ? error.message : String(error)}`, + ), + ); + try { + fs.rmSync(binaryPath, { force: true }); + } catch { + // ignore + } + return 1; + } + + if (process.platform !== 'win32') { + try { + fs.chmodSync(binaryPath, 0o755); + } catch (error) { + logError( + chalk.red( + ` ✗ Failed to set executable permission: ${error instanceof Error ? error.message : String(error)}`, + ), + ); + return 1; + } + } + + if (process.platform === 'darwin') { + try { + execFileSync('xattr', ['-d', 'com.apple.quarantine', binaryPath], { + stdio: 'ignore', + }); + log(chalk.green(' ✓ macOS quarantine attribute removed')); + } catch { + // Expected if the attribute doesn't exist. + } + } + } + + if (!argv.skipModel) { + const modelAlreadyDownloaded = isModelDownloaded(binaryPath); + if (modelAlreadyDownloaded && !force) { + log(''); + log(chalk.green(` ✓ Model ${GEMMA_MODEL_NAME} already downloaded`)); + } else { + log(''); + log(` Downloading model ${GEMMA_MODEL_NAME}...`); + log(chalk.dim(' You may be prompted to accept the Gemma Terms of Use.')); + log(''); + + const exitCode = await spawnInherited(binaryPath, [ + 'pull', + GEMMA_MODEL_NAME, + ]); + if (exitCode !== 0) { + logError(''); + logError( + chalk.red(` ✗ Model download failed (exit code ${exitCode})`), + ); + return 1; + } + log(''); + log(chalk.green(` ✓ Model ${GEMMA_MODEL_NAME} downloaded`)); + } + } + + log(''); + log(' Configuring settings...'); + try { + const settings = loadSettings(process.cwd()); + + // User scope: security-sensitive settings that must not be overridable + // by workspace configs (prevents arbitrary binary execution). + const existingUserGemma = + settings.forScope(SettingScope.User).settings.experimental + ?.gemmaModelRouter ?? {}; + autoStartServer = existingUserGemma.autoStartServer ?? true; + const existingUserExperimental = + settings.forScope(SettingScope.User).settings.experimental ?? {}; + settings.setValue(SettingScope.User, 'experimental', { + ...existingUserExperimental, + gemmaModelRouter: { + autoStartServer, + ...(existingUserGemma.binaryPath !== undefined + ? { binaryPath: existingUserGemma.binaryPath } + : {}), + }, + }); + + // Workspace scope: project-isolated settings so the local model only + // runs for this specific project, saving resources globally. + const existingWorkspaceGemma = + settings.forScope(SettingScope.Workspace).settings.experimental + ?.gemmaModelRouter ?? {}; + const existingWorkspaceExperimental = + settings.forScope(SettingScope.Workspace).settings.experimental ?? {}; + settings.setValue(SettingScope.Workspace, 'experimental', { + ...existingWorkspaceExperimental, + gemmaModelRouter: { + ...existingWorkspaceGemma, + enabled: true, + classifier: { + ...existingWorkspaceGemma.classifier, + host: `http://localhost:${port}`, + model: GEMMA_MODEL_NAME, + }, + }, + }); + + log(chalk.green(' ✓ Settings updated')); + log(chalk.dim(' User (~/.gemini/settings.json): autoStartServer')); + log( + chalk.dim(' Workspace (.gemini/settings.json): enabled, classifier'), + ); + settingsUpdated = true; + } catch (error) { + logError( + chalk.red( + ` ✗ Failed to update settings: ${error instanceof Error ? error.message : String(error)}`, + ), + ); + logError( + ' You can manually add the configuration to ~/.gemini/settings.json', + ); + } + + if (argv.start) { + log(''); + log(' Starting LiteRT server...'); + serverStarted = await startServer(binaryPath, port); + if (serverStarted) { + log(chalk.green(` ✓ Server started on port ${port}`)); + } else { + log( + chalk.yellow( + ` ! Server may not have started correctly. Check: gemini gemma status`, + ), + ); + } + } + + const routingActive = settingsUpdated && serverStarted; + const setupSucceeded = settingsUpdated && (!argv.start || serverStarted); + log(''); + log(chalk.dim('─'.repeat(40))); + if (routingActive) { + log(chalk.bold.green(' Setup complete! Local model routing is active.')); + } else if (settingsUpdated) { + log( + chalk.bold.green(' Setup complete! Local model routing is configured.'), + ); + } else { + log( + chalk.bold.yellow( + ' Setup incomplete. Manual settings changes are still required.', + ), + ); + } + log(''); + log(' How it works: Every request is classified by the local Gemma model.'); + log( + ' Simple tasks (file reads, quick edits) route to ' + + chalk.cyan('Flash') + + ' for speed.', + ); + log( + ' Complex tasks (debugging, architecture) route to ' + + chalk.cyan('Pro') + + ' for quality.', + ); + log(' This happens automatically — just use the CLI as usual.'); + log(''); + if (!settingsUpdated) { + log( + chalk.yellow( + ' Fix the settings update above, then rerun "gemini gemma status".', + ), + ); + log(''); + } else if (!argv.start) { + log(chalk.yellow(' Note: Run "gemini gemma start" to start the server.')); + if (autoStartServer) { + log( + chalk.yellow( + ' Or restart the CLI to auto-start it on the next launch.', + ), + ); + } + log(''); + } else if (!serverStarted) { + log( + chalk.yellow( + ' Review the server logs and rerun "gemini gemma start" after fixing the issue.', + ), + ); + log(''); + } + log(' Useful commands:'); + log(chalk.dim(' gemini gemma status Check routing status')); + log(chalk.dim(' gemini gemma start Start the LiteRT server')); + log(chalk.dim(' gemini gemma stop Stop the LiteRT server')); + log(chalk.dim(' /gemma Check status inside a session')); + log(''); + + return setupSucceeded ? 0 : 1; +} + +export const setupCommand: CommandModule = { + command: 'setup', + describe: 'Download and configure Gemma local model routing', + builder: (yargs) => + yargs + .option('port', { + type: 'number', + default: DEFAULT_PORT, + description: 'Port for the LiteRT server', + }) + .option('skip-model', { + type: 'boolean', + default: false, + description: 'Skip model download (binary only)', + }) + .option('start', { + type: 'boolean', + default: true, + description: 'Start the server after setup', + }) + .option('force', { + type: 'boolean', + default: false, + description: 'Re-download binary and model even if already present', + }) + .option('consent', { + type: 'boolean', + default: false, + description: 'Skip interactive consent prompt (implies acceptance)', + }), + handler: async (argv) => { + const exitCode = await handleSetup({ + port: Number(argv['port']), + skipModel: Boolean(argv['skipModel']), + start: Boolean(argv['start']), + force: Boolean(argv['force']), + consent: Boolean(argv['consent']), + }); + await exitCli(exitCode); + }, +}; diff --git a/packages/cli/src/commands/gemma/start.ts b/packages/cli/src/commands/gemma/start.ts new file mode 100644 index 0000000000..badf7b69a5 --- /dev/null +++ b/packages/cli/src/commands/gemma/start.ts @@ -0,0 +1,123 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { CommandModule } from 'yargs'; +import fs from 'node:fs'; +import path from 'node:path'; +import { spawn } from 'node:child_process'; +import chalk from 'chalk'; +import { debugLogger } from '@google/gemini-cli-core'; +import { exitCli } from '../utils.js'; +import { + DEFAULT_PORT, + getPidFilePath, + getLogFilePath, + getLiteRtBinDir, + SERVER_START_WAIT_MS, +} from './constants.js'; +import { + getBinaryPath, + isBinaryInstalled, + isServerRunning, + resolveGemmaConfig, + writeServerProcessInfo, +} from './platform.js'; + +export async function startServer( + binaryPath: string, + port: number, +): Promise { + const alreadyRunning = await isServerRunning(port); + if (alreadyRunning) { + debugLogger.log(`LiteRT server already running on port ${port}`); + return true; + } + + const logPath = getLogFilePath(); + fs.mkdirSync(getLiteRtBinDir(), { recursive: true }); + const tmpDir = path.dirname(getPidFilePath()); + fs.mkdirSync(tmpDir, { recursive: true }); + + const logFd = fs.openSync(logPath, 'a'); + + try { + const child = spawn(binaryPath, ['serve', `--port=${port}`, '--verbose'], { + detached: true, + stdio: ['ignore', logFd, logFd], + }); + + if (child.pid) { + writeServerProcessInfo({ + pid: child.pid, + binaryPath, + port, + }); + } + + child.unref(); + } finally { + fs.closeSync(logFd); + } + + await new Promise((resolve) => setTimeout(resolve, SERVER_START_WAIT_MS)); + return isServerRunning(port); +} + +export const startCommand: CommandModule = { + command: 'start', + describe: 'Start the LiteRT-LM server', + builder: (yargs) => + yargs.option('port', { + type: 'number', + description: 'Port for the LiteRT server', + }), + handler: async (argv) => { + let port: number | undefined; + if (argv['port'] !== undefined) { + port = Number(argv['port']); + } + + if (!port) { + const { configuredPort } = resolveGemmaConfig(DEFAULT_PORT); + port = configuredPort; + } + + const binaryPath = getBinaryPath(); + if (!binaryPath || !isBinaryInstalled(binaryPath)) { + debugLogger.error( + chalk.red( + 'LiteRT-LM binary not found. Run "gemini gemma setup" first.', + ), + ); + await exitCli(1); + return; + } + + const alreadyRunning = await isServerRunning(port); + if (alreadyRunning) { + debugLogger.log( + chalk.green(`LiteRT server is already running on port ${port}.`), + ); + await exitCli(0); + return; + } + + debugLogger.log(`Starting LiteRT server on port ${port}...`); + + const started = await startServer(binaryPath, port); + if (started) { + debugLogger.log(chalk.green(`LiteRT server started on port ${port}.`)); + debugLogger.log(chalk.dim(`Logs: ${getLogFilePath()}`)); + await exitCli(0); + } else { + debugLogger.error( + chalk.red('Server may not have started correctly. Check logs:'), + ); + debugLogger.error(chalk.dim(` ${getLogFilePath()}`)); + await exitCli(1); + } + }, +}; diff --git a/packages/cli/src/commands/gemma/status.ts b/packages/cli/src/commands/gemma/status.ts new file mode 100644 index 0000000000..8ce9f006dc --- /dev/null +++ b/packages/cli/src/commands/gemma/status.ts @@ -0,0 +1,165 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { CommandModule } from 'yargs'; +import chalk from 'chalk'; +import { DEFAULT_PORT, GEMMA_MODEL_NAME } from './constants.js'; +import { + detectPlatform, + getBinaryPath, + isBinaryInstalled, + isModelDownloaded, + isServerRunning, + readServerPid, + isProcessRunning, + resolveGemmaConfig, +} from './platform.js'; +import { exitCli } from '../utils.js'; + +export interface GemmaStatusResult { + binaryInstalled: boolean; + binaryPath: string | null; + modelDownloaded: boolean; + serverRunning: boolean; + serverPid: number | null; + settingsEnabled: boolean; + port: number; + allPassing: boolean; +} + +export async function checkGemmaStatus( + port?: number, +): Promise { + const { settingsEnabled, configuredPort } = resolveGemmaConfig(DEFAULT_PORT); + + const effectivePort = port ?? configuredPort; + const binaryPath = getBinaryPath(); + const binaryInstalled = isBinaryInstalled(binaryPath); + const modelDownloaded = + binaryInstalled && binaryPath ? isModelDownloaded(binaryPath) : false; + const serverRunning = await isServerRunning(effectivePort); + const pid = readServerPid(); + const serverPid = pid && isProcessRunning(pid) ? pid : null; + + const allPassing = + binaryInstalled && modelDownloaded && serverRunning && settingsEnabled; + + return { + binaryInstalled, + binaryPath, + modelDownloaded, + serverRunning, + serverPid, + settingsEnabled, + port: effectivePort, + allPassing, + }; +} + +export function formatGemmaStatus(status: GemmaStatusResult): string { + const check = (ok: boolean) => (ok ? chalk.green('✓') : chalk.red('✗')); + + const lines: string[] = [ + '', + chalk.bold('Gemma Local Model Routing Status'), + chalk.dim('─'.repeat(40)), + '', + ]; + + if (status.binaryInstalled) { + lines.push(` Binary: ${check(true)} Installed (${status.binaryPath})`); + } else { + const platform = detectPlatform(); + if (platform) { + lines.push(` Binary: ${check(false)} Not installed`); + lines.push(chalk.dim(` Run: gemini gemma setup`)); + } else { + lines.push( + ` Binary: ${check(false)} Unsupported platform (${process.platform}-${process.arch})`, + ); + } + } + + if (status.modelDownloaded) { + lines.push(` Model: ${check(true)} ${GEMMA_MODEL_NAME} downloaded`); + } else { + lines.push(` Model: ${check(false)} ${GEMMA_MODEL_NAME} not found`); + if (status.binaryInstalled) { + lines.push( + chalk.dim( + ` Run: ${status.binaryPath} pull ${GEMMA_MODEL_NAME}`, + ), + ); + } else { + lines.push(chalk.dim(` Run: gemini gemma setup`)); + } + } + + if (status.serverRunning) { + const pidInfo = status.serverPid ? ` (PID ${status.serverPid})` : ''; + lines.push( + ` Server: ${check(true)} Running on port ${status.port}${pidInfo}`, + ); + } else { + lines.push( + ` Server: ${check(false)} Not running on port ${status.port}`, + ); + lines.push(chalk.dim(` Run: gemini gemma start`)); + } + + if (status.settingsEnabled) { + lines.push(` Settings: ${check(true)} Enabled in settings.json`); + } else { + lines.push(` Settings: ${check(false)} Not enabled in settings.json`); + lines.push( + chalk.dim( + ` Run: gemini gemma setup (auto-configures settings)`, + ), + ); + } + + lines.push(''); + + if (status.allPassing) { + lines.push(chalk.green(' Routing is active — no action needed.')); + lines.push(''); + lines.push( + chalk.dim( + ' Simple requests → Flash (fast) | Complex requests → Pro (powerful)', + ), + ); + lines.push(chalk.dim(' This happens automatically on every request.')); + } else { + lines.push( + chalk.yellow( + ' Some checks failed. Run "gemini gemma setup" for guided installation.', + ), + ); + } + + lines.push(''); + return lines.join('\n'); +} + +export const statusCommand: CommandModule = { + command: 'status', + describe: 'Check Gemma local model routing status', + builder: (yargs) => + yargs.option('port', { + type: 'number', + description: 'Port to check for the LiteRT server', + }), + handler: async (argv) => { + let port: number | undefined; + if (argv['port'] !== undefined) { + port = Number(argv['port']); + } + const status = await checkGemmaStatus(port); + const output = formatGemmaStatus(status); + process.stdout.write(output); + await exitCli(status.allPassing ? 0 : 1); + }, +}; diff --git a/packages/cli/src/commands/gemma/stop.test.ts b/packages/cli/src/commands/gemma/stop.test.ts new file mode 100644 index 0000000000..64eaf6d5fc --- /dev/null +++ b/packages/cli/src/commands/gemma/stop.test.ts @@ -0,0 +1,112 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import fs from 'node:fs'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +const mockGetBinaryPath = vi.hoisted(() => vi.fn()); +const mockIsExpectedLiteRtServerProcess = vi.hoisted(() => vi.fn()); +const mockIsProcessRunning = vi.hoisted(() => vi.fn()); +const mockIsServerRunning = vi.hoisted(() => vi.fn()); +const mockReadServerPid = vi.hoisted(() => vi.fn()); +const mockReadServerProcessInfo = vi.hoisted(() => vi.fn()); +const mockResolveGemmaConfig = vi.hoisted(() => vi.fn()); + +vi.mock('@google/gemini-cli-core', async (importOriginal) => { + const { mockCoreDebugLogger } = await import( + '../../test-utils/mockDebugLogger.js' + ); + return mockCoreDebugLogger( + await importOriginal(), + { + stripAnsi: false, + }, + ); +}); + +vi.mock('./constants.js', () => ({ + DEFAULT_PORT: 9379, + getPidFilePath: vi.fn(() => '/tmp/litert-server.pid'), +})); + +vi.mock('./platform.js', () => ({ + getBinaryPath: mockGetBinaryPath, + isExpectedLiteRtServerProcess: mockIsExpectedLiteRtServerProcess, + isProcessRunning: mockIsProcessRunning, + isServerRunning: mockIsServerRunning, + readServerPid: mockReadServerPid, + readServerProcessInfo: mockReadServerProcessInfo, + resolveGemmaConfig: mockResolveGemmaConfig, +})); + +vi.mock('../utils.js', () => ({ + exitCli: vi.fn(), +})); + +import { stopServer } from './stop.js'; + +describe('gemma stop command', () => { + beforeEach(() => { + vi.clearAllMocks(); + vi.useFakeTimers(); + mockGetBinaryPath.mockReturnValue('/custom/lit'); + mockResolveGemmaConfig.mockReturnValue({ configuredPort: 9379 }); + }); + + afterEach(() => { + vi.useRealTimers(); + vi.restoreAllMocks(); + }); + + it('refuses to signal a pid that does not match the expected LiteRT server', async () => { + mockReadServerProcessInfo.mockReturnValue({ + pid: 1234, + binaryPath: '/custom/lit', + port: 8123, + }); + mockIsProcessRunning.mockReturnValue(true); + mockIsExpectedLiteRtServerProcess.mockReturnValue(false); + + const killSpy = vi.spyOn(process, 'kill').mockImplementation(() => true); + + await expect(stopServer(8123)).resolves.toBe('unexpected-process'); + expect(killSpy).not.toHaveBeenCalled(); + }); + + it('stops the verified LiteRT server and removes the pid file', async () => { + mockReadServerProcessInfo.mockReturnValue({ + pid: 1234, + binaryPath: '/custom/lit', + port: 8123, + }); + mockIsProcessRunning.mockReturnValueOnce(true).mockReturnValueOnce(false); + mockIsExpectedLiteRtServerProcess.mockReturnValue(true); + + const unlinkSpy = vi.spyOn(fs, 'unlinkSync').mockImplementation(() => {}); + const killSpy = vi.spyOn(process, 'kill').mockImplementation(() => true); + + const stopPromise = stopServer(8123); + await vi.runAllTimersAsync(); + + await expect(stopPromise).resolves.toBe('stopped'); + expect(killSpy).toHaveBeenCalledWith(1234, 'SIGTERM'); + expect(unlinkSpy).toHaveBeenCalledWith('/tmp/litert-server.pid'); + }); + + it('cleans up a stale pid file when the recorded process is no longer running', async () => { + mockReadServerProcessInfo.mockReturnValue({ + pid: 1234, + binaryPath: '/custom/lit', + port: 8123, + }); + mockIsProcessRunning.mockReturnValue(false); + + const unlinkSpy = vi.spyOn(fs, 'unlinkSync').mockImplementation(() => {}); + + await expect(stopServer(8123)).resolves.toBe('not-running'); + expect(unlinkSpy).toHaveBeenCalledWith('/tmp/litert-server.pid'); + }); +}); diff --git a/packages/cli/src/commands/gemma/stop.ts b/packages/cli/src/commands/gemma/stop.ts new file mode 100644 index 0000000000..c51269c579 --- /dev/null +++ b/packages/cli/src/commands/gemma/stop.ts @@ -0,0 +1,155 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { CommandModule } from 'yargs'; +import fs from 'node:fs'; +import chalk from 'chalk'; +import { debugLogger } from '@google/gemini-cli-core'; +import { exitCli } from '../utils.js'; +import { DEFAULT_PORT, getPidFilePath } from './constants.js'; +import { + getBinaryPath, + isExpectedLiteRtServerProcess, + isProcessRunning, + isServerRunning, + readServerPid, + readServerProcessInfo, + resolveGemmaConfig, +} from './platform.js'; + +export type StopServerResult = + | 'stopped' + | 'not-running' + | 'unexpected-process' + | 'failed'; + +export async function stopServer( + expectedPort?: number, +): Promise { + const processInfo = readServerProcessInfo(); + const pidPath = getPidFilePath(); + + if (!processInfo) { + return 'not-running'; + } + + const { pid } = processInfo; + if (!isProcessRunning(pid)) { + debugLogger.log( + `Stale PID file found (PID ${pid} is not running), removing ${pidPath}`, + ); + try { + fs.unlinkSync(pidPath); + } catch { + // ignore + } + return 'not-running'; + } + + const binaryPath = processInfo.binaryPath ?? getBinaryPath(); + const port = processInfo.port ?? expectedPort; + if (!isExpectedLiteRtServerProcess(pid, { binaryPath, port })) { + debugLogger.warn( + `Refusing to stop PID ${pid} because it does not match the expected LiteRT server process.`, + ); + return 'unexpected-process'; + } + + try { + process.kill(pid, 'SIGTERM'); + } catch { + return 'failed'; + } + + await new Promise((resolve) => setTimeout(resolve, 1000)); + + if (isProcessRunning(pid)) { + try { + process.kill(pid, 'SIGKILL'); + } catch { + // ignore + } + await new Promise((resolve) => setTimeout(resolve, 500)); + if (isProcessRunning(pid)) { + return 'failed'; + } + } + + try { + fs.unlinkSync(pidPath); + } catch { + // ignore + } + + return 'stopped'; +} + +export const stopCommand: CommandModule = { + command: 'stop', + describe: 'Stop the LiteRT-LM server', + builder: (yargs) => + yargs.option('port', { + type: 'number', + description: 'Port where the LiteRT server is running', + }), + handler: async (argv) => { + let port: number | undefined; + if (argv['port'] !== undefined) { + port = Number(argv['port']); + } + + if (!port) { + const { configuredPort } = resolveGemmaConfig(DEFAULT_PORT); + port = configuredPort; + } + + const processInfo = readServerProcessInfo(); + const pid = processInfo?.pid ?? readServerPid(); + + if (pid !== null && isProcessRunning(pid)) { + debugLogger.log(`Stopping LiteRT server (PID ${pid})...`); + const result = await stopServer(port); + if (result === 'stopped') { + debugLogger.log(chalk.green('LiteRT server stopped.')); + await exitCli(0); + } else if (result === 'unexpected-process') { + debugLogger.error( + chalk.red( + `Refusing to stop PID ${pid} because it does not match the expected LiteRT server process.`, + ), + ); + debugLogger.error( + chalk.dim( + 'Remove the stale pid file after verifying the process, or stop the process manually.', + ), + ); + await exitCli(1); + } else { + debugLogger.error(chalk.red('Failed to stop LiteRT server.')); + await exitCli(1); + } + return; + } + + const running = await isServerRunning(port); + if (running) { + debugLogger.log( + chalk.yellow( + `A server is responding on port ${port}, but it was not started by "gemini gemma start".`, + ), + ); + debugLogger.log( + chalk.dim( + 'If you started it manually, stop it from the terminal where it is running.', + ), + ); + await exitCli(1); + } else { + debugLogger.log('No LiteRT server is currently running.'); + await exitCli(0); + } + }, +}; diff --git a/packages/cli/src/config/config.test.ts b/packages/cli/src/config/config.test.ts index 04df366a98..180f461749 100644 --- a/packages/cli/src/config/config.test.ts +++ b/packages/cli/src/config/config.test.ts @@ -338,6 +338,7 @@ describe('parseArguments', () => { { cmd: 'skill list', expected: true }, { cmd: 'hooks migrate', expected: true }, { cmd: 'hook migrate', expected: true }, + { cmd: 'gemma status', expected: true }, { cmd: 'some query', expected: undefined }, { cmd: 'hello world', expected: undefined }, ])( @@ -758,6 +759,12 @@ describe('parseArguments', () => { const argv = await parseArguments(settings); expect(argv.isCommand).toBe(true); }); + + it('should set isCommand to true for gemma command', async () => { + process.argv = ['node', 'script.js', 'gemma', 'status']; + const argv = await parseArguments(createTestMergedSettings()); + expect(argv.isCommand).toBe(true); + }); }); describe('loadCliConfig', () => { @@ -3030,6 +3037,8 @@ describe('loadCliConfig gemmaModelRouter', () => { experimental: { gemmaModelRouter: { enabled: true, + autoStartServer: false, + binaryPath: '/custom/lit', classifier: { host: 'http://custom:1234', model: 'custom-gemma', @@ -3040,6 +3049,8 @@ describe('loadCliConfig gemmaModelRouter', () => { const config = await loadCliConfig(settings, 'test-session', argv); expect(config.getGemmaModelRouterEnabled()).toBe(true); const gemmaSettings = config.getGemmaModelRouterSettings(); + expect(gemmaSettings.autoStartServer).toBe(false); + expect(gemmaSettings.binaryPath).toBe('/custom/lit'); expect(gemmaSettings.classifier?.host).toBe('http://custom:1234'); expect(gemmaSettings.classifier?.model).toBe('custom-gemma'); }); @@ -3057,6 +3068,8 @@ describe('loadCliConfig gemmaModelRouter', () => { const config = await loadCliConfig(settings, 'test-session', argv); expect(config.getGemmaModelRouterEnabled()).toBe(true); const gemmaSettings = config.getGemmaModelRouterSettings(); + expect(gemmaSettings.autoStartServer).toBe(false); + expect(gemmaSettings.binaryPath).toBe(''); expect(gemmaSettings.classifier?.host).toBe('http://localhost:9379'); expect(gemmaSettings.classifier?.model).toBe('gemma3-1b-gpu-custom'); }); diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index d3b807f991..213c22120e 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -13,6 +13,7 @@ import { mcpCommand } from '../commands/mcp.js'; import { extensionsCommand } from '../commands/extensions.js'; import { skillsCommand } from '../commands/skills.js'; import { hooksCommand } from '../commands/hooks.js'; +import { gemmaCommand } from '../commands/gemma.js'; import { setGeminiMdFilename as setServerGeminiMdFilename, getCurrentGeminiMdFilename, @@ -181,6 +182,7 @@ export async function parseArguments( extensionsCommand, skillsCommand, hooksCommand, + gemmaCommand, ]; const subcommands = commandModules.flatMap((mod) => { @@ -260,6 +262,7 @@ export async function parseArguments( yargsInstance.command(extensionsCommand); yargsInstance.command(skillsCommand); yargsInstance.command(hooksCommand); + yargsInstance.command(gemmaCommand); yargsInstance .command('$0 [query..]', 'Launch Gemini CLI', (yargsInstance) => diff --git a/packages/cli/src/config/settingsSchema.test.ts b/packages/cli/src/config/settingsSchema.test.ts index 27639fa031..81e5f32ff0 100644 --- a/packages/cli/src/config/settingsSchema.test.ts +++ b/packages/cli/src/config/settingsSchema.test.ts @@ -471,11 +471,33 @@ describe('SettingsSchema', () => { expect(enabled.category).toBe('Experimental'); expect(enabled.default).toBe(false); expect(enabled.requiresRestart).toBe(true); - expect(enabled.showInDialog).toBe(false); + expect(enabled.showInDialog).toBe(true); expect(enabled.description).toBe( 'Enable the Gemma Model Router (experimental). Requires a local endpoint serving Gemma via the Gemini API using LiteRT-LM shim.', ); + const autoStartServer = gemmaModelRouter.properties.autoStartServer; + expect(autoStartServer).toBeDefined(); + expect(autoStartServer.type).toBe('boolean'); + expect(autoStartServer.category).toBe('Experimental'); + expect(autoStartServer.default).toBe(false); + expect(autoStartServer.requiresRestart).toBe(true); + expect(autoStartServer.showInDialog).toBe(true); + expect(autoStartServer.description).toBe( + 'Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.', + ); + + const binaryPath = gemmaModelRouter.properties.binaryPath; + expect(binaryPath).toBeDefined(); + expect(binaryPath.type).toBe('string'); + expect(binaryPath.category).toBe('Experimental'); + expect(binaryPath.default).toBe(''); + expect(binaryPath.requiresRestart).toBe(true); + expect(binaryPath.showInDialog).toBe(false); + expect(binaryPath.description).toBe( + 'Custom path to the LiteRT-LM binary. Leave empty to use the default location (~/.gemini/bin/litert/).', + ); + const classifier = gemmaModelRouter.properties.classifier; expect(classifier).toBeDefined(); expect(classifier.type).toBe('object'); diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index 93ac53ada3..7e7de80132 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -2169,6 +2169,26 @@ const SETTINGS_SCHEMA = { default: false, description: 'Enable the Gemma Model Router (experimental). Requires a local endpoint serving Gemma via the Gemini API using LiteRT-LM shim.', + showInDialog: true, + }, + autoStartServer: { + type: 'boolean', + label: 'Auto-start LiteRT Server', + category: 'Experimental', + requiresRestart: true, + default: false, + description: + 'Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.', + showInDialog: true, + }, + binaryPath: { + type: 'string', + label: 'LiteRT Binary Path', + category: 'Experimental', + requiresRestart: true, + default: '', + description: + 'Custom path to the LiteRT-LM binary. Leave empty to use the default location (~/.gemini/bin/litert/).', showInDialog: false, }, classifier: { diff --git a/packages/cli/src/gemini.tsx b/packages/cli/src/gemini.tsx index eedfcc950a..6e257270d7 100644 --- a/packages/cli/src/gemini.tsx +++ b/packages/cli/src/gemini.tsx @@ -612,6 +612,23 @@ export async function main() { const initializationResult = await initializeApp(config, settings); initAppHandle?.end(); + import('./services/liteRtServerManager.js') + .then(({ LiteRtServerManager }) => { + const mergedGemma = settings.merged.experimental?.gemmaModelRouter; + if (!mergedGemma) return; + // Security: binaryPath and autoStartServer must come from user-scoped + // settings only to prevent workspace configs from triggering arbitrary + // binary execution. + const userGemma = settings.forScope(SettingScope.User).settings + .experimental?.gemmaModelRouter; + return LiteRtServerManager.ensureRunning({ + ...mergedGemma, + binaryPath: userGemma?.binaryPath, + autoStartServer: userGemma?.autoStartServer, + }); + }) + .catch((e) => debugLogger.warn('LiteRT auto-start import failed:', e)); + if ( settings.merged.security.auth.selectedType === AuthType.LOGIN_WITH_GOOGLE && diff --git a/packages/cli/src/services/BuiltinCommandLoader.ts b/packages/cli/src/services/BuiltinCommandLoader.ts index c1cbd5621e..94b5986eb3 100644 --- a/packages/cli/src/services/BuiltinCommandLoader.ts +++ b/packages/cli/src/services/BuiltinCommandLoader.ts @@ -61,6 +61,7 @@ import { vimCommand } from '../ui/commands/vimCommand.js'; import { setupGithubCommand } from '../ui/commands/setupGithubCommand.js'; import { terminalSetupCommand } from '../ui/commands/terminalSetupCommand.js'; import { upgradeCommand } from '../ui/commands/upgradeCommand.js'; +import { gemmaStatusCommand } from '../ui/commands/gemmaStatusCommand.js'; /** * Loads the core, hard-coded slash commands that are an integral part @@ -221,6 +222,7 @@ export class BuiltinCommandLoader implements ICommandLoader { : [skillsCommand] : []), settingsCommand, + gemmaStatusCommand, tasksCommand, vimCommand, setupGithubCommand, diff --git a/packages/cli/src/services/liteRtServerManager.test.ts b/packages/cli/src/services/liteRtServerManager.test.ts new file mode 100644 index 0000000000..f1af5c800a --- /dev/null +++ b/packages/cli/src/services/liteRtServerManager.test.ts @@ -0,0 +1,68 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import fs from 'node:fs'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import type { GemmaModelRouterSettings } from '@google/gemini-cli-core'; + +const mockGetBinaryPath = vi.hoisted(() => vi.fn()); +const mockIsServerRunning = vi.hoisted(() => vi.fn()); +const mockStartServer = vi.hoisted(() => vi.fn()); + +vi.mock('../commands/gemma/platform.js', () => ({ + getBinaryPath: mockGetBinaryPath, + isServerRunning: mockIsServerRunning, +})); + +vi.mock('../commands/gemma/start.js', () => ({ + startServer: mockStartServer, +})); + +import { LiteRtServerManager } from './liteRtServerManager.js'; + +describe('LiteRtServerManager', () => { + beforeEach(() => { + vi.clearAllMocks(); + vi.spyOn(fs, 'existsSync').mockReturnValue(true); + mockIsServerRunning.mockResolvedValue(false); + mockStartServer.mockResolvedValue(true); + }); + + it('uses the configured custom binary path when auto-starting', async () => { + mockGetBinaryPath.mockReturnValue('/user/lit'); + + const settings: GemmaModelRouterSettings = { + enabled: true, + binaryPath: '/workspace/evil', + classifier: { + host: 'http://localhost:8123', + }, + }; + + await LiteRtServerManager.ensureRunning(settings); + + expect(mockGetBinaryPath).toHaveBeenCalledTimes(1); + expect(fs.existsSync).toHaveBeenCalledWith('/user/lit'); + expect(mockStartServer).toHaveBeenCalledWith('/user/lit', 8123); + }); + + it('falls back to the default binary path when no custom path is configured', async () => { + mockGetBinaryPath.mockReturnValue('/default/lit'); + + const settings: GemmaModelRouterSettings = { + enabled: true, + classifier: { + host: 'http://localhost:9379', + }, + }; + + await LiteRtServerManager.ensureRunning(settings); + + expect(mockGetBinaryPath).toHaveBeenCalledTimes(1); + expect(fs.existsSync).toHaveBeenCalledWith('/default/lit'); + expect(mockStartServer).toHaveBeenCalledWith('/default/lit', 9379); + }); +}); diff --git a/packages/cli/src/services/liteRtServerManager.ts b/packages/cli/src/services/liteRtServerManager.ts new file mode 100644 index 0000000000..e72d321f9d --- /dev/null +++ b/packages/cli/src/services/liteRtServerManager.ts @@ -0,0 +1,59 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import fs from 'node:fs'; +import { debugLogger } from '@google/gemini-cli-core'; +import type { GemmaModelRouterSettings } from '@google/gemini-cli-core'; +import { getBinaryPath, isServerRunning } from '../commands/gemma/platform.js'; +import { DEFAULT_PORT } from '../commands/gemma/constants.js'; + +export class LiteRtServerManager { + static async ensureRunning( + gemmaSettings: GemmaModelRouterSettings | undefined, + ): Promise { + if (!gemmaSettings?.enabled) return; + if (gemmaSettings.autoStartServer === false) return; + const binaryPath = getBinaryPath(); + if (!binaryPath || !fs.existsSync(binaryPath)) { + debugLogger.log( + '[LiteRtServerManager] Binary not installed, skipping auto-start. Run "gemini gemma setup".', + ); + return; + } + + const port = + parseInt( + gemmaSettings.classifier?.host?.match(/:(\d+)/)?.[1] ?? '', + 10, + ) || DEFAULT_PORT; + + const running = await isServerRunning(port); + if (running) { + debugLogger.log( + `[LiteRtServerManager] Server already running on port ${port}`, + ); + return; + } + + debugLogger.log( + `[LiteRtServerManager] Auto-starting LiteRT server on port ${port}...`, + ); + + try { + const { startServer } = await import('../commands/gemma/start.js'); + const started = await startServer(binaryPath, port); + if (started) { + debugLogger.log(`[LiteRtServerManager] Server started on port ${port}`); + } else { + debugLogger.warn( + `[LiteRtServerManager] Server may not have started correctly on port ${port}`, + ); + } + } catch (error) { + debugLogger.warn('[LiteRtServerManager] Auto-start failed:', error); + } + } +} diff --git a/packages/cli/src/ui/commands/gemmaStatusCommand.ts b/packages/cli/src/ui/commands/gemmaStatusCommand.ts new file mode 100644 index 0000000000..2c581b31a1 --- /dev/null +++ b/packages/cli/src/ui/commands/gemmaStatusCommand.ts @@ -0,0 +1,41 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { CommandKind, type SlashCommand } from './types.js'; +import { MessageType, type HistoryItemGemmaStatus } from '../types.js'; +import { checkGemmaStatus } from '../../commands/gemma/status.js'; +import { GEMMA_MODEL_NAME } from '../../commands/gemma/constants.js'; + +export const gemmaStatusCommand: SlashCommand = { + name: 'gemma', + description: 'Check local Gemma model routing status', + kind: CommandKind.BUILT_IN, + autoExecute: true, + isSafeConcurrent: true, + action: async (context) => { + const port = + parseInt( + context.services.settings.merged.experimental?.gemmaModelRouter?.classifier?.host?.match( + /:(\d+)/, + )?.[1] ?? '', + 10, + ) || undefined; + const status = await checkGemmaStatus(port); + const item: Omit = { + type: MessageType.GEMMA_STATUS, + binaryInstalled: status.binaryInstalled, + binaryPath: status.binaryPath, + modelName: GEMMA_MODEL_NAME, + modelDownloaded: status.modelDownloaded, + serverRunning: status.serverRunning, + serverPid: status.serverPid, + serverPort: status.port, + settingsEnabled: status.settingsEnabled, + allPassing: status.allPassing, + }; + context.ui.addItem(item); + }, +}; diff --git a/packages/cli/src/ui/components/HistoryItemDisplay.tsx b/packages/cli/src/ui/components/HistoryItemDisplay.tsx index c1bdc02c75..081a206272 100644 --- a/packages/cli/src/ui/components/HistoryItemDisplay.tsx +++ b/packages/cli/src/ui/components/HistoryItemDisplay.tsx @@ -32,6 +32,7 @@ import { ToolsList } from './views/ToolsList.js'; import { SkillsList } from './views/SkillsList.js'; import { AgentsStatus } from './views/AgentsStatus.js'; import { McpStatus } from './views/McpStatus.js'; +import { GemmaStatus } from './views/GemmaStatus.js'; import { ChatList } from './views/ChatList.js'; import { ModelMessage } from './messages/ModelMessage.js'; import { ThinkingMessage } from './messages/ThinkingMessage.js'; @@ -228,6 +229,9 @@ export const HistoryItemDisplay: React.FC = ({ {itemForDisplay.type === 'mcp_status' && ( )} + {itemForDisplay.type === 'gemma_status' && ( + + )} {itemForDisplay.type === 'chat_list' && ( )} diff --git a/packages/cli/src/ui/components/views/GemmaStatus.tsx b/packages/cli/src/ui/components/views/GemmaStatus.tsx new file mode 100644 index 0000000000..160689ebea --- /dev/null +++ b/packages/cli/src/ui/components/views/GemmaStatus.tsx @@ -0,0 +1,120 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { Box, Text } from 'ink'; +import type React from 'react'; +import { theme } from '../../semantic-colors.js'; +import type { HistoryItemGemmaStatus } from '../../types.js'; + +type GemmaStatusProps = Omit; + +const StatusDot: React.FC<{ ok: boolean }> = ({ ok }) => ( + + {ok ? '\u25CF' : '\u25CB'} + +); + +export const GemmaStatus: React.FC = ({ + binaryInstalled, + binaryPath, + modelName, + modelDownloaded, + serverRunning, + serverPid, + serverPort, + settingsEnabled, + allPassing, +}) => ( + + Gemma Local Model Routing + + + + + + {' '} + Binary: + {binaryInstalled ? ( + {binaryPath} + ) : ( + Not installed + )} + + + + + + + {' '} + Model: + {modelDownloaded ? ( + {modelName} + ) : ( + {modelName} not found + )} + + + + + + + {' '} + Server: + {serverRunning ? ( + + port {serverPort} + {serverPid ? ( + (PID {serverPid}) + ) : null} + + ) : ( + + not running on port {serverPort} + + )} + + + + + + + {' '} + Settings: + {settingsEnabled ? ( + enabled + ) : ( + not enabled + )} + + + + + Active for: + {allPassing ? ( + [routing] + ) : ( + none + )} + + + + {allPassing ? ( + + + Simple requests route to Flash, complex requests to Pro. + + + This happens automatically on every request. + + + ) : ( + + Run "gemini gemma setup" to install and configure. + + )} + + +); diff --git a/packages/cli/src/ui/types.ts b/packages/cli/src/ui/types.ts index 1ded2ae643..2808d716b7 100644 --- a/packages/cli/src/ui/types.ts +++ b/packages/cli/src/ui/types.ts @@ -355,6 +355,19 @@ export interface JsonMcpResource { description?: string; } +export type HistoryItemGemmaStatus = HistoryItemBase & { + type: 'gemma_status'; + binaryInstalled: boolean; + binaryPath: string | null; + modelName: string; + modelDownloaded: boolean; + serverRunning: boolean; + serverPid: number | null; + serverPort: number; + settingsEnabled: boolean; + allPassing: boolean; +}; + export type HistoryItemMcpStatus = HistoryItemBase & { type: 'mcp_status'; servers: Record; @@ -404,6 +417,7 @@ export type HistoryItemWithoutId = | HistoryItemSkillsList | HistoryItemAgentsList | HistoryItemMcpStatus + | HistoryItemGemmaStatus | HistoryItemChatList | HistoryItemThinking | HistoryItemHint @@ -430,6 +444,7 @@ export enum MessageType { SKILLS_LIST = 'skills_list', AGENTS_LIST = 'agents_list', MCP_STATUS = 'mcp_status', + GEMMA_STATUS = 'gemma_status', CHAT_LIST = 'chat_list', HINT = 'hint', } diff --git a/packages/core/src/config/config.test.ts b/packages/core/src/config/config.test.ts index 97531a5190..fd97d67eda 100644 --- a/packages/core/src/config/config.test.ts +++ b/packages/core/src/config/config.test.ts @@ -1975,6 +1975,8 @@ describe('GemmaModelRouterSettings', () => { const config = new Config(baseParams); const settings = config.getGemmaModelRouterSettings(); expect(settings.enabled).toBe(false); + expect(settings.autoStartServer).toBe(true); + expect(settings.binaryPath).toBe(''); expect(settings.classifier?.host).toBe('http://localhost:9379'); expect(settings.classifier?.model).toBe('gemma3-1b-gpu-custom'); }); @@ -1984,6 +1986,8 @@ describe('GemmaModelRouterSettings', () => { ...baseParams, gemmaModelRouter: { enabled: true, + autoStartServer: false, + binaryPath: '/custom/lit', classifier: { host: 'http://custom:1234', model: 'custom-gemma', @@ -1993,6 +1997,8 @@ describe('GemmaModelRouterSettings', () => { const config = new Config(params); const settings = config.getGemmaModelRouterSettings(); expect(settings.enabled).toBe(true); + expect(settings.autoStartServer).toBe(false); + expect(settings.binaryPath).toBe('/custom/lit'); expect(settings.classifier?.host).toBe('http://custom:1234'); expect(settings.classifier?.model).toBe('custom-gemma'); }); @@ -2007,6 +2013,8 @@ describe('GemmaModelRouterSettings', () => { const config = new Config(params); const settings = config.getGemmaModelRouterSettings(); expect(settings.enabled).toBe(true); + expect(settings.autoStartServer).toBe(true); + expect(settings.binaryPath).toBe(''); expect(settings.classifier?.host).toBe('http://localhost:9379'); expect(settings.classifier?.model).toBe('gemma3-1b-gpu-custom'); }); diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 76c571e29e..e3220eb9ef 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -219,6 +219,8 @@ export interface OutputSettings { export interface GemmaModelRouterSettings { enabled?: boolean; + autoStartServer?: boolean; + binaryPath?: string; classifier?: { host?: string; model?: string; @@ -1323,6 +1325,8 @@ export class Config implements McpContext, AgentLoopContext { }; this.gemmaModelRouter = { enabled: params.gemmaModelRouter?.enabled ?? false, + autoStartServer: params.gemmaModelRouter?.autoStartServer ?? true, + binaryPath: params.gemmaModelRouter?.binaryPath ?? '', classifier: { host: params.gemmaModelRouter?.classifier?.host ?? 'http://localhost:9379', diff --git a/packages/core/src/core/localLiteRtLmClient.test.ts b/packages/core/src/core/localLiteRtLmClient.test.ts index c4398b5b9c..6c64143ec3 100644 --- a/packages/core/src/core/localLiteRtLmClient.test.ts +++ b/packages/core/src/core/localLiteRtLmClient.test.ts @@ -7,6 +7,8 @@ import { describe, it, expect, vi, beforeEach } from 'vitest'; import { LocalLiteRtLmClient } from './localLiteRtLmClient.js'; import type { Config } from '../config/config.js'; +import { GoogleGenAI } from '@google/genai'; + const mockGenerateContent = vi.fn(); vi.mock('@google/genai', () => { @@ -44,6 +46,14 @@ describe('LocalLiteRtLmClient', () => { const result = await client.generateJson([], 'test-instruction'); expect(result).toEqual({ key: 'value' }); + expect(GoogleGenAI).toHaveBeenCalledWith( + expect.objectContaining({ + apiVersion: 'v1beta', + httpOptions: expect.objectContaining({ + baseUrl: 'http://test-host:1234', + }), + }), + ); expect(mockGenerateContent).toHaveBeenCalledWith( expect.objectContaining({ model: 'gemma:latest', diff --git a/packages/core/src/core/localLiteRtLmClient.ts b/packages/core/src/core/localLiteRtLmClient.ts index 798dcb5765..82fa44e87b 100644 --- a/packages/core/src/core/localLiteRtLmClient.ts +++ b/packages/core/src/core/localLiteRtLmClient.ts @@ -25,6 +25,8 @@ export class LocalLiteRtLmClient { this.client = new GoogleGenAI({ // The LiteRT-LM server does not require an API key, but the SDK requires one to be set even for local endpoints. This is a dummy value and is not used for authentication. apiKey: 'no-api-key-needed', + apiVersion: 'v1beta', + vertexai: false, httpOptions: { baseUrl: this.host, // If the LiteRT-LM server is started but the wrong port is set, there will be a lengthy TCP timeout (here fixed to be 10 seconds). diff --git a/schemas/settings.schema.json b/schemas/settings.schema.json index 491db887a4..d30a6f4b0a 100644 --- a/schemas/settings.schema.json +++ b/schemas/settings.schema.json @@ -2920,6 +2920,20 @@ "default": false, "type": "boolean" }, + "autoStartServer": { + "title": "Auto-start LiteRT Server", + "description": "Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.", + "markdownDescription": "Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `false`", + "default": false, + "type": "boolean" + }, + "binaryPath": { + "title": "LiteRT Binary Path", + "description": "Custom path to the LiteRT-LM binary. Leave empty to use the default location (~/.gemini/bin/litert/).", + "markdownDescription": "Custom path to the LiteRT-LM binary. Leave empty to use the default location (~/.gemini/bin/litert/).\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: ``", + "default": "", + "type": "string" + }, "classifier": { "title": "Classifier", "description": "Classifier configuration.",