feat(cli): add streamlined gemini gemma local model setup (#25498)

Co-authored-by: Abhijit Balaji <abhijitbalaji@google.com> Co-authored-by: Samee Zahid <sameez@google.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
2026-04-22 02:54:31 -07:00 · 2026-04-20 16:57:56 -07:00
parent 6afc47f81c
commit 1d383a4a8e
31 changed files with 2509 additions and 12 deletions
@@ -1975,6 +1975,8 @@ describe('GemmaModelRouterSettings', () => {
    const config = new Config(baseParams);
    const settings = config.getGemmaModelRouterSettings();
    expect(settings.enabled).toBe(false);
+    expect(settings.autoStartServer).toBe(true);
+    expect(settings.binaryPath).toBe('');
    expect(settings.classifier?.host).toBe('http://localhost:9379');
    expect(settings.classifier?.model).toBe('gemma3-1b-gpu-custom');
  });
@@ -1984,6 +1986,8 @@ describe('GemmaModelRouterSettings', () => {
      ...baseParams,
      gemmaModelRouter: {
        enabled: true,
+        autoStartServer: false,
+        binaryPath: '/custom/lit',
        classifier: {
          host: 'http://custom:1234',
          model: 'custom-gemma',
@@ -1993,6 +1997,8 @@ describe('GemmaModelRouterSettings', () => {
    const config = new Config(params);
    const settings = config.getGemmaModelRouterSettings();
    expect(settings.enabled).toBe(true);
+    expect(settings.autoStartServer).toBe(false);
+    expect(settings.binaryPath).toBe('/custom/lit');
    expect(settings.classifier?.host).toBe('http://custom:1234');
    expect(settings.classifier?.model).toBe('custom-gemma');
  });
@@ -2007,6 +2013,8 @@ describe('GemmaModelRouterSettings', () => {
    const config = new Config(params);
    const settings = config.getGemmaModelRouterSettings();
    expect(settings.enabled).toBe(true);
+    expect(settings.autoStartServer).toBe(true);
+    expect(settings.binaryPath).toBe('');
    expect(settings.classifier?.host).toBe('http://localhost:9379');
    expect(settings.classifier?.model).toBe('gemma3-1b-gpu-custom');
  });
@@ -219,6 +219,8 @@ export interface OutputSettings {

 export interface GemmaModelRouterSettings {
  enabled?: boolean;
+  autoStartServer?: boolean;
+  binaryPath?: string;
  classifier?: {
    host?: string;
    model?: string;
@@ -1323,6 +1325,8 @@ export class Config implements McpContext, AgentLoopContext {
    };
    this.gemmaModelRouter = {
      enabled: params.gemmaModelRouter?.enabled ?? false,
+      autoStartServer: params.gemmaModelRouter?.autoStartServer ?? true,
+      binaryPath: params.gemmaModelRouter?.binaryPath ?? '',
      classifier: {
        host:
          params.gemmaModelRouter?.classifier?.host ?? 'http://localhost:9379',
@@ -7,6 +7,8 @@
 import { describe, it, expect, vi, beforeEach } from 'vitest';
 import { LocalLiteRtLmClient } from './localLiteRtLmClient.js';
 import type { Config } from '../config/config.js';
+import { GoogleGenAI } from '@google/genai';
+
 const mockGenerateContent = vi.fn();

 vi.mock('@google/genai', () => {
@@ -44,6 +46,14 @@ describe('LocalLiteRtLmClient', () => {
    const result = await client.generateJson([], 'test-instruction');

    expect(result).toEqual({ key: 'value' });
+    expect(GoogleGenAI).toHaveBeenCalledWith(
+      expect.objectContaining({
+        apiVersion: 'v1beta',
+        httpOptions: expect.objectContaining({
+          baseUrl: 'http://test-host:1234',
+        }),
+      }),
+    );
    expect(mockGenerateContent).toHaveBeenCalledWith(
      expect.objectContaining({
        model: 'gemma:latest',
@@ -25,6 +25,8 @@ export class LocalLiteRtLmClient {
    this.client = new GoogleGenAI({
      // The LiteRT-LM server does not require an API key, but the SDK requires one to be set even for local endpoints. This is a dummy value and is not used for authentication.
      apiKey: 'no-api-key-needed',
+      apiVersion: 'v1beta',
+      vertexai: false,
      httpOptions: {
        baseUrl: this.host,
        // If the LiteRT-LM server is started but the wrong port is set, there will be a lengthy TCP timeout (here fixed to be 10 seconds).