From 7b9a8294ee78746ff92f63927d3b7690ecb4af19 Mon Sep 17 00:00:00 2001 From: mkorwel Date: Sun, 15 Mar 2026 13:16:28 -0700 Subject: [PATCH] feat(offload): inject SSH keys and add disk expansion logic to provisioning --- .gemini/skills/offload/GEMINI.md | 25 +++++++++++ .gemini/skills/offload/scripts/fleet.ts | 60 ++++++++++++++++++------- 2 files changed, 69 insertions(+), 16 deletions(-) create mode 100644 .gemini/skills/offload/GEMINI.md diff --git a/.gemini/skills/offload/GEMINI.md b/.gemini/skills/offload/GEMINI.md new file mode 100644 index 0000000000..1b63882b18 --- /dev/null +++ b/.gemini/skills/offload/GEMINI.md @@ -0,0 +1,25 @@ +# Architectural Mandate: High-Performance Offload System + +## Infrastructure Strategy +- **Base OS**: Always use **Container-Optimized OS (COS)** (`cos-stable` family). It is security-hardened and has Docker pre-installed. +- **Provisioning**: Use the **Cloud-Init (`user-data`)** pattern. + - *Note*: Avoid `gcloud compute instances create-with-container` on standard Linux images as it uses a deprecated startup agent. On COS, use native `user-data` for cleanest execution. +- **Performance**: Provision with a minimum of **200GB PD-Balanced** disk to ensure high I/O throughput for Node.js builds and to satisfy GCP disk performance requirements. + +## Container Isolation +- **Image**: `us-docker.pkg.dev/gemini-code-dev/gemini-cli/maintainer:latest`. +- **Identity**: The container must be named **`maintainer-worker`**. +- **Mounts**: Standardize on these host-to-container mappings: + - `~/dev` -> `/home/node/dev` (Persistence for worktrees) + - `~/.gemini` -> `/home/node/.gemini` (Shared credentials) + - `~/.offload` -> `/home/node/.offload` (Shared scripts/logs) +- **Runtime**: The container runs as a persistent service (`--restart always`) acting as a "Remote Workstation" rather than an ephemeral task. + +## Orchestration Logic +- **Fast-Path SSH**: Land on the VM Host via standard SSH (using an alias like `gcli-worker`). +- **Context Execution**: Use `docker exec -it maintainer-worker ...` for interactive tasks and `tmux` sessions. This provides persistence against connection drops while keeping the host OS "invisible." +- **Path Resolution**: Both Host and Container must share identical tilde (`~`) paths to avoid mapping confusion in automation scripts. + +## Maintenance +- **Rebuilds**: If the environment drifts or the image updates, delete the VM and re-run the `provision` action. +- **Status**: The Mission Control dashboard derives state by scanning host `tmux` sessions and container filesystem logs. diff --git a/.gemini/skills/offload/scripts/fleet.ts b/.gemini/skills/offload/scripts/fleet.ts index 1912fdae27..4cfe292240 100644 --- a/.gemini/skills/offload/scripts/fleet.ts +++ b/.gemini/skills/offload/scripts/fleet.ts @@ -4,6 +4,8 @@ * Manages dynamic GCP workers for offloading tasks. */ import { spawnSync } from 'child_process'; +import path from 'path'; +import fs from 'fs'; const PROJECT_ID = 'gemini-cli-team-quota'; const USER = process.env.USER || 'mattkorwel'; @@ -41,33 +43,51 @@ async function provisionWorker() { return; } - console.log(`šŸš€ Provisioning "Invisible VM" (Container-Optimized OS): ${name}...`); - console.log(` - Image: ${imageUri}`); - console.log(` - OS: Container-Optimized OS (Docker Pre-installed)`); + console.log(`šŸš€ Provisioning modern container worker (COS + Cloud-Init): ${name}...`); + // Get local public key for native SSH access + const pubKeyPath = path.join(os.homedir(), '.ssh/google_compute_engine.pub'); + const pubKey = fs.existsSync(pubKeyPath) ? fs.readFileSync(pubKeyPath, 'utf8').trim() : ''; + const sshKeyMetadata = pubKey ? `${USER}:${pubKey}` : ''; + + // Modern Cloud-Init (user-data) configuration for COS + const cloudConfig = `#cloud-config +runcmd: + - | + # Expand the root partition to use the full 200GB for high performance + /usr/bin/growpart /dev/sda 1 + /usr/sbin/resize2fs /dev/sda1 + - docker run -d --name maintainer-worker --restart always \\ + -v /home/node/dev:/home/node/dev:rw \\ + -v /home/node/.gemini:/home/node/.gemini:rw \\ + -v /home/node/.offload:/home/node/.offload:rw \\ + ${imageUri} /bin/bash -c "while true; do sleep 1000; done" +`; + + const tempPath = path.join(process.env.TMPDIR || '/tmp', `cloud-init-${name}.yaml`); + fs.writeFileSync(tempPath, cloudConfig); + const result = spawnSync('gcloud', [ - 'compute', 'instances', 'create-with-container', name, + 'compute', 'instances', 'create', name, '--project', PROJECT_ID, - '--zone', zone, + '--zone', 'us-west1-a', '--machine-type', 'n2-standard-8', '--image-family', 'cos-stable', '--image-project', 'cos-cloud', '--boot-disk-size', '200GB', '--boot-disk-type', 'pd-balanced', - '--container-image', imageUri, - '--container-name', 'maintainer-worker', - '--container-restart-policy', 'always', - '--container-mount-host-path', 'host-path=/home/$(whoami)/dev,mount-path=/home/node/dev,mode=rw', - '--container-mount-host-path', 'host-path=/home/$(whoami)/.gemini,mount-path=/home/node/.gemini,mode=rw', - '--container-mount-host-path', 'host-path=/home/$(whoami)/.offload,mount-path=/home/node/.offload,mode=rw', + '--metadata-from-file', `user-data=${tempPath}`, + '--metadata', `enable-oslogin=TRUE${sshKeyMetadata ? `,ssh-keys=${sshKeyMetadata}` : ''}`, '--labels', `owner=${USER.replace(/[^a-z0-9_-]/g, '_')},type=offload-worker`, '--tags', `gcli-offload-${USER}`, '--scopes', 'https://www.googleapis.com/auth/cloud-platform' - ], { stdio: 'inherit', shell: true }); + ], { stdio: 'inherit' }); + + fs.unlinkSync(tempPath); if (result.status === 0) { - console.log(`\nāœ… Container worker ${name} is being provisioned.`); - console.log(`šŸ‘‰ The container will start automatically on boot.`); + console.log(`\nāœ… Worker ${name} is being provisioned.`); + console.log(`šŸ‘‰ Container 'maintainer-worker' will start natively via Cloud-Init.`); } } @@ -120,12 +140,17 @@ async function stopWorker() { async function remoteStatus() { const name = INSTANCE_PREFIX; - const zone = 'us-west1-a'; - console.log(`šŸ“” Fetching remote status from ${name}...`); spawnSync('ssh', ['gcli-worker', 'tsx .offload/scripts/status.ts'], { stdio: 'inherit', shell: true }); } +async function rebuildWorker() { + const name = INSTANCE_PREFIX; + console.log(`šŸ”„ Rebuilding worker ${name}...`); + spawnSync('gcloud', ['compute', 'instances', 'delete', name, '--project', PROJECT_ID, '--zone', 'us-west1-a', '--quiet'], { stdio: 'inherit' }); + await provisionWorker(); +} + async function main() { const action = process.argv[2] || 'list'; @@ -136,6 +161,9 @@ async function main() { case 'provision': await provisionWorker(); break; + case 'rebuild': + await rebuildWorker(); + break; case 'stop': await stopWorker(); break;