From b9b94e57ca47a77ad8ad6549be6fcabe013ead46 Mon Sep 17 00:00:00 2001 From: mkorwel Date: Sun, 15 Mar 2026 16:20:31 -0700 Subject: [PATCH] feat(offload): implement high-performance internal routing with IAP fallback --- .gemini/settings.json | 5 +- .../skills/offload/plan.workerabstraction.md | 46 +++++++++++++++++++ .../skills/offload/scripts/orchestrator.ts | 4 +- .gemini/skills/offload/scripts/setup.ts | 12 +++-- 4 files changed, 61 insertions(+), 6 deletions(-) create mode 100644 .gemini/skills/offload/plan.workerabstraction.md diff --git a/.gemini/settings.json b/.gemini/settings.json index 1020c4a033..055942be20 100644 --- a/.gemini/settings.json +++ b/.gemini/settings.json @@ -12,8 +12,9 @@ "projectId": "gemini-cli-team-quota", "zone": "us-west1-a", "remoteHost": "gcli-worker", - "remoteHome": "/home/mattkorwel_google_com", - "remoteWorkDir": "/home/mattkorwel_google_com/dev/main", + "remoteHome": "/home/node", + "remoteWorkDir": "/home/node/dev/main", + "useContainer": true, "terminalType": "iterm2" } } diff --git a/.gemini/skills/offload/plan.workerabstraction.md b/.gemini/skills/offload/plan.workerabstraction.md new file mode 100644 index 0000000000..f0a9cc1d95 --- /dev/null +++ b/.gemini/skills/offload/plan.workerabstraction.md @@ -0,0 +1,46 @@ +# Plan: Worker Provider Abstraction for Offload System + +## Objective +Abstract the remote execution infrastructure (GCE COS, GCE Linux, Cloud Workstations) behind a common `WorkerProvider` interface. This eliminates infrastructure-specific prompts (like "use container mode") and makes the system extensible to new backends. + +## Architectural Changes + +### 1. New Provider Abstraction +Create a modular provider system where each infrastructure type implements a standard interface. +- **Base Interface**: `WorkerProvider` (methods for `exec`, `sync`, `provision`, `getStatus`). +- **Implementations**: + - `GceCosProvider`: Handles COS with Cloud-Init and `docker exec` wrapping. + - `GceLinuxProvider`: Handles standard Linux VMs with direct execution. + - `LocalDockerProvider`: (Future) Runs offload tasks in a local container. + - `WorkstationProvider`: (Future) Integrates with Google Cloud Workstations. + +### 2. Auto-Discovery +Modify `setup.ts` to: +- Prompt for a high-level "Provider Type" (e.g., "Google Cloud (COS)", "Google Cloud (Linux)"). +- Auto-detect environment details where possible (e.g., fetching internal IPs, identifying container names). + +### 3. Clean Orchestration +Refactor `orchestrator.ts` to be provider-agnostic: +- It asks the provider to "Ensure Ready" (wake VM). +- It asks the provider to "Prepare Environment" (worktree setup). +- It asks the provider to "Launch Task" (tmux initialization). + +## Implementation Steps + +### Phase 1: Infrastructure Cleanup +- Move existing procedural logic from `fleet.ts`, `setup.ts`, and `orchestrator.ts` into a new `providers/` directory. +- Create `ProviderFactory` to instantiate the correct implementation based on `settings.json`. + +### Phase 2: Refactor Scripts +- **`fleet.ts`**: Proxy all actions (`provision`, `rebuild`, `stop`) to the provider. +- **`orchestrator.ts`**: Use the provider for the entire lifecycle of a job. +- **`status.ts`**: Use the provider's `getStatus()` method to derive state. + +### Phase 3: Validation +- Verify that the `gcli-worker` SSH alias and IAP tunneling remain functional. +- Ensure "Fast-Path SSH" is still the primary interactive gateway. + +## Verification +- Run `npm run offload:fleet provision` and ensure it creates a COS-native worker. +- Run `npm run offload:setup` and verify it no longer asks cryptic infrastructure questions. +- Launch a review and verify it uses `docker exec internally for the COS provider. diff --git a/.gemini/skills/offload/scripts/orchestrator.ts b/.gemini/skills/offload/scripts/orchestrator.ts index 9b683bbb71..1689fe4ca7 100644 --- a/.gemini/skills/offload/scripts/orchestrator.ts +++ b/.gemini/skills/offload/scripts/orchestrator.ts @@ -70,7 +70,9 @@ export async function runOrchestrator(args: string[], env: NodeJS.ProcessEnv = p } const sshInternal = `tmux attach-session -t ${sessionName} 2>/dev/null || tmux new-session -s ${sessionName} -n 'offload' ${q(tmuxCmd)}`; - const finalSSH = `ssh -t ${remoteHost} ${q(sshInternal)}`; + + // High-performance primary SSH with IAP fallback + const finalSSH = `ssh -o ConnectTimeout=5 -t ${remoteHost} ${q(sshInternal)} || gcloud compute ssh ${targetVM} --project ${projectId} --zone ${zone} --tunnel-through-iap --command ${q(sshInternal)}`; // 5. Open in iTerm2 const isWithinGemini = !!env.GEMINI_CLI || !!env.GEMINI_SESSION_ID || !!env.GCLI_SESSION_ID; diff --git a/.gemini/skills/offload/scripts/setup.ts b/.gemini/skills/offload/scripts/setup.ts index eea19757b7..f33174c0e5 100644 --- a/.gemini/skills/offload/scripts/setup.ts +++ b/.gemini/skills/offload/scripts/setup.ts @@ -62,19 +62,25 @@ export async function runSetup(env: NodeJS.ProcessEnv = process.env) { spawnSync(`gcloud compute instances start ${targetVM} --project ${projectId} --zone ${zone}`, { shell: true, stdio: 'inherit' }); } - // 1. Configure Fast-Path SSH Alias - console.log(`\nšŸš€ Configuring Fast-Path SSH Alias (Internal IP)...`); + // 1. Configure Fast-Path SSH Alias (Direct Internal Hostname) + console.log(`\nšŸš€ Configuring Fast-Path SSH Alias (Internal Hostname)...`); + const dnsSuffix = await prompt('Internal DNS Suffix (e.g. .internal or .internal.gcpnode.com)', '.internal'); + + // Construct the high-performance direct hostname + const internalHostname = `${targetVM}.${zone}.c.${projectId}${dnsSuffix}`; const sshAlias = 'gcli-worker'; const sshConfigPath = path.join(os.homedir(), '.ssh/config'); + const sshEntry = ` Host ${sshAlias} - HostName ${internalIp} + HostName ${internalHostname} IdentityFile ~/.ssh/google_compute_engine User ${env.USER || 'mattkorwel'}_google_com CheckHostIP no StrictHostKeyChecking no `; + let currentConfig = ''; if (fs.existsSync(sshConfigPath)) currentConfig = fs.readFileSync(sshConfigPath, 'utf8');