From e937681cd3b51882c38e462111d774864aa7e15a Mon Sep 17 00:00:00 2001 From: mkorwel Date: Tue, 17 Mar 2026 13:03:40 -0700 Subject: [PATCH] docs(offload): document persistence and fix status script orchestrator --- .gemini/skills/offload/README.md | 16 +++- .gemini/skills/offload/scripts/status.ts | 97 +++++++++++------------- 2 files changed, 61 insertions(+), 52 deletions(-) diff --git a/.gemini/skills/offload/README.md b/.gemini/skills/offload/README.md index 6425fc45cb..8e54c7d60d 100644 --- a/.gemini/skills/offload/README.md +++ b/.gemini/skills/offload/README.md @@ -64,7 +64,21 @@ The offload system uses a **Hybrid VM + Docker** architecture designed for maxim ### Getting Started (Onboarding) For a complete guide on setting up your remote environment, see the [Maintainer Onboarding Guide](../../../MAINTAINER_ONBOARDING.md). -### Technical details +### Persistence and Job Recovery + +The offload system is designed for high reliability and persistence. Jobs use a nested execution model to ensure they continue running even if your local terminal is closed or the connection is lost. + +### How it Works +1. **Host-Level Persistence**: The orchestrator launches each job in a named **`tmux`** session on the remote VM. +2. **Container Isolation**: The actual work is performed inside the persistent `maintainer-worker` Docker container. + +### Re-attaching to a Job +If you lose your connection, you can easily resume your session: + +- **Automatic**: Simply run the exact same command you started with (e.g., `npm run offload 123 review`). The system will automatically detect the existing session and re-attach you. +- **Manual**: Use `npm run offload:status` to find the session name, then use `ssh gcli-worker` to jump into the VM and `tmux attach -t ` to resume. + +## Technical details This skill uses a **Worker Provider** abstraction (`GceCosProvider`) to manage the remote lifecycle. It uses an isolated Gemini profile on the remote host (`~/.offload/gemini-cli-config`) to ensure that verification tasks do not interfere with your primary configuration. diff --git a/.gemini/skills/offload/scripts/status.ts b/.gemini/skills/offload/scripts/status.ts index 1275b31640..4e64689b96 100644 --- a/.gemini/skills/offload/scripts/status.ts +++ b/.gemini/skills/offload/scripts/status.ts @@ -1,66 +1,61 @@ /** - * Offload Status Inspector (Remote) + * Offload Status Inspector (Local) * - * Scans tmux sessions (host) and logs (container) to provide job status. + * Orchestrates remote status retrieval via the WorkerProvider. */ -import { spawnSync } from 'child_process'; -import fs from 'fs'; import path from 'path'; -import os from 'os'; +import fs from 'fs'; +import { fileURLToPath } from 'url'; +import { ProviderFactory } from './providers/ProviderFactory.ts'; -const WORKTREE_BASE = '/home/node/dev/worktrees'; +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const REPO_ROOT = path.resolve(__dirname, '../../../..'); -function getStatus() { - console.log('\n๐Ÿ›ฐ๏ธ Offload Mission Control Status (Container Mode):'); - console.log(''.padEnd(100, '-')); - console.log(`${'JOB ID'.padEnd(10)} | ${'ACTION'.padEnd(10)} | ${'STATE'.padEnd(12)} | ${'SESSION'.padEnd(25)}`); - console.log(''.padEnd(100, '-')); - - // 1. Get active tmux sessions on the HOST - const tmux = spawnSync('tmux', ['ls', '-F', '#{session_name}']); - const activeSessions = tmux.stdout.toString().split('\n').filter(s => s.startsWith('offload-')); - - // 2. Scan worktrees inside the CONTAINER - const findJobs = spawnSync('docker', ['exec', 'maintainer-worker', 'ls', WORKTREE_BASE], { stdio: 'pipe' }); - const jobs = findJobs.stdout.toString().split('\n').filter(d => d.startsWith('offload-')); - - if (jobs.length === 0 && activeSessions.length === 0) { - console.log(' No jobs found.'); - return; +async function runStatus(env: NodeJS.ProcessEnv = process.env) { + const settingsPath = path.join(REPO_ROOT, '.gemini/offload/settings.json'); + if (!fs.existsSync(settingsPath)) { + console.error('โŒ Settings not found. Run "npm run offload:setup" first.'); + return 1; + } + const settings = JSON.parse(fs.readFileSync(settingsPath, 'utf8')); + const config = settings.deepReview; + if (!config) { + console.error('โŒ Deep Review configuration not found.'); + return 1; } - const allJobIds = Array.from(new Set([...jobs, ...activeSessions])); + const { projectId, zone } = config; + const targetVM = `gcli-offload-${env.USER || 'mattkorwel'}`; + const provider = ProviderFactory.getProvider({ projectId, zone, instanceName: targetVM }); - allJobIds.forEach(id => { - if (!id) return; - const parts = id.split('-'); // offload-123-review - const pr = parts[1] || '???'; - const action = parts[2] || '???'; + console.log(`\n๐Ÿ›ฐ๏ธ Offload Mission Control: ${targetVM}`); + console.log(`--------------------------------------------------------------------------------`); + + const status = await provider.getStatus(); + console.log(` - VM State: ${status.status}`); + console.log(` - Internal IP: ${status.internalIp || 'N/A'}`); + + if (status.status === 'RUNNING') { + console.log(`\n๐Ÿงต Active Sessions (tmux):`); + // We fetch the list of sessions from the host + const tmuxRes = await provider.getExecOutput('tmux list-sessions -F "#S" 2>/dev/null'); - let state = '๐Ÿ’ค IDLE'; - if (activeSessions.includes(id)) { - state = '๐Ÿƒ RUNNING'; + if (tmuxRes.status === 0 && tmuxRes.stdout.trim()) { + const sessions = tmuxRes.stdout.trim().split('\n'); + sessions.forEach(s => { + if (s.startsWith('offload-')) { + console.log(` โœ… ${s}`); + } else { + console.log(` ๐Ÿ”น ${s} (Non-offload)`); + } + }); } else { - // Check logs inside the container - const logCheck = spawnSync('docker', ['exec', 'maintainer-worker', 'sh', '-c', `ls ${WORKTREE_BASE}/${id}/.gemini/logs/*.log 2>/dev/null | tail -n 1`], { stdio: 'pipe' }); - const lastLogFile = logCheck.stdout.toString().trim(); - - if (lastLogFile) { - const logContent = spawnSync('docker', ['exec', 'maintainer-worker', 'cat', lastLogFile], { stdio: 'pipe' }).stdout.toString(); - if (logContent.includes('SUCCESS')) state = 'โœ… SUCCESS'; - else if (logContent.includes('FAILED')) state = 'โŒ FAILED'; - else state = '๐Ÿ FINISHED'; - } + console.log(' - No active sessions'); } + } - console.log(`${pr.padEnd(10)} | ${action.padEnd(10)} | ${state.padEnd(12)} | ${id.padEnd(25)}`); - if (state === '๐Ÿƒ RUNNING') { - console.log(` โ”œโ”€ Attach: npm run offload:attach ${pr} ${action} [--local]`); - console.log(` โ”œโ”€ Logs: npm run offload:logs ${pr} ${action}`); - } - console.log(` โ””โ”€ Remove: npm run offload:remove ${pr} ${action}`); - }); - console.log(''.padEnd(100, '-')); + console.log(`--------------------------------------------------------------------------------\n`); + return 0; } -getStatus(); +runStatus().catch(console.error);