mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-12 12:54:07 -07:00
docs(offload): document persistence and fix status script orchestrator
This commit is contained in:
@@ -64,7 +64,21 @@ The offload system uses a **Hybrid VM + Docker** architecture designed for maxim
|
|||||||
### Getting Started (Onboarding)
|
### Getting Started (Onboarding)
|
||||||
For a complete guide on setting up your remote environment, see the [Maintainer Onboarding Guide](../../../MAINTAINER_ONBOARDING.md).
|
For a complete guide on setting up your remote environment, see the [Maintainer Onboarding Guide](../../../MAINTAINER_ONBOARDING.md).
|
||||||
|
|
||||||
### Technical details
|
### Persistence and Job Recovery
|
||||||
|
|
||||||
|
The offload system is designed for high reliability and persistence. Jobs use a nested execution model to ensure they continue running even if your local terminal is closed or the connection is lost.
|
||||||
|
|
||||||
|
### How it Works
|
||||||
|
1. **Host-Level Persistence**: The orchestrator launches each job in a named **`tmux`** session on the remote VM.
|
||||||
|
2. **Container Isolation**: The actual work is performed inside the persistent `maintainer-worker` Docker container.
|
||||||
|
|
||||||
|
### Re-attaching to a Job
|
||||||
|
If you lose your connection, you can easily resume your session:
|
||||||
|
|
||||||
|
- **Automatic**: Simply run the exact same command you started with (e.g., `npm run offload 123 review`). The system will automatically detect the existing session and re-attach you.
|
||||||
|
- **Manual**: Use `npm run offload:status` to find the session name, then use `ssh gcli-worker` to jump into the VM and `tmux attach -t <session>` to resume.
|
||||||
|
|
||||||
|
## Technical details
|
||||||
|
|
||||||
This skill uses a **Worker Provider** abstraction (`GceCosProvider`) to manage the remote lifecycle. It uses an isolated Gemini profile on the remote host (`~/.offload/gemini-cli-config`) to ensure that verification tasks do not interfere with your primary configuration.
|
This skill uses a **Worker Provider** abstraction (`GceCosProvider`) to manage the remote lifecycle. It uses an isolated Gemini profile on the remote host (`~/.offload/gemini-cli-config`) to ensure that verification tasks do not interfere with your primary configuration.
|
||||||
|
|
||||||
|
|||||||
@@ -1,66 +1,61 @@
|
|||||||
/**
|
/**
|
||||||
* Offload Status Inspector (Remote)
|
* Offload Status Inspector (Local)
|
||||||
*
|
*
|
||||||
* Scans tmux sessions (host) and logs (container) to provide job status.
|
* Orchestrates remote status retrieval via the WorkerProvider.
|
||||||
*/
|
*/
|
||||||
import { spawnSync } from 'child_process';
|
|
||||||
import fs from 'fs';
|
|
||||||
import path from 'path';
|
import path from 'path';
|
||||||
import os from 'os';
|
import fs from 'fs';
|
||||||
|
import { fileURLToPath } from 'url';
|
||||||
|
import { ProviderFactory } from './providers/ProviderFactory.ts';
|
||||||
|
|
||||||
const WORKTREE_BASE = '/home/node/dev/worktrees';
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||||
|
const REPO_ROOT = path.resolve(__dirname, '../../../..');
|
||||||
|
|
||||||
function getStatus() {
|
async function runStatus(env: NodeJS.ProcessEnv = process.env) {
|
||||||
console.log('\n🛰️ Offload Mission Control Status (Container Mode):');
|
const settingsPath = path.join(REPO_ROOT, '.gemini/offload/settings.json');
|
||||||
console.log(''.padEnd(100, '-'));
|
if (!fs.existsSync(settingsPath)) {
|
||||||
console.log(`${'JOB ID'.padEnd(10)} | ${'ACTION'.padEnd(10)} | ${'STATE'.padEnd(12)} | ${'SESSION'.padEnd(25)}`);
|
console.error('❌ Settings not found. Run "npm run offload:setup" first.');
|
||||||
console.log(''.padEnd(100, '-'));
|
return 1;
|
||||||
|
}
|
||||||
// 1. Get active tmux sessions on the HOST
|
const settings = JSON.parse(fs.readFileSync(settingsPath, 'utf8'));
|
||||||
const tmux = spawnSync('tmux', ['ls', '-F', '#{session_name}']);
|
const config = settings.deepReview;
|
||||||
const activeSessions = tmux.stdout.toString().split('\n').filter(s => s.startsWith('offload-'));
|
if (!config) {
|
||||||
|
console.error('❌ Deep Review configuration not found.');
|
||||||
// 2. Scan worktrees inside the CONTAINER
|
return 1;
|
||||||
const findJobs = spawnSync('docker', ['exec', 'maintainer-worker', 'ls', WORKTREE_BASE], { stdio: 'pipe' });
|
|
||||||
const jobs = findJobs.stdout.toString().split('\n').filter(d => d.startsWith('offload-'));
|
|
||||||
|
|
||||||
if (jobs.length === 0 && activeSessions.length === 0) {
|
|
||||||
console.log(' No jobs found.');
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const allJobIds = Array.from(new Set([...jobs, ...activeSessions]));
|
const { projectId, zone } = config;
|
||||||
|
const targetVM = `gcli-offload-${env.USER || 'mattkorwel'}`;
|
||||||
|
const provider = ProviderFactory.getProvider({ projectId, zone, instanceName: targetVM });
|
||||||
|
|
||||||
allJobIds.forEach(id => {
|
console.log(`\n🛰️ Offload Mission Control: ${targetVM}`);
|
||||||
if (!id) return;
|
console.log(`--------------------------------------------------------------------------------`);
|
||||||
const parts = id.split('-'); // offload-123-review
|
|
||||||
const pr = parts[1] || '???';
|
const status = await provider.getStatus();
|
||||||
const action = parts[2] || '???';
|
console.log(` - VM State: ${status.status}`);
|
||||||
|
console.log(` - Internal IP: ${status.internalIp || 'N/A'}`);
|
||||||
|
|
||||||
|
if (status.status === 'RUNNING') {
|
||||||
|
console.log(`\n🧵 Active Sessions (tmux):`);
|
||||||
|
// We fetch the list of sessions from the host
|
||||||
|
const tmuxRes = await provider.getExecOutput('tmux list-sessions -F "#S" 2>/dev/null');
|
||||||
|
|
||||||
let state = '💤 IDLE';
|
if (tmuxRes.status === 0 && tmuxRes.stdout.trim()) {
|
||||||
if (activeSessions.includes(id)) {
|
const sessions = tmuxRes.stdout.trim().split('\n');
|
||||||
state = '🏃 RUNNING';
|
sessions.forEach(s => {
|
||||||
|
if (s.startsWith('offload-')) {
|
||||||
|
console.log(` ✅ ${s}`);
|
||||||
|
} else {
|
||||||
|
console.log(` 🔹 ${s} (Non-offload)`);
|
||||||
|
}
|
||||||
|
});
|
||||||
} else {
|
} else {
|
||||||
// Check logs inside the container
|
console.log(' - No active sessions');
|
||||||
const logCheck = spawnSync('docker', ['exec', 'maintainer-worker', 'sh', '-c', `ls ${WORKTREE_BASE}/${id}/.gemini/logs/*.log 2>/dev/null | tail -n 1`], { stdio: 'pipe' });
|
|
||||||
const lastLogFile = logCheck.stdout.toString().trim();
|
|
||||||
|
|
||||||
if (lastLogFile) {
|
|
||||||
const logContent = spawnSync('docker', ['exec', 'maintainer-worker', 'cat', lastLogFile], { stdio: 'pipe' }).stdout.toString();
|
|
||||||
if (logContent.includes('SUCCESS')) state = '✅ SUCCESS';
|
|
||||||
else if (logContent.includes('FAILED')) state = '❌ FAILED';
|
|
||||||
else state = '🏁 FINISHED';
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
console.log(`${pr.padEnd(10)} | ${action.padEnd(10)} | ${state.padEnd(12)} | ${id.padEnd(25)}`);
|
console.log(`--------------------------------------------------------------------------------\n`);
|
||||||
if (state === '🏃 RUNNING') {
|
return 0;
|
||||||
console.log(` ├─ Attach: npm run offload:attach ${pr} ${action} [--local]`);
|
|
||||||
console.log(` ├─ Logs: npm run offload:logs ${pr} ${action}`);
|
|
||||||
}
|
|
||||||
console.log(` └─ Remove: npm run offload:remove ${pr} ${action}`);
|
|
||||||
});
|
|
||||||
console.log(''.padEnd(100, '-'));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
getStatus();
|
runStatus().catch(console.error);
|
||||||
|
|||||||
Reference in New Issue
Block a user