fix(offload): implement auto-update for stale containers

This commit is contained in:
mkorwel
2026-03-17 15:25:39 -07:00
parent ff56c33f64
commit bdf00c1d8c
2 changed files with 27 additions and 11 deletions
@@ -70,11 +70,11 @@ export async function runOrchestrator(args: string[], env: NodeJS.ProcessEnv = p
// 4. Execution Logic (Persistent Workstation Mode) // 4. Execution Logic (Persistent Workstation Mode)
const remoteWorker = `tsx ${persistentScripts}/entrypoint.ts ${prNumber} remote-branch ${remotePolicyPath} ${action}`; const remoteWorker = `tsx ${persistentScripts}/entrypoint.ts ${prNumber} remote-branch ${remotePolicyPath} ${action}`;
// We launch a tmux session INSIDE the container for persistence and tool access // We MUST ensure this entire block is interpreted as a SINGLE string passed to the container's shell
const tmuxCmd = `cd /home/node/dev/worktrees/${sessionName} && ${remoteWorker}; exec $SHELL`; const remoteTmuxCmd = `tmux attach-session -t ${sessionName} 2>/dev/null || tmux new-session -s ${sessionName} -n 'offload' 'cd /home/node/dev/worktrees/${sessionName} && ${remoteWorker}; exec $SHELL'`;
const containerTmux = `sudo docker exec -it maintainer-worker sh -c ${q(`tmux attach-session -t ${sessionName} 2>/dev/null || tmux new-session -s ${sessionName} -n 'offload' ${q(tmuxCmd)}`)}`; const containerWrap = `sudo docker exec -it maintainer-worker sh -c ${q(remoteTmuxCmd)}`;
const finalSSH = provider.getRunCommand(containerTmux, { interactive: true }); const finalSSH = provider.getRunCommand(containerWrap, { interactive: true });
const isWithinGemini = !!env.GEMINI_CLI || !!env.GEMINI_SESSION_ID || !!env.GCLI_SESSION_ID; const isWithinGemini = !!env.GEMINI_CLI || !!env.GEMINI_SESSION_ID || !!env.GCLI_SESSION_ID;
const terminalTarget = config.terminalTarget || 'tab'; const terminalTarget = config.terminalTarget || 'tab';
@@ -78,7 +78,7 @@ export class GceCosProvider implements WorkerProvider {
-v ~/.offload:/home/node/.offload:rw \\ -v ~/.offload:/home/node/.offload:rw \\
-v ~/dev:/home/node/dev:rw \\ -v ~/dev:/home/node/dev:rw \\
-v ~/.gemini:/home/node/.gemini:rw \\ -v ~/.gemini:/home/node/.gemini:rw \\
${imageUri} /bin/bash -c "while true; do sleep 1000; done" ${imageUri} /bin/bash -c "apt-get update && apt-get install -y tmux && while true; do sleep 1000; done"
fi fi
echo "✅ Maintainer Worker is active." echo "✅ Maintainer Worker is active."
`; `;
@@ -121,20 +121,36 @@ export class GceCosProvider implements WorkerProvider {
await new Promise(r => setTimeout(r, 20000)); await new Promise(r => setTimeout(r, 20000));
} }
// NEW: Verify the container is actually running // NEW: Verify the container is actually running AND up to date
console.log(' - Verifying remote container health...'); console.log(' - Verifying remote container health and image version...');
const containerCheck = await this.getExecOutput('sudo docker ps -q --filter "name=maintainer-worker"'); const containerCheck = await this.getExecOutput('sudo docker ps -q --filter "name=maintainer-worker"');
if (containerCheck.status !== 0 || !containerCheck.stdout.trim()) { let needsUpdate = false;
console.log(' ⚠️ Container missing or stopped. Attempting emergency restart...'); if (containerCheck.status === 0 && containerCheck.stdout.trim()) {
// Check if the running image is stale
const imageUri = 'us-docker.pkg.dev/gemini-code-dev/gemini-cli/maintainer:latest';
const remoteDigest = await this.getExecOutput(`sudo docker inspect --format='{{index .Config.Labels "org.opencontainers.image.revision"}}' maintainer-worker || sudo docker inspect --format='{{.Image}}' maintainer-worker`);
// We'll pull the latest tag to see if it's different (or just force pull if it's been a while)
// For simplicity in this environment, we'll just check if tmux is missing as a proxy for "stale image"
const tmuxCheck = await this.getExecOutput('sudo docker exec maintainer-worker which tmux');
if (tmuxCheck.status !== 0) {
console.log(' ⚠️ Remote container is stale (missing tmux). Triggering update...');
needsUpdate = true;
}
} else {
needsUpdate = true;
}
if (needsUpdate) {
console.log(' ⚠️ Container missing or stale. Attempting refresh...');
const imageUri = 'us-docker.pkg.dev/gemini-code-dev/gemini-cli/maintainer:latest'; const imageUri = 'us-docker.pkg.dev/gemini-code-dev/gemini-cli/maintainer:latest';
const recoverCmd = `sudo docker pull ${imageUri} && (sudo docker rm -f maintainer-worker || true) && sudo docker run -d --name maintainer-worker --restart always -v ~/.offload:/home/node/.offload:rw -v ~/dev:/home/node/dev:rw -v ~/.gemini:/home/node/.gemini:rw ${imageUri} /bin/bash -c "while true; do sleep 1000; done"`; const recoverCmd = `sudo docker pull ${imageUri} && (sudo docker rm -f maintainer-worker || true) && sudo docker run -d --name maintainer-worker --restart always -v ~/.offload:/home/node/.offload:rw -v ~/dev:/home/node/dev:rw -v ~/.gemini:/home/node/.gemini:rw ${imageUri} /bin/bash -c "while true; do sleep 1000; done"`;
const recoverRes = await this.exec(recoverCmd); const recoverRes = await this.exec(recoverCmd);
if (recoverRes !== 0) { if (recoverRes !== 0) {
console.error(' ❌ Critical: Failed to recover maintainer container.'); console.error(' ❌ Critical: Failed to refresh maintainer container.');
return 1; return 1;
} }
console.log(' ✅ Container recovered.'); console.log(' ✅ Container refreshed.');
} }
return 0; return 0;