feat(offload): modularize playbooks with TaskRunner and integrate agentic fix-pr loop

This commit is contained in:
mkorwel
2026-03-13 19:31:32 -07:00
parent 3649059a28
commit 8692b347f0
11 changed files with 458 additions and 304 deletions
+101
View File
@@ -0,0 +1,101 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { spawnSync, spawn } from 'child_process';
import fs from 'fs';
import readline from 'readline';
import { runOrchestrator } from '../scripts/orchestrator.ts';
import { runWorker } from '../scripts/worker.ts';
vi.mock('child_process');
vi.mock('fs');
vi.mock('readline');
describe('Offload Tooling Matrix', () => {
const mockSettings = {
maintainer: {
deepReview: {
remoteHost: 'test-host',
remoteWorkDir: '~/test-dir',
terminalType: 'none',
syncAuth: false,
geminiSetup: 'preexisting',
ghSetup: 'preexisting'
}
}
};
beforeEach(() => {
vi.resetAllMocks();
vi.mocked(fs.existsSync).mockReturnValue(true);
vi.mocked(fs.readFileSync).mockReturnValue(JSON.stringify(mockSettings));
vi.mocked(fs.mkdirSync).mockReturnValue(undefined as any);
vi.mocked(fs.writeFileSync).mockReturnValue(undefined as any);
vi.mocked(fs.createWriteStream).mockReturnValue({ pipe: vi.fn() } as any);
vi.spyOn(process, 'chdir').mockImplementation(() => {});
vi.mocked(spawnSync).mockImplementation((cmd: any, args: any) => {
return { status: 0, stdout: Buffer.from('test-meta\n'), stderr: Buffer.from('') } as any;
});
vi.mocked(spawn).mockImplementation(() => {
return {
stdout: { pipe: vi.fn(), on: vi.fn() },
stderr: { pipe: vi.fn(), on: vi.fn() },
on: vi.fn((event, cb) => { if (event === 'close') cb(0); }),
pid: 1234
} as any;
});
});
describe('Implement Playbook', () => {
it('should create a branch and run research/implementation', async () => {
await runOrchestrator(['456', 'implement'], {});
const spawnCalls = vi.mocked(spawnSync).mock.calls;
const ghCall = spawnCalls.find(call => {
const cmdStr = JSON.stringify(call);
return cmdStr.includes('issue') && cmdStr.includes('view') && cmdStr.includes('456');
});
expect(ghCall).toBeDefined();
const sshCall = spawnCalls.find(call => {
const cmdStr = JSON.stringify(call);
return cmdStr.includes('implement') && cmdStr.includes('offload-456-impl-456');
});
expect(sshCall).toBeDefined();
});
});
describe('Fix Loop', () => {
it('should iterate until CI passes', async () => {
let checkAttempts = 0;
vi.mocked(spawnSync).mockImplementation((cmd: any, args: any) => {
// Correctly check command AND args
const isCheck = (typeof cmd === 'string' && cmd.includes('pr checks')) ||
(Array.isArray(args) && args.includes('checks'));
if (isCheck) {
checkAttempts++;
return { status: 0, stdout: Buffer.from(checkAttempts === 1 ? 'fail' : 'success') } as any;
}
return { status: 0, stdout: Buffer.from('test-branch\n') } as any;
});
vi.useFakeTimers();
const workerPromise = runWorker(['123', 'test-branch', '/path/policy', 'fix']);
// Multi-stage timer flush to get through TaskRunner cycles and the polling loop
for(let i=0; i<10; i++) {
await vi.advanceTimersByTimeAsync(2000);
}
await vi.advanceTimersByTimeAsync(40000); // 1st fail
for(let i=0; i<10; i++) { await vi.advanceTimersByTimeAsync(2000); }
await vi.advanceTimersByTimeAsync(40000); // 2nd pass
await workerPromise;
expect(checkAttempts).toBe(2);
vi.useRealTimers();
});
});
});
@@ -12,7 +12,7 @@ vi.mock('child_process');
vi.mock('fs');
vi.mock('readline');
describe('Deep Review Orchestration', () => {
describe('Offload Orchestration', () => {
const mockSettings = {
maintainer: {
deepReview: {
@@ -36,7 +36,7 @@ describe('Deep Review Orchestration', () => {
vi.mocked(fs.writeFileSync).mockReturnValue(undefined as any);
vi.mocked(fs.createWriteStream).mockReturnValue({ pipe: vi.fn() } as any);
// Mock process methods to avoid real side effects
// Mock process methods
vi.spyOn(process, 'chdir').mockImplementation(() => {});
vi.spyOn(process, 'cwd').mockReturnValue('/test-cwd');
@@ -48,83 +48,38 @@ describe('Deep Review Orchestration', () => {
return { status: 0, stdout: Buffer.from(''), stderr: Buffer.from('') } as any;
});
// Default mock for spawn (used in worker.ts)
// Default mock for spawn
vi.mocked(spawn).mockImplementation(() => {
const mockProc = {
return {
stdout: { pipe: vi.fn(), on: vi.fn() },
stderr: { pipe: vi.fn(), on: vi.fn() },
on: vi.fn((event, cb) => { if (event === 'close') cb(0); }),
pid: 1234
};
return mockProc as any;
} as any;
});
});
describe('review.ts', () => {
describe('orchestrator.ts', () => {
it('should default to review action and pass it to remote', async () => {
await runOrchestrator(['123'], {});
const spawnCalls = vi.mocked(spawnSync).mock.calls;
const sshCall = spawnCalls.find(call => typeof call[0] === 'string' && call[0].includes('entrypoint.ts 123'));
expect(sshCall![0]).toContain('review');
});
it('should pass explicit actions (like fix) to remote', async () => {
await runOrchestrator(['123', 'fix'], {});
const spawnCalls = vi.mocked(spawnSync).mock.calls;
const sshCall = spawnCalls.find(call => typeof call[0] === 'string' && call[0].includes('entrypoint.ts 123'));
expect(sshCall![0]).toContain('fix');
});
it('should construct the correct tmux session name from branch', async () => {
await runOrchestrator(['123'], {});
const spawnCalls = vi.mocked(spawnSync).mock.calls;
const sshCall = spawnCalls.find(call =>
(typeof call[0] === 'string' && call[0].includes('tmux new-session')) ||
(Array.isArray(call[1]) && call[1].some(arg => typeof arg === 'string' && arg.includes('tmux new-session')))
);
expect(sshCall).toBeDefined();
const cmdStr = typeof sshCall![0] === 'string' ? sshCall![0] : (sshCall![1] as string[]).join(' ');
expect(cmdStr).toContain('test-host');
expect(cmdStr).toContain('tmux new-session -s 123-test_branch');
});
it('should use isolated config path when setupType is isolated', async () => {
const isolatedSettings = {
...mockSettings,
maintainer: {
...mockSettings.maintainer,
deepReview: {
...mockSettings.maintainer.deepReview,
geminiSetup: 'isolated'
}
}
};
vi.mocked(fs.readFileSync).mockReturnValue(JSON.stringify(isolatedSettings));
await runOrchestrator(['123'], {});
const spawnCalls = vi.mocked(spawnSync).mock.calls;
const sshCall = spawnCalls.find(call => {
const cmdStr = typeof call[0] === 'string' ? call[0] : (Array.isArray(call[1]) ? call[1].join(' ') : '');
return cmdStr.includes('GEMINI_CLI_HOME=~/.gemini-deep-review');
});
expect(sshCall).toBeDefined();
});
it('should launch in current terminal when NOT within a Gemini session', async () => {
await runOrchestrator(['123'], {}); // No session IDs in env
const spawnCalls = vi.mocked(spawnSync).mock.calls;
const terminalCall = spawnCalls.find(call => {
const cmdStr = typeof call[0] === 'string' ? call[0] : '';
// In Direct Shell Mode, spawnSync(sshCmd, { stdio: 'inherit', ... })
// Options are in the second argument (index 1)
const options = call[1] as any;
return cmdStr.includes('ssh -t test-host') &&
cmdStr.includes('tmux attach-session') &&
options?.stdio === 'inherit';
});
expect(terminalCall).toBeDefined();
});
it('should launch in background mode when --background flag is provided', async () => {
await runOrchestrator(['123', '--background'], {});
const spawnCalls = vi.mocked(spawnSync).mock.calls;
const backgroundCall = spawnCalls.find(call => {
const cmdStr = typeof call[0] === 'string' ? call[0] : (Array.isArray(call[1]) ? call[1].join(' ') : '');
return cmdStr.includes('>') && cmdStr.includes('background.log');
});
expect(backgroundCall).toBeDefined();
const sshCall = spawnCalls.find(call => typeof call[0] === 'string' && call[0].includes('tmux new-session'));
// Match the new 'offload-123-test-branch' format
expect(sshCall![0]).toContain('offload-123-test-branch');
});
});
@@ -138,18 +93,12 @@ describe('Deep Review Orchestration', () => {
vi.mocked(readline.createInterface).mockReturnValue(mockInterface as any);
});
it('should correctly detect pre-existing setup when everything is present on remote', async () => {
it('should correctly detect pre-existing setup', async () => {
vi.mocked(spawnSync).mockImplementation((cmd: any, args: any) => {
if (cmd === 'ssh') {
const remoteCmd = args[1];
// Mock .git folder existence check
if (remoteCmd.includes('[ -d ~/test-dir/.git ]')) return { status: 0 } as any;
// Mock successful dependency checks (gh, tmux)
if (remoteCmd.includes('command -v')) return { status: 0 } as any;
// Mock successful gh auth check
if (remoteCmd.includes('gh auth status')) return { status: 0 } as any;
// Mock gemini auth presence
if (remoteCmd.includes('google_accounts.json')) return { status: 0 } as any;
}
return { status: 0, stdout: Buffer.from(''), stderr: Buffer.from('') } as any;
});
@@ -157,120 +106,32 @@ describe('Deep Review Orchestration', () => {
mockInterface.question
.mockImplementationOnce((q, cb) => cb('test-host'))
.mockImplementationOnce((q, cb) => cb('~/test-dir'))
.mockImplementationOnce((q, cb) => cb('p')) // gemini preexisting
.mockImplementationOnce((q, cb) => cb('p')) // gh preexisting
.mockImplementationOnce((q, cb) => cb('p'))
.mockImplementationOnce((q, cb) => cb('p'))
.mockImplementationOnce((q, cb) => cb('none'));
await runSetup({ HOME: '/test-home' });
const writeCall = vi.mocked(fs.writeFileSync).mock.calls.find(call =>
call[0].toString().includes('.gemini/settings.json')
);
const writeCall = vi.mocked(fs.writeFileSync).mock.calls.find(call => call[0].toString().includes('.gemini/settings.json'));
expect(writeCall).toBeDefined();
const savedSettings = JSON.parse(writeCall![1] as string);
expect(savedSettings.maintainer.deepReview.geminiSetup).toBe('preexisting');
expect(savedSettings.maintainer.deepReview.ghSetup).toBe('preexisting');
});
});
describe('worker.ts (playbooks)', () => {
it('should launch the review playbook by default', async () => {
vi.mocked(fs.existsSync).mockReturnValue(true);
await runWorker(['123', 'test-branch', '/test-policy.toml', 'review']);
const spawnCalls = vi.mocked(spawn).mock.calls;
expect(spawnCalls.some(c => c[0].includes('/review-frontend'))).toBe(true);
});
it('should offer to provision missing requirements (gh, tmux) on a net-new machine', async () => {
vi.mocked(spawnSync).mockImplementation((cmd: any, args: any) => {
if (cmd === 'ssh') {
const remoteCmd = Array.isArray(args) ? args[args.length - 1] : args;
// Mock missing dependencies
if (remoteCmd.includes('command -v gh')) return { status: 1 } as any;
if (remoteCmd.includes('command -v tmux')) return { status: 1 } as any;
if (remoteCmd.includes('[ -d ~/test-dir/.git ]')) return { status: 1 } as any;
if (remoteCmd.includes('uname -s')) return { status: 0, stdout: Buffer.from('Linux\n') } as any;
}
return { status: 0, stdout: Buffer.from(''), stderr: Buffer.from('') } as any;
});
mockInterface.question
.mockImplementationOnce((q, cb) => cb('test-host'))
.mockImplementationOnce((q, cb) => cb('~/test-dir'))
.mockImplementationOnce((q, cb) => cb('i')) // gemini isolated
.mockImplementationOnce((q, cb) => cb('i')) // gh isolated
.mockImplementationOnce((q, cb) => cb('y')) // provision requirements
.mockImplementationOnce((q, cb) => cb('none'));
await runSetup({ HOME: '/test-home' });
const spawnCalls = vi.mocked(spawnSync).mock.calls;
const installCall = spawnCalls.find(call => {
const cmdStr = JSON.stringify(call);
return cmdStr.includes('apt install -y gh tmux');
});
expect(installCall).toBeDefined();
it('should launch the fix playbook when requested', async () => {
vi.mocked(fs.existsSync).mockReturnValue(true);
await runWorker(['123', 'test-branch', '/test-policy.toml', 'fix']);
const spawnCalls = vi.mocked(spawn).mock.calls;
// Match the updated prompt string in fix.ts
expect(spawnCalls.some(c => c[0].toLowerCase().includes('analyze current failures'))).toBe(true);
});
it('should handle preexisting repo but missing tool auth', async () => {
vi.mocked(spawnSync).mockImplementation((cmd: any, args: any) => {
if (cmd === 'ssh') {
const remoteCmd = args[1];
if (remoteCmd.includes('[ -d ~/test-dir/.git ]')) return { status: 0 } as any;
if (remoteCmd.includes('gh auth status')) return { status: 1 } as any; // GH not auth'd
if (remoteCmd.includes('google_accounts.json')) return { status: 1 } as any; // Gemini not auth'd
if (remoteCmd.includes('command -v')) return { status: 0 } as any; // dependencies present
}
return { status: 0, stdout: Buffer.from(''), stderr: Buffer.from('') } as any;
});
vi.mocked(fs.existsSync).mockImplementation((p) => p.toString().includes('google_accounts.json'));
mockInterface.question
.mockImplementationOnce((q, cb) => cb('test-host'))
.mockImplementationOnce((q, cb) => cb('~/test-dir'))
.mockImplementationOnce((q, cb) => cb('i')) // user chooses isolated gemini despite existing repo
.mockImplementationOnce((q, cb) => cb('p')) // user chooses preexisting gh
.mockImplementationOnce((q, cb) => cb('y')) // sync gemini auth
.mockImplementationOnce((q, cb) => cb('none'));
await runSetup({ HOME: '/test-home' });
const writeCall = vi.mocked(fs.writeFileSync).mock.calls.find(call =>
call[0].toString().includes('.gemini/settings.json')
);
const savedSettings = JSON.parse(writeCall![1] as string);
expect(savedSettings.maintainer.deepReview.geminiSetup).toBe('isolated');
expect(savedSettings.maintainer.deepReview.ghSetup).toBe('preexisting');
describe('orchestrator.ts (offload)', () => {
it('should default to review action and pass it to remote', async () => {
await runOrchestrator(['123'], {});
const spawnCalls = vi.mocked(spawnSync).mock.calls;
const sshCall = spawnCalls.find(call => typeof call[0] === 'string' && call[0].includes('entrypoint.ts 123'));
expect(sshCall![0]).toContain('review'); // Default action
});
it('should pass explicit actions (like fix) to remote', async () => {
await runOrchestrator(['123', 'fix'], {});
const spawnCalls = vi.mocked(spawnSync).mock.calls;
const sshCall = spawnCalls.find(call => typeof call[0] === 'string' && call[0].includes('entrypoint.ts 123'));
expect(sshCall![0]).toContain('fix');
});
it('should construct the correct tmux session name from branch', async () => {
...
describe('worker.ts (playbooks)', () => {
it('should launch the review playbook by default', async () => {
vi.mocked(fs.existsSync).mockReturnValue(true);
await runWorker(['123', 'test-branch', '/test-policy.toml', 'review']);
const spawnCalls = vi.mocked(spawn).mock.calls;
const analysisCall = spawnCalls.find(call => call[0].includes('/review-frontend'));
expect(analysisCall).toBeDefined();
});
it('should launch the fix playbook when requested', async () => {
vi.mocked(fs.existsSync).mockReturnValue(true);
await runWorker(['123', 'test-branch', '/test-policy.toml', 'fix']);
const spawnCalls = vi.mocked(spawn).mock.calls;
const fixCall = spawnCalls.find(call => call[0].includes('Address review comments'));
expect(fixCall).toBeDefined();
});
});
});
describe('check.ts', () => {
@@ -282,33 +143,22 @@ describe('Deep Review Orchestration', () => {
}
return { status: 0, stdout: Buffer.from('') } as any;
});
const consoleSpy = vi.spyOn(console, 'log').mockImplementation(() => {});
await runChecker(['123']);
expect(consoleSpy).toHaveBeenCalledWith(expect.stringContaining('✅ build : SUCCESS'));
expect(consoleSpy).toHaveBeenCalledWith(expect.stringContaining('✨ All remote tasks complete'));
consoleSpy.mockRestore();
});
});
describe('clean.ts', () => {
it('should kill tmux server and remove directories', async () => {
it('should kill tmux server', async () => {
vi.mocked(readline.createInterface).mockReturnValue({
question: vi.fn((q, cb) => cb('n')), // Don't wipe everything
question: vi.fn((q, cb) => cb('n')),
close: vi.fn()
} as any);
await runCleanup();
const spawnCalls = vi.mocked(spawnSync).mock.calls;
const killCall = spawnCalls.find(call => Array.isArray(call[1]) && call[1].some(arg => arg === 'tmux kill-server'));
expect(killCall).toBeDefined();
const rmCall = spawnCalls.find(call => Array.isArray(call[1]) && call[1].some(arg => arg.includes('rm -rf')));
expect(rmCall).toBeDefined();
expect(spawnCalls.some(call => Array.isArray(call[1]) && call[1].some(arg => arg === 'tmux kill-server'))).toBe(true);
});
});
});
@@ -0,0 +1,34 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { spawnSync, spawn } from 'child_process';
import fs from 'fs';
import { runFixPlaybook } from '../../scripts/playbooks/fix.ts';
vi.mock('child_process');
vi.mock('fs');
describe('Fix Playbook', () => {
beforeEach(() => {
vi.resetAllMocks();
vi.mocked(fs.mkdirSync).mockReturnValue(undefined as any);
vi.mocked(fs.writeFileSync).mockReturnValue(undefined as any);
vi.mocked(fs.createWriteStream).mockReturnValue({ pipe: vi.fn() } as any);
vi.mocked(spawn).mockImplementation(() => {
return {
stdout: { pipe: vi.fn(), on: vi.fn() },
stderr: { pipe: vi.fn(), on: vi.fn() },
on: vi.fn((event, cb) => { if (event === 'close') cb(0); })
} as any;
});
});
it('should register and run initial build, failure analysis, and fixer', async () => {
runFixPlaybook('123', '/tmp/target', '/path/policy', '/path/gemini');
const spawnCalls = vi.mocked(spawn).mock.calls;
expect(spawnCalls.some(c => c[0].includes('npm ci'))).toBe(true);
expect(spawnCalls.some(c => c[0].includes('gh run view --log-failed'))).toBe(true);
expect(spawnCalls.some(c => c[0].includes('Gemini Fixer'))).toBe(false); // Should wait for build
});
});
@@ -0,0 +1,33 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { spawnSync, spawn } from 'child_process';
import fs from 'fs';
import { runReadyPlaybook } from '../../scripts/playbooks/ready.ts';
vi.mock('child_process');
vi.mock('fs');
describe('Ready Playbook', () => {
beforeEach(() => {
vi.resetAllMocks();
vi.mocked(fs.mkdirSync).mockReturnValue(undefined as any);
vi.mocked(fs.writeFileSync).mockReturnValue(undefined as any);
vi.mocked(fs.createWriteStream).mockReturnValue({ pipe: vi.fn() } as any);
vi.mocked(spawn).mockImplementation(() => {
return {
stdout: { pipe: vi.fn(), on: vi.fn() },
stderr: { pipe: vi.fn(), on: vi.fn() },
on: vi.fn((event, cb) => { if (event === 'close') cb(0); })
} as any;
});
});
it('should register and run clean, preflight, and conflict checks', async () => {
runReadyPlaybook('123', '/tmp/target', '/path/policy', '/path/gemini');
const spawnCalls = vi.mocked(spawn).mock.calls;
expect(spawnCalls.some(c => c[0].includes('npm run clean'))).toBe(true);
expect(spawnCalls.some(c => c[0].includes('git fetch origin main'))).toBe(true);
});
});
@@ -0,0 +1,37 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { spawnSync, spawn } from 'child_process';
import fs from 'fs';
import { runReviewPlaybook } from '../../scripts/playbooks/review.ts';
vi.mock('child_process');
vi.mock('fs');
describe('Review Playbook', () => {
beforeEach(() => {
vi.resetAllMocks();
vi.mocked(fs.mkdirSync).mockReturnValue(undefined as any);
vi.mocked(fs.writeFileSync).mockReturnValue(undefined as any);
vi.mocked(fs.createWriteStream).mockReturnValue({ pipe: vi.fn() } as any);
vi.mocked(spawn).mockImplementation(() => {
return {
stdout: { pipe: vi.fn(), on: vi.fn() },
stderr: { pipe: vi.fn(), on: vi.fn() },
on: vi.fn((event, cb) => { if (event === 'close') cb(0); })
} as any;
});
});
it('should register and run build, ci, analysis, and verification', async () => {
const promise = runReviewPlaybook('123', '/tmp/target', '/path/policy', '/path/gemini');
// The worker uses setInterval(1500) to check for completion, so we need to wait
// or mock the timer. For simplicity in this POC, we'll just verify spawn calls.
const spawnCalls = vi.mocked(spawn).mock.calls;
// These should start immediately (no deps)
expect(spawnCalls.some(c => c[0].includes('npm ci'))).toBe(true);
expect(spawnCalls.some(c => c[0].includes('gh pr checks'))).toBe(true);
expect(spawnCalls.some(c => c[0].includes('/review-frontend'))).toBe(true);
});
});