diff --git a/.gemini/skills/ci/SKILL.md b/.gemini/skills/ci/SKILL.md new file mode 100644 index 0000000000..b55aa4d233 --- /dev/null +++ b/.gemini/skills/ci/SKILL.md @@ -0,0 +1,66 @@ +--- +name: ci +description: + A specialized skill for Gemini CLI that provides high-performance, fail-fast + monitoring of GitHub Actions workflows and automated local verification of CI + failures. It handles run discovery automaticallyโ€”simply provide the branch name. +--- + +# CI Replicate & Status + +This skill enables the agent to efficiently monitor GitHub Actions, triage +failures, and bridge remote CI errors to local development. It defaults to +**automatic replication** of failures to streamline the fix cycle. + +## Core Capabilities + +- **Automatic Replication**: Automatically monitors CI and immediately executes + suggested test or lint commands locally upon failure. +- **Real-time Monitoring**: Aggregated status line for all concurrent workflows + on the current branch. +- **Fail-Fast Triage**: Immediately stops on the first job failure to provide a + structured report. + +## Workflow + +### 1. CI Replicate (`replicate`) - DEFAULT +Use this as the primary path to monitor CI and **automatically** replicate +failures locally for immediate triage and fixing. +- **Behavior**: When this workflow is triggered, the agent will monitor the CI + and **immediately and automatically execute** all suggested test or lint + commands (marked with ๐Ÿš€) as soon as a failure is detected. +- **Tool**: `node .gemini/skills/ci/scripts/ci.mjs [branch]` +- **Discovery**: The script **automatically** finds the latest active or recent + run for the branch. Do NOT manually search for run IDs. +- **Goal**: Reproduce the failure locally without manual intervention, then + proceed to analyze and fix the code. + +### 1. CI Status (`status`) +Use this when you have pushed changes and need to monitor the CI and reproduce +any failures locally. +- **Tool**: `node .gemini/skills/ci/scripts/ci.mjs [branch] [run_id]` +- **Discovery**: The script **automatically** finds the latest active or recent + run for the branch. You should NOT manually search for \`run_id\` using \`gh run list\` + unless a specific historical run is requested. Simply provide the branch name. +- **Step 1 (Monitor)**: Execute the tool with the branch name. +- **Step 2 (Extract)**: Extract suggested \`npm test\` or \`npm run lint\` commands + from the output (marked with ๐Ÿš€). +- **Step 3 (Reproduce)**: Execute those commands locally to confirm the failure. +- **Behavior**: It will poll every 15 seconds. If it detects a failure, it will + exit with a structured report and provide the exact commands to run locally. + +## Failure Categories & Actions + +- **Test Failures**: Agent should run the specific `npm test -w -- ` + command suggested. +- **Lint Errors**: Agent should run `npm run lint:all` or the specific package + lint command. +- **Build Errors**: Agent should check `tsc` output or build logs to resolve + compilation issues. +- **Job Errors**: Investigate `gh run view --job --log` for + infrastructure or setup failures. + +## Noise Filtering +The underlying scripts automatically filter noise (Git logs, NPM warnings, stack +trace overhead). The agent should focus on the "Structured Failure Report" +provided by the tool. diff --git a/.gemini/skills/ci/scripts/ci.mjs b/.gemini/skills/ci/scripts/ci.mjs new file mode 100755 index 0000000000..9073285231 --- /dev/null +++ b/.gemini/skills/ci/scripts/ci.mjs @@ -0,0 +1,281 @@ +#!/usr/bin/env node + +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { execSync } from 'node:child_process'; + +const BRANCH = + process.argv[2] || execSync('git branch --show-current').toString().trim(); +const RUN_ID_OVERRIDE = process.argv[3]; + +let REPO; +try { + const remoteUrl = execSync('git remote get-url origin').toString().trim(); + REPO = remoteUrl + .replace(/.*github\.com[\/:]/, '') + .replace(/\.git$/, '') + .trim(); +} catch (e) { + REPO = 'google-gemini/gemini-cli'; +} + +const FAILED_FILES = new Set(); + +function runGh(args) { + try { + return execSync(`gh ${args}`, { + stdio: ['ignore', 'pipe', 'ignore'], + }).toString(); + } catch (e) { + return null; + } +} + +function fetchFailuresViaApi(jobId) { + try { + const cmd = `gh api repos/${REPO}/actions/jobs/${jobId}/logs | grep -iE " FAIL |โŒ|ERROR|Lint failed|Build failed|Exception|failed with exit code"`; + return execSync(cmd, { + stdio: ['ignore', 'pipe', 'ignore'], + maxBuffer: 10 * 1024 * 1024, + }).toString(); + } catch (e) { + return ''; + } +} + +function isNoise(line) { + const lower = line.toLowerCase(); + return ( + lower.includes('* [new branch]') || + lower.includes('npm warn') || + lower.includes('fetching updates') || + lower.includes('node:internal/errors') || + lower.includes('at ') || // Stack traces + lower.includes('checkexecsyncerror') || + lower.includes('node_modules') + ); +} + +function extractTestFile(failureText) { + const cleanLine = failureText + .replace(/[|#\[\]()]/g, ' ') + .replace(/<[^>]*>/g, ' ') + .trim(); + const fileMatch = cleanLine.match(/([\w\/._-]+\.test\.[jt]sx?)/); + if (fileMatch) return fileMatch[1]; + return null; +} + +function generateTestCommand(failedFilesMap) { + const workspaceToFiles = new Map(); + for (const [file, info] of failedFilesMap.entries()) { + if ( + ['Job Error', 'Unknown File', 'Build Error', 'Lint Error'].includes(file) + ) + continue; + let workspace = '@google/gemini-cli'; + let relPath = file; + if (file.startsWith('packages/core/')) { + workspace = '@google/gemini-cli-core'; + relPath = file.replace('packages/core/', ''); + } else if (file.startsWith('packages/cli/')) { + workspace = '@google/gemini-cli'; + relPath = file.replace('packages/cli/', ''); + } + relPath = relPath.replace(/^.*packages\/[^\/]+\//, ''); + if (!workspaceToFiles.has(workspace)) + workspaceToFiles.set(workspace, new Set()); + workspaceToFiles.get(workspace).add(relPath); + } + const commands = []; + for (const [workspace, files] of workspaceToFiles.entries()) { + commands.push(`npm test -w ${workspace} -- ${Array.from(files).join(' ')}`); + } + return commands.join(' && '); +} + +async function monitor() { + let targetRunIds = []; + if (RUN_ID_OVERRIDE) { + targetRunIds = [RUN_ID_OVERRIDE]; + } else { + // 1. Get runs directly associated with the branch + const runListOutput = runGh( + `run list --branch "${BRANCH}" --limit 10 --json databaseId,status,workflowName,createdAt`, + ); + if (runListOutput) { + const runs = JSON.parse(runListOutput); + const activeRuns = runs.filter((r) => r.status !== 'completed'); + if (activeRuns.length > 0) { + targetRunIds = activeRuns.map((r) => r.databaseId); + } else if (runs.length > 0) { + const latestTime = new Date(runs[0].createdAt).getTime(); + targetRunIds = runs + .filter((r) => latestTime - new Date(r.createdAt).getTime() < 60000) + .map((r) => r.databaseId); + } + } + + // 2. Get runs associated with commit statuses (handles chained/indirect runs) + try { + const headSha = execSync(`git rev-parse "${BRANCH}"`).toString().trim(); + const statusOutput = runGh( + `api repos/${REPO}/commits/${headSha}/status -q '.statuses[] | select(.target_url | contains("actions/runs/")) | .target_url'`, + ); + if (statusOutput) { + const statusRunIds = statusOutput + .split('\n') + .filter(Boolean) + .map((url) => { + const match = url.match(/actions\/runs\/(\d+)/); + return match ? parseInt(match[1], 10) : null; + }) + .filter(Boolean); + + for (const runId of statusRunIds) { + if (!targetRunIds.includes(runId)) { + targetRunIds.push(runId); + } + } + } + } catch (e) { + // Ignore if branch/SHA not found or API fails + } + + if (targetRunIds.length > 0) { + const runNames = []; + for (const runId of targetRunIds) { + const runInfo = runGh(`run view "${runId}" --json workflowName`); + if (runInfo) { + runNames.push(JSON.parse(runInfo).workflowName); + } + } + console.log(`Monitoring workflows: ${[...new Set(runNames)].join(', ')}`); + } + } + + if (targetRunIds.length === 0) { + console.log(`No runs found for branch ${BRANCH}.`); + process.exit(0); + } + + while (true) { + let allPassed = 0, + allFailed = 0, + allRunning = 0, + allQueued = 0, + totalJobs = 0; + let anyRunInProgress = false; + const fileToTests = new Map(); + let failuresFoundInLoop = false; + + for (const runId of targetRunIds) { + const runOutput = runGh( + `run view "${runId}" --json databaseId,status,conclusion,workflowName`, + ); + if (!runOutput) continue; + const run = JSON.parse(runOutput); + if (run.status !== 'completed') anyRunInProgress = true; + + const jobsOutput = runGh(`run view "${runId}" --json jobs`); + if (jobsOutput) { + const { jobs } = JSON.parse(jobsOutput); + totalJobs += jobs.length; + const failedJobs = jobs.filter((j) => j.conclusion === 'failure'); + if (failedJobs.length > 0) { + failuresFoundInLoop = true; + for (const job of failedJobs) { + const failures = fetchFailuresViaApi(job.databaseId); + if (failures.trim()) { + failures.split('\n').forEach((line) => { + if (!line.trim() || isNoise(line)) return; + const file = extractTestFile(line); + const filePath = + file || + (line.toLowerCase().includes('lint') + ? 'Lint Error' + : line.toLowerCase().includes('build') + ? 'Build Error' + : 'Unknown File'); + let testName = line; + if (line.includes(' > ')) { + testName = line.split(' > ').slice(1).join(' > ').trim(); + } + if (!fileToTests.has(filePath)) + fileToTests.set(filePath, new Set()); + fileToTests.get(filePath).add(testName); + }); + } else { + const step = + job.steps?.find((s) => s.conclusion === 'failure')?.name || + 'unknown'; + const category = step.toLowerCase().includes('lint') + ? 'Lint Error' + : step.toLowerCase().includes('build') + ? 'Build Error' + : 'Job Error'; + if (!fileToTests.has(category)) + fileToTests.set(category, new Set()); + fileToTests + .get(category) + .add(`${job.name}: Failed at step "${step}"`); + } + } + } + for (const job of jobs) { + if (job.status === 'in_progress') allRunning++; + else if (job.status === 'queued') allQueued++; + else if (job.conclusion === 'success') allPassed++; + else if (job.conclusion === 'failure') allFailed++; + } + } + } + + if (failuresFoundInLoop) { + console.log( + `\n\nโŒ Failures detected across ${allFailed} job(s). Stopping monitor...`, + ); + console.log('\n--- Structured Failure Report (Noise Filtered) ---'); + for (const [file, tests] of fileToTests.entries()) { + console.log(`\nCategory/File: ${file}`); + // Limit output per file if it's too large + const testsArr = Array.from(tests).map((t) => + t.length > 500 ? t.substring(0, 500) + '... [TRUNCATED]' : t, + ); + testsArr.slice(0, 10).forEach((t) => console.log(` - ${t}`)); + if (testsArr.length > 10) + console.log(` ... and ${testsArr.length - 10} more`); + } + const testCmd = generateTestCommand(fileToTests); + if (testCmd) { + console.log('\n๐Ÿš€ Run this to verify fixes:'); + console.log(testCmd); + } else if ( + Array.from(fileToTests.keys()).some((k) => k.includes('Lint')) + ) { + console.log('\n๐Ÿš€ Run this to verify lint fixes:\nnpm run lint:all'); + } + console.log('---------------------------------'); + process.exit(1); + } + + const completed = allPassed + allFailed; + process.stdout.write( + `\rโณ Monitoring ${targetRunIds.length} runs... ${completed}/${totalJobs} jobs (${allPassed} passed, ${allFailed} failed, ${allRunning} running, ${allQueued} queued) `, + ); + if (!anyRunInProgress) { + console.log('\nโœ… All workflows passed!'); + process.exit(0); + } + await new Promise((r) => setTimeout(r, 15000)); + } +} + +monitor().catch((err) => { + console.error('\nMonitor error:', err.message); + process.exit(1); +}); diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index 89f7502502..f8382ee28c 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -1215,6 +1215,11 @@ their corresponding top-level category object in your `settings.json` file. - **Description:** Disable user input on browser window during automation. - **Default:** `true` +- **`agents.browser.maxActionsPerTask`** (number): + - **Description:** The maximum number of tool calls allowed per browser task. + Enforcement is hard: the agent will be terminated when the limit is reached. + - **Default:** `100` + - **`agents.browser.confirmSensitiveActions`** (boolean): - **Description:** Require manual confirmation for sensitive browser actions (e.g., fill_form, evaluate_script). diff --git a/evals/subagents.eval.ts b/evals/subagents.eval.ts index 3a7d8fa44f..140925964b 100644 --- a/evals/subagents.eval.ts +++ b/evals/subagents.eval.ts @@ -9,27 +9,7 @@ import path from 'node:path'; import { describe, expect } from 'vitest'; -import { evalTest } from './test-helper.js'; - -const DOCS_AGENT_DEFINITION = `--- -name: docs-agent -description: An agent with expertise in updating documentation. -tools: - - read_file - - write_file ---- -You are the docs agent. Update documentation clearly and accurately. -`; - -const TEST_AGENT_DEFINITION = `--- -name: test-agent -description: An agent with expertise in writing and updating tests. -tools: - - read_file - - write_file ---- -You are the test agent. Add or update tests. -`; +import { evalTest, TEST_AGENTS } from './test-helper.js'; const INDEX_TS = 'export const add = (a: number, b: number) => a + b;\n'; @@ -62,12 +42,12 @@ describe('subagent eval test cases', () => { }, prompt: 'Please update README.md with a description of this library.', files: { - '.gemini/agents/docs-agent.md': DOCS_AGENT_DEFINITION, + ...TEST_AGENTS.DOCS_AGENT.asFile(), 'index.ts': INDEX_TS, 'README.md': 'TODO: update the README.\n', }, assert: async (rig, _result) => { - await rig.expectToolCallSuccess(['docs-agent']); + await rig.expectToolCallSuccess([TEST_AGENTS.DOCS_AGENT.name]); }, }); @@ -92,7 +72,7 @@ describe('subagent eval test cases', () => { prompt: 'Rename the exported function in index.ts from add to sum and update the file directly.', files: { - '.gemini/agents/docs-agent.md': DOCS_AGENT_DEFINITION, + ...TEST_AGENTS.DOCS_AGENT.asFile(), 'index.ts': INDEX_TS, }, assert: async (rig, _result) => { @@ -102,9 +82,11 @@ describe('subagent eval test cases', () => { }>; expect(updatedIndex).toContain('export const sum ='); - expect(toolLogs.some((l) => l.toolRequest.name === 'docs-agent')).toBe( - false, - ); + expect( + toolLogs.some( + (l) => l.toolRequest.name === TEST_AGENTS.DOCS_AGENT.name, + ), + ).toBe(false); expect(toolLogs.some((l) => l.toolRequest.name === 'generalist')).toBe( false, ); @@ -133,7 +115,7 @@ describe('subagent eval test cases', () => { }, prompt: 'Please add a small test file that verifies add(1, 2) returns 3.', files: { - '.gemini/agents/test-agent.md': TEST_AGENT_DEFINITION, + ...TEST_AGENTS.TESTING_AGENT.asFile(), 'index.ts': INDEX_TS, 'package.json': JSON.stringify( { @@ -150,7 +132,7 @@ describe('subagent eval test cases', () => { toolRequest: { name: string }; }>; - await rig.expectToolCallSuccess(['test-agent']); + await rig.expectToolCallSuccess([TEST_AGENTS.TESTING_AGENT.name]); expect(toolLogs.some((l) => l.toolRequest.name === 'generalist')).toBe( false, ); @@ -178,8 +160,8 @@ describe('subagent eval test cases', () => { prompt: 'Add a short README description for this library and also add a test file that verifies add(1, 2) returns 3.', files: { - '.gemini/agents/docs-agent.md': DOCS_AGENT_DEFINITION, - '.gemini/agents/test-agent.md': TEST_AGENT_DEFINITION, + ...TEST_AGENTS.DOCS_AGENT.asFile(), + ...TEST_AGENTS.TESTING_AGENT.asFile(), 'index.ts': INDEX_TS, 'README.md': 'TODO: update the README.\n', 'package.json': JSON.stringify( @@ -198,7 +180,10 @@ describe('subagent eval test cases', () => { }>; const readme = readProjectFile(rig, 'README.md'); - await rig.expectToolCallSuccess(['docs-agent', 'test-agent']); + await rig.expectToolCallSuccess([ + TEST_AGENTS.DOCS_AGENT.name, + TEST_AGENTS.TESTING_AGENT.name, + ]); expect(readme).not.toContain('TODO: update the README.'); expect(toolLogs.some((l) => l.toolRequest.name === 'generalist')).toBe( false, diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index 0d0672a227..c0f2395110 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -1208,6 +1208,16 @@ const SETTINGS_SCHEMA = { 'Disable user input on browser window during automation.', showInDialog: false, }, + maxActionsPerTask: { + type: 'number', + label: 'Max Actions Per Task', + category: 'Advanced', + requiresRestart: false, + default: 100, + description: + 'The maximum number of tool calls allowed per browser task. Enforcement is hard: the agent will be terminated when the limit is reached.', + showInDialog: false, + }, confirmSensitiveActions: { type: 'boolean', label: 'Confirm Sensitive Actions', diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx index 8c199c9387..ce5fc7c872 100644 --- a/packages/cli/src/ui/AppContainer.tsx +++ b/packages/cli/src/ui/AppContainer.tsx @@ -1393,9 +1393,22 @@ Logging in with Google... Restarting Gemini CLI to continue. (streamingState === StreamingState.Idle || streamingState === StreamingState.Responding || streamingState === StreamingState.WaitingForConfirmation) && - !proQuotaRequest; + !proQuotaRequest && + !copyModeEnabled; const [controlsHeight, setControlsHeight] = useState(0); + const [lastNonCopyControlsHeight, setLastNonCopyControlsHeight] = useState(0); + + useLayoutEffect(() => { + if (!copyModeEnabled && controlsHeight > 0) { + setLastNonCopyControlsHeight(controlsHeight); + } + }, [copyModeEnabled, controlsHeight]); + + const stableControlsHeight = + copyModeEnabled && lastNonCopyControlsHeight > 0 + ? lastNonCopyControlsHeight + : controlsHeight; useLayoutEffect(() => { if (mainControlsRef.current) { @@ -1407,10 +1420,10 @@ Logging in with Google... Restarting Gemini CLI to continue. } }, [buffer, terminalWidth, terminalHeight, controlsHeight, isInputActive]); - // Compute available terminal height based on controls measurement + // Compute available terminal height based on stable controls measurement const availableTerminalHeight = Math.max( 0, - terminalHeight - controlsHeight - backgroundShellHeight - 1, + terminalHeight - stableControlsHeight - backgroundShellHeight - 1, ); config.setShellExecutionConfig({ @@ -2269,6 +2282,7 @@ Logging in with Google... Restarting Gemini CLI to continue. contextFileNames, errorCount, availableTerminalHeight, + stableControlsHeight, mainAreaWidth, staticAreaMaxItemHeight, staticExtraHeight, @@ -2390,6 +2404,7 @@ Logging in with Google... Restarting Gemini CLI to continue. contextFileNames, errorCount, availableTerminalHeight, + stableControlsHeight, mainAreaWidth, staticAreaMaxItemHeight, staticExtraHeight, diff --git a/packages/cli/src/ui/__snapshots__/App.test.tsx.snap b/packages/cli/src/ui/__snapshots__/App.test.tsx.snap index 1d1ebbb3d1..f145eadfff 100644 --- a/packages/cli/src/ui/__snapshots__/App.test.tsx.snap +++ b/packages/cli/src/ui/__snapshots__/App.test.tsx.snap @@ -34,12 +34,11 @@ Tips for getting started: - - Notifications + Composer " `; @@ -100,12 +99,11 @@ exports[`App > Snapshots > renders with dialogs visible 1`] = ` - - Notifications + DialogManager " `; @@ -147,9 +145,8 @@ HistoryItemDisplay - - Notifications + Composer " `; diff --git a/packages/cli/src/ui/__snapshots__/ToolConfirmationFullFrame-Full-Terminal-Tool-Confirmation-Snapshot-renders-tool-confirmation-box-in-the-frame-of-the-entire-terminal.snap.svg b/packages/cli/src/ui/__snapshots__/ToolConfirmationFullFrame-Full-Terminal-Tool-Confirmation-Snapshot-renders-tool-confirmation-box-in-the-frame-of-the-entire-terminal.snap.svg index be799c5d80..97b01f3025 100644 --- a/packages/cli/src/ui/__snapshots__/ToolConfirmationFullFrame-Full-Terminal-Tool-Confirmation-Snapshot-renders-tool-confirmation-box-in-the-frame-of-the-entire-terminal.snap.svg +++ b/packages/cli/src/ui/__snapshots__/ToolConfirmationFullFrame-Full-Terminal-Tool-Confirmation-Snapshot-renders-tool-confirmation-box-in-the-frame-of-the-entire-terminal.snap.svg @@ -1,271 +1,266 @@ - + - + - 3. Ask coding questions, edit code or run commands - 4. Be specific for the best results + + โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€ + + + > + + Can you edit InputPrompt.tsx for me? + - โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€ - - - > - - Can you edit InputPrompt.tsx for me? - - - โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„ - โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ + โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„ + โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ + โ”‚ + Action Required + โ”‚ + โ”‚ + โ”‚ โ”‚ - Action Required + ? + Edit + packages/.../InputPrompt.tsx: return kittyProtocolSupporte... => return kittyProto + โ€ฆ โ”‚ โ”‚ โ”‚ โ”‚ - ? - Edit - packages/.../InputPrompt.tsx: return kittyProtocolSupporte... => return kittyProto - โ€ฆ + ... first 44 lines hidden (Ctrl+O to show) ... โ”‚ โ”‚ + 45 + const + line45 + = + true + ; โ”‚ โ”‚ - ... first 44 lines hidden (Ctrl+O to show) ... + 46 + const + line46 + = + true + ; โ”‚ - โ–ˆ โ”‚ - 45 + 47 const - line45 + line47 = true ; โ”‚ โ–ˆ โ”‚ - 46 + 48 const - line46 + line48 = true ; โ”‚ โ–ˆ โ”‚ - 47 + 49 const - line47 + line49 = true ; โ”‚ โ–ˆ โ”‚ - 48 + 50 const - line48 + line50 = true ; โ”‚ โ–ˆ โ”‚ - 49 + 51 const - line49 + line51 = true ; โ”‚ โ–ˆ โ”‚ - 50 + 52 const - line50 + line52 = true ; โ”‚ โ–ˆ โ”‚ - 51 + 53 const - line51 + line53 = true ; โ”‚ โ–ˆ โ”‚ - 52 + 54 const - line52 + line54 = true ; โ”‚ โ–ˆ โ”‚ - 53 + 55 const - line53 + line55 = true ; โ”‚ โ–ˆ โ”‚ - 54 + 56 const - line54 + line56 = true ; โ”‚ โ–ˆ โ”‚ - 55 + 57 const - line55 + line57 = true ; โ”‚ โ–ˆ โ”‚ - 56 + 58 const - line56 + line58 = true ; โ”‚ โ–ˆ โ”‚ - 57 + 59 const - line57 + line59 = true ; โ”‚ โ–ˆ โ”‚ - 58 + 60 const - line58 + line60 = true ; โ”‚ โ–ˆ โ”‚ - 59 - const - line59 - = - true - ; + + 61 + + + - + + + + return + + kittyProtocolSupporte...; โ”‚ โ–ˆ โ”‚ - 60 - const - line60 - = - true - ; + + 61 + + + + + + + + return + + kittyProtocolSupporte...; โ”‚ โ–ˆ โ”‚ - - 61 - - - - - - - - return - - kittyProtocolSupporte...; + 62 + buffer: TextBuffer; โ”‚ โ–ˆ โ”‚ - - 61 - - - + - - - - return - - kittyProtocolSupporte...; + 63 + onSubmit + : ( + value + : + string + ) => + void + ; โ”‚ โ–ˆ โ”‚ - 62 - buffer: TextBuffer; + Apply this change? โ”‚ โ–ˆ โ”‚ - 63 - onSubmit - : ( - value - : - string - ) => - void - ; โ”‚ โ–ˆ โ”‚ - Apply this change? + + โ— + + + 1. + + + Allow once + โ”‚ โ–ˆ โ”‚ + 2. + Allow for this session โ”‚ โ–ˆ โ”‚ - - โ— - - - 1. - - - Allow once - + 3. + Allow for this file in all future sessions โ”‚ โ–ˆ โ”‚ - 2. - Allow for this session + 4. + Modify with external editor โ”‚ โ–ˆ โ”‚ - 3. - Allow for this file in all future sessions + 5. + No, suggest changes (esc) โ”‚ โ–ˆ โ”‚ - 4. - Modify with external editor โ”‚ โ–ˆ - โ”‚ - 5. - No, suggest changes (esc) - โ”‚ + โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ โ–ˆ - โ”‚ - โ”‚ - โ–ˆ - โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ - โ–ˆ \ No newline at end of file diff --git a/packages/cli/src/ui/__snapshots__/ToolConfirmationFullFrame.test.tsx.snap b/packages/cli/src/ui/__snapshots__/ToolConfirmationFullFrame.test.tsx.snap index 202f814c05..98853434df 100644 --- a/packages/cli/src/ui/__snapshots__/ToolConfirmationFullFrame.test.tsx.snap +++ b/packages/cli/src/ui/__snapshots__/ToolConfirmationFullFrame.test.tsx.snap @@ -1,9 +1,7 @@ // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html exports[`Full Terminal Tool Confirmation Snapshot > renders tool confirmation box in the frame of the entire terminal 1`] = ` -"3. Ask coding questions, edit code or run commands -4. Be specific for the best results -โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€ +"โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€ > Can you edit InputPrompt.tsx for me? โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„ โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ @@ -11,9 +9,9 @@ exports[`Full Terminal Tool Confirmation Snapshot > renders tool confirmation bo โ”‚ โ”‚ โ”‚ ? Edit packages/.../InputPrompt.tsx: return kittyProtocolSupporte... => return kittyProtoโ€ฆ โ”‚ โ”‚ โ”‚ -โ”‚ ... first 44 lines hidden (Ctrl+O to show) ... โ”‚โ–ˆ -โ”‚ 45 const line45 = true; โ”‚โ–ˆ -โ”‚ 46 const line46 = true; โ”‚โ–ˆ +โ”‚ ... first 44 lines hidden (Ctrl+O to show) ... โ”‚ +โ”‚ 45 const line45 = true; โ”‚ +โ”‚ 46 const line46 = true; โ”‚ โ”‚ 47 const line47 = true; โ”‚โ–ˆ โ”‚ 48 const line48 = true; โ”‚โ–ˆ โ”‚ 49 const line49 = true; โ”‚โ–ˆ diff --git a/packages/cli/src/ui/components/AppHeader.tsx b/packages/cli/src/ui/components/AppHeader.tsx index 704b094663..7d0ef75a36 100644 --- a/packages/cli/src/ui/components/AppHeader.tsx +++ b/packages/cli/src/ui/components/AppHeader.tsx @@ -108,7 +108,7 @@ export const AppHeader = ({ version, showDetails = true }: AppHeaderProps) => { Gemini CLI v{version} - {updateInfo && ( + {updateInfo?.isUpdating && ( Updating diff --git a/packages/cli/src/ui/components/Composer.tsx b/packages/cli/src/ui/components/Composer.tsx index 593b4e2a6a..af6d3b32da 100644 --- a/packages/cli/src/ui/components/Composer.tsx +++ b/packages/cli/src/ui/components/Composer.tsx @@ -588,12 +588,15 @@ export const Composer = ({ isFocused = true }: { isFocused?: boolean }) => { streamingState={uiState.streamingState} suggestionsPosition={suggestionsPosition} onSuggestionsVisibilityChange={setSuggestionsVisible} + copyModeEnabled={uiState.copyModeEnabled} /> )} {showUiDetails && !settings.merged.ui.hideFooter && - !isScreenReaderEnabled &&
} + !isScreenReaderEnabled && ( +
+ )}
); }; diff --git a/packages/cli/src/ui/components/CopyModeWarning.tsx b/packages/cli/src/ui/components/CopyModeWarning.tsx index 4b6328274b..eb5c1f6d78 100644 --- a/packages/cli/src/ui/components/CopyModeWarning.tsx +++ b/packages/cli/src/ui/components/CopyModeWarning.tsx @@ -12,16 +12,14 @@ import { theme } from '../semantic-colors.js'; export const CopyModeWarning: React.FC = () => { const { copyModeEnabled } = useUIState(); - if (!copyModeEnabled) { - return null; - } - return ( - - - In Copy Mode. Use Page Up/Down to scroll. Press Ctrl+S or any other key - to exit. - + + {copyModeEnabled && ( + + In Copy Mode. Use Page Up/Down to scroll. Press Ctrl+S or any other + key to exit. + + )} ); }; diff --git a/packages/cli/src/ui/components/Footer.tsx b/packages/cli/src/ui/components/Footer.tsx index c6816339f5..696cc5e417 100644 --- a/packages/cli/src/ui/components/Footer.tsx +++ b/packages/cli/src/ui/components/Footer.tsx @@ -175,12 +175,18 @@ interface FooterColumn { isHighPriority: boolean; } -export const Footer: React.FC = () => { +export const Footer: React.FC<{ copyModeEnabled?: boolean }> = ({ + copyModeEnabled = false, +}) => { const uiState = useUIState(); const config = useConfig(); const settings = useSettings(); const { vimEnabled, vimMode } = useVimMode(); + if (copyModeEnabled) { + return ; + } + const { model, targetDir, @@ -353,7 +359,17 @@ export const Footer: React.FC = () => { break; } case 'memory-usage': { - addCol(id, header, () => , 10); + addCol( + id, + header, + () => ( + + ), + 10, + ); break; } case 'session-id': { diff --git a/packages/cli/src/ui/components/InputPrompt.tsx b/packages/cli/src/ui/components/InputPrompt.tsx index 0deb0c40d2..35cf7ef656 100644 --- a/packages/cli/src/ui/components/InputPrompt.tsx +++ b/packages/cli/src/ui/components/InputPrompt.tsx @@ -119,6 +119,7 @@ export interface InputPromptProps { popAllMessages?: () => string | undefined; suggestionsPosition?: 'above' | 'below'; setBannerVisible: (visible: boolean) => void; + copyModeEnabled?: boolean; } // The input content, input container, and input suggestions list may have different widths @@ -212,6 +213,7 @@ export const InputPrompt: React.FC = ({ popAllMessages, suggestionsPosition = 'below', setBannerVisible, + copyModeEnabled = false, }) => { const isHelpDismissKey = useIsHelpDismissKey(); const keyMatchers = useKeyMatchers(); @@ -331,7 +333,8 @@ export const InputPrompt: React.FC = ({ isShellSuggestionsVisible, } = completion; - const showCursor = focus && isShellFocused && !isEmbeddedShellFocused; + const showCursor = + focus && isShellFocused && !isEmbeddedShellFocused && !copyModeEnabled; // Notify parent component about escape prompt state changes useEffect(() => { diff --git a/packages/cli/src/ui/components/MemoryUsageDisplay.tsx b/packages/cli/src/ui/components/MemoryUsageDisplay.tsx index 7941a9cb1d..709f76baf3 100644 --- a/packages/cli/src/ui/components/MemoryUsageDisplay.tsx +++ b/packages/cli/src/ui/components/MemoryUsageDisplay.tsx @@ -11,13 +11,18 @@ import { theme } from '../semantic-colors.js'; import process from 'node:process'; import { formatBytes } from '../utils/formatters.js'; -export const MemoryUsageDisplay: React.FC<{ color?: string }> = ({ - color = theme.text.primary, -}) => { +export const MemoryUsageDisplay: React.FC<{ + color?: string; + isActive?: boolean; +}> = ({ color = theme.text.primary, isActive = true }) => { const [memoryUsage, setMemoryUsage] = useState(''); const [memoryUsageColor, setMemoryUsageColor] = useState(color); useEffect(() => { + if (!isActive) { + return; + } + const updateMemory = () => { const usage = process.memoryUsage().rss; setMemoryUsage(formatBytes(usage)); @@ -25,10 +30,11 @@ export const MemoryUsageDisplay: React.FC<{ color?: string }> = ({ usage >= 2 * 1024 * 1024 * 1024 ? theme.status.error : color, ); }; + const intervalId = setInterval(updateMemory, 2000); updateMemory(); // Initial update return () => clearInterval(intervalId); - }, [color]); + }, [color, isActive]); return ( diff --git a/packages/cli/src/ui/contexts/UIStateContext.tsx b/packages/cli/src/ui/contexts/UIStateContext.tsx index b77a56bbc3..e4d95a79af 100644 --- a/packages/cli/src/ui/contexts/UIStateContext.tsx +++ b/packages/cli/src/ui/contexts/UIStateContext.tsx @@ -180,6 +180,7 @@ export interface UIState { contextFileNames: string[]; errorCount: number; availableTerminalHeight: number | undefined; + stableControlsHeight: number; mainAreaWidth: number; staticAreaMaxItemHeight: number; staticExtraHeight: number; diff --git a/packages/cli/src/ui/layouts/DefaultAppLayout.tsx b/packages/cli/src/ui/layouts/DefaultAppLayout.tsx index 74c02c1d9a..8370b78085 100644 --- a/packages/cli/src/ui/layouts/DefaultAppLayout.tsx +++ b/packages/cli/src/ui/layouts/DefaultAppLayout.tsx @@ -31,6 +31,7 @@ export const DefaultAppLayout: React.FC = () => { flexDirection="column" width={uiState.terminalWidth} height={isAlternateBuffer ? terminalHeight : undefined} + paddingBottom={isAlternateBuffer ? 1 : undefined} flexShrink={0} flexGrow={0} overflow="hidden" @@ -62,6 +63,9 @@ export const DefaultAppLayout: React.FC = () => { flexShrink={0} flexGrow={0} width={uiState.terminalWidth} + height={ + uiState.copyModeEnabled ? uiState.stableControlsHeight : undefined + } > diff --git a/packages/cli/src/ui/utils/updateCheck.ts b/packages/cli/src/ui/utils/updateCheck.ts index 21dc0f836e..9f80beee08 100644 --- a/packages/cli/src/ui/utils/updateCheck.ts +++ b/packages/cli/src/ui/utils/updateCheck.ts @@ -27,6 +27,7 @@ export interface UpdateInfo { export interface UpdateObject { message: string; update: UpdateInfo; + isUpdating?: boolean; } /** diff --git a/packages/cli/src/utils/handleAutoUpdate.test.ts b/packages/cli/src/utils/handleAutoUpdate.test.ts index 94795bf94e..6035c1e6d1 100644 --- a/packages/cli/src/utils/handleAutoUpdate.test.ts +++ b/packages/cli/src/utils/handleAutoUpdate.test.ts @@ -197,7 +197,9 @@ describe('handleAutoUpdate', () => { expect(updateEventEmitter.emit).toHaveBeenCalledTimes(1); expect(updateEventEmitter.emit).toHaveBeenCalledWith('update-received', { + ...mockUpdateInfo, message: 'An update is available!\nPlease update manually.', + isUpdating: false, }); expect(mockSpawn).not.toHaveBeenCalled(); }); @@ -236,7 +238,9 @@ describe('handleAutoUpdate', () => { expect(updateEventEmitter.emit).toHaveBeenCalledTimes(1); expect(updateEventEmitter.emit).toHaveBeenCalledWith('update-received', { + ...mockUpdateInfo, message: 'An update is available!\nCannot determine update command.', + isUpdating: false, }); expect(mockSpawn).not.toHaveBeenCalled(); }); @@ -253,7 +257,9 @@ describe('handleAutoUpdate', () => { expect(updateEventEmitter.emit).toHaveBeenCalledTimes(1); expect(updateEventEmitter.emit).toHaveBeenCalledWith('update-received', { + ...mockUpdateInfo, message: 'An update is available!\nThis is an additional message.', + isUpdating: false, }); }); diff --git a/packages/cli/src/utils/handleAutoUpdate.ts b/packages/cli/src/utils/handleAutoUpdate.ts index bd0effa53b..4f8ca69ed3 100644 --- a/packages/cli/src/utils/handleAutoUpdate.ts +++ b/packages/cli/src/utils/handleAutoUpdate.ts @@ -102,17 +102,22 @@ export function handleAutoUpdate( combinedMessage += `\n${installationInfo.updateMessage}`; } - updateEventEmitter.emit('update-received', { - message: combinedMessage, - }); - if ( !installationInfo.updateCommand || !settings.merged.general.enableAutoUpdate ) { + updateEventEmitter.emit('update-received', { + ...info, + message: combinedMessage, + isUpdating: false, + }); return; } - + updateEventEmitter.emit('update-received', { + ...info, + message: combinedMessage, + isUpdating: true, + }); if (_updateInProgress) { return; } diff --git a/packages/core/src/agents/a2a-client-manager.test.ts b/packages/core/src/agents/a2a-client-manager.test.ts index f4a39c1d36..60c9d66035 100644 --- a/packages/core/src/agents/a2a-client-manager.test.ts +++ b/packages/core/src/agents/a2a-client-manager.test.ts @@ -128,7 +128,10 @@ describe('A2AClientManager', () => { describe('getInstance / dispatcher initialization', () => { it('should use UndiciAgent when no proxy is configured', async () => { - await manager.loadAgent('TestAgent', 'http://test.agent/card'); + await manager.loadAgent('TestAgent', { + type: 'url', + url: 'http://test.agent/card', + }); const resolverOptions = vi.mocked(DefaultAgentCardResolver).mock .calls[0][0]; @@ -153,7 +156,10 @@ describe('A2AClientManager', () => { } as Config; manager = new A2AClientManager(mockConfigWithProxy); - await manager.loadAgent('TestProxyAgent', 'http://test.proxy.agent/card'); + await manager.loadAgent('TestProxyAgent', { + type: 'url', + url: 'http://test.proxy.agent/card', + }); const resolverOptions = vi.mocked(DefaultAgentCardResolver).mock .calls[0][0]; @@ -172,28 +178,40 @@ describe('A2AClientManager', () => { describe('loadAgent', () => { it('should create and cache an A2AClient', async () => { - const agentCard = await manager.loadAgent( - 'TestAgent', - 'http://test.agent/card', - ); + const agentCard = await manager.loadAgent('TestAgent', { + type: 'url', + url: 'http://test.agent/card', + }); expect(manager.getAgentCard('TestAgent')).toBe(agentCard); expect(manager.getClient('TestAgent')).toBeDefined(); }); it('should configure ClientFactory with REST, JSON-RPC, and gRPC transports', async () => { - await manager.loadAgent('TestAgent', 'http://test.agent/card'); + await manager.loadAgent('TestAgent', { + type: 'url', + url: 'http://test.agent/card', + }); expect(ClientFactoryOptions.createFrom).toHaveBeenCalled(); }); it('should throw an error if an agent with the same name is already loaded', async () => { - await manager.loadAgent('TestAgent', 'http://test.agent/card'); + await manager.loadAgent('TestAgent', { + type: 'url', + url: 'http://test.agent/card', + }); await expect( - manager.loadAgent('TestAgent', 'http://test.agent/card'), + manager.loadAgent('TestAgent', { + type: 'url', + url: 'http://test.agent/card', + }), ).rejects.toThrow("Agent with name 'TestAgent' is already loaded."); }); it('should use native fetch by default', async () => { - await manager.loadAgent('TestAgent', 'http://test.agent/card'); + await manager.loadAgent('TestAgent', { + type: 'url', + url: 'http://test.agent/card', + }); expect(createAuthenticatingFetchWithRetry).not.toHaveBeenCalled(); }); @@ -204,7 +222,7 @@ describe('A2AClientManager', () => { }; await manager.loadAgent( 'TestAgent', - 'http://test.agent/card', + { type: 'url', url: 'http://test.agent/card' }, customAuthHandler as unknown as AuthenticationHandler, ); @@ -221,7 +239,7 @@ describe('A2AClientManager', () => { }; await manager.loadAgent( 'AuthCardAgent', - 'http://authcard.agent/card', + { type: 'url', url: 'http://authcard.agent/card' }, customAuthHandler as unknown as AuthenticationHandler, ); @@ -252,7 +270,7 @@ describe('A2AClientManager', () => { await manager.loadAgent( 'AuthCardAgent401', - 'http://authcard.agent/card', + { type: 'url', url: 'http://authcard.agent/card' }, customAuthHandler as unknown as AuthenticationHandler, ); @@ -267,19 +285,65 @@ describe('A2AClientManager', () => { }); it('should log a debug message upon loading an agent', async () => { - await manager.loadAgent('TestAgent', 'http://test.agent/card'); + await manager.loadAgent('TestAgent', { + type: 'url', + url: 'http://test.agent/card', + }); expect(debugLogger.debug).toHaveBeenCalledWith( expect.stringContaining("Loaded agent 'TestAgent'"), ); }); it('should clear the cache', async () => { - await manager.loadAgent('TestAgent', 'http://test.agent/card'); + await manager.loadAgent('TestAgent', { + type: 'url', + url: 'http://test.agent/card', + }); manager.clearCache(); expect(manager.getAgentCard('TestAgent')).toBeUndefined(); expect(manager.getClient('TestAgent')).toBeUndefined(); }); + it('should load an agent from inline JSON without calling resolver', async () => { + const inlineJson = JSON.stringify(mockAgentCard); + const agentCard = await manager.loadAgent('JsonAgent', { + type: 'json', + json: inlineJson, + }); + expect(agentCard).toBeDefined(); + expect(agentCard.name).toBe('test-agent'); + expect(manager.getAgentCard('JsonAgent')).toBe(agentCard); + expect(manager.getClient('JsonAgent')).toBeDefined(); + // Resolver should not have been called for inline JSON + const resolverInstance = vi.mocked(DefaultAgentCardResolver).mock + .results[0]?.value; + if (resolverInstance) { + expect(resolverInstance.resolve).not.toHaveBeenCalled(); + } + }); + + it('should throw a descriptive error for invalid inline JSON', async () => { + await expect( + manager.loadAgent('BadJsonAgent', { + type: 'json', + json: 'not valid json {{', + }), + ).rejects.toThrow( + /Failed to parse inline agent card JSON for agent 'BadJsonAgent'/, + ); + }); + + it('should log "inline JSON" for JSON-loaded agents', async () => { + const inlineJson = JSON.stringify(mockAgentCard); + await manager.loadAgent('JsonLogAgent', { + type: 'json', + json: inlineJson, + }); + expect(debugLogger.debug).toHaveBeenCalledWith( + expect.stringContaining('inline JSON'), + ); + }); + it('should throw if resolveAgentCard fails', async () => { const resolverInstance = { resolve: vi.fn().mockRejectedValue(new Error('Resolution failed')), @@ -289,7 +353,10 @@ describe('A2AClientManager', () => { ); await expect( - manager.loadAgent('FailAgent', 'http://fail.agent'), + manager.loadAgent('FailAgent', { + type: 'url', + url: 'http://fail.agent', + }), ).rejects.toThrow('Resolution failed'); }); @@ -304,7 +371,10 @@ describe('A2AClientManager', () => { ); await expect( - manager.loadAgent('FailAgent', 'http://fail.agent'), + manager.loadAgent('FailAgent', { + type: 'url', + url: 'http://fail.agent', + }), ).rejects.toThrow('Factory failed'); }); }); @@ -318,7 +388,10 @@ describe('A2AClientManager', () => { describe('sendMessageStream', () => { beforeEach(async () => { - await manager.loadAgent('TestAgent', 'http://test.agent/card'); + await manager.loadAgent('TestAgent', { + type: 'url', + url: 'http://test.agent/card', + }); }); it('should send a message and return a stream', async () => { @@ -433,7 +506,10 @@ describe('A2AClientManager', () => { describe('getTask', () => { beforeEach(async () => { - await manager.loadAgent('TestAgent', 'http://test.agent/card'); + await manager.loadAgent('TestAgent', { + type: 'url', + url: 'http://test.agent/card', + }); }); it('should get a task from the correct agent', async () => { @@ -462,7 +538,10 @@ describe('A2AClientManager', () => { describe('cancelTask', () => { beforeEach(async () => { - await manager.loadAgent('TestAgent', 'http://test.agent/card'); + await manager.loadAgent('TestAgent', { + type: 'url', + url: 'http://test.agent/card', + }); }); it('should cancel a task on the correct agent', async () => { diff --git a/packages/core/src/agents/a2a-client-manager.ts b/packages/core/src/agents/a2a-client-manager.ts index c15d34179c..a40e39f2f4 100644 --- a/packages/core/src/agents/a2a-client-manager.ts +++ b/packages/core/src/agents/a2a-client-manager.ts @@ -26,6 +26,7 @@ import * as grpc from '@grpc/grpc-js'; import { v4 as uuidv4 } from 'uuid'; import { Agent as UndiciAgent, ProxyAgent } from 'undici'; import { normalizeAgentCard } from './a2aUtils.js'; +import type { AgentCardLoadOptions } from './types.js'; import type { Config } from '../config/config.js'; import { debugLogger } from '../utils/debugLogger.js'; import { classifyAgentError } from './a2a-errors.js'; @@ -85,7 +86,7 @@ export class A2AClientManager { */ async loadAgent( name: string, - agentCardUrl: string, + options: AgentCardLoadOptions, authHandler?: AuthenticationHandler, ): Promise { if (this.clients.has(name) && this.agentCards.has(name)) { @@ -119,7 +120,24 @@ export class A2AClientManager { }; const resolver = new DefaultAgentCardResolver({ fetchImpl: cardFetch }); - const rawCard = await resolver.resolve(agentCardUrl, ''); + + let rawCard: unknown; + let urlIdentifier = 'inline JSON'; + + if (options.type === 'json') { + try { + rawCard = JSON.parse(options.json); + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + throw new Error( + `Failed to parse inline agent card JSON for agent '${name}': ${msg}`, + ); + } + } else { + urlIdentifier = options.url; + rawCard = await resolver.resolve(options.url, ''); + } + // TODO: Remove normalizeAgentCard once @a2a-js/sdk handles // proto field name aliases (supportedInterfaces โ†’ additionalInterfaces, // protocolBinding โ†’ transport). @@ -153,12 +171,12 @@ export class A2AClientManager { this.agentCards.set(name, agentCard); debugLogger.debug( - `[A2AClientManager] Loaded agent '${name}' from ${agentCardUrl}`, + `[A2AClientManager] Loaded agent '${name}' from ${urlIdentifier}`, ); return agentCard; } catch (error: unknown) { - throw classifyAgentError(name, agentCardUrl, error); + throw classifyAgentError(name, urlIdentifier, error); } } diff --git a/packages/core/src/agents/agentLoader.test.ts b/packages/core/src/agents/agentLoader.test.ts index 917628f7e7..ca2b2be78b 100644 --- a/packages/core/src/agents/agentLoader.test.ts +++ b/packages/core/src/agents/agentLoader.test.ts @@ -19,6 +19,9 @@ import { DEFAULT_MAX_TIME_MINUTES, DEFAULT_MAX_TURNS, type LocalAgentDefinition, + type RemoteAgentDefinition, + getAgentCardLoadOptions, + getRemoteAgentTargetUrl, } from './types.js'; describe('loader', () => { @@ -232,6 +235,75 @@ agent_card_url: https://example.com/card }); }); + it('should parse a remote agent with agent_card_json', async () => { + const cardJson = JSON.stringify({ + name: 'json-agent', + url: 'https://example.com/agent', + version: '1.0', + }); + const filePath = await writeAgentMarkdown(`--- +kind: remote +name: json-remote +description: A JSON-based remote agent +agent_card_json: '${cardJson}' +--- +`); + const result = await parseAgentMarkdown(filePath); + expect(result).toHaveLength(1); + expect(result[0]).toMatchObject({ + kind: 'remote', + name: 'json-remote', + description: 'A JSON-based remote agent', + agent_card_json: cardJson, + }); + // Should NOT have agent_card_url + expect(result[0]).not.toHaveProperty('agent_card_url'); + }); + + it('should reject agent_card_json that is not valid JSON', async () => { + const filePath = await writeAgentMarkdown(`--- +kind: remote +name: invalid-json-remote +agent_card_json: "not valid json {{" +--- +`); + await expect(parseAgentMarkdown(filePath)).rejects.toThrow( + /agent_card_json must be valid JSON/, + ); + }); + + it('should reject a remote agent with both agent_card_url and agent_card_json', async () => { + const filePath = await writeAgentMarkdown(`--- +kind: remote +name: both-fields +agent_card_url: https://example.com/card +agent_card_json: '{"name":"test"}' +--- +`); + await expect(parseAgentMarkdown(filePath)).rejects.toThrow( + /Validation failed/, + ); + }); + + it('should infer remote kind from agent_card_json', async () => { + const cardJson = JSON.stringify({ + name: 'test', + url: 'https://example.com', + }); + const filePath = await writeAgentMarkdown(`--- +name: inferred-json-remote +agent_card_json: '${cardJson}' +--- +`); + const result = await parseAgentMarkdown(filePath); + expect(result).toHaveLength(1); + expect(result[0]).toMatchObject({ + kind: 'remote', + name: 'inferred-json-remote', + agent_card_json: cardJson, + }); + }); + it('should throw AgentLoadError if agent name is not a valid slug', async () => { const filePath = await writeAgentMarkdown(`--- name: Invalid Name With Spaces @@ -242,6 +314,99 @@ Body`); /Name must be a valid slug/, ); }); + + describe('error formatting and kind inference', () => { + it('should only show local agent errors when kind is inferred as local (via kind field)', async () => { + const filePath = await writeAgentMarkdown(`--- +kind: local +name: invalid-local +# missing description +--- +Body`); + const error = await parseAgentMarkdown(filePath).catch((e) => e); + expect(error).toBeInstanceOf(AgentLoadError); + expect(error.message).toContain('Validation failed'); + expect(error.message).toContain('description: Required'); + expect(error.message).not.toContain('Remote Agent'); + }); + + it('should only show local agent errors when kind is inferred as local (via local-specific keys)', async () => { + const filePath = await writeAgentMarkdown(`--- +name: invalid-local +# missing description +tools: + - run_shell_command +--- +Body`); + const error = await parseAgentMarkdown(filePath).catch((e) => e); + expect(error).toBeInstanceOf(AgentLoadError); + expect(error.message).toContain('Validation failed'); + expect(error.message).toContain('description: Required'); + expect(error.message).not.toContain('Remote Agent'); + }); + + it('should only show remote agent errors when kind is inferred as remote (via kind field)', async () => { + const filePath = await writeAgentMarkdown(`--- +kind: remote +name: invalid-remote +# missing agent_card_url +--- +Body`); + const error = await parseAgentMarkdown(filePath).catch((e) => e); + expect(error).toBeInstanceOf(AgentLoadError); + expect(error.message).toContain('Validation failed'); + expect(error.message).toContain('agent_card_url: Required'); + expect(error.message).not.toContain('Local Agent'); + }); + + it('should only show remote agent errors when kind is inferred as remote (via remote-specific keys)', async () => { + const filePath = await writeAgentMarkdown(`--- +name: invalid-remote +auth: + type: apiKey + key: my_key +# missing agent_card_url +--- +Body`); + const error = await parseAgentMarkdown(filePath).catch((e) => e); + expect(error).toBeInstanceOf(AgentLoadError); + expect(error.message).toContain('Validation failed'); + expect(error.message).toContain('agent_card_url: Required'); + expect(error.message).not.toContain('Local Agent'); + }); + + it('should show errors for both types when kind cannot be inferred', async () => { + const filePath = await writeAgentMarkdown(`--- +name: invalid-unknown +# missing description and missing agent_card_url, no specific keys +--- +Body`); + const error = await parseAgentMarkdown(filePath).catch((e) => e); + expect(error).toBeInstanceOf(AgentLoadError); + expect(error.message).toContain('Validation failed'); + expect(error.message).toContain('(Local Agent)'); + expect(error.message).toContain('(Remote Agent)'); + expect(error.message).toContain('description: Required'); + expect(error.message).toContain('agent_card_url: Required'); + }); + + it('should format errors without a stray colon when the path is empty (e.g. strict object with unknown keys)', async () => { + const filePath = await writeAgentMarkdown(`--- +kind: local +name: my-agent +description: test +unknown_field: true +--- +Body`); + const error = await parseAgentMarkdown(filePath).catch((e) => e); + expect(error).toBeInstanceOf(AgentLoadError); + expect(error.message).toContain( + "Unrecognized key(s) in object: 'unknown_field'", + ); + expect(error.message).not.toContain(': Unrecognized key(s)'); + expect(error.message).not.toContain('Required'); + }); + }); }); describe('markdownToAgentDefinition', () => { @@ -372,6 +537,40 @@ Body`); }, }); }); + + it('should convert remote agent definition with agent_card_json', () => { + const cardJson = JSON.stringify({ + name: 'json-agent', + url: 'https://example.com/agent', + }); + const markdown = { + kind: 'remote' as const, + name: 'json-remote', + description: 'A JSON remote agent', + agent_card_json: cardJson, + }; + + const result = markdownToAgentDefinition( + markdown, + ) as RemoteAgentDefinition; + expect(result.kind).toBe('remote'); + expect(result.name).toBe('json-remote'); + expect(result.agentCardJson).toBe(cardJson); + expect(result.agentCardUrl).toBeUndefined(); + }); + + it('should throw for remote agent with neither agent_card_url nor agent_card_json', () => { + // Cast to bypass compile-time check โ€” this tests the runtime guard + const markdown = { + kind: 'remote' as const, + name: 'no-card-agent', + description: 'Missing card info', + } as Parameters[0]; + + expect(() => markdownToAgentDefinition(markdown)).toThrow( + /neither agent_card_json nor agent_card_url/, + ); + }); }); describe('loadAgentsFromDirectory', () => { @@ -744,5 +943,103 @@ auth: }, }); }); + + it('should throw an error for an unknown auth type in markdownToAgentDefinition', () => { + const markdown = { + kind: 'remote' as const, + name: 'unknown-auth-agent', + agent_card_url: 'https://example.com/card', + auth: { + type: 'apiKey' as const, + key: 'some-key', + }, + }; + + // Mutate the object at runtime to bypass TypeScript compile-time checks cleanly + Object.assign(markdown.auth, { type: 'some-unknown-type' }); + + expect(() => markdownToAgentDefinition(markdown)).toThrow( + /Unknown auth type: some-unknown-type/, + ); + }); + }); + + describe('getAgentCardLoadOptions', () => { + it('should return json options when agentCardJson is present', () => { + const def = { + name: 'test', + agentCardJson: '{"url":"http://x"}', + } as RemoteAgentDefinition; + const opts = getAgentCardLoadOptions(def); + expect(opts).toEqual({ type: 'json', json: '{"url":"http://x"}' }); + }); + + it('should return url options when agentCardUrl is present', () => { + const def = { + name: 'test', + agentCardUrl: 'http://x/card', + } as RemoteAgentDefinition; + const opts = getAgentCardLoadOptions(def); + expect(opts).toEqual({ type: 'url', url: 'http://x/card' }); + }); + + it('should prefer agentCardJson over agentCardUrl when both present', () => { + const def = { + name: 'test', + agentCardJson: '{"url":"http://x"}', + agentCardUrl: 'http://x/card', + } as RemoteAgentDefinition; + const opts = getAgentCardLoadOptions(def); + expect(opts.type).toBe('json'); + }); + + it('should throw when neither is present', () => { + const def = { name: 'orphan' } as RemoteAgentDefinition; + expect(() => getAgentCardLoadOptions(def)).toThrow( + /Remote agent 'orphan' has neither agentCardUrl nor agentCardJson/, + ); + }); + }); + + describe('getRemoteAgentTargetUrl', () => { + it('should return agentCardUrl when present', () => { + const def = { + name: 'test', + agentCardUrl: 'http://x/card', + } as RemoteAgentDefinition; + expect(getRemoteAgentTargetUrl(def)).toBe('http://x/card'); + }); + + it('should extract url from agentCardJson when agentCardUrl is absent', () => { + const def = { + name: 'test', + agentCardJson: JSON.stringify({ + name: 'agent', + url: 'https://example.com/agent', + }), + } as RemoteAgentDefinition; + expect(getRemoteAgentTargetUrl(def)).toBe('https://example.com/agent'); + }); + + it('should return undefined when JSON has no url field', () => { + const def = { + name: 'test', + agentCardJson: JSON.stringify({ name: 'agent' }), + } as RemoteAgentDefinition; + expect(getRemoteAgentTargetUrl(def)).toBeUndefined(); + }); + + it('should return undefined when agentCardJson is invalid JSON', () => { + const def = { + name: 'test', + agentCardJson: 'not json', + } as RemoteAgentDefinition; + expect(getRemoteAgentTargetUrl(def)).toBeUndefined(); + }); + + it('should return undefined when neither field is present', () => { + const def = { name: 'test' } as RemoteAgentDefinition; + expect(getRemoteAgentTargetUrl(def)).toBeUndefined(); + }); }); }); diff --git a/packages/core/src/agents/agentLoader.ts b/packages/core/src/agents/agentLoader.ts index 1b9eb1ea4e..d34d0e974e 100644 --- a/packages/core/src/agents/agentLoader.ts +++ b/packages/core/src/agents/agentLoader.ts @@ -12,6 +12,7 @@ import * as crypto from 'node:crypto'; import { z } from 'zod'; import { type AgentDefinition, + type RemoteAgentDefinition, DEFAULT_MAX_TURNS, DEFAULT_MAX_TIME_MINUTES, } from './types.js'; @@ -21,79 +22,6 @@ import { isValidToolName } from '../tools/tool-names.js'; import { FRONTMATTER_REGEX } from '../skills/skillLoader.js'; import { getErrorMessage } from '../utils/errors.js'; -/** - * DTO for Markdown parsing - represents the structure from frontmatter. - */ -interface FrontmatterBaseAgentDefinition { - name: string; - display_name?: string; -} - -interface FrontmatterMCPServerConfig { - command?: string; - args?: string[]; - env?: Record; - cwd?: string; - url?: string; - http_url?: string; - headers?: Record; - tcp?: string; - type?: 'sse' | 'http'; - timeout?: number; - trust?: boolean; - description?: string; - include_tools?: string[]; - exclude_tools?: string[]; -} - -interface FrontmatterLocalAgentDefinition - extends FrontmatterBaseAgentDefinition { - kind: 'local'; - description: string; - tools?: string[]; - mcp_servers?: Record; - system_prompt: string; - model?: string; - temperature?: number; - max_turns?: number; - timeout_mins?: number; -} - -/** - * Authentication configuration for remote agents in frontmatter format. - */ -interface FrontmatterAuthConfig { - type: 'apiKey' | 'http' | 'google-credentials' | 'oauth'; - // API Key - key?: string; - name?: string; - // HTTP - scheme?: string; - token?: string; - username?: string; - password?: string; - value?: string; - // Google Credentials - scopes?: string[]; - // OAuth2 - client_id?: string; - client_secret?: string; - authorization_url?: string; - token_url?: string; -} - -interface FrontmatterRemoteAgentDefinition - extends FrontmatterBaseAgentDefinition { - kind: 'remote'; - description?: string; - agent_card_url: string; - auth?: FrontmatterAuthConfig; -} - -type FrontmatterAgentDefinition = - | FrontmatterLocalAgentDefinition - | FrontmatterRemoteAgentDefinition; - /** * Error thrown when an agent definition is invalid or cannot be loaded. */ @@ -159,15 +87,13 @@ const localAgentSchema = z }) .strict(); -/** - * Base fields shared by all auth configs. - */ +type FrontmatterLocalAgentDefinition = z.infer & { + system_prompt: string; +}; + +// Base fields shared by all auth configs. const baseAuthFields = {}; -/** - * API Key auth schema. - * Supports sending key in header, query parameter, or cookie. - */ const apiKeyAuthSchema = z.object({ ...baseAuthFields, type: z.literal('apiKey'), @@ -175,11 +101,6 @@ const apiKeyAuthSchema = z.object({ name: z.string().optional(), }); -/** - * HTTP auth schema (Bearer or Basic). - * Note: Validation for scheme-specific fields is applied in authConfigSchema - * since discriminatedUnion doesn't support refined schemas directly. - */ const httpAuthSchema = z.object({ ...baseAuthFields, type: z.literal('http'), @@ -190,19 +111,12 @@ const httpAuthSchema = z.object({ value: z.string().min(1).optional(), }); -/** - * Google Credentials auth schema. - */ const googleCredentialsAuthSchema = z.object({ ...baseAuthFields, type: z.literal('google-credentials'), scopes: z.array(z.string()).optional(), }); -/** - * OAuth2 auth schema. - * authorization_url and token_url can be discovered from the agent card if omitted. - */ const oauth2AuthSchema = z.object({ ...baseAuthFields, type: z.literal('oauth'), @@ -222,18 +136,16 @@ const authConfigSchema = z ]) .superRefine((data, ctx) => { if (data.type === 'http') { - if (data.value) { - // Raw mode - only scheme and value are needed - return; - } - if (data.scheme === 'Bearer' && !data.token) { - ctx.addIssue({ - code: z.ZodIssueCode.custom, - message: 'Bearer scheme requires "token"', - path: ['token'], - }); - } - if (data.scheme === 'Basic') { + if (data.value) return; + if (data.scheme === 'Bearer') { + if (!data.token) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'Bearer scheme requires "token"', + path: ['token'], + }); + } + } else if (data.scheme === 'Basic') { if (!data.username) { ctx.addIssue({ code: z.ZodIssueCode.custom, @@ -248,55 +160,129 @@ const authConfigSchema = z path: ['password'], }); } + } else { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `HTTP scheme "${data.scheme}" requires "value"`, + path: ['value'], + }); } } }); -const remoteAgentSchema = z - .object({ - kind: z.literal('remote').optional().default('remote'), - name: nameSchema, - description: z.string().optional(), - display_name: z.string().optional(), +type FrontmatterAuthConfig = z.infer; + +const baseRemoteAgentSchema = z.object({ + kind: z.literal('remote').optional().default('remote'), + name: nameSchema, + description: z.string().optional(), + display_name: z.string().optional(), + auth: authConfigSchema.optional(), +}); + +const remoteAgentUrlSchema = baseRemoteAgentSchema + .extend({ agent_card_url: z.string().url(), - auth: authConfigSchema.optional(), + agent_card_json: z.undefined().optional(), }) .strict(); -// Use a Zod union to automatically discriminate between local and remote -// agent types. +const remoteAgentJsonSchema = baseRemoteAgentSchema + .extend({ + agent_card_url: z.undefined().optional(), + agent_card_json: z.string().refine( + (val) => { + try { + JSON.parse(val); + return true; + } catch { + return false; + } + }, + { message: 'agent_card_json must be valid JSON' }, + ), + }) + .strict(); + +const remoteAgentSchema = z.union([ + remoteAgentUrlSchema, + remoteAgentJsonSchema, +]); + +type FrontmatterRemoteAgentDefinition = z.infer; + +type FrontmatterAgentDefinition = + | FrontmatterLocalAgentDefinition + | FrontmatterRemoteAgentDefinition; + const agentUnionOptions = [ - { schema: localAgentSchema, label: 'Local Agent' }, - { schema: remoteAgentSchema, label: 'Remote Agent' }, -] as const; + { label: 'Local Agent' }, + { label: 'Remote Agent' }, + { label: 'Remote Agent' }, +]; const remoteAgentsListSchema = z.array(remoteAgentSchema); const markdownFrontmatterSchema = z.union([ - agentUnionOptions[0].schema, - agentUnionOptions[1].schema, + localAgentSchema, + remoteAgentUrlSchema, + remoteAgentJsonSchema, ]); -function formatZodError(error: z.ZodError, context: string): string { - const issues = error.issues - .map((i) => { +function guessIntendedKind(rawInput: unknown): 'local' | 'remote' | undefined { + if (typeof rawInput !== 'object' || rawInput === null) return undefined; + const input = rawInput as Partial & + Partial; + + if (input.kind === 'local') return 'local'; + if (input.kind === 'remote') return 'remote'; + + const hasLocalKeys = + 'tools' in input || + 'mcp_servers' in input || + 'model' in input || + 'temperature' in input || + 'max_turns' in input || + 'timeout_mins' in input; + const hasRemoteKeys = + 'agent_card_url' in input || 'auth' in input || 'agent_card_json' in input; + + if (hasLocalKeys && !hasRemoteKeys) return 'local'; + if (hasRemoteKeys && !hasLocalKeys) return 'remote'; + + return undefined; +} + +function formatZodError( + error: z.ZodError, + context: string, + rawInput?: unknown, +): string { + const intendedKind = rawInput ? guessIntendedKind(rawInput) : undefined; + + const formatIssues = (issues: z.ZodIssue[], unionPrefix?: string): string[] => + issues.flatMap((i) => { // Handle union errors specifically to give better context if (i.code === z.ZodIssueCode.invalid_union) { - return i.unionErrors - .map((unionError, index) => { - const label = - agentUnionOptions[index]?.label ?? `Agent type #${index + 1}`; - const unionIssues = unionError.issues - .map((u) => `${u.path.join('.')}: ${u.message}`) - .join(', '); - return `(${label}) ${unionIssues}`; - }) - .join('\n'); + return i.unionErrors.flatMap((unionError, index) => { + const label = unionPrefix + ? unionPrefix + : ((agentUnionOptions[index] as { label?: string })?.label ?? + `Branch #${index + 1}`); + + if (intendedKind === 'local' && label === 'Remote Agent') return []; + if (intendedKind === 'remote' && label === 'Local Agent') return []; + + return formatIssues(unionError.issues, label); + }); } - return `${i.path.join('.')}: ${i.message}`; - }) - .join('\n'); - return `${context}:\n${issues}`; + const prefix = unionPrefix ? `(${unionPrefix}) ` : ''; + const path = i.path.length > 0 ? `${i.path.join('.')}: ` : ''; + return `${prefix}${path}${i.message}`; + }); + + const formatted = Array.from(new Set(formatIssues(error.issues))).join('\n'); + return `${context}:\n${formatted}`; } /** @@ -343,8 +329,7 @@ export async function parseAgentMarkdown( } catch (error) { throw new AgentLoadError( filePath, - // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion - `YAML frontmatter parsing failed: ${(error as Error).message}`, + `YAML frontmatter parsing failed: ${getErrorMessage(error)}`, ); } @@ -368,7 +353,7 @@ export async function parseAgentMarkdown( if (!result.success) { throw new AgentLoadError( filePath, - `Validation failed: ${formatZodError(result.error, 'Agent Definition')}`, + `Validation failed: ${formatZodError(result.error, 'Agent Definition', rawFrontmatter)}`, ); } @@ -383,17 +368,14 @@ export async function parseAgentMarkdown( ]; } - // Local agent validation - // Validate tools - // Construct the local agent definition - const agentDef: FrontmatterLocalAgentDefinition = { - ...frontmatter, - kind: 'local', - system_prompt: body.trim(), - }; - - return [agentDef]; + return [ + { + ...frontmatter, + kind: 'local', + system_prompt: body.trim(), + }, + ]; } /** @@ -403,15 +385,9 @@ export async function parseAgentMarkdown( function convertFrontmatterAuthToConfig( frontmatter: FrontmatterAuthConfig, ): A2AAuthConfig { - const base = {}; - switch (frontmatter.type) { case 'apiKey': - if (!frontmatter.key) { - throw new Error('Internal error: API key missing after validation.'); - } return { - ...base, type: 'apiKey', key: frontmatter.key, name: frontmatter.name, @@ -419,20 +395,13 @@ function convertFrontmatterAuthToConfig( case 'google-credentials': return { - ...base, type: 'google-credentials', scopes: frontmatter.scopes, }; - case 'http': { - if (!frontmatter.scheme) { - throw new Error( - 'Internal error: HTTP scheme missing after validation.', - ); - } + case 'http': if (frontmatter.value) { return { - ...base, type: 'http', scheme: frontmatter.scheme, value: frontmatter.value, @@ -440,40 +409,27 @@ function convertFrontmatterAuthToConfig( } switch (frontmatter.scheme) { case 'Bearer': - if (!frontmatter.token) { - throw new Error( - 'Internal error: Bearer token missing after validation.', - ); - } + // Token is required by schema validation return { - ...base, type: 'http', scheme: 'Bearer', - token: frontmatter.token, + + token: frontmatter.token!, }; case 'Basic': - if (!frontmatter.username || !frontmatter.password) { - throw new Error( - 'Internal error: Basic auth credentials missing after validation.', - ); - } + // Username/password are required by schema validation return { - ...base, type: 'http', scheme: 'Basic', - username: frontmatter.username, - password: frontmatter.password, + username: frontmatter.username!, + password: frontmatter.password!, }; - default: { - // Other IANA schemes without a value should not reach here after validation + default: throw new Error(`Unknown HTTP scheme: ${frontmatter.scheme}`); - } } - } case 'oauth': return { - ...base, type: 'oauth2', client_id: frontmatter.client_id, client_secret: frontmatter.client_secret, @@ -483,8 +439,12 @@ function convertFrontmatterAuthToConfig( }; default: { - const exhaustive: never = frontmatter.type; - throw new Error(`Unknown auth type: ${exhaustive}`); + const exhaustive: never = frontmatter; + const raw: unknown = exhaustive; + if (typeof raw === 'object' && raw !== null && 'type' in raw) { + throw new Error(`Unknown auth type: ${String(raw['type'])}`); + } + throw new Error('Unknown auth type'); } } } @@ -515,25 +475,41 @@ export function markdownToAgentDefinition( }; if (markdown.kind === 'remote') { - return { + const base: RemoteAgentDefinition = { kind: 'remote', name: markdown.name, description: markdown.description || '', displayName: markdown.display_name, - agentCardUrl: markdown.agent_card_url, auth: markdown.auth ? convertFrontmatterAuthToConfig(markdown.auth) : undefined, inputConfig, metadata, }; + + if ( + 'agent_card_json' in markdown && + markdown.agent_card_json !== undefined + ) { + base.agentCardJson = markdown.agent_card_json; + return base; + } + if ('agent_card_url' in markdown && markdown.agent_card_url !== undefined) { + base.agentCardUrl = markdown.agent_card_url; + return base; + } + + throw new AgentLoadError( + metadata?.filePath || 'unknown', + 'Unexpected state: neither agent_card_json nor agent_card_url present on remote agent', + ); } // If a model is specified, use it. Otherwise, inherit const modelName = markdown.model || 'inherit'; const mcpServers: Record = {}; - if (markdown.kind === 'local' && markdown.mcp_servers) { + if (markdown.mcp_servers) { for (const [name, config] of Object.entries(markdown.mcp_servers)) { mcpServers[name] = new MCPServerConfig( config.command, @@ -606,15 +582,13 @@ export async function loadAgentsFromDirectory( dirEntries = await fs.readdir(dir, { withFileTypes: true }); } catch (error) { // If directory doesn't exist, just return empty - // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion - if ((error as NodeJS.ErrnoException).code === 'ENOENT') { + if (error instanceof Error && 'code' in error && error.code === 'ENOENT') { return result; } result.errors.push( new AgentLoadError( dir, - // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion - `Could not list directory: ${(error as Error).message}`, + `Could not list directory: ${getErrorMessage(error)}`, ), ); return result; @@ -644,8 +618,7 @@ export async function loadAgentsFromDirectory( result.errors.push( new AgentLoadError( filePath, - // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion - `Unexpected error: ${(error as Error).message}`, + `Unexpected error: ${getErrorMessage(error)}`, ), ); } diff --git a/packages/core/src/agents/browser/browserAgentDefinition.ts b/packages/core/src/agents/browser/browserAgentDefinition.ts index 064d66dfbc..b04b2a3ede 100644 --- a/packages/core/src/agents/browser/browserAgentDefinition.ts +++ b/packages/core/src/agents/browser/browserAgentDefinition.ts @@ -112,6 +112,7 @@ Some errors are unrecoverable and retrying will never help. When you see ANY of - "Could not connect to Chrome" or "Failed to connect to Chrome" or "Timed out connecting to Chrome" โ€” Include the full error message with its remediation steps in your summary verbatim. Do NOT paraphrase or omit instructions. - "Browser closed" or "Target closed" or "Session closed" โ€” The browser process has terminated. Include the error and tell the user to try again. - "net::ERR_" network errors on the SAME URL after 2 retries โ€” the site is unreachable. Report the URL and error. +- "reached maximum action limit" โ€” You have performed too many actions in this task. Stop immediately and report this limit to the user. - Any error that appears IDENTICALLY 3+ times in a row โ€” it will not resolve by retrying. Do NOT keep retrying terminal errors. Report them with actionable remediation steps and exit immediately. diff --git a/packages/core/src/agents/browser/browserManager.test.ts b/packages/core/src/agents/browser/browserManager.test.ts index 36652bbb64..303c07288d 100644 --- a/packages/core/src/agents/browser/browserManager.test.ts +++ b/packages/core/src/agents/browser/browserManager.test.ts @@ -697,4 +697,28 @@ describe('BrowserManager', () => { expect(injectAutomationOverlay).not.toHaveBeenCalled(); }); }); + + describe('Rate limiting', () => { + it('should terminate task when maxActionsPerTask is reached', async () => { + const limitedConfig = makeFakeConfig({ + agents: { + browser: { + maxActionsPerTask: 3, + }, + }, + }); + const manager = new BrowserManager(limitedConfig); + + // First 3 calls should succeed + await manager.callTool('take_snapshot', {}); + await manager.callTool('take_snapshot', { some: 'args' }); + await manager.callTool('take_snapshot', { other: 'args' }); + await manager.callTool('take_snapshot', { other: 'new args' }); + + // 4th call should throw + await expect(manager.callTool('take_snapshot', {})).rejects.toThrow( + /maximum action limit \(3\)/, + ); + }); + }); }); diff --git a/packages/core/src/agents/browser/browserManager.ts b/packages/core/src/agents/browser/browserManager.ts index c5fc6c5053..cc059feea3 100644 --- a/packages/core/src/agents/browser/browserManager.ts +++ b/packages/core/src/agents/browser/browserManager.ts @@ -97,6 +97,10 @@ export class BrowserManager { private mcpTransport: StdioClientTransport | undefined; private discoveredTools: McpTool[] = []; + /** State for action rate limiting */ + private actionCounter = 0; + private readonly maxActionsPerTask: number; + /** * Whether to inject the automation overlay. * Always false in headless mode (no visible window to decorate). @@ -108,6 +112,8 @@ export class BrowserManager { const browserConfig = config.getBrowserAgentConfig(); this.shouldInjectOverlay = !browserConfig?.customConfig?.headless; this.shouldDisableInput = config.shouldDisableBrowserUserInput(); + this.maxActionsPerTask = + browserConfig?.customConfig.maxActionsPerTask ?? 100; } /** @@ -151,6 +157,16 @@ export class BrowserManager { throw signal.reason ?? new Error('Operation cancelled'); } + // Hard enforcement of per-action rate limit + if (this.actionCounter > this.maxActionsPerTask) { + const error = new Error( + `Browser agent reached maximum action limit (${this.maxActionsPerTask}). ` + + `Task terminated to prevent runaway execution. To config the limit, use maxActionsPerTask in the settings.`, + ); + throw error; + } + this.actionCounter++; + const errorMessage = this.checkNavigationRestrictions(toolName, args); if (errorMessage) { return { diff --git a/packages/core/src/agents/registry.test.ts b/packages/core/src/agents/registry.test.ts index de0d95e659..97d2c9ea09 100644 --- a/packages/core/src/agents/registry.test.ts +++ b/packages/core/src/agents/registry.test.ts @@ -596,7 +596,7 @@ describe('AgentRegistry', () => { }); expect(loadAgentSpy).toHaveBeenCalledWith( 'RemoteAgentWithAuth', - 'https://example.com/card', + { type: 'url', url: 'https://example.com/card' }, mockHandler, ); expect(registry.getDefinition('RemoteAgentWithAuth')).toEqual( diff --git a/packages/core/src/agents/registry.ts b/packages/core/src/agents/registry.ts index 619f1dd71c..625302a6c7 100644 --- a/packages/core/src/agents/registry.ts +++ b/packages/core/src/agents/registry.ts @@ -4,10 +4,12 @@ * SPDX-License-Identifier: Apache-2.0 */ +import * as crypto from 'node:crypto'; import { Storage } from '../config/storage.js'; import { CoreEvent, coreEvents } from '../utils/events.js'; import type { AgentOverride, Config } from '../config/config.js'; import type { AgentDefinition, LocalAgentDefinition } from './types.js'; +import { getAgentCardLoadOptions, getRemoteAgentTargetUrl } from './types.js'; import { loadAgentsFromDirectory } from './agentLoader.js'; import { CodebaseInvestigatorAgent } from './codebase-investigator.js'; import { CliHelpAgent } from './cli-help-agent.js'; @@ -162,7 +164,14 @@ export class AgentRegistry { if (!agent.metadata) { agent.metadata = {}; } - agent.metadata.hash = agent.agentCardUrl; + agent.metadata.hash = + agent.agentCardUrl ?? + (agent.agentCardJson + ? crypto + .createHash('sha256') + .update(agent.agentCardJson) + .digest('hex') + : undefined); } if (!agent.metadata?.hash) { @@ -443,12 +452,13 @@ export class AgentRegistry { ); return; } + const targetUrl = getRemoteAgentTargetUrl(remoteDef); let authHandler: AuthenticationHandler | undefined; if (definition.auth) { const provider = await A2AAuthProviderFactory.create({ authConfig: definition.auth, agentName: definition.name, - targetUrl: definition.agentCardUrl, + targetUrl, agentCardUrl: remoteDef.agentCardUrl, }); if (!provider) { @@ -461,7 +471,7 @@ export class AgentRegistry { const agentCard = await clientManager.loadAgent( remoteDef.name, - remoteDef.agentCardUrl, + getAgentCardLoadOptions(remoteDef), authHandler, ); @@ -515,7 +525,7 @@ export class AgentRegistry { if (this.config.getDebugMode()) { debugLogger.log( - `[AgentRegistry] Registered remote agent '${definition.name}' with card: ${definition.agentCardUrl}`, + `[AgentRegistry] Registered remote agent '${definition.name}' with card: ${definition.agentCardUrl ?? 'inline JSON'}`, ); } this.agents.set(definition.name, definition); diff --git a/packages/core/src/agents/remote-invocation.test.ts b/packages/core/src/agents/remote-invocation.test.ts index b5fdd4a4fa..3ff7ebe794 100644 --- a/packages/core/src/agents/remote-invocation.test.ts +++ b/packages/core/src/agents/remote-invocation.test.ts @@ -189,7 +189,7 @@ describe('RemoteAgentInvocation', () => { expect(mockClientManager.loadAgent).toHaveBeenCalledWith( 'test-agent', - 'http://test-agent/card', + { type: 'url', url: 'http://test-agent/card' }, undefined, ); }); @@ -240,7 +240,7 @@ describe('RemoteAgentInvocation', () => { }); expect(mockClientManager.loadAgent).toHaveBeenCalledWith( 'test-agent', - 'http://test-agent/card', + { type: 'url', url: 'http://test-agent/card' }, mockHandler, ); }); @@ -266,11 +266,10 @@ describe('RemoteAgentInvocation', () => { ); const result = await invocation.execute(new AbortController().signal); - expect(result.returnDisplay).toMatchObject({ - result: expect.stringContaining( - "Failed to create auth provider for agent 'test-agent'", - ), - }); + expect(result.returnDisplay).toMatchObject({ state: 'error' }); + expect((result.returnDisplay as SubagentProgress).result).toContain( + "Failed to create auth provider for agent 'test-agent'", + ); }); it('should not load the agent if already present', async () => { diff --git a/packages/core/src/agents/remote-invocation.ts b/packages/core/src/agents/remote-invocation.ts index 130f0f1a38..7dda4b0ee0 100644 --- a/packages/core/src/agents/remote-invocation.ts +++ b/packages/core/src/agents/remote-invocation.ts @@ -16,6 +16,8 @@ import { type RemoteAgentDefinition, type AgentInputs, type SubagentProgress, + getAgentCardLoadOptions, + getRemoteAgentTargetUrl, } from './types.js'; import { type AgentLoopContext } from '../config/agent-loop-context.js'; import type { MessageBus } from '../confirmation-bus/message-bus.js'; @@ -92,10 +94,11 @@ export class RemoteAgentInvocation extends BaseToolInvocation< } if (this.definition.auth) { + const targetUrl = getRemoteAgentTargetUrl(this.definition); const provider = await A2AAuthProviderFactory.create({ authConfig: this.definition.auth, agentName: this.definition.name, - targetUrl: this.definition.agentCardUrl, + targetUrl, agentCardUrl: this.definition.agentCardUrl, }); if (!provider) { @@ -162,7 +165,7 @@ export class RemoteAgentInvocation extends BaseToolInvocation< if (!this.clientManager.getClient(this.definition.name)) { await this.clientManager.loadAgent( this.definition.name, - this.definition.agentCardUrl, + getAgentCardLoadOptions(this.definition), authHandler, ); } diff --git a/packages/core/src/agents/types.ts b/packages/core/src/agents/types.ts index e36d8f0ccb..456f4cfdb3 100644 --- a/packages/core/src/agents/types.ts +++ b/packages/core/src/agents/types.ts @@ -13,6 +13,7 @@ import type { AnyDeclarativeTool } from '../tools/tools.js'; import { type z } from 'zod'; import type { ModelConfig } from '../services/modelConfigService.js'; import type { AnySchema } from 'ajv'; +import type { AgentCard } from '@a2a-js/sdk'; import type { A2AAuthConfig } from './auth-provider/types.js'; import type { MCPServerConfig } from '../config/config.js'; @@ -128,6 +129,62 @@ export function isToolActivityError(data: unknown): boolean { * The base definition for an agent. * @template TOutput The specific Zod schema for the agent's final output object. */ +export type AgentCardLoadOptions = + | { type: 'url'; url: string } + | { type: 'json'; json: string }; + +/** Minimal shape needed by helper functions, avoids generic TOutput constraints. */ +interface RemoteAgentRef { + name: string; + agentCardUrl?: string; + agentCardJson?: string; +} + +/** + * Derives the AgentCardLoadOptions from a RemoteAgentDefinition. + * Throws if neither agentCardUrl nor agentCardJson is present. + */ +export function getAgentCardLoadOptions( + def: RemoteAgentRef, +): AgentCardLoadOptions { + if (def.agentCardJson) { + return { type: 'json', json: def.agentCardJson }; + } + if (def.agentCardUrl) { + return { type: 'url', url: def.agentCardUrl }; + } + throw new Error( + `Remote agent '${def.name}' has neither agentCardUrl nor agentCardJson`, + ); +} + +/** + * Extracts a target URL for auth providers from a RemoteAgentDefinition. + * For URL-based agents, returns the agentCardUrl. + * For JSON-based agents, attempts to parse the URL from the inline card JSON. + * Returns undefined if no URL can be determined. + */ +export function getRemoteAgentTargetUrl( + def: RemoteAgentRef, +): string | undefined { + if (def.agentCardUrl) { + return def.agentCardUrl; + } + if (def.agentCardJson) { + try { + const parsed: unknown = JSON.parse(def.agentCardJson); + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion + const card = parsed as AgentCard; + if (card.url) { + return card.url; + } + } catch { + // JSON parse will fail properly later in loadAgent + } + } + return undefined; +} + export interface BaseAgentDefinition< TOutput extends z.ZodTypeAny = z.ZodUnknown, > { @@ -172,11 +229,10 @@ export interface LocalAgentDefinition< processOutput?: (output: z.infer) => string; } -export interface RemoteAgentDefinition< +export interface BaseRemoteAgentDefinition< TOutput extends z.ZodTypeAny = z.ZodUnknown, > extends BaseAgentDefinition { kind: 'remote'; - agentCardUrl: string; /** The user-provided description, before any remote card merging. */ originalDescription?: string; /** @@ -187,6 +243,13 @@ export interface RemoteAgentDefinition< auth?: A2AAuthConfig; } +export interface RemoteAgentDefinition< + TOutput extends z.ZodTypeAny = z.ZodUnknown, +> extends BaseRemoteAgentDefinition { + agentCardUrl?: string; + agentCardJson?: string; +} + export type AgentDefinition = | LocalAgentDefinition | RemoteAgentDefinition; diff --git a/packages/core/src/code_assist/setup.test.ts b/packages/core/src/code_assist/setup.test.ts index 475ac7aa6e..cf2251ed13 100644 --- a/packages/core/src/code_assist/setup.test.ts +++ b/packages/core/src/code_assist/setup.test.ts @@ -15,8 +15,20 @@ import { CodeAssistServer } from '../code_assist/server.js'; import type { OAuth2Client } from 'google-auth-library'; import { UserTierId, type GeminiUserTier } from './types.js'; import type { Config } from '../config/config.js'; +import { + logOnboardingSuccess, + OnboardingSuccessEvent, +} from '../telemetry/index.js'; vi.mock('../code_assist/server.js'); +vi.mock('../telemetry/index.js', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + logOnboardingStart: vi.fn(), + logOnboardingSuccess: vi.fn(), + }; +}); const mockPaidTier: GeminiUserTier = { id: UserTierId.STANDARD, @@ -214,7 +226,20 @@ describe('setupUser', () => { mockLoad.mockResolvedValue({ allowedTiers: [mockPaidTier], }); - const userData = await setupUser({} as OAuth2Client, mockConfig); + mockOnboardUser.mockImplementation(async () => { + await new Promise((resolve) => setTimeout(resolve, 1500)); + return { + done: true, + response: { + cloudaicompanionProject: { + id: 'server-project', + }, + }, + }; + }); + const userDataPromise = setupUser({} as OAuth2Client, mockConfig); + await vi.advanceTimersByTimeAsync(1500); + const userData = await userDataPromise; expect(mockOnboardUser).toHaveBeenCalledWith( expect.objectContaining({ tierId: UserTierId.STANDARD, @@ -227,6 +252,13 @@ describe('setupUser', () => { userTierName: 'paid', hasOnboardedPreviously: false, }); + expect(logOnboardingSuccess).toHaveBeenCalledWith( + mockConfig, + expect.any(OnboardingSuccessEvent), + ); + const event = vi.mocked(logOnboardingSuccess).mock.calls[0][1]; + expect(event.userTier).toBe('paid'); + expect(event.duration_ms).toBeGreaterThanOrEqual(1500); }); it('should onboard a new free user when project ID is not set', async () => { diff --git a/packages/core/src/code_assist/setup.ts b/packages/core/src/code_assist/setup.ts index 59e8749912..5e94aee8c7 100644 --- a/packages/core/src/code_assist/setup.ts +++ b/packages/core/src/code_assist/setup.ts @@ -251,6 +251,7 @@ async function _doSetupUser( } logOnboardingStart(config, new OnboardingStartEvent()); + const onboardingStartTime = Date.now(); let lroRes = await caServer.onboardUser(onboardReq); if (!lroRes.done && lroRes.name) { @@ -261,8 +262,10 @@ async function _doSetupUser( } } - const userTier = tier.id ?? UserTierId.STANDARD; - logOnboardingSuccess(config, new OnboardingSuccessEvent(userTier)); + logOnboardingSuccess( + config, + new OnboardingSuccessEvent(tier.name, Date.now() - onboardingStartTime), + ); if (!lroRes.response?.cloudaicompanionProject?.id) { if (projectId) { diff --git a/packages/core/src/config/config.test.ts b/packages/core/src/config/config.test.ts index f8247f8377..99688eead5 100644 --- a/packages/core/src/config/config.test.ts +++ b/packages/core/src/config/config.test.ts @@ -1474,6 +1474,22 @@ describe('Server Config (config.ts)', () => { expect(browserConfig.customConfig.visualModel).toBe( 'custom-visual-model', ); + expect(browserConfig.customConfig.maxActionsPerTask).toBe(100); // default + }); + + it('should return custom maxActionsPerTask', () => { + const params: ConfigParameters = { + ...baseParams, + agents: { + browser: { + maxActionsPerTask: 50, + }, + }, + }; + const config = new Config(params); + const browserConfig = config.getBrowserAgentConfig(); + + expect(browserConfig.customConfig.maxActionsPerTask).toBe(50); }); it('should apply defaults for partial custom config', () => { diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index f4f186ff8f..795df747cb 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -331,6 +331,8 @@ export interface BrowserAgentCustomConfig { allowedDomains?: string[]; /** Disable user input on the browser window during automation. Default: true in non-headless mode */ disableUserInput?: boolean; + /** Maximum number of actions (tool calls) allowed per task. Default: 100 */ + maxActionsPerTask?: number; /** Whether to confirm sensitive actions (e.g., fill_form, evaluate_script). */ confirmSensitiveActions?: boolean; /** Whether to block file uploads. */ @@ -3194,6 +3196,7 @@ export class Config implements McpContext, AgentLoopContext { visualModel: customConfig.visualModel, allowedDomains: customConfig.allowedDomains, disableUserInput: customConfig.disableUserInput, + maxActionsPerTask: customConfig.maxActionsPerTask ?? 100, confirmSensitiveActions: customConfig.confirmSensitiveActions, blockFileUploads: customConfig.blockFileUploads, }, diff --git a/packages/core/src/sandbox/linux/LinuxSandboxManager.test.ts b/packages/core/src/sandbox/linux/LinuxSandboxManager.test.ts index df230b4d5b..5bde6a44da 100644 --- a/packages/core/src/sandbox/linux/LinuxSandboxManager.test.ts +++ b/packages/core/src/sandbox/linux/LinuxSandboxManager.test.ts @@ -4,8 +4,9 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import { LinuxSandboxManager } from './LinuxSandboxManager.js'; +import * as sandboxManager from '../../services/sandboxManager.js'; import type { SandboxRequest } from '../../services/sandboxManager.js'; import fs from 'node:fs'; @@ -43,6 +44,10 @@ describe('LinuxSandboxManager', () => { manager = new LinuxSandboxManager({ workspace }); }); + afterEach(() => { + vi.restoreAllMocks(); + }); + const getBwrapArgs = async (req: SandboxRequest) => { const result = await manager.prepareCommand(req); expect(result.program).toBe('sh'); @@ -55,63 +60,19 @@ describe('LinuxSandboxManager', () => { return result.args.slice(4); }; - it('correctly outputs bwrap as the program with appropriate isolation flags', async () => { - const bwrapArgs = await getBwrapArgs({ - command: 'ls', - args: ['-la'], - cwd: workspace, - env: {}, - }); - - expect(bwrapArgs).toEqual([ - '--unshare-all', - '--new-session', - '--die-with-parent', - '--ro-bind', - '/', - '/', - '--dev', - '/dev', - '--proc', - '/proc', - '--tmpfs', - '/tmp', - '--bind', - workspace, - workspace, - '--ro-bind', - `${workspace}/.gitignore`, - `${workspace}/.gitignore`, - '--ro-bind', - `${workspace}/.geminiignore`, - `${workspace}/.geminiignore`, - '--ro-bind', - `${workspace}/.git`, - `${workspace}/.git`, - '--seccomp', - '9', - '--', - 'ls', - '-la', - ]); - }); - - it('maps allowedPaths to bwrap binds', async () => { - const bwrapArgs = await getBwrapArgs({ - command: 'node', - args: ['script.js'], - cwd: workspace, - env: {}, - policy: { - allowedPaths: ['/tmp/cache', '/opt/tools', workspace], - }, - }); - - // Verify the specific bindings were added correctly + /** + * Helper to verify only the dynamic, policy-based binds (e.g. allowedPaths, forbiddenPaths). + * It asserts that the base workspace and governance files are present exactly once, + * then strips them away, leaving only the dynamic binds for a focused, non-brittle assertion. + */ + const expectDynamicBinds = ( + bwrapArgs: string[], + expectedDynamicBinds: string[], + ) => { const bindsIndex = bwrapArgs.indexOf('--seccomp'); - const binds = bwrapArgs.slice(bwrapArgs.indexOf('--bind'), bindsIndex); + const allBinds = bwrapArgs.slice(bwrapArgs.indexOf('--bind'), bindsIndex); - expect(binds).toEqual([ + const baseBinds = [ '--bind', workspace, workspace, @@ -124,85 +85,353 @@ describe('LinuxSandboxManager', () => { '--ro-bind', `${workspace}/.git`, `${workspace}/.git`, - '--bind-try', - '/tmp/cache', - '/tmp/cache', - '--bind-try', - '/opt/tools', - '/opt/tools', - ]); - }); + ]; - it('protects real paths of governance files if they are symlinks', async () => { - vi.mocked(fs.realpathSync).mockImplementation((p) => { - if (p.toString() === `${workspace}/.gitignore`) - return '/shared/global.gitignore'; - return p.toString(); + // Verify the base binds are present exactly at the beginning + expect(allBinds.slice(0, baseBinds.length)).toEqual(baseBinds); + + // Extract the remaining dynamic binds + const dynamicBinds = allBinds.slice(baseBinds.length); + expect(dynamicBinds).toEqual(expectedDynamicBinds); + }; + + describe('prepareCommand', () => { + it('should correctly format the base command and args', async () => { + const bwrapArgs = await getBwrapArgs({ + command: 'ls', + args: ['-la'], + cwd: workspace, + env: {}, + }); + + expect(bwrapArgs).toEqual([ + '--unshare-all', + '--new-session', + '--die-with-parent', + '--ro-bind', + '/', + '/', + '--dev', + '/dev', + '--proc', + '/proc', + '--tmpfs', + '/tmp', + '--bind', + workspace, + workspace, + '--ro-bind', + `${workspace}/.gitignore`, + `${workspace}/.gitignore`, + '--ro-bind', + `${workspace}/.geminiignore`, + `${workspace}/.geminiignore`, + '--ro-bind', + `${workspace}/.git`, + `${workspace}/.git`, + '--seccomp', + '9', + '--', + 'ls', + '-la', + ]); }); - const bwrapArgs = await getBwrapArgs({ - command: 'ls', - args: [], - cwd: workspace, - env: {}, + it('should correctly pass through the cwd to the resulting command', async () => { + const req: SandboxRequest = { + command: 'ls', + args: [], + cwd: '/different/cwd', + env: {}, + }; + + const result = await manager.prepareCommand(req); + + expect(result.cwd).toBe('/different/cwd'); }); - expect(bwrapArgs).toContain('--ro-bind'); - expect(bwrapArgs).toContain(`${workspace}/.gitignore`); - expect(bwrapArgs).toContain('/shared/global.gitignore'); + it('should apply environment sanitization via the default mechanisms', async () => { + const req: SandboxRequest = { + command: 'test', + args: [], + cwd: workspace, + env: { + API_KEY: 'secret', + PATH: '/usr/bin', + }, + policy: { + sanitizationConfig: { + allowedEnvironmentVariables: ['PATH'], + blockedEnvironmentVariables: ['API_KEY'], + enableEnvironmentVariableRedaction: true, + }, + }, + }; - // Check that both are bound - const gitignoreIndex = bwrapArgs.indexOf(`${workspace}/.gitignore`); - expect(bwrapArgs[gitignoreIndex - 1]).toBe('--ro-bind'); - expect(bwrapArgs[gitignoreIndex + 1]).toBe(`${workspace}/.gitignore`); - - const realGitignoreIndex = bwrapArgs.indexOf('/shared/global.gitignore'); - expect(bwrapArgs[realGitignoreIndex - 1]).toBe('--ro-bind'); - expect(bwrapArgs[realGitignoreIndex + 1]).toBe('/shared/global.gitignore'); - }); - - it('touches governance files if they do not exist', async () => { - vi.mocked(fs.existsSync).mockReturnValue(false); - - await getBwrapArgs({ - command: 'ls', - args: [], - cwd: workspace, - env: {}, + const result = await manager.prepareCommand(req); + expect(result.env['PATH']).toBe('/usr/bin'); + expect(result.env['API_KEY']).toBeUndefined(); }); - expect(fs.mkdirSync).toHaveBeenCalled(); - expect(fs.openSync).toHaveBeenCalled(); - }); + it('should allow network when networkAccess is true', async () => { + const bwrapArgs = await getBwrapArgs({ + command: 'ls', + args: ['-la'], + cwd: workspace, + env: {}, + policy: { + networkAccess: true, + }, + }); - it('should not bind the workspace twice even if it has a trailing slash in allowedPaths', async () => { - const bwrapArgs = await getBwrapArgs({ - command: 'ls', - args: ['-la'], - cwd: workspace, - env: {}, - policy: { - allowedPaths: [workspace + '/'], - }, + expect(bwrapArgs).toContain('--unshare-user'); + expect(bwrapArgs).toContain('--unshare-ipc'); + expect(bwrapArgs).toContain('--unshare-pid'); + expect(bwrapArgs).toContain('--unshare-uts'); + expect(bwrapArgs).toContain('--unshare-cgroup'); + expect(bwrapArgs).not.toContain('--unshare-all'); }); - const bindsIndex = bwrapArgs.indexOf('--seccomp'); - const binds = bwrapArgs.slice(bwrapArgs.indexOf('--bind'), bindsIndex); + describe('governance files', () => { + it('should ensure governance files exist', async () => { + vi.mocked(fs.existsSync).mockReturnValue(false); - // Should only contain the primary workspace bind and governance files, not the second workspace bind with a trailing slash - expect(binds).toEqual([ - '--bind', - workspace, - workspace, - '--ro-bind', - `${workspace}/.gitignore`, - `${workspace}/.gitignore`, - '--ro-bind', - `${workspace}/.geminiignore`, - `${workspace}/.geminiignore`, - '--ro-bind', - `${workspace}/.git`, - `${workspace}/.git`, - ]); + await getBwrapArgs({ + command: 'ls', + args: [], + cwd: workspace, + env: {}, + }); + + expect(fs.mkdirSync).toHaveBeenCalled(); + expect(fs.openSync).toHaveBeenCalled(); + }); + + it('should protect both the symlink and the real path if they differ', async () => { + vi.mocked(fs.realpathSync).mockImplementation((p) => { + if (p.toString() === `${workspace}/.gitignore`) + return '/shared/global.gitignore'; + return p.toString(); + }); + + const bwrapArgs = await getBwrapArgs({ + command: 'ls', + args: [], + cwd: workspace, + env: {}, + }); + + expect(bwrapArgs).toContain('--ro-bind'); + expect(bwrapArgs).toContain(`${workspace}/.gitignore`); + expect(bwrapArgs).toContain('/shared/global.gitignore'); + + // Check that both are bound + const gitignoreIndex = bwrapArgs.indexOf(`${workspace}/.gitignore`); + expect(bwrapArgs[gitignoreIndex - 1]).toBe('--ro-bind'); + expect(bwrapArgs[gitignoreIndex + 1]).toBe(`${workspace}/.gitignore`); + + const realGitignoreIndex = bwrapArgs.indexOf( + '/shared/global.gitignore', + ); + expect(bwrapArgs[realGitignoreIndex - 1]).toBe('--ro-bind'); + expect(bwrapArgs[realGitignoreIndex + 1]).toBe( + '/shared/global.gitignore', + ); + }); + }); + + describe('allowedPaths', () => { + it('should parameterize allowed paths and normalize them', async () => { + const bwrapArgs = await getBwrapArgs({ + command: 'node', + args: ['script.js'], + cwd: workspace, + env: {}, + policy: { + allowedPaths: ['/tmp/cache', '/opt/tools', workspace], + }, + }); + + // Verify the specific bindings were added correctly + expectDynamicBinds(bwrapArgs, [ + '--bind-try', + '/tmp/cache', + '/tmp/cache', + '--bind-try', + '/opt/tools', + '/opt/tools', + ]); + }); + + it('should not bind the workspace twice even if it has a trailing slash in allowedPaths', async () => { + const bwrapArgs = await getBwrapArgs({ + command: 'ls', + args: ['-la'], + cwd: workspace, + env: {}, + policy: { + allowedPaths: [workspace + '/'], + }, + }); + + // Should only contain the primary workspace bind and governance files, not the second workspace bind with a trailing slash + expectDynamicBinds(bwrapArgs, []); + }); + }); + + describe('forbiddenPaths', () => { + it('should parameterize forbidden paths and explicitly deny them', async () => { + vi.spyOn(fs.promises, 'stat').mockImplementation(async (p) => { + // Mock /tmp/cache as a directory, and /opt/secret.txt as a file + if (p.toString().includes('cache')) { + return { isDirectory: () => true } as fs.Stats; + } + return { isDirectory: () => false } as fs.Stats; + }); + vi.spyOn(sandboxManager, 'tryRealpath').mockImplementation(async (p) => + p.toString(), + ); + + const bwrapArgs = await getBwrapArgs({ + command: 'ls', + args: ['-la'], + cwd: workspace, + env: {}, + policy: { + forbiddenPaths: ['/tmp/cache', '/opt/secret.txt'], + }, + }); + + expectDynamicBinds(bwrapArgs, [ + '--tmpfs', + '/tmp/cache', + '--remount-ro', + '/tmp/cache', + '--ro-bind-try', + '/dev/null', + '/opt/secret.txt', + ]); + }); + + it('resolves forbidden symlink paths to their real paths', async () => { + vi.spyOn(fs.promises, 'stat').mockImplementation( + async () => ({ isDirectory: () => false }) as fs.Stats, + ); + vi.spyOn(sandboxManager, 'tryRealpath').mockImplementation( + async (p) => { + if (p === '/tmp/forbidden-symlink') return '/opt/real-target.txt'; + return p.toString(); + }, + ); + + const bwrapArgs = await getBwrapArgs({ + command: 'ls', + args: ['-la'], + cwd: workspace, + env: {}, + policy: { + forbiddenPaths: ['/tmp/forbidden-symlink'], + }, + }); + + // Should explicitly mask both the resolved path and the original symlink path + expectDynamicBinds(bwrapArgs, [ + '--ro-bind-try', + '/dev/null', + '/opt/real-target.txt', + '--ro-bind-try', + '/dev/null', + '/tmp/forbidden-symlink', + ]); + }); + + it('explicitly denies non-existent forbidden paths to prevent creation', async () => { + const error = new Error('File not found') as NodeJS.ErrnoException; + error.code = 'ENOENT'; + vi.spyOn(fs.promises, 'stat').mockRejectedValue(error); + vi.spyOn(sandboxManager, 'tryRealpath').mockImplementation(async (p) => + p.toString(), + ); + + const bwrapArgs = await getBwrapArgs({ + command: 'ls', + args: [], + cwd: workspace, + env: {}, + policy: { + forbiddenPaths: ['/tmp/not-here.txt'], + }, + }); + + expectDynamicBinds(bwrapArgs, [ + '--symlink', + '/.forbidden', + '/tmp/not-here.txt', + ]); + }); + + it('masks directory symlinks with tmpfs for both paths', async () => { + vi.spyOn(fs.promises, 'stat').mockImplementation( + async () => ({ isDirectory: () => true }) as fs.Stats, + ); + vi.spyOn(sandboxManager, 'tryRealpath').mockImplementation( + async (p) => { + if (p === '/tmp/dir-link') return '/opt/real-dir'; + return p.toString(); + }, + ); + + const bwrapArgs = await getBwrapArgs({ + command: 'ls', + args: [], + cwd: workspace, + env: {}, + policy: { + forbiddenPaths: ['/tmp/dir-link'], + }, + }); + + expectDynamicBinds(bwrapArgs, [ + '--tmpfs', + '/opt/real-dir', + '--remount-ro', + '/opt/real-dir', + '--tmpfs', + '/tmp/dir-link', + '--remount-ro', + '/tmp/dir-link', + ]); + }); + + it('should override allowed paths if a path is also in forbidden paths', async () => { + vi.spyOn(fs.promises, 'stat').mockImplementation( + async () => ({ isDirectory: () => true }) as fs.Stats, + ); + vi.spyOn(sandboxManager, 'tryRealpath').mockImplementation(async (p) => + p.toString(), + ); + + const bwrapArgs = await getBwrapArgs({ + command: 'ls', + args: ['-la'], + cwd: workspace, + env: {}, + policy: { + allowedPaths: ['/tmp/conflict'], + forbiddenPaths: ['/tmp/conflict'], + }, + }); + + expectDynamicBinds(bwrapArgs, [ + '--bind-try', + '/tmp/conflict', + '/tmp/conflict', + '--tmpfs', + '/tmp/conflict', + '--remount-ro', + '/tmp/conflict', + ]); + }); + }); }); }); diff --git a/packages/core/src/sandbox/linux/LinuxSandboxManager.ts b/packages/core/src/sandbox/linux/LinuxSandboxManager.ts index f50a97c17f..8dd1154846 100644 --- a/packages/core/src/sandbox/linux/LinuxSandboxManager.ts +++ b/packages/core/src/sandbox/linux/LinuxSandboxManager.ts @@ -14,11 +14,13 @@ import { type SandboxedCommand, GOVERNANCE_FILES, sanitizePaths, + tryRealpath, } from '../../services/sandboxManager.js'; import { sanitizeEnvironment, getSecureSanitizationConfig, } from '../../services/environmentSanitization.js'; +import { isNodeError } from '../../utils/errors.js'; let cachedBpfPath: string | undefined; @@ -111,53 +113,13 @@ export class LinuxSandboxManager implements SandboxManager { const sanitizedEnv = sanitizeEnvironment(req.env, sanitizationConfig); const bwrapArgs: string[] = [ - '--unshare-all', - '--new-session', // Isolate session - '--die-with-parent', // Prevent orphaned runaway processes - '--ro-bind', - '/', - '/', - '--dev', // Creates a safe, minimal /dev (replaces --dev-bind) - '/dev', - '--proc', // Creates a fresh procfs for the unshared PID namespace - '/proc', - '--tmpfs', // Provides an isolated, writable /tmp directory - '/tmp', - // Note: --dev /dev sets up /dev/pts automatically - '--bind', - this.options.workspace, - this.options.workspace, + ...this.getNetworkArgs(req), + ...this.getBaseArgs(), + ...this.getGovernanceArgs(), + ...this.getAllowedPathsArgs(req.policy?.allowedPaths), + ...(await this.getForbiddenPathsArgs(req.policy?.forbiddenPaths)), ]; - // Protected governance files are bind-mounted as read-only, even if the workspace is RW. - // We ensure they exist on the host and resolve real paths to prevent symlink bypasses. - // In bwrap, later binds override earlier ones for the same path. - for (const file of GOVERNANCE_FILES) { - const filePath = join(this.options.workspace, file.path); - touch(filePath, file.isDirectory); - - const realPath = fs.realpathSync(filePath); - - bwrapArgs.push('--ro-bind', filePath, filePath); - if (realPath !== filePath) { - bwrapArgs.push('--ro-bind', realPath, realPath); - } - } - - const allowedPaths = sanitizePaths(req.policy?.allowedPaths) || []; - const normalizedWorkspace = normalize(this.options.workspace).replace( - /\/$/, - '', - ); - for (const allowedPath of allowedPaths) { - const normalizedAllowedPath = normalize(allowedPath).replace(/\/$/, ''); - if (normalizedAllowedPath !== normalizedWorkspace) { - bwrapArgs.push('--bind-try', allowedPath, allowedPath); - } - } - - // TODO: handle forbidden paths - const bpfPath = getSeccompBpfPath(); bwrapArgs.push('--seccomp', '9'); @@ -175,6 +137,145 @@ export class LinuxSandboxManager implements SandboxManager { program: 'sh', args: shArgs, env: sanitizedEnv, + cwd: req.cwd, }; } + + /** + * Generates arguments for network isolation. + */ + private getNetworkArgs(req: SandboxRequest): string[] { + return req.policy?.networkAccess + ? [ + '--unshare-user', + '--unshare-ipc', + '--unshare-pid', + '--unshare-uts', + '--unshare-cgroup', + ] + : ['--unshare-all']; + } + + /** + * Generates the base bubblewrap arguments for isolation. + */ + private getBaseArgs(): string[] { + return [ + '--new-session', // Isolate session + '--die-with-parent', // Prevent orphaned runaway processes + '--ro-bind', + '/', + '/', + '--dev', // Creates a safe, minimal /dev (replaces --dev-bind) + '/dev', + '--proc', // Creates a fresh procfs for the unshared PID namespace + '/proc', + '--tmpfs', // Provides an isolated, writable /tmp directory + '/tmp', + // Note: --dev /dev sets up /dev/pts automatically + '--bind', + this.options.workspace, + this.options.workspace, + ]; + } + + /** + * Generates arguments for protected governance files. + */ + private getGovernanceArgs(): string[] { + const args: string[] = []; + // Protected governance files are bind-mounted as read-only, even if the workspace is RW. + // We ensure they exist on the host and resolve real paths to prevent symlink bypasses. + // In bwrap, later binds override earlier ones for the same path. + for (const file of GOVERNANCE_FILES) { + const filePath = join(this.options.workspace, file.path); + touch(filePath, file.isDirectory); + + const realPath = fs.realpathSync(filePath); + + args.push('--ro-bind', filePath, filePath); + if (realPath !== filePath) { + args.push('--ro-bind', realPath, realPath); + } + } + return args; + } + + /** + * Generates arguments for allowed paths. + */ + private getAllowedPathsArgs(allowedPaths?: string[]): string[] { + const args: string[] = []; + const paths = sanitizePaths(allowedPaths) || []; + const normalizedWorkspace = this.normalizePath(this.options.workspace); + + for (const p of paths) { + if (this.normalizePath(p) !== normalizedWorkspace) { + args.push('--bind-try', p, p); + } + } + return args; + } + + /** + * Generates arguments for forbidden paths. + */ + private async getForbiddenPathsArgs( + forbiddenPaths?: string[], + ): Promise { + const args: string[] = []; + const paths = sanitizePaths(forbiddenPaths) || []; + + for (const p of paths) { + try { + const originalPath = this.normalizePath(p); + const resolvedPath = await tryRealpath(originalPath); + + // Mask the resolved path to prevent access to the underlying file. + const resolvedMask = await this.getMaskArgs(resolvedPath); + args.push(...resolvedMask); + + // If the original path was a symlink, mask it as well to prevent access + // through the link itself. + if (resolvedPath !== originalPath) { + const originalMask = await this.getMaskArgs(originalPath); + args.push(...originalMask); + } + } catch (e) { + throw new Error( + `Failed to deny access to forbidden path: ${p}. ${ + e instanceof Error ? e.message : String(e) + }`, + ); + } + } + return args; + } + + /** + * Generates bubblewrap arguments to mask a forbidden path. + */ + private async getMaskArgs(path: string): Promise { + try { + const stats = await fs.promises.stat(path); + + if (stats.isDirectory()) { + // Directories are masked by mounting an empty, read-only tmpfs. + return ['--tmpfs', path, '--remount-ro', path]; + } + // Existing files are masked by binding them to /dev/null. + return ['--ro-bind-try', '/dev/null', path]; + } catch (e) { + if (isNodeError(e) && e.code === 'ENOENT') { + // Non-existent paths are masked by a broken symlink. This prevents + // creation within the sandbox while avoiding host remnants. + return ['--symlink', '/.forbidden', path]; + } + throw e; + } + } + + private normalizePath(p: string): string { + return normalize(p).replace(/\/$/, ''); + } } diff --git a/packages/core/src/sandbox/macos/MacOsSandboxManager.integration.test.ts b/packages/core/src/sandbox/macos/MacOsSandboxManager.integration.test.ts deleted file mode 100644 index f9a3551124..0000000000 --- a/packages/core/src/sandbox/macos/MacOsSandboxManager.integration.test.ts +++ /dev/null @@ -1,206 +0,0 @@ -/** - * @license - * Copyright 2026 Google LLC - * SPDX-License-Identifier: Apache-2.0 - */ -import { describe, it, expect, beforeAll, afterAll } from 'vitest'; -import { MacOsSandboxManager } from './MacOsSandboxManager.js'; -import { ShellExecutionService } from '../../services/shellExecutionService.js'; -import { getSecureSanitizationConfig } from '../../services/environmentSanitization.js'; -import { type SandboxedCommand } from '../../services/sandboxManager.js'; -import { execFile } from 'node:child_process'; -import { promisify } from 'node:util'; -import os from 'node:os'; -import fs from 'node:fs'; -import path from 'node:path'; -import http from 'node:http'; - -/** - * A simple asynchronous wrapper for execFile that returns the exit status, - * stdout, and stderr. Unlike spawnSync, this does not block the Node.js - * event loop, allowing the local HTTP test server to function. - */ -async function runCommand(command: SandboxedCommand) { - try { - const { stdout, stderr } = await promisify(execFile)( - command.program, - command.args, - { - cwd: command.cwd, - env: command.env, - encoding: 'utf-8', - }, - ); - return { status: 0, stdout, stderr }; - } catch (error: unknown) { - const err = error as { - code?: number; - stdout?: string; - stderr?: string; - }; - return { - status: err.code ?? 1, - stdout: err.stdout ?? '', - stderr: err.stderr ?? '', - }; - } -} - -describe.skipIf(os.platform() !== 'darwin')( - 'MacOsSandboxManager Integration', - () => { - describe('Basic Execution', () => { - it('should execute commands within the workspace', async () => { - const manager = new MacOsSandboxManager({ workspace: process.cwd() }); - const command = await manager.prepareCommand({ - command: 'echo', - args: ['sandbox test'], - cwd: process.cwd(), - env: process.env, - }); - - const execResult = await runCommand(command); - - expect(execResult.status).toBe(0); - expect(execResult.stdout.trim()).toBe('sandbox test'); - }); - - it('should support interactive pseudo-terminals (node-pty)', async () => { - const manager = new MacOsSandboxManager({ workspace: process.cwd() }); - const abortController = new AbortController(); - - // Verify that node-pty file descriptors are successfully allocated inside the sandbox - // by using the bash [ -t 1 ] idiom to check if stdout is a TTY. - const handle = await ShellExecutionService.execute( - 'bash -c "if [ -t 1 ]; then echo True; else echo False; fi"', - process.cwd(), - () => {}, - abortController.signal, - true, - { - sanitizationConfig: getSecureSanitizationConfig(), - sandboxManager: manager, - }, - ); - - const result = await handle.result; - expect(result.error).toBeNull(); - expect(result.exitCode).toBe(0); - expect(result.output).toContain('True'); - }); - }); - - describe('File System Access', () => { - it('should block file system access outside the workspace', async () => { - const manager = new MacOsSandboxManager({ workspace: process.cwd() }); - const blockedPath = '/Users/Shared/.gemini_test_sandbox_blocked'; - - const command = await manager.prepareCommand({ - command: 'touch', - args: [blockedPath], - cwd: process.cwd(), - env: process.env, - }); - const execResult = await runCommand(command); - - expect(execResult.status).not.toBe(0); - expect(execResult.stderr).toContain('Operation not permitted'); - }); - - it('should grant file system access to explicitly allowed paths', async () => { - // Create a unique temporary directory to prevent artifacts and test flakiness - const allowedDir = fs.mkdtempSync( - path.join(os.tmpdir(), 'gemini-sandbox-test-'), - ); - - try { - const manager = new MacOsSandboxManager({ - workspace: process.cwd(), - }); - const testFile = path.join(allowedDir, 'test.txt'); - - const command = await manager.prepareCommand({ - command: 'touch', - args: [testFile], - cwd: process.cwd(), - env: process.env, - policy: { - allowedPaths: [allowedDir], - }, - }); - - const execResult = await runCommand(command); - - expect(execResult.status).toBe(0); - } finally { - fs.rmSync(allowedDir, { recursive: true, force: true }); - } - }); - }); - - describe('Network Access', () => { - let testServer: http.Server; - let testServerUrl: string; - - beforeAll(async () => { - testServer = http.createServer((_, res) => { - // Ensure connections are closed immediately to prevent hanging - res.setHeader('Connection', 'close'); - res.writeHead(200); - res.end('ok'); - }); - - await new Promise((resolve, reject) => { - testServer.on('error', reject); - testServer.listen(0, '127.0.0.1', () => { - const address = testServer.address() as import('net').AddressInfo; - testServerUrl = `http://127.0.0.1:${address.port}`; - resolve(); - }); - }); - }); - - afterAll(async () => { - if (testServer) { - await new Promise((resolve) => { - testServer.close(() => resolve()); - }); - } - }); - - it('should block network access by default', async () => { - const manager = new MacOsSandboxManager({ workspace: process.cwd() }); - const command = await manager.prepareCommand({ - command: 'curl', - args: ['-s', '--connect-timeout', '1', testServerUrl], - cwd: process.cwd(), - env: process.env, - }); - - const execResult = await runCommand(command); - - expect(execResult.status).not.toBe(0); - }); - - it('should grant network access when explicitly allowed', async () => { - const manager = new MacOsSandboxManager({ - workspace: process.cwd(), - }); - const command = await manager.prepareCommand({ - command: 'curl', - args: ['-s', '--connect-timeout', '1', testServerUrl], - cwd: process.cwd(), - env: process.env, - policy: { - networkAccess: true, - }, - }); - - const execResult = await runCommand(command); - - expect(execResult.status).toBe(0); - expect(execResult.stdout.trim()).toBe('ok'); - }); - }); - }, -); diff --git a/packages/core/src/sandbox/macos/MacOsSandboxManager.test.ts b/packages/core/src/sandbox/macos/MacOsSandboxManager.test.ts index 97d475e303..7d9bd57cae 100644 --- a/packages/core/src/sandbox/macos/MacOsSandboxManager.test.ts +++ b/packages/core/src/sandbox/macos/MacOsSandboxManager.test.ts @@ -6,6 +6,7 @@ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import { MacOsSandboxManager } from './MacOsSandboxManager.js'; import type { ExecutionPolicy } from '../../services/sandboxManager.js'; +import * as seatbeltArgsBuilder from './seatbeltArgsBuilder.js'; import fs from 'node:fs'; import os from 'node:os'; import path from 'node:path'; @@ -35,8 +36,14 @@ describe('MacOsSandboxManager', () => { }; manager = new MacOsSandboxManager({ workspace: mockWorkspace }); - // Mock realpathSync to just return the path for testing - vi.spyOn(fs, 'realpathSync').mockImplementation((p) => p as string); + + // Mock the seatbelt args builder to isolate manager tests + vi.spyOn(seatbeltArgsBuilder, 'buildSeatbeltArgs').mockResolvedValue([ + '-p', + '(mock profile)', + '-D', + 'MOCK_VAR=value', + ]); }); afterEach(() => { @@ -48,78 +55,7 @@ describe('MacOsSandboxManager', () => { }); describe('prepareCommand', () => { - it('should build a strict allowlist profile allowing the workspace via param', async () => { - const result = await manager.prepareCommand({ - command: 'echo', - args: ['hello'], - cwd: mockWorkspace, - env: {}, - policy: { networkAccess: false }, - }); - - expect(result.program).toBe('/usr/bin/sandbox-exec'); - const profile = result.args[1]; - expect(profile).toContain('(version 1)'); - expect(profile).toContain('(deny default)'); - expect(profile).toContain('(allow process-exec)'); - expect(profile).toContain('(subpath (param "WORKSPACE"))'); - expect(profile).not.toContain('(allow network-outbound)'); - - expect(result.args).toContain('-D'); - expect(result.args).toContain(`WORKSPACE=${mockWorkspace}`); - expect(result.args).toContain(`TMPDIR=${os.tmpdir()}`); - - // Governance files should be protected - expect(profile).toContain( - '(deny file-write* (literal (param "GOVERNANCE_FILE_0")))', - ); // .gitignore - expect(profile).toContain( - '(deny file-write* (literal (param "GOVERNANCE_FILE_1")))', - ); // .geminiignore - expect(profile).toContain( - '(deny file-write* (subpath (param "GOVERNANCE_FILE_2")))', - ); // .git - }); - - it('should allow network when networkAccess is true in policy', async () => { - const result = await manager.prepareCommand({ - command: 'curl', - args: ['example.com'], - cwd: mockWorkspace, - env: {}, - policy: { networkAccess: true }, - }); - - const profile = result.args[1]; - expect(profile).toContain('(allow network-outbound)'); - }); - - it('should parameterize allowed paths and normalize them', async () => { - vi.spyOn(fs, 'realpathSync').mockImplementation((p) => { - if (p === '/test/symlink') return '/test/real_path'; - return p as string; - }); - - const result = await manager.prepareCommand({ - command: 'ls', - args: ['/custom/path1'], - cwd: mockWorkspace, - env: {}, - policy: { - allowedPaths: ['/custom/path1', '/test/symlink'], - }, - }); - - const profile = result.args[1]; - expect(profile).toContain('(subpath (param "ALLOWED_PATH_0"))'); - expect(profile).toContain('(subpath (param "ALLOWED_PATH_1"))'); - - expect(result.args).toContain('-D'); - expect(result.args).toContain('ALLOWED_PATH_0=/custom/path1'); - expect(result.args).toContain('ALLOWED_PATH_1=/test/real_path'); - }); - - it('should format the executable and arguments correctly for sandbox-exec', async () => { + it('should correctly format the base command and args', async () => { const result = await manager.prepareCommand({ command: 'echo', args: ['hello'], @@ -128,8 +64,31 @@ describe('MacOsSandboxManager', () => { policy: mockPolicy, }); + expect(seatbeltArgsBuilder.buildSeatbeltArgs).toHaveBeenCalledWith({ + workspace: mockWorkspace, + allowedPaths: mockAllowedPaths, + networkAccess: mockNetworkAccess, + forbiddenPaths: undefined, + workspaceWrite: false, + additionalPermissions: { + fileSystem: { + read: [], + write: [], + }, + network: true, + }, + }); + expect(result.program).toBe('/usr/bin/sandbox-exec'); - expect(result.args.slice(-3)).toEqual(['--', 'echo', 'hello']); + expect(result.args).toEqual([ + '-p', + '(mock profile)', + '-D', + 'MOCK_VAR=value', + '--', + 'echo', + 'hello', + ]); }); it('should correctly pass through the cwd to the resulting command', async () => { @@ -160,62 +119,118 @@ describe('MacOsSandboxManager', () => { expect(result.env['GITHUB_TOKEN']).toBeUndefined(); }); - it('should resolve parent directories if a file does not exist', async () => { - const baseTmpDir = fs.mkdtempSync( - path.join(os.tmpdir(), 'gemini-cli-macos-realpath-test-'), - ); - const realPath = path.join(baseTmpDir, 'real_path'); - const nonexistentFile = path.join(realPath, 'nonexistent.txt'); - - vi.spyOn(fs, 'realpathSync').mockImplementation((p) => { - if (p === nonexistentFile) { - const error = new Error('ENOENT'); - Object.assign(error, { code: 'ENOENT' }); - throw error; - } - if (p === realPath) { - return path.join(baseTmpDir, 'resolved_path'); - } - return p as string; + it('should allow network when networkAccess is true', async () => { + await manager.prepareCommand({ + command: 'echo', + args: ['hello'], + cwd: mockWorkspace, + env: {}, + policy: { ...mockPolicy, networkAccess: true }, }); - try { - const dynamicManager = new MacOsSandboxManager({ - workspace: nonexistentFile, - }); - const dynamicResult = await dynamicManager.prepareCommand({ - command: 'echo', - args: ['hello'], - cwd: nonexistentFile, - env: {}, - }); - - expect(dynamicResult.args).toContain( - `WORKSPACE=${path.join(baseTmpDir, 'resolved_path', 'nonexistent.txt')}`, - ); - } finally { - fs.rmSync(baseTmpDir, { recursive: true, force: true }); - } + expect(seatbeltArgsBuilder.buildSeatbeltArgs).toHaveBeenCalledWith( + expect.objectContaining({ networkAccess: true }), + ); }); - it('should throw if realpathSync throws a non-ENOENT error', async () => { - vi.spyOn(fs, 'realpathSync').mockImplementation(() => { - const error = new Error('Permission denied'); - Object.assign(error, { code: 'EACCES' }); - throw error; - }); - - const errorManager = new MacOsSandboxManager({ - workspace: mockWorkspace, - }); - await expect( - errorManager.prepareCommand({ + describe('governance files', () => { + it('should ensure governance files exist', async () => { + await manager.prepareCommand({ command: 'echo', - args: ['hello'], + args: [], cwd: mockWorkspace, env: {}, - }), - ).rejects.toThrow('Permission denied'); + policy: mockPolicy, + }); + + // The seatbelt builder internally handles governance files, so we simply verify + // it is invoked correctly with the right workspace. + expect(seatbeltArgsBuilder.buildSeatbeltArgs).toHaveBeenCalledWith( + expect.objectContaining({ workspace: mockWorkspace }), + ); + }); + }); + + describe('allowedPaths', () => { + it('should parameterize allowed paths and normalize them', async () => { + await manager.prepareCommand({ + command: 'echo', + args: [], + cwd: mockWorkspace, + env: {}, + policy: { + ...mockPolicy, + allowedPaths: ['/tmp/allowed1', '/tmp/allowed2'], + }, + }); + + expect(seatbeltArgsBuilder.buildSeatbeltArgs).toHaveBeenCalledWith( + expect.objectContaining({ + allowedPaths: ['/tmp/allowed1', '/tmp/allowed2'], + }), + ); + }); + }); + + describe('forbiddenPaths', () => { + it('should parameterize forbidden paths and explicitly deny them', async () => { + await manager.prepareCommand({ + command: 'echo', + args: [], + cwd: mockWorkspace, + env: {}, + policy: { + ...mockPolicy, + forbiddenPaths: ['/tmp/forbidden1'], + }, + }); + + expect(seatbeltArgsBuilder.buildSeatbeltArgs).toHaveBeenCalledWith( + expect.objectContaining({ + forbiddenPaths: ['/tmp/forbidden1'], + }), + ); + }); + + it('explicitly denies non-existent forbidden paths to prevent creation', async () => { + await manager.prepareCommand({ + command: 'echo', + args: [], + cwd: mockWorkspace, + env: {}, + policy: { + ...mockPolicy, + forbiddenPaths: ['/tmp/does-not-exist'], + }, + }); + + expect(seatbeltArgsBuilder.buildSeatbeltArgs).toHaveBeenCalledWith( + expect.objectContaining({ + forbiddenPaths: ['/tmp/does-not-exist'], + }), + ); + }); + + it('should override allowed paths if a path is also in forbidden paths', async () => { + await manager.prepareCommand({ + command: 'echo', + args: [], + cwd: mockWorkspace, + env: {}, + policy: { + ...mockPolicy, + allowedPaths: ['/tmp/conflict'], + forbiddenPaths: ['/tmp/conflict'], + }, + }); + + expect(seatbeltArgsBuilder.buildSeatbeltArgs).toHaveBeenCalledWith( + expect.objectContaining({ + allowedPaths: ['/tmp/conflict'], + forbiddenPaths: ['/tmp/conflict'], + }), + ); + }); }); }); }); diff --git a/packages/core/src/sandbox/macos/MacOsSandboxManager.ts b/packages/core/src/sandbox/macos/MacOsSandboxManager.ts index 04271c991d..10828083a5 100644 --- a/packages/core/src/sandbox/macos/MacOsSandboxManager.ts +++ b/packages/core/src/sandbox/macos/MacOsSandboxManager.ts @@ -154,7 +154,7 @@ export class MacOsSandboxManager implements SandboxManager { false, }; - const sandboxArgs = buildSeatbeltArgs({ + const sandboxArgs = await buildSeatbeltArgs({ workspace: this.options.workspace, allowedPaths: [...(req.policy?.allowedPaths || [])], forbiddenPaths: req.policy?.forbiddenPaths, diff --git a/packages/core/src/sandbox/macos/seatbeltArgsBuilder.test.ts b/packages/core/src/sandbox/macos/seatbeltArgsBuilder.test.ts index 8bc3ac87b4..dd2c95235e 100644 --- a/packages/core/src/sandbox/macos/seatbeltArgsBuilder.test.ts +++ b/packages/core/src/sandbox/macos/seatbeltArgsBuilder.test.ts @@ -3,158 +3,235 @@ * Copyright 2026 Google LLC * SPDX-License-Identifier: Apache-2.0 */ -import { describe, it, expect, vi } from 'vitest'; +import { describe, it, expect, vi, beforeEach } from 'vitest'; import { buildSeatbeltArgs } from './seatbeltArgsBuilder.js'; +import * as sandboxManager from '../../services/sandboxManager.js'; import fs from 'node:fs'; import os from 'node:os'; describe('seatbeltArgsBuilder', () => { - it('should build a strict allowlist profile allowing the workspace via param', () => { - // Mock realpathSync to just return the path for testing - vi.spyOn(fs, 'realpathSync').mockImplementation((p) => p as string); - - const args = buildSeatbeltArgs({ workspace: '/Users/test/workspace' }); - - expect(args[0]).toBe('-p'); - const profile = args[1]; - expect(profile).toContain('(version 1)'); - expect(profile).toContain('(deny default)'); - expect(profile).toContain('(allow process-exec)'); - expect(profile).toContain('(subpath (param "WORKSPACE"))'); - expect(profile).not.toContain('(allow network*)'); - - expect(args).toContain('-D'); - expect(args).toContain('WORKSPACE=/Users/test/workspace'); - expect(args).toContain(`TMPDIR=${os.tmpdir()}`); - + beforeEach(() => { vi.restoreAllMocks(); }); - it('should allow network when networkAccess is true', () => { - const args = buildSeatbeltArgs({ workspace: '/test', networkAccess: true }); - const profile = args[1]; - expect(profile).toContain('(allow network-outbound)'); - }); - - it('should parameterize allowed paths and normalize them', () => { - vi.spyOn(fs, 'realpathSync').mockImplementation((p) => { - if (p === '/test/symlink') return '/test/real_path'; - return p as string; - }); - - const args = buildSeatbeltArgs({ - workspace: '/test', - allowedPaths: ['/custom/path1', '/test/symlink'], - }); - - const profile = args[1]; - expect(profile).toContain('(subpath (param "ALLOWED_PATH_0"))'); - expect(profile).toContain('(subpath (param "ALLOWED_PATH_1"))'); - - expect(args).toContain('-D'); - expect(args).toContain('ALLOWED_PATH_0=/custom/path1'); - expect(args).toContain('ALLOWED_PATH_1=/test/real_path'); - - vi.restoreAllMocks(); - }); - - it('should resolve parent directories if a file does not exist', () => { - vi.spyOn(fs, 'realpathSync').mockImplementation((p) => { - if (p === '/test/symlink/nonexistent.txt') { - const error = new Error('ENOENT'); - Object.assign(error, { code: 'ENOENT' }); - throw error; - } - if (p === '/test/symlink') { - return '/test/real_path'; - } - return p as string; - }); - - const args = buildSeatbeltArgs({ - workspace: '/test/symlink/nonexistent.txt', - }); - - expect(args).toContain('WORKSPACE=/test/real_path/nonexistent.txt'); - vi.restoreAllMocks(); - }); - - it('should throw if realpathSync throws a non-ENOENT error', () => { - vi.spyOn(fs, 'realpathSync').mockImplementation(() => { - const error = new Error('Permission denied'); - Object.assign(error, { code: 'EACCES' }); - throw error; - }); - - expect(() => - buildSeatbeltArgs({ - workspace: '/test/workspace', - }), - ).toThrow('Permission denied'); - - vi.restoreAllMocks(); - }); - - describe('governance files', () => { - it('should inject explicit deny rules for governance files', () => { - vi.spyOn(fs, 'realpathSync').mockImplementation((p) => p.toString()); - vi.spyOn(fs, 'existsSync').mockReturnValue(true); - vi.spyOn(fs, 'lstatSync').mockImplementation( - (p) => - ({ - isDirectory: () => p.toString().endsWith('.git'), - isFile: () => !p.toString().endsWith('.git'), - }) as unknown as fs.Stats, + describe('buildSeatbeltArgs', () => { + it('should build a strict allowlist profile allowing the workspace via param', async () => { + // Mock tryRealpath to just return the path for testing + vi.spyOn(sandboxManager, 'tryRealpath').mockImplementation( + async (p) => p, ); - const args = buildSeatbeltArgs({ workspace: '/Users/test/workspace' }); - const profile = args[1]; - - // .gitignore should be a literal deny - expect(args).toContain('-D'); - expect(args).toContain( - 'GOVERNANCE_FILE_0=/Users/test/workspace/.gitignore', - ); - expect(profile).toContain( - '(deny file-write* (literal (param "GOVERNANCE_FILE_0")))', - ); - - // .git should be a subpath deny - expect(args).toContain('GOVERNANCE_FILE_2=/Users/test/workspace/.git'); - expect(profile).toContain( - '(deny file-write* (subpath (param "GOVERNANCE_FILE_2")))', - ); - - vi.restoreAllMocks(); - }); - - it('should protect both the symlink and the real path if they differ', () => { - vi.spyOn(fs, 'realpathSync').mockImplementation((p) => { - if (p === '/test/workspace/.gitignore') return '/test/real/.gitignore'; - return p.toString(); + const args = await buildSeatbeltArgs({ + workspace: '/Users/test/workspace', }); - vi.spyOn(fs, 'existsSync').mockReturnValue(true); - vi.spyOn(fs, 'lstatSync').mockImplementation( - () => - ({ - isDirectory: () => false, - isFile: () => true, - }) as unknown as fs.Stats, - ); - const args = buildSeatbeltArgs({ workspace: '/test/workspace' }); + expect(args[0]).toBe('-p'); const profile = args[1]; + expect(profile).toContain('(version 1)'); + expect(profile).toContain('(deny default)'); + expect(profile).toContain('(allow process-exec)'); + expect(profile).toContain('(subpath (param "WORKSPACE"))'); + expect(profile).not.toContain('(allow network*)'); - expect(args).toContain('GOVERNANCE_FILE_0=/test/workspace/.gitignore'); - expect(args).toContain('REAL_GOVERNANCE_FILE_0=/test/real/.gitignore'); - expect(profile).toContain( - '(deny file-write* (literal (param "GOVERNANCE_FILE_0")))', - ); - expect(profile).toContain( - '(deny file-write* (literal (param "REAL_GOVERNANCE_FILE_0")))', - ); + expect(args).toContain('-D'); + expect(args).toContain('WORKSPACE=/Users/test/workspace'); + expect(args).toContain(`TMPDIR=${os.tmpdir()}`); + }); - vi.restoreAllMocks(); + it('should allow network when networkAccess is true', async () => { + vi.spyOn(sandboxManager, 'tryRealpath').mockImplementation( + async (p) => p, + ); + const args = await buildSeatbeltArgs({ + workspace: '/test', + networkAccess: true, + }); + const profile = args[1]; + expect(profile).toContain('(allow network-outbound)'); + }); + + describe('governance files', () => { + it('should inject explicit deny rules for governance files', async () => { + vi.spyOn(sandboxManager, 'tryRealpath').mockImplementation(async (p) => + p.toString(), + ); + vi.spyOn(fs, 'existsSync').mockReturnValue(true); + vi.spyOn(fs, 'lstatSync').mockImplementation( + (p) => + ({ + isDirectory: () => p.toString().endsWith('.git'), + isFile: () => !p.toString().endsWith('.git'), + }) as unknown as fs.Stats, + ); + + const args = await buildSeatbeltArgs({ + workspace: '/Users/test/workspace', + }); + const profile = args[1]; + + // .gitignore should be a literal deny + expect(args).toContain('-D'); + expect(args).toContain( + 'GOVERNANCE_FILE_0=/Users/test/workspace/.gitignore', + ); + expect(profile).toContain( + '(deny file-write* (literal (param "GOVERNANCE_FILE_0")))', + ); + + // .git should be a subpath deny + expect(args).toContain('GOVERNANCE_FILE_2=/Users/test/workspace/.git'); + expect(profile).toContain( + '(deny file-write* (subpath (param "GOVERNANCE_FILE_2")))', + ); + }); + + it('should protect both the symlink and the real path if they differ', async () => { + vi.spyOn(sandboxManager, 'tryRealpath').mockImplementation( + async (p) => { + if (p === '/test/workspace/.gitignore') + return '/test/real/.gitignore'; + return p.toString(); + }, + ); + vi.spyOn(fs, 'existsSync').mockReturnValue(true); + vi.spyOn(fs, 'lstatSync').mockImplementation( + () => + ({ + isDirectory: () => false, + isFile: () => true, + }) as unknown as fs.Stats, + ); + + const args = await buildSeatbeltArgs({ workspace: '/test/workspace' }); + const profile = args[1]; + + expect(args).toContain('GOVERNANCE_FILE_0=/test/workspace/.gitignore'); + expect(args).toContain('REAL_GOVERNANCE_FILE_0=/test/real/.gitignore'); + expect(profile).toContain( + '(deny file-write* (literal (param "GOVERNANCE_FILE_0")))', + ); + expect(profile).toContain( + '(deny file-write* (literal (param "REAL_GOVERNANCE_FILE_0")))', + ); + }); + }); + + describe('allowedPaths', () => { + it('should parameterize allowed paths and normalize them', async () => { + vi.spyOn(sandboxManager, 'tryRealpath').mockImplementation( + async (p) => { + if (p === '/test/symlink') return '/test/real_path'; + return p; + }, + ); + + const args = await buildSeatbeltArgs({ + workspace: '/test', + allowedPaths: ['/custom/path1', '/test/symlink'], + }); + + const profile = args[1]; + expect(profile).toContain('(subpath (param "ALLOWED_PATH_0"))'); + expect(profile).toContain('(subpath (param "ALLOWED_PATH_1"))'); + + expect(args).toContain('-D'); + expect(args).toContain('ALLOWED_PATH_0=/custom/path1'); + expect(args).toContain('ALLOWED_PATH_1=/test/real_path'); + }); + }); + + describe('forbiddenPaths', () => { + it('should parameterize forbidden paths and explicitly deny them', async () => { + vi.spyOn(sandboxManager, 'tryRealpath').mockImplementation( + async (p) => p, + ); + + const args = await buildSeatbeltArgs({ + workspace: '/test', + forbiddenPaths: ['/secret/path'], + }); + + const profile = args[1]; + + expect(args).toContain('-D'); + expect(args).toContain('FORBIDDEN_PATH_0=/secret/path'); + + expect(profile).toContain( + '(deny file-read* file-write* (subpath (param "FORBIDDEN_PATH_0")))', + ); + }); + + it('resolves forbidden symlink paths to their real paths', async () => { + vi.spyOn(sandboxManager, 'tryRealpath').mockImplementation( + async (p) => { + if (p === '/test/symlink') return '/test/real_path'; + return p; + }, + ); + + const args = await buildSeatbeltArgs({ + workspace: '/test', + forbiddenPaths: ['/test/symlink'], + }); + + const profile = args[1]; + + // The builder should resolve the symlink and explicitly deny the real target path + expect(args).toContain('-D'); + expect(args).toContain('FORBIDDEN_PATH_0=/test/real_path'); + expect(profile).toContain( + '(deny file-read* file-write* (subpath (param "FORBIDDEN_PATH_0")))', + ); + }); + + it('explicitly denies non-existent forbidden paths to prevent creation', async () => { + vi.spyOn(sandboxManager, 'tryRealpath').mockImplementation( + async (p) => p, + ); + + const args = await buildSeatbeltArgs({ + workspace: '/test', + forbiddenPaths: ['/test/missing-dir/missing-file.txt'], + }); + + const profile = args[1]; + + expect(args).toContain('-D'); + expect(args).toContain( + 'FORBIDDEN_PATH_0=/test/missing-dir/missing-file.txt', + ); + expect(profile).toContain( + '(deny file-read* file-write* (subpath (param "FORBIDDEN_PATH_0")))', + ); + }); + + it('should override allowed paths if a path is also in forbidden paths', async () => { + vi.spyOn(sandboxManager, 'tryRealpath').mockImplementation( + async (p) => p, + ); + + const args = await buildSeatbeltArgs({ + workspace: '/test', + allowedPaths: ['/custom/path1'], + forbiddenPaths: ['/custom/path1'], + }); + + const profile = args[1]; + + const allowString = + '(allow file-read* file-write* (subpath (param "ALLOWED_PATH_0")))'; + const denyString = + '(deny file-read* file-write* (subpath (param "FORBIDDEN_PATH_0")))'; + + expect(profile).toContain(allowString); + expect(profile).toContain(denyString); + + // Verify ordering: The explicit deny must appear AFTER the explicit allow in the profile string + // Seatbelt rules are evaluated in order where the latest rule matching a path wins + const allowIndex = profile.indexOf(allowString); + const denyIndex = profile.indexOf(denyString); + expect(denyIndex).toBeGreaterThan(allowIndex); + }); }); }); }); diff --git a/packages/core/src/sandbox/macos/seatbeltArgsBuilder.ts b/packages/core/src/sandbox/macos/seatbeltArgsBuilder.ts index 3a4a9d3ab7..f72229b5cc 100644 --- a/packages/core/src/sandbox/macos/seatbeltArgsBuilder.ts +++ b/packages/core/src/sandbox/macos/seatbeltArgsBuilder.ts @@ -15,6 +15,7 @@ import { type SandboxPermissions, sanitizePaths, GOVERNANCE_FILES, + tryRealpath, } from '../../services/sandboxManager.js'; /** @@ -35,26 +36,6 @@ export interface SeatbeltArgsOptions { workspaceWrite?: boolean; } -/** - * Resolves symlinks for a given path to prevent sandbox escapes. - * If a file does not exist (ENOENT), it recursively resolves the parent directory. - * Other errors (e.g. EACCES) are re-thrown. - */ -function tryRealpath(p: string): string { - try { - return fs.realpathSync(p); - } catch (e) { - if (e instanceof Error && 'code' in e && e.code === 'ENOENT') { - const parentDir = path.dirname(p); - if (parentDir === p) { - return p; - } - return path.join(tryRealpath(parentDir), path.basename(p)); - } - throw e; - } -} - /** * Builds the arguments array for sandbox-exec using a strict allowlist profile. * It relies on parameters passed to sandbox-exec via the -D flag to avoid @@ -63,11 +44,13 @@ function tryRealpath(p: string): string { * Returns arguments up to the end of sandbox-exec configuration (e.g. ['-p', '', '-D', ...]) * Does not include the final '--' separator or the command to run. */ -export function buildSeatbeltArgs(options: SeatbeltArgsOptions): string[] { +export async function buildSeatbeltArgs( + options: SeatbeltArgsOptions, +): Promise { let profile = BASE_SEATBELT_PROFILE + '\n'; const args: string[] = []; - const workspacePath = tryRealpath(options.workspace); + const workspacePath = await tryRealpath(options.workspace); args.push('-D', `WORKSPACE=${workspacePath}`); args.push('-D', `WORKSPACE_RAW=${options.workspace}`); profile += `(allow file-read* (subpath (param "WORKSPACE_RAW")))\n`; @@ -84,7 +67,7 @@ export function buildSeatbeltArgs(options: SeatbeltArgsOptions): string[] { // (Seatbelt evaluates rules in order, later rules win for same path). for (let i = 0; i < GOVERNANCE_FILES.length; i++) { const governanceFile = path.join(workspacePath, GOVERNANCE_FILES[i].path); - const realGovernanceFile = tryRealpath(governanceFile); + const realGovernanceFile = await tryRealpath(governanceFile); // Determine if it should be treated as a directory (subpath) or a file (literal). // .git is generally a directory, while ignore files are literals. @@ -120,7 +103,7 @@ export function buildSeatbeltArgs(options: SeatbeltArgsOptions): string[] { if (!path.isAbsolute(worktreeGitDir)) { worktreeGitDir = path.resolve(workspacePath, worktreeGitDir); } - const resolvedWorktreeGitDir = tryRealpath(worktreeGitDir); + const resolvedWorktreeGitDir = await tryRealpath(worktreeGitDir); // Grant write access to the worktree's specific .git directory args.push('-D', `WORKTREE_GIT_DIR=${resolvedWorktreeGitDir}`); @@ -128,7 +111,7 @@ export function buildSeatbeltArgs(options: SeatbeltArgsOptions): string[] { // Grant write access to the main repository's .git directory (objects, refs, etc. are shared) // resolvedWorktreeGitDir is usually like: /path/to/main-repo/.git/worktrees/worktree-name - const mainGitDir = tryRealpath( + const mainGitDir = await tryRealpath( path.dirname(path.dirname(resolvedWorktreeGitDir)), ); if (mainGitDir && mainGitDir.endsWith('.git')) { @@ -141,10 +124,10 @@ export function buildSeatbeltArgs(options: SeatbeltArgsOptions): string[] { // Ignore if .git doesn't exist, isn't readable, etc. } - const tmpPath = tryRealpath(os.tmpdir()); + const tmpPath = await tryRealpath(os.tmpdir()); args.push('-D', `TMPDIR=${tmpPath}`); - const nodeRootPath = tryRealpath( + const nodeRootPath = await tryRealpath( path.dirname(path.dirname(process.execPath)), ); args.push('-D', `NODE_ROOT=${nodeRootPath}`); @@ -159,7 +142,7 @@ export function buildSeatbeltArgs(options: SeatbeltArgsOptions): string[] { for (const p of paths) { if (!p.trim()) continue; try { - let resolved = tryRealpath(p); + let resolved = await tryRealpath(p); // If this is a 'bin' directory (like /usr/local/bin or homebrew/bin), // also grant read access to its parent directory so that symlinked @@ -183,7 +166,7 @@ export function buildSeatbeltArgs(options: SeatbeltArgsOptions): string[] { // Handle allowedPaths const allowedPaths = sanitizePaths(options.allowedPaths) || []; for (let i = 0; i < allowedPaths.length; i++) { - const allowedPath = tryRealpath(allowedPaths[i]); + const allowedPath = await tryRealpath(allowedPaths[i]); args.push('-D', `ALLOWED_PATH_${i}=${allowedPath}`); profile += `(allow file-read* file-write* (subpath (param "ALLOWED_PATH_${i}")))\n`; } @@ -192,8 +175,8 @@ export function buildSeatbeltArgs(options: SeatbeltArgsOptions): string[] { if (options.additionalPermissions?.fileSystem) { const { read, write } = options.additionalPermissions.fileSystem; if (read) { - read.forEach((p, i) => { - const resolved = tryRealpath(p); + for (let i = 0; i < read.length; i++) { + const resolved = await tryRealpath(read[i]); const paramName = `ADDITIONAL_READ_${i}`; args.push('-D', `${paramName}=${resolved}`); let isFile = false; @@ -207,11 +190,11 @@ export function buildSeatbeltArgs(options: SeatbeltArgsOptions): string[] { } else { profile += `(allow file-read* (subpath (param "${paramName}")))\n`; } - }); + } } if (write) { - write.forEach((p, i) => { - const resolved = tryRealpath(p); + for (let i = 0; i < write.length; i++) { + const resolved = await tryRealpath(write[i]); const paramName = `ADDITIONAL_WRITE_${i}`; args.push('-D', `${paramName}=${resolved}`); let isFile = false; @@ -225,14 +208,14 @@ export function buildSeatbeltArgs(options: SeatbeltArgsOptions): string[] { } else { profile += `(allow file-read* file-write* (subpath (param "${paramName}")))\n`; } - }); + } } } // Handle forbiddenPaths const forbiddenPaths = sanitizePaths(options.forbiddenPaths) || []; for (let i = 0; i < forbiddenPaths.length; i++) { - const forbiddenPath = tryRealpath(forbiddenPaths[i]); + const forbiddenPath = await tryRealpath(forbiddenPaths[i]); args.push('-D', `FORBIDDEN_PATH_${i}=${forbiddenPath}`); profile += `(deny file-read* file-write* (subpath (param "FORBIDDEN_PATH_${i}")))\n`; } diff --git a/packages/core/src/sandbox/windows/WindowsSandboxManager.test.ts b/packages/core/src/sandbox/windows/WindowsSandboxManager.test.ts index de526e2eaf..0abd3dd56b 100644 --- a/packages/core/src/sandbox/windows/WindowsSandboxManager.test.ts +++ b/packages/core/src/sandbox/windows/WindowsSandboxManager.test.ts @@ -9,6 +9,7 @@ import fs from 'node:fs'; import os from 'node:os'; import path from 'node:path'; import { WindowsSandboxManager } from './WindowsSandboxManager.js'; +import * as sandboxManager from '../../services/sandboxManager.js'; import type { SandboxRequest } from '../../services/sandboxManager.js'; import { spawnAsync } from '../../utils/shell-utils.js'; @@ -22,6 +23,9 @@ describe('WindowsSandboxManager', () => { beforeEach(() => { vi.spyOn(os, 'platform').mockReturnValue('win32'); + vi.spyOn(sandboxManager, 'tryRealpath').mockImplementation(async (p) => + p.toString(), + ); testCwd = fs.mkdtempSync(path.join(os.tmpdir(), 'gemini-cli-test-')); manager = new WindowsSandboxManager({ workspace: testCwd }); }); @@ -31,108 +35,240 @@ describe('WindowsSandboxManager', () => { fs.rmSync(testCwd, { recursive: true, force: true }); }); - it('should prepare a GeminiSandbox.exe command', async () => { - const req: SandboxRequest = { - command: 'whoami', - args: ['/groups'], - cwd: testCwd, - env: { TEST_VAR: 'test_value' }, - policy: { - networkAccess: false, - }, - }; - - const result = await manager.prepareCommand(req); - - expect(result.program).toContain('GeminiSandbox.exe'); - expect(result.args).toEqual(['0', testCwd, 'whoami', '/groups']); - }); - - it('should handle networkAccess from config', async () => { - const req: SandboxRequest = { - command: 'whoami', - args: [], - cwd: testCwd, - env: {}, - policy: { - networkAccess: true, - }, - }; - - const result = await manager.prepareCommand(req); - expect(result.args[0]).toBe('1'); - }); - - it('should sanitize environment variables', async () => { - const req: SandboxRequest = { - command: 'test', - args: [], - cwd: testCwd, - env: { - API_KEY: 'secret', - PATH: '/usr/bin', - }, - policy: { - sanitizationConfig: { - allowedEnvironmentVariables: ['PATH'], - blockedEnvironmentVariables: ['API_KEY'], - enableEnvironmentVariableRedaction: true, + describe('prepareCommand', () => { + it('should correctly format the base command and args', async () => { + const req: SandboxRequest = { + command: 'whoami', + args: ['/groups'], + cwd: testCwd, + env: { TEST_VAR: 'test_value' }, + policy: { + networkAccess: false, }, - }, - }; + }; - const result = await manager.prepareCommand(req); - expect(result.env['PATH']).toBe('/usr/bin'); - expect(result.env['API_KEY']).toBeUndefined(); - }); + const result = await manager.prepareCommand(req); - it('should ensure governance files exist', async () => { - const req: SandboxRequest = { - command: 'test', - args: [], - cwd: testCwd, - env: {}, - }; + expect(result.program).toContain('GeminiSandbox.exe'); + expect(result.args).toEqual(['0', testCwd, 'whoami', '/groups']); + }); - await manager.prepareCommand(req); + it('should correctly pass through the cwd to the resulting command', async () => { + const req: SandboxRequest = { + command: 'whoami', + args: [], + cwd: '/different/cwd', + env: {}, + }; - expect(fs.existsSync(path.join(testCwd, '.gitignore'))).toBe(true); - expect(fs.existsSync(path.join(testCwd, '.geminiignore'))).toBe(true); - expect(fs.existsSync(path.join(testCwd, '.git'))).toBe(true); - expect(fs.lstatSync(path.join(testCwd, '.git')).isDirectory()).toBe(true); - }); + const result = await manager.prepareCommand(req); - it('should grant Low Integrity access to the workspace and allowed paths', async () => { - const allowedPath = path.join(os.tmpdir(), 'gemini-cli-test-allowed'); - if (!fs.existsSync(allowedPath)) { - fs.mkdirSync(allowedPath); - } - try { + expect(result.cwd).toBe('/different/cwd'); + }); + + it('should apply environment sanitization via the default mechanisms', async () => { const req: SandboxRequest = { command: 'test', args: [], cwd: testCwd, - env: {}, + env: { + API_KEY: 'secret', + PATH: '/usr/bin', + }, policy: { - allowedPaths: [allowedPath], + sanitizationConfig: { + allowedEnvironmentVariables: ['PATH'], + blockedEnvironmentVariables: ['API_KEY'], + enableEnvironmentVariableRedaction: true, + }, }, }; - await manager.prepareCommand(req); + const result = await manager.prepareCommand(req); + expect(result.env['PATH']).toBe('/usr/bin'); + expect(result.env['API_KEY']).toBeUndefined(); + }); - expect(spawnAsync).toHaveBeenCalledWith('icacls', [ - path.resolve(testCwd), - '/setintegritylevel', - 'Low', - ]); + it('should allow network when networkAccess is true', async () => { + const req: SandboxRequest = { + command: 'whoami', + args: [], + cwd: testCwd, + env: {}, + policy: { + networkAccess: true, + }, + }; - expect(spawnAsync).toHaveBeenCalledWith('icacls', [ - path.resolve(allowedPath), - '/setintegritylevel', - 'Low', - ]); - } finally { - fs.rmSync(allowedPath, { recursive: true, force: true }); - } + const result = await manager.prepareCommand(req); + expect(result.args[0]).toBe('1'); + }); + + describe('governance files', () => { + it('should ensure governance files exist', async () => { + const req: SandboxRequest = { + command: 'test', + args: [], + cwd: testCwd, + env: {}, + }; + + await manager.prepareCommand(req); + + expect(fs.existsSync(path.join(testCwd, '.gitignore'))).toBe(true); + expect(fs.existsSync(path.join(testCwd, '.geminiignore'))).toBe(true); + expect(fs.existsSync(path.join(testCwd, '.git'))).toBe(true); + expect(fs.lstatSync(path.join(testCwd, '.git')).isDirectory()).toBe( + true, + ); + }); + }); + + describe('allowedPaths', () => { + it('should parameterize allowed paths and normalize them', async () => { + const allowedPath = path.join(os.tmpdir(), 'gemini-cli-test-allowed'); + if (!fs.existsSync(allowedPath)) { + fs.mkdirSync(allowedPath); + } + try { + const req: SandboxRequest = { + command: 'test', + args: [], + cwd: testCwd, + env: {}, + policy: { + allowedPaths: [allowedPath], + }, + }; + + await manager.prepareCommand(req); + + expect(spawnAsync).toHaveBeenCalledWith('icacls', [ + path.resolve(testCwd), + '/setintegritylevel', + 'Low', + ]); + + expect(spawnAsync).toHaveBeenCalledWith('icacls', [ + path.resolve(allowedPath), + '/setintegritylevel', + 'Low', + ]); + } finally { + fs.rmSync(allowedPath, { recursive: true, force: true }); + } + }); + }); + + describe('forbiddenPaths', () => { + it('should parameterize forbidden paths and explicitly deny them', async () => { + const forbiddenPath = path.join( + os.tmpdir(), + 'gemini-cli-test-forbidden', + ); + if (!fs.existsSync(forbiddenPath)) { + fs.mkdirSync(forbiddenPath); + } + try { + const req: SandboxRequest = { + command: 'test', + args: [], + cwd: testCwd, + env: {}, + policy: { + forbiddenPaths: [forbiddenPath], + }, + }; + + await manager.prepareCommand(req); + + expect(spawnAsync).toHaveBeenCalledWith('icacls', [ + path.resolve(forbiddenPath), + '/deny', + '*S-1-16-4096:(OI)(CI)(F)', + ]); + } finally { + fs.rmSync(forbiddenPath, { recursive: true, force: true }); + } + }); + + it('explicitly denies non-existent forbidden paths to prevent creation', async () => { + const missingPath = path.join( + os.tmpdir(), + 'gemini-cli-test-missing', + 'does-not-exist.txt', + ); + + // Ensure it definitely doesn't exist + if (fs.existsSync(missingPath)) { + fs.rmSync(missingPath, { recursive: true, force: true }); + } + + const req: SandboxRequest = { + command: 'test', + args: [], + cwd: testCwd, + env: {}, + policy: { + forbiddenPaths: [missingPath], + }, + }; + + await manager.prepareCommand(req); + + // Should NOT have called icacls to deny the missing path + expect(spawnAsync).not.toHaveBeenCalledWith('icacls', [ + path.resolve(missingPath), + '/deny', + '*S-1-16-4096:(OI)(CI)(F)', + ]); + }); + + it('should override allowed paths if a path is also in forbidden paths', async () => { + const conflictPath = path.join(os.tmpdir(), 'gemini-cli-test-conflict'); + if (!fs.existsSync(conflictPath)) { + fs.mkdirSync(conflictPath); + } + try { + const req: SandboxRequest = { + command: 'test', + args: [], + cwd: testCwd, + env: {}, + policy: { + allowedPaths: [conflictPath], + forbiddenPaths: [conflictPath], + }, + }; + + await manager.prepareCommand(req); + + const spawnMock = vi.mocked(spawnAsync); + const allowCallIndex = spawnMock.mock.calls.findIndex( + (call) => + call[1] && + call[1].includes('/setintegritylevel') && + call[0] === 'icacls' && + call[1][0] === path.resolve(conflictPath), + ); + const denyCallIndex = spawnMock.mock.calls.findIndex( + (call) => + call[1] && + call[1].includes('/deny') && + call[0] === 'icacls' && + call[1][0] === path.resolve(conflictPath), + ); + + // Both should have been called + expect(allowCallIndex).toBeGreaterThan(-1); + expect(denyCallIndex).toBeGreaterThan(-1); + + // Verify order: explicitly denying must happen after the explicit allow + expect(allowCallIndex).toBeLessThan(denyCallIndex); + } finally { + fs.rmSync(conflictPath, { recursive: true, force: true }); + } + }); + }); }); }); diff --git a/packages/core/src/sandbox/windows/WindowsSandboxManager.ts b/packages/core/src/sandbox/windows/WindowsSandboxManager.ts index b4391c8595..0a1bc2a95f 100644 --- a/packages/core/src/sandbox/windows/WindowsSandboxManager.ts +++ b/packages/core/src/sandbox/windows/WindowsSandboxManager.ts @@ -15,6 +15,7 @@ import { GOVERNANCE_FILES, type GlobalSandboxOptions, sanitizePaths, + tryRealpath, } from '../../services/sandboxManager.js'; import { sanitizeEnvironment, @@ -22,6 +23,7 @@ import { } from '../../services/environmentSanitization.js'; import { debugLogger } from '../../utils/debugLogger.js'; import { spawnAsync } from '../../utils/shell-utils.js'; +import { isNodeError } from '../../utils/errors.js'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); @@ -34,7 +36,8 @@ const __dirname = path.dirname(__filename); export class WindowsSandboxManager implements SandboxManager { private readonly helperPath: string; private initialized = false; - private readonly lowIntegrityCache = new Set(); + private readonly allowedCache = new Set(); + private readonly deniedCache = new Set(); constructor(private readonly options: GlobalSandboxOptions) { this.helperPath = path.resolve(__dirname, 'GeminiSandbox.exe'); @@ -185,7 +188,11 @@ export class WindowsSandboxManager implements SandboxManager { await this.grantLowIntegrityAccess(allowedPath); } - // TODO: handle forbidden paths + // Denies access to forbiddenPaths for Low Integrity processes. + const forbiddenPaths = sanitizePaths(req.policy?.forbiddenPaths) || []; + for (const forbiddenPath of forbiddenPaths) { + await this.denyLowIntegrityAccess(forbiddenPath); + } // 2. Protected governance files // These must exist on the host before running the sandbox to prevent @@ -224,6 +231,7 @@ export class WindowsSandboxManager implements SandboxManager { program, args, env: sanitizedEnv, + cwd: req.cwd, }; } @@ -235,8 +243,8 @@ export class WindowsSandboxManager implements SandboxManager { return; } - const resolvedPath = path.resolve(targetPath); - if (this.lowIntegrityCache.has(resolvedPath)) { + const resolvedPath = await tryRealpath(targetPath); + if (this.allowedCache.has(resolvedPath)) { return; } @@ -256,7 +264,7 @@ export class WindowsSandboxManager implements SandboxManager { try { await spawnAsync('icacls', [resolvedPath, '/setintegritylevel', 'Low']); - this.lowIntegrityCache.add(resolvedPath); + this.allowedCache.add(resolvedPath); } catch (e) { debugLogger.log( 'WindowsSandboxManager: icacls failed for', @@ -265,4 +273,54 @@ export class WindowsSandboxManager implements SandboxManager { ); } } + + /** + * Explicitly denies access to a path for Low Integrity processes using icacls. + */ + private async denyLowIntegrityAccess(targetPath: string): Promise { + if (os.platform() !== 'win32') { + return; + } + + const resolvedPath = await tryRealpath(targetPath); + if (this.deniedCache.has(resolvedPath)) { + return; + } + + // S-1-16-4096 is the SID for "Low Mandatory Level" (Low Integrity) + const LOW_INTEGRITY_SID = '*S-1-16-4096'; + + // icacls flags: (OI) Object Inherit, (CI) Container Inherit, (F) Full Access Deny. + // Omit /T (recursive) for performance; (OI)(CI) ensures inheritance for new items. + // Windows dynamically evaluates existing items, though deep explicit Allow ACEs + // could potentially bypass this inherited Deny rule. + const DENY_ALL_INHERIT = '(OI)(CI)(F)'; + + // icacls fails on non-existent paths, so we cannot explicitly deny + // paths that do not yet exist (unlike macOS/Linux). + // Skip to prevent sandbox initialization failure. + try { + await fs.promises.stat(resolvedPath); + } catch (e: unknown) { + if (isNodeError(e) && e.code === 'ENOENT') { + return; + } + throw e; + } + + try { + await spawnAsync('icacls', [ + resolvedPath, + '/deny', + `${LOW_INTEGRITY_SID}:${DENY_ALL_INHERIT}`, + ]); + this.deniedCache.add(resolvedPath); + } catch (e) { + throw new Error( + `Failed to deny access to forbidden path: ${resolvedPath}. ${ + e instanceof Error ? e.message : String(e) + }`, + ); + } + } } diff --git a/packages/core/src/services/sandboxManager.integration.test.ts b/packages/core/src/services/sandboxManager.integration.test.ts new file mode 100644 index 0000000000..4cf894cc17 --- /dev/null +++ b/packages/core/src/services/sandboxManager.integration.test.ts @@ -0,0 +1,475 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ +import { describe, it, expect, beforeAll, afterAll } from 'vitest'; +import { createSandboxManager } from './sandboxManagerFactory.js'; +import { ShellExecutionService } from './shellExecutionService.js'; +import { getSecureSanitizationConfig } from './environmentSanitization.js'; +import { + type SandboxedCommand, + NoopSandboxManager, + LocalSandboxManager, +} from './sandboxManager.js'; +import { execFile, execSync } from 'node:child_process'; +import { promisify } from 'node:util'; +import os from 'node:os'; +import fs from 'node:fs'; +import path from 'node:path'; +import http from 'node:http'; + +/** + * Abstracts platform-specific shell commands for integration testing. + */ +const Platform = { + isWindows: os.platform() === 'win32', + + /** Returns a command to create an empty file. */ + touch(filePath: string) { + return this.isWindows + ? { command: 'cmd.exe', args: ['/c', `type nul > "${filePath}"`] } + : { command: 'touch', args: [filePath] }; + }, + + /** Returns a command to read a file's content. */ + cat(filePath: string) { + return this.isWindows + ? { command: 'cmd.exe', args: ['/c', `type "${filePath}"`] } + : { command: 'cat', args: [filePath] }; + }, + + /** Returns a command to echo a string. */ + echo(text: string) { + return this.isWindows + ? { command: 'cmd.exe', args: ['/c', `echo ${text}`] } + : { command: 'echo', args: [text] }; + }, + + /** Returns a command to perform a network request. */ + curl(url: string) { + return this.isWindows + ? { + command: 'powershell.exe', + args: ['-Command', `Invoke-WebRequest -Uri ${url} -TimeoutSec 1`], + } + : { command: 'curl', args: ['-s', '--connect-timeout', '1', url] }; + }, + + /** Returns a command that checks if the current terminal is interactive. */ + isPty() { + return this.isWindows + ? 'cmd.exe /c echo True' + : 'bash -c "if [ -t 1 ]; then echo True; else echo False; fi"'; + }, + + /** Returns a path that is strictly outside the workspace and likely blocked. */ + getExternalBlockedPath() { + return this.isWindows + ? 'C:\\Windows\\System32\\drivers\\etc\\hosts' + : '/Users/Shared/.gemini_test_blocked'; + }, +}; + +async function runCommand(command: SandboxedCommand) { + try { + const { stdout, stderr } = await promisify(execFile)( + command.program, + command.args, + { + cwd: command.cwd, + env: command.env, + encoding: 'utf-8', + }, + ); + return { status: 0, stdout, stderr }; + } catch (error: unknown) { + const err = error as { code?: number; stdout?: string; stderr?: string }; + return { + status: err.code ?? 1, + stdout: err.stdout ?? '', + stderr: err.stderr ?? '', + }; + } +} + +/** + * Determines if the system has the necessary binaries to run the sandbox. + */ +function isSandboxAvailable(): boolean { + if (os.platform() === 'win32') { + // Windows sandboxing relies on icacls, which is a core system utility and + // always available. + return true; + } + + if (os.platform() === 'darwin') { + return fs.existsSync('/usr/bin/sandbox-exec'); + } + + if (os.platform() === 'linux') { + // TODO: Install bubblewrap (bwrap) in Linux CI environments to enable full + // integration testing. + try { + execSync('which bwrap', { stdio: 'ignore' }); + return true; + } catch { + return false; + } + } + + return false; +} + +describe('SandboxManager Integration', () => { + const workspace = process.cwd(); + const manager = createSandboxManager({ enabled: true }, workspace); + + // Skip if we are on an unsupported platform or if it's a NoopSandboxManager + const shouldSkip = + manager instanceof NoopSandboxManager || + manager instanceof LocalSandboxManager || + !isSandboxAvailable(); + + describe.skipIf(shouldSkip)('Cross-platform Sandbox Behavior', () => { + describe('Basic Execution', () => { + it('executes commands within the workspace', async () => { + const { command, args } = Platform.echo('sandbox test'); + const sandboxed = await manager.prepareCommand({ + command, + args, + cwd: workspace, + env: process.env, + }); + + const result = await runCommand(sandboxed); + expect(result.status).toBe(0); + expect(result.stdout.trim()).toBe('sandbox test'); + }); + + it('supports interactive pseudo-terminals (node-pty)', async () => { + const handle = await ShellExecutionService.execute( + Platform.isPty(), + workspace, + () => {}, + new AbortController().signal, + true, + { + sanitizationConfig: getSecureSanitizationConfig(), + sandboxManager: manager, + }, + ); + + const result = await handle.result; + expect(result.exitCode).toBe(0); + expect(result.output).toContain('True'); + }); + }); + + describe('File System Access', () => { + it('blocks access outside the workspace', async () => { + const blockedPath = Platform.getExternalBlockedPath(); + const { command, args } = Platform.touch(blockedPath); + + const sandboxed = await manager.prepareCommand({ + command, + args, + cwd: workspace, + env: process.env, + }); + + const result = await runCommand(sandboxed); + expect(result.status).not.toBe(0); + }); + + it('grants access to explicitly allowed paths', async () => { + const allowedDir = fs.mkdtempSync(path.join(os.tmpdir(), 'allowed-')); + const testFile = path.join(allowedDir, 'test.txt'); + + try { + const { command, args } = Platform.touch(testFile); + const sandboxed = await manager.prepareCommand({ + command, + args, + cwd: workspace, + env: process.env, + policy: { allowedPaths: [allowedDir] }, + }); + + const result = await runCommand(sandboxed); + expect(result.status).toBe(0); + expect(fs.existsSync(testFile)).toBe(true); + } finally { + if (fs.existsSync(testFile)) fs.unlinkSync(testFile); + fs.rmSync(allowedDir, { recursive: true, force: true }); + } + }); + + it('blocks access to forbidden paths within the workspace', async () => { + const tempWorkspace = fs.mkdtempSync( + path.join(os.tmpdir(), 'workspace-'), + ); + const forbiddenDir = path.join(tempWorkspace, 'forbidden'); + const testFile = path.join(forbiddenDir, 'test.txt'); + fs.mkdirSync(forbiddenDir); + + try { + const osManager = createSandboxManager( + { enabled: true }, + tempWorkspace, + ); + const { command, args } = Platform.touch(testFile); + + const sandboxed = await osManager.prepareCommand({ + command, + args, + cwd: tempWorkspace, + env: process.env, + policy: { forbiddenPaths: [forbiddenDir] }, + }); + + const result = await runCommand(sandboxed); + expect(result.status).not.toBe(0); + } finally { + fs.rmSync(tempWorkspace, { recursive: true, force: true }); + } + }); + + it('blocks access to files inside forbidden directories recursively', async () => { + const tempWorkspace = fs.mkdtempSync( + path.join(os.tmpdir(), 'workspace-'), + ); + const forbiddenDir = path.join(tempWorkspace, 'forbidden'); + const nestedDir = path.join(forbiddenDir, 'nested'); + const nestedFile = path.join(nestedDir, 'test.txt'); + + fs.mkdirSync(nestedDir, { recursive: true }); + fs.writeFileSync(nestedFile, 'secret'); + + try { + const osManager = createSandboxManager( + { enabled: true }, + tempWorkspace, + ); + const { command, args } = Platform.cat(nestedFile); + + const sandboxed = await osManager.prepareCommand({ + command, + args, + cwd: tempWorkspace, + env: process.env, + policy: { forbiddenPaths: [forbiddenDir] }, + }); + + const result = await runCommand(sandboxed); + expect(result.status).not.toBe(0); + } finally { + fs.rmSync(tempWorkspace, { recursive: true, force: true }); + } + }); + + it('prioritizes forbiddenPaths over allowedPaths', async () => { + const tempWorkspace = fs.mkdtempSync( + path.join(os.tmpdir(), 'workspace-'), + ); + const conflictDir = path.join(tempWorkspace, 'conflict'); + const testFile = path.join(conflictDir, 'test.txt'); + fs.mkdirSync(conflictDir); + + try { + const osManager = createSandboxManager( + { enabled: true }, + tempWorkspace, + ); + const { command, args } = Platform.touch(testFile); + + const sandboxed = await osManager.prepareCommand({ + command, + args, + cwd: tempWorkspace, + env: process.env, + policy: { + allowedPaths: [conflictDir], + forbiddenPaths: [conflictDir], + }, + }); + + const result = await runCommand(sandboxed); + expect(result.status).not.toBe(0); + } finally { + fs.rmSync(tempWorkspace, { recursive: true, force: true }); + } + }); + + it('gracefully ignores non-existent paths in allowedPaths and forbiddenPaths', async () => { + const tempWorkspace = fs.mkdtempSync( + path.join(os.tmpdir(), 'workspace-'), + ); + const nonExistentPath = path.join(tempWorkspace, 'does-not-exist'); + + try { + const osManager = createSandboxManager( + { enabled: true }, + tempWorkspace, + ); + const { command, args } = Platform.echo('survived'); + const sandboxed = await osManager.prepareCommand({ + command, + args, + cwd: tempWorkspace, + env: process.env, + policy: { + allowedPaths: [nonExistentPath], + forbiddenPaths: [nonExistentPath], + }, + }); + const result = await runCommand(sandboxed); + expect(result.status).toBe(0); + expect(result.stdout.trim()).toBe('survived'); + } finally { + fs.rmSync(tempWorkspace, { recursive: true, force: true }); + } + }); + + it('prevents creation of non-existent forbidden paths', async () => { + // Windows icacls cannot explicitly protect paths that have not yet been created. + if (Platform.isWindows) return; + + const tempWorkspace = fs.mkdtempSync( + path.join(os.tmpdir(), 'workspace-'), + ); + const nonExistentFile = path.join(tempWorkspace, 'never-created.txt'); + + try { + const osManager = createSandboxManager( + { enabled: true }, + tempWorkspace, + ); + + // We use touch to attempt creation of the file + const { command: cmdTouch, args: argsTouch } = + Platform.touch(nonExistentFile); + + const sandboxedCmd = await osManager.prepareCommand({ + command: cmdTouch, + args: argsTouch, + cwd: tempWorkspace, + env: process.env, + policy: { forbiddenPaths: [nonExistentFile] }, + }); + + // Execute the command, we expect it to fail (permission denied or read-only file system) + const result = await runCommand(sandboxedCmd); + + expect(result.status).not.toBe(0); + expect(fs.existsSync(nonExistentFile)).toBe(false); + } finally { + fs.rmSync(tempWorkspace, { recursive: true, force: true }); + } + }); + + it('blocks access to both a symlink and its target when the symlink is forbidden', async () => { + if (Platform.isWindows) return; + + const tempWorkspace = fs.mkdtempSync( + path.join(os.tmpdir(), 'workspace-'), + ); + const targetFile = path.join(tempWorkspace, 'target.txt'); + const symlinkFile = path.join(tempWorkspace, 'link.txt'); + + fs.writeFileSync(targetFile, 'secret data'); + fs.symlinkSync(targetFile, symlinkFile); + + try { + const osManager = createSandboxManager( + { enabled: true }, + tempWorkspace, + ); + + // Attempt to read the target file directly + const { command: cmdTarget, args: argsTarget } = + Platform.cat(targetFile); + const commandTarget = await osManager.prepareCommand({ + command: cmdTarget, + args: argsTarget, + cwd: tempWorkspace, + env: process.env, + policy: { forbiddenPaths: [symlinkFile] }, // Forbid the symlink + }); + const resultTarget = await runCommand(commandTarget); + expect(resultTarget.status).not.toBe(0); + + // Attempt to read via the symlink + const { command: cmdLink, args: argsLink } = + Platform.cat(symlinkFile); + const commandLink = await osManager.prepareCommand({ + command: cmdLink, + args: argsLink, + cwd: tempWorkspace, + env: process.env, + policy: { forbiddenPaths: [symlinkFile] }, // Forbid the symlink + }); + const resultLink = await runCommand(commandLink); + expect(resultLink.status).not.toBe(0); + } finally { + fs.rmSync(tempWorkspace, { recursive: true, force: true }); + } + }); + }); + + describe('Network Access', () => { + let server: http.Server; + let url: string; + + beforeAll(async () => { + server = http.createServer((_, res) => { + res.setHeader('Connection', 'close'); + res.writeHead(200); + res.end('ok'); + }); + await new Promise((resolve, reject) => { + server.on('error', reject); + server.listen(0, '127.0.0.1', () => { + const addr = server.address() as import('net').AddressInfo; + url = `http://127.0.0.1:${addr.port}`; + resolve(); + }); + }); + }); + + afterAll(async () => { + if (server) await new Promise((res) => server.close(() => res())); + }); + + it('blocks network access by default', async () => { + const { command, args } = Platform.curl(url); + const sandboxed = await manager.prepareCommand({ + command, + args, + cwd: workspace, + env: process.env, + }); + + const result = await runCommand(sandboxed); + expect(result.status).not.toBe(0); + }); + + it('grants network access when explicitly allowed', async () => { + const { command, args } = Platform.curl(url); + const sandboxed = await manager.prepareCommand({ + command, + args, + cwd: workspace, + env: process.env, + policy: { networkAccess: true }, + }); + + const result = await runCommand(sandboxed); + expect(result.status).toBe(0); + if (!Platform.isWindows) { + expect(result.stdout.trim()).toBe('ok'); + } + }); + }); + }); +}); diff --git a/packages/core/src/services/sandboxManager.test.ts b/packages/core/src/services/sandboxManager.test.ts index 9b1903ef3a..411b49636b 100644 --- a/packages/core/src/services/sandboxManager.test.ts +++ b/packages/core/src/services/sandboxManager.test.ts @@ -5,8 +5,14 @@ */ import os from 'node:os'; -import { describe, expect, it, vi } from 'vitest'; -import { NoopSandboxManager, sanitizePaths } from './sandboxManager.js'; +import path from 'node:path'; +import fs from 'node:fs/promises'; +import { describe, expect, it, vi, beforeEach } from 'vitest'; +import { + NoopSandboxManager, + sanitizePaths, + tryRealpath, +} from './sandboxManager.js'; import { createSandboxManager } from './sandboxManagerFactory.js'; import { LinuxSandboxManager } from '../sandbox/linux/LinuxSandboxManager.js'; import { MacOsSandboxManager } from '../sandbox/macos/MacOsSandboxManager.js'; @@ -30,6 +36,82 @@ describe('sanitizePaths', () => { }); }); +describe('tryRealpath', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('should return the realpath if the file exists', async () => { + vi.spyOn(fs, 'realpath').mockResolvedValue('/real/path/to/file.txt'); + const result = await tryRealpath('/some/symlink/to/file.txt'); + expect(result).toBe('/real/path/to/file.txt'); + expect(fs.realpath).toHaveBeenCalledWith('/some/symlink/to/file.txt'); + }); + + it('should fallback to parent directory if file does not exist (ENOENT)', async () => { + vi.spyOn(fs, 'realpath').mockImplementation(async (p) => { + if (p === '/workspace/nonexistent.txt') { + throw Object.assign(new Error('ENOENT: no such file or directory'), { + code: 'ENOENT', + }); + } + if (p === '/workspace') { + return '/real/workspace'; + } + throw new Error(`Unexpected path: ${p}`); + }); + + const result = await tryRealpath('/workspace/nonexistent.txt'); + + // It should combine the real path of the parent with the original basename + expect(result).toBe(path.join('/real/workspace', 'nonexistent.txt')); + }); + + it('should recursively fallback up the directory tree on multiple ENOENT errors', async () => { + vi.spyOn(fs, 'realpath').mockImplementation(async (p) => { + if (p === '/workspace/missing_dir/missing_file.txt') { + throw Object.assign(new Error('ENOENT'), { code: 'ENOENT' }); + } + if (p === '/workspace/missing_dir') { + throw Object.assign(new Error('ENOENT'), { code: 'ENOENT' }); + } + if (p === '/workspace') { + return '/real/workspace'; + } + throw new Error(`Unexpected path: ${p}`); + }); + + const result = await tryRealpath('/workspace/missing_dir/missing_file.txt'); + + // It should resolve '/workspace' to '/real/workspace' and append the missing parts + expect(result).toBe( + path.join('/real/workspace', 'missing_dir', 'missing_file.txt'), + ); + }); + + it('should return the path unchanged if it reaches the root directory and it still does not exist', async () => { + const rootPath = path.resolve('/'); + vi.spyOn(fs, 'realpath').mockImplementation(async () => { + throw Object.assign(new Error('ENOENT'), { code: 'ENOENT' }); + }); + + const result = await tryRealpath(rootPath); + expect(result).toBe(rootPath); + }); + + it('should throw an error if realpath fails with a non-ENOENT error (e.g. EACCES)', async () => { + vi.spyOn(fs, 'realpath').mockImplementation(async () => { + throw Object.assign(new Error('EACCES: permission denied'), { + code: 'EACCES', + }); + }); + + await expect(tryRealpath('/secret/file.txt')).rejects.toThrow( + 'EACCES: permission denied', + ); + }); +}); + describe('NoopSandboxManager', () => { const sandboxManager = new NoopSandboxManager(); diff --git a/packages/core/src/services/sandboxManager.ts b/packages/core/src/services/sandboxManager.ts index 4bf1db2875..c2f5a4c623 100644 --- a/packages/core/src/services/sandboxManager.ts +++ b/packages/core/src/services/sandboxManager.ts @@ -4,8 +4,10 @@ * SPDX-License-Identifier: Apache-2.0 */ +import fs from 'node:fs/promises'; import os from 'node:os'; import path from 'node:path'; +import { isNodeError } from '../utils/errors.js'; import { sanitizeEnvironment, getSecureSanitizationConfig, @@ -164,4 +166,25 @@ export function sanitizePaths(paths?: string[]): string[] | undefined { return Array.from(uniquePathsMap.values()); } + +/** + * Resolves symlinks for a given path to prevent sandbox escapes. + * If a file does not exist (ENOENT), it recursively resolves the parent directory. + * Other errors (e.g. EACCES) are re-thrown. + */ +export async function tryRealpath(p: string): Promise { + try { + return await fs.realpath(p); + } catch (e) { + if (isNodeError(e) && e.code === 'ENOENT') { + const parentDir = path.dirname(p); + if (parentDir === p) { + return p; + } + return path.join(await tryRealpath(parentDir), path.basename(p)); + } + throw e; + } +} + export { createSandboxManager } from './sandboxManagerFactory.js'; diff --git a/packages/core/src/telemetry/clearcut-logger/clearcut-logger.test.ts b/packages/core/src/telemetry/clearcut-logger/clearcut-logger.test.ts index 69ac326d7f..de1aaeb32f 100644 --- a/packages/core/src/telemetry/clearcut-logger/clearcut-logger.test.ts +++ b/packages/core/src/telemetry/clearcut-logger/clearcut-logger.test.ts @@ -1675,7 +1675,7 @@ describe('ClearcutLogger', () => { describe('logOnboardingSuccessEvent', () => { it('logs an event with proper name and user tier', () => { const { logger } = setup(); - const event = new OnboardingSuccessEvent('standard-tier'); + const event = new OnboardingSuccessEvent('standard-tier', 100); logger?.logOnboardingSuccessEvent(event); @@ -1686,6 +1686,10 @@ describe('ClearcutLogger', () => { EventMetadataKey.GEMINI_CLI_ONBOARDING_USER_TIER, 'standard-tier', ]); + expect(events[0]).toHaveMetadataValue([ + EventMetadataKey.GEMINI_CLI_ONBOARDING_DURATION_MS, + '100', + ]); }); }); }); diff --git a/packages/core/src/telemetry/clearcut-logger/clearcut-logger.ts b/packages/core/src/telemetry/clearcut-logger/clearcut-logger.ts index 4791d6d1c2..2915edf712 100644 --- a/packages/core/src/telemetry/clearcut-logger/clearcut-logger.ts +++ b/packages/core/src/telemetry/clearcut-logger/clearcut-logger.ts @@ -1821,6 +1821,12 @@ export class ClearcutLogger { value: event.userTier, }); } + if (event.duration_ms !== undefined) { + data.push({ + gemini_cli_key: EventMetadataKey.GEMINI_CLI_ONBOARDING_DURATION_MS, + value: event.duration_ms.toString(), + }); + } this.enqueueLogEvent( this.createLogEvent(EventNames.ONBOARDING_SUCCESS, data), ); diff --git a/packages/core/src/telemetry/clearcut-logger/event-metadata-key.ts b/packages/core/src/telemetry/clearcut-logger/event-metadata-key.ts index b124a84386..b5688a3e65 100644 --- a/packages/core/src/telemetry/clearcut-logger/event-metadata-key.ts +++ b/packages/core/src/telemetry/clearcut-logger/event-metadata-key.ts @@ -7,7 +7,7 @@ // Defines valid event metadata keys for Clearcut logging. export enum EventMetadataKey { // Deleted enums: 24 - // Next ID: 194 + // Next ID: 195 GEMINI_CLI_KEY_UNKNOWN = 0, @@ -722,4 +722,7 @@ export enum EventMetadataKey { // Logs the user tier for onboarding success events. GEMINI_CLI_ONBOARDING_USER_TIER = 193, + + // Logs the duration of the onboarding process in milliseconds. + GEMINI_CLI_ONBOARDING_DURATION_MS = 194, } diff --git a/packages/core/src/telemetry/loggers.test.ts b/packages/core/src/telemetry/loggers.test.ts index 71e2e8ea7b..48b7792168 100644 --- a/packages/core/src/telemetry/loggers.test.ts +++ b/packages/core/src/telemetry/loggers.test.ts @@ -2566,7 +2566,7 @@ describe('loggers', () => { }); it('should log onboarding success event to Clearcut and OTEL, and record metrics', () => { - const event = new OnboardingSuccessEvent('standard-tier'); + const event = new OnboardingSuccessEvent('standard-tier', 100); logOnboardingSuccess(mockConfig, event); @@ -2575,7 +2575,7 @@ describe('loggers', () => { ).toHaveBeenCalledWith(event); expect(mockLogger.emit).toHaveBeenCalledWith({ - body: 'Onboarding succeeded. Tier: standard-tier', + body: 'Onboarding succeeded. Tier: standard-tier. Duration: 100ms', attributes: { 'session.id': 'test-session-id', 'user.email': 'test-user@example.com', @@ -2584,12 +2584,14 @@ describe('loggers', () => { 'event.timestamp': '2025-01-01T00:00:00.000Z', interactive: false, user_tier: 'standard-tier', + duration_ms: 100, }, }); expect(metrics.recordOnboardingSuccess).toHaveBeenCalledWith( mockConfig, 'standard-tier', + 100, ); }); }); diff --git a/packages/core/src/telemetry/loggers.ts b/packages/core/src/telemetry/loggers.ts index 53c7dcb894..a33c8ca200 100644 --- a/packages/core/src/telemetry/loggers.ts +++ b/packages/core/src/telemetry/loggers.ts @@ -909,7 +909,7 @@ export function logOnboardingSuccess( }; logger.emit(logRecord); - recordOnboardingSuccess(config, event.userTier); + recordOnboardingSuccess(config, event.userTier, event.duration_ms); }); } diff --git a/packages/core/src/telemetry/metrics.test.ts b/packages/core/src/telemetry/metrics.test.ts index 3b8ae1ea0c..0db3367c1a 100644 --- a/packages/core/src/telemetry/metrics.test.ts +++ b/packages/core/src/telemetry/metrics.test.ts @@ -100,6 +100,7 @@ describe('Telemetry Metrics', () => { let recordFlickerFrameModule: typeof import('./metrics.js').recordFlickerFrame; let recordExitFailModule: typeof import('./metrics.js').recordExitFail; let recordAgentRunMetricsModule: typeof import('./metrics.js').recordAgentRunMetrics; + let recordOnboardingSuccessModule: typeof import('./metrics.js').recordOnboardingSuccess; let recordLinesChangedModule: typeof import('./metrics.js').recordLinesChanged; let recordSlowRenderModule: typeof import('./metrics.js').recordSlowRender; let recordPlanExecutionModule: typeof import('./metrics.js').recordPlanExecution; @@ -148,6 +149,7 @@ describe('Telemetry Metrics', () => { recordFlickerFrameModule = metricsJsModule.recordFlickerFrame; recordExitFailModule = metricsJsModule.recordExitFail; recordAgentRunMetricsModule = metricsJsModule.recordAgentRunMetrics; + recordOnboardingSuccessModule = metricsJsModule.recordOnboardingSuccess; recordLinesChangedModule = metricsJsModule.recordLinesChanged; recordSlowRenderModule = metricsJsModule.recordSlowRender; recordPlanExecutionModule = metricsJsModule.recordPlanExecution; @@ -626,6 +628,56 @@ describe('Telemetry Metrics', () => { }); }); + describe('recordOnboardingSuccess', () => { + const mockConfig = { + getSessionId: () => 'test-session-id', + getTelemetryEnabled: () => true, + } as unknown as Config; + + it('should not record metrics if not initialized', () => { + recordOnboardingSuccessModule(mockConfig, 'standard-tier', 100); + expect(mockCounterAddFn).not.toHaveBeenCalled(); + expect(mockHistogramRecordFn).not.toHaveBeenCalled(); + }); + + it('should record onboarding success metrics without duration', () => { + initializeMetricsModule(mockConfig); + mockCounterAddFn.mockClear(); + mockHistogramRecordFn.mockClear(); + + recordOnboardingSuccessModule(mockConfig, 'standard-tier'); + + expect(mockCounterAddFn).toHaveBeenCalledWith(1, { + 'session.id': 'test-session-id', + 'installation.id': 'test-installation-id', + 'user.email': 'test@example.com', + user_tier: 'standard-tier', + }); + expect(mockHistogramRecordFn).not.toHaveBeenCalled(); + }); + + it('should record onboarding success metrics with duration', () => { + initializeMetricsModule(mockConfig); + mockCounterAddFn.mockClear(); + mockHistogramRecordFn.mockClear(); + + recordOnboardingSuccessModule(mockConfig, 'standard-tier', 1500); + + expect(mockCounterAddFn).toHaveBeenCalledWith(1, { + 'session.id': 'test-session-id', + 'installation.id': 'test-installation-id', + 'user.email': 'test@example.com', + user_tier: 'standard-tier', + }); + expect(mockHistogramRecordFn).toHaveBeenCalledWith(1500, { + 'session.id': 'test-session-id', + 'installation.id': 'test-installation-id', + 'user.email': 'test@example.com', + user_tier: 'standard-tier', + }); + }); + }); + describe('OpenTelemetry GenAI Semantic Convention Metrics', () => { const mockConfig = { getSessionId: () => 'test-session-id', diff --git a/packages/core/src/telemetry/metrics.ts b/packages/core/src/telemetry/metrics.ts index 16147b3d64..f63ee3aefa 100644 --- a/packages/core/src/telemetry/metrics.ts +++ b/packages/core/src/telemetry/metrics.ts @@ -53,6 +53,7 @@ const OVERAGE_OPTION_COUNT = 'gemini_cli.overage_option.count'; const CREDIT_PURCHASE_COUNT = 'gemini_cli.credit_purchase.count'; const EVENT_ONBOARDING_START = 'gemini_cli.onboarding.start'; const EVENT_ONBOARDING_SUCCESS = 'gemini_cli.onboarding.success'; +const EVENT_ONBOARDING_DURATION_MS = 'gemini_cli.onboarding.duration'; // Agent Metrics const AGENT_RUN_COUNT = 'gemini_cli.agent.run.count'; @@ -430,6 +431,15 @@ const HISTOGRAM_DEFINITIONS = { success: boolean; }, }, + [EVENT_ONBOARDING_DURATION_MS]: { + description: 'Duration of onboarding in milliseconds.', + unit: 'ms', + valueType: ValueType.INT, + assign: (h: Histogram) => (onboardingDurationHistogram = h), + attributes: {} as { + user_tier?: string; + }, + }, } as const; const PERFORMANCE_COUNTER_DEFINITIONS = { @@ -658,6 +668,7 @@ let overageOptionCounter: Counter | undefined; let creditPurchaseCounter: Counter | undefined; let onboardingStartCounter: Counter | undefined; let onboardingSuccessCounter: Counter | undefined; +let onboardingDurationHistogram: Histogram | undefined; // OpenTelemetry GenAI Semantic Convention Metrics let genAiClientTokenUsageHistogram: Histogram | undefined; @@ -847,12 +858,22 @@ export function recordOnboardingStart(config: Config): void { export function recordOnboardingSuccess( config: Config, userTier?: string, + durationMs?: number, ): void { - if (!onboardingSuccessCounter || !isMetricsInitialized) return; - onboardingSuccessCounter.add(1, { + if (!isMetricsInitialized) return; + + const attributes: Attributes = { ...baseMetricDefinition.getCommonAttributes(config), ...(userTier && { user_tier: userTier }), - }); + }; + + if (onboardingSuccessCounter) { + onboardingSuccessCounter.add(1, attributes); + } + + if (durationMs !== undefined && onboardingDurationHistogram) { + onboardingDurationHistogram.record(durationMs, attributes); + } } /** diff --git a/packages/core/src/telemetry/types.ts b/packages/core/src/telemetry/types.ts index ffca3a2698..9d6cd08c72 100644 --- a/packages/core/src/telemetry/types.ts +++ b/packages/core/src/telemetry/types.ts @@ -44,7 +44,6 @@ import { getFileDiffFromResultDisplay } from '../utils/fileDiffUtils.js'; import { LlmRole } from './llmRole.js'; export { LlmRole }; import type { HookType } from '../hooks/types.js'; -import type { UserTierId } from '../code_assist/types.js'; export interface BaseTelemetryEvent { 'event.name': string; @@ -2184,7 +2183,8 @@ export class ApprovalModeSwitchEvent implements BaseTelemetryEvent { toOpenTelemetryAttributes(config: Config): LogAttributes { return { ...getCommonAttributes(config), - event_name: EVENT_APPROVAL_MODE_SWITCH, + 'event.name': EVENT_APPROVAL_MODE_SWITCH, + 'event.timestamp': this['event.timestamp'], from_mode: this.from_mode, to_mode: this.to_mode, }; @@ -2214,7 +2214,8 @@ export class ApprovalModeDurationEvent implements BaseTelemetryEvent { toOpenTelemetryAttributes(config: Config): LogAttributes { return { ...getCommonAttributes(config), - event_name: EVENT_APPROVAL_MODE_DURATION, + 'event.name': EVENT_APPROVAL_MODE_DURATION, + 'event.timestamp': this['event.timestamp'], mode: this.mode, duration_ms: this.duration_ms, }; @@ -2388,12 +2389,14 @@ export const EVENT_ONBOARDING_SUCCESS = 'gemini_cli.onboarding.success'; export class OnboardingSuccessEvent implements BaseTelemetryEvent { 'event.name': 'onboarding_success'; 'event.timestamp': string; - userTier?: UserTierId; + userTier?: string; + duration_ms?: number; - constructor(userTier?: UserTierId) { + constructor(userTier?: string, duration_ms?: number) { this['event.name'] = 'onboarding_success'; this['event.timestamp'] = new Date().toISOString(); this.userTier = userTier; + this.duration_ms = duration_ms; } toOpenTelemetryAttributes(config: Config): LogAttributes { @@ -2402,11 +2405,12 @@ export class OnboardingSuccessEvent implements BaseTelemetryEvent { 'event.name': EVENT_ONBOARDING_SUCCESS, 'event.timestamp': this['event.timestamp'], user_tier: this.userTier ?? '', + duration_ms: this.duration_ms ?? 0, }; } toLogBody(): string { - return `Onboarding succeeded.${this.userTier ? ` Tier: ${this.userTier}` : ''}`; + return `Onboarding succeeded.${this.userTier ? ` Tier: ${this.userTier}` : ''}${this.duration_ms !== undefined ? `. Duration: ${this.duration_ms}ms` : ''}`; } } diff --git a/packages/core/src/tools/mcp-client-manager.test.ts b/packages/core/src/tools/mcp-client-manager.test.ts index 84d3e138ce..a96f3f7d29 100644 --- a/packages/core/src/tools/mcp-client-manager.test.ts +++ b/packages/core/src/tools/mcp-client-manager.test.ts @@ -147,6 +147,51 @@ describe('McpClientManager', () => { expect(mockedMcpClient.discoverInto).not.toHaveBeenCalled(); }); + it('should NOT set COMPLETED prematurely when startConfiguredMcpServers finishes before parallel extensions', async () => { + mockConfig.getMcpServers.mockReturnValue({}); + const manager = setupManager(new McpClientManager('0.0.1', mockConfig)); + + let resolveExtension: (value: void) => void; + const extensionPromise = new Promise((resolve) => { + resolveExtension = resolve; + }); + + mockedMcpClient.connect.mockImplementation(async () => { + await extensionPromise; + }); + + const extensionStartPromise = manager.startExtension({ + name: 'test-extension', + mcpServers: { + 'extension-server': { command: 'node' }, + }, + isActive: true, + version: '1.0.0', + path: '/some-path', + contextFiles: [], + id: '123', + }); + + // Wait for the state to become IN_PROGRESS (since maybeDiscoverMcpServer is async) + await vi.waitFor(() => { + if (manager.getDiscoveryState() !== MCPDiscoveryState.IN_PROGRESS) { + throw new Error('Discovery state is not IN_PROGRESS'); + } + }); + + expect(manager.getDiscoveryState()).toBe(MCPDiscoveryState.IN_PROGRESS); + + await manager.startConfiguredMcpServers(); + + // discoveryState should still be IN_PROGRESS because the extension is still starting + expect(manager.getDiscoveryState()).toBe(MCPDiscoveryState.IN_PROGRESS); + + resolveExtension!(undefined); + await extensionStartPromise; + + expect(manager.getDiscoveryState()).toBe(MCPDiscoveryState.COMPLETED); + }); + it('should mark discovery completed when all configured servers are blocked', async () => { mockConfig.getMcpServers.mockReturnValue({ 'test-server': { command: 'node' }, diff --git a/packages/core/src/tools/mcp-client-manager.ts b/packages/core/src/tools/mcp-client-manager.ts index 666b6d5321..3e7ef75d4c 100644 --- a/packages/core/src/tools/mcp-client-manager.ts +++ b/packages/core/src/tools/mcp-client-manager.ts @@ -554,8 +554,10 @@ export class McpClientManager { ); if (Object.keys(servers).length === 0) { - this.discoveryState = MCPDiscoveryState.COMPLETED; - this.eventEmitter?.emit('mcp-client-update', this.clients); + if (!this.discoveryPromise) { + this.discoveryState = MCPDiscoveryState.COMPLETED; + this.eventEmitter?.emit('mcp-client-update', this.clients); + } return; } @@ -574,7 +576,10 @@ export class McpClientManager { // If every configured server was skipped (for example because all are // disabled by user settings), no discovery promise is created. In that // case we must still mark discovery complete or the UI will wait forever. - if (this.discoveryState === MCPDiscoveryState.IN_PROGRESS) { + if ( + this.discoveryState === MCPDiscoveryState.IN_PROGRESS && + !this.discoveryPromise + ) { this.discoveryState = MCPDiscoveryState.COMPLETED; this.eventEmitter?.emit('mcp-client-update', this.clients); } diff --git a/packages/core/src/tools/tools.test.ts b/packages/core/src/tools/tools.test.ts index edbc487160..9b200d6f38 100644 --- a/packages/core/src/tools/tools.test.ts +++ b/packages/core/src/tools/tools.test.ts @@ -6,6 +6,7 @@ import { describe, it, expect, vi } from 'vitest'; import { + BaseToolInvocation, DeclarativeTool, hasCycleInSchema, Kind, @@ -272,3 +273,55 @@ describe('Tools Read-Only property', () => { expect(searcher.isReadOnly).toBe(true); }); }); + +describe('toJSON serialization', () => { + it('DeclarativeTool.toJSON should return essential metadata', () => { + const bus = createMockMessageBus(); + class MyTool extends DeclarativeTool { + build(_params: object): ToolInvocation { + throw new Error('Not implemented'); + } + } + const tool = new MyTool( + 'name', + 'display', + 'desc', + Kind.Read, + { type: 'object' }, + bus, + ); + const json = tool.toJSON(); + + expect(json).toEqual({ + name: 'name', + displayName: 'display', + description: 'desc', + kind: Kind.Read, + parameterSchema: { type: 'object' }, + }); + // Ensure messageBus is NOT included in serialization + expect(Object.keys(json)).not.toContain('messageBus'); + expect(JSON.stringify(tool)).toContain('"name":"name"'); + expect(JSON.stringify(tool)).not.toContain('messageBus'); + }); + + it('BaseToolInvocation.toJSON should return only params', () => { + const bus = createMockMessageBus(); + const params = { foo: 'bar' }; + class MyInvocation extends BaseToolInvocation { + getDescription() { + return 'desc'; + } + async execute() { + return { llmContent: '', returnDisplay: '' }; + } + } + const invocation = new MyInvocation(params, bus, 'tool'); + const json = invocation.toJSON(); + + expect(json).toEqual({ params }); + // Ensure messageBus is NOT included in serialization + expect(Object.keys(json)).not.toContain('messageBus'); + expect(JSON.stringify(invocation)).toBe('{"params":{"foo":"bar"}}'); + }); +}); diff --git a/packages/core/src/tools/tools.ts b/packages/core/src/tools/tools.ts index 6b22f7a3e3..23e88b608b 100644 --- a/packages/core/src/tools/tools.ts +++ b/packages/core/src/tools/tools.ts @@ -379,6 +379,12 @@ export abstract class BaseToolInvocation< updateOutput?: (output: ToolLiveOutput) => void, options?: ExecuteOptions, ): Promise; + + toJSON() { + return { + params: this.params, + }; + } } /** @@ -498,6 +504,16 @@ export abstract class DeclarativeTool< return cloned; } + toJSON() { + return { + name: this.name, + displayName: this.displayName, + description: this.description, + kind: this.kind, + parameterSchema: this.parameterSchema, + }; + } + get isReadOnly(): boolean { return READ_ONLY_KINDS.includes(this.kind); } diff --git a/packages/test-utils/src/fixtures/agents.ts b/packages/test-utils/src/fixtures/agents.ts new file mode 100644 index 0000000000..9469457227 --- /dev/null +++ b/packages/test-utils/src/fixtures/agents.ts @@ -0,0 +1,72 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * Represents a test agent used in evaluations and tests. + */ +export interface TestAgent { + /** The unique name of the agent. */ + readonly name: string; + /** The full YAML/Markdown definition of the agent. */ + readonly definition: string; + /** The standard path where this agent should be saved in a test project. */ + readonly path: string; + /** A helper to spread this agent directly into a 'files' object for evalTest. */ + readonly asFile: () => Record; +} + +/** + * Helper to create a TestAgent with consistent formatting and pathing. + */ +function createAgent(options: { + name: string; + description: string; + tools: string[]; + body: string; +}): TestAgent { + const definition = `--- +name: ${options.name} +description: ${options.description} +tools: +${options.tools.map((t) => ` - ${t}`).join('\n')} +--- +${options.body} +`; + + const path = `.gemini/agents/${options.name}.md`; + + return { + name: options.name, + definition, + path, + asFile: () => ({ [path]: definition }), + }; +} + +/** + * A collection of predefined test agents for use in evaluations and tests. + */ +export const TEST_AGENTS = { + /** + * An agent with expertise in updating documentation. + */ + DOCS_AGENT: createAgent({ + name: 'docs-agent', + description: 'An agent with expertise in updating documentation.', + tools: ['read_file', 'write_file'], + body: 'You are the docs agent. Update documentation clearly and accurately.', + }), + + /** + * An agent with expertise in writing and updating tests. + */ + TESTING_AGENT: createAgent({ + name: 'testing-agent', + description: 'An agent with expertise in writing and updating tests.', + tools: ['read_file', 'write_file'], + body: 'You are the test agent. Add or update tests.', + }), +} as const; diff --git a/packages/test-utils/src/index.ts b/packages/test-utils/src/index.ts index 42dd12bb43..7bae818040 100644 --- a/packages/test-utils/src/index.ts +++ b/packages/test-utils/src/index.ts @@ -5,6 +5,7 @@ */ export * from './file-system-test-helpers.js'; -export * from './test-rig.js'; +export * from './fixtures/agents.js'; export * from './mock-utils.js'; export * from './test-mcp-server.js'; +export * from './test-rig.js'; diff --git a/schemas/settings.schema.json b/schemas/settings.schema.json index 287d2b3f76..93bd8fc895 100644 --- a/schemas/settings.schema.json +++ b/schemas/settings.schema.json @@ -2142,6 +2142,13 @@ "default": true, "type": "boolean" }, + "maxActionsPerTask": { + "title": "Max Actions Per Task", + "description": "The maximum number of tool calls allowed per browser task. Enforcement is hard: the agent will be terminated when the limit is reached.", + "markdownDescription": "The maximum number of tool calls allowed per browser task. Enforcement is hard: the agent will be terminated when the limit is reached.\n\n- Category: `Advanced`\n- Requires restart: `no`\n- Default: `100`", + "default": 100, + "type": "number" + }, "confirmSensitiveActions": { "title": "Confirm Sensitive Actions", "description": "Require manual confirmation for sensitive browser actions (e.g., fill_form, evaluate_script).",