diff --git a/.github/workflows/gemini-scheduled-stale-pr-closer.yml b/.github/workflows/gemini-scheduled-stale-pr-closer.yml index 366564d56e..cc33848941 100644 --- a/.github/workflows/gemini-scheduled-stale-pr-closer.yml +++ b/.github/workflows/gemini-scheduled-stale-pr-closer.yml @@ -40,6 +40,8 @@ jobs: github-token: '${{ steps.generate_token.outputs.token || secrets.GITHUB_TOKEN }}' script: | const dryRun = process.env.DRY_RUN === 'true'; + const fourteenDaysAgo = new Date(); + fourteenDaysAgo.setDate(fourteenDaysAgo.getDate() - 14); const thirtyDaysAgo = new Date(); thirtyDaysAgo.setDate(thirtyDaysAgo.getDate() - 30); @@ -56,48 +58,38 @@ jobs: for (const m of members) maintainerLogins.add(m.login.toLowerCase()); core.info(`Successfully fetched ${members.length} team members from ${team_slug}`); } catch (e) { - core.warning(`Failed to fetch team members from ${team_slug}: ${e.message}`); + // Silently skip if permissions are insufficient; we will rely on author_association + core.debug(`Skipped team fetch for ${team_slug}: ${e.message}`); } } - const isGooglerCache = new Map(); - const isGoogler = async (login) => { - if (isGooglerCache.has(login)) return isGooglerCache.get(login); + const isMaintainer = async (login, assoc) => { + // Reliably identify maintainers using authorAssociation (provided by GitHub) + // and organization membership (if available). + const isTeamMember = maintainerLogins.has(login.toLowerCase()); + const isRepoMaintainer = ['OWNER', 'MEMBER', 'COLLABORATOR'].includes(assoc); + if (isTeamMember || isRepoMaintainer) return true; + + // Fallback: Check if user belongs to the 'google' or 'googlers' orgs (requires permission) try { - // Check membership in 'googlers' or 'google' orgs const orgs = ['googlers', 'google']; for (const org of orgs) { try { - await github.rest.orgs.checkMembershipForUser({ - org: org, - username: login - }); - core.info(`User ${login} is a member of ${org} organization.`); - isGooglerCache.set(login, true); + await github.rest.orgs.checkMembershipForUser({ org: org, username: login }); return true; } catch (e) { - // 404 just means they aren't a member, which is fine if (e.status !== 404) throw e; } } } catch (e) { - core.warning(`Failed to check org membership for ${login}: ${e.message}`); + // Gracefully ignore failures here } - isGooglerCache.set(login, false); return false; }; - const isMaintainer = async (login, assoc) => { - const isTeamMember = maintainerLogins.has(login.toLowerCase()); - const isRepoMaintainer = ['OWNER', 'MEMBER', 'COLLABORATOR'].includes(assoc); - if (isTeamMember || isRepoMaintainer) return true; - - return await isGoogler(login); - }; - - // 2. Determine which PRs to check + // 2. Fetch all open PRs let prs = []; if (context.eventName === 'pull_request') { const { data: pr } = await github.rest.pulls.get({ @@ -118,64 +110,77 @@ jobs: for (const pr of prs) { const maintainerPr = await isMaintainer(pr.user.login, pr.author_association); const isBot = pr.user.type === 'Bot' || pr.user.login.endsWith('[bot]'); + if (maintainerPr || isBot) continue; - // Detection Logic for Linked Issues - // Check 1: Official GitHub "Closing Issue" link (GraphQL) - const linkedIssueQuery = `query($owner:String!, $repo:String!, $number:Int!) { + // Helper: Fetch labels and linked issues via GraphQL + const prDetailsQuery = `query($owner:String!, $repo:String!, $number:Int!) { repository(owner:$owner, name:$repo) { pullRequest(number:$number) { - closingIssuesReferences(first: 1) { totalCount } + closingIssuesReferences(first: 10) { + nodes { + number + labels(first: 20) { + nodes { name } + } + } + } } } }`; - let hasClosingLink = false; + let linkedIssues = []; try { - const res = await github.graphql(linkedIssueQuery, { + const res = await github.graphql(prDetailsQuery, { owner: context.repo.owner, repo: context.repo.repo, number: pr.number }); - hasClosingLink = res.repository.pullRequest.closingIssuesReferences.totalCount > 0; - } catch (e) {} - - // Check 2: Regex for mentions (e.g., "Related to #123", "Part of #123", "#123") - // We check for # followed by numbers or direct URLs to issues. - const body = pr.body || ''; - const mentionRegex = /(?:#|https:\/\/github\.com\/[^\/]+\/[^\/]+\/issues\/)(\d+)/i; - const hasMentionLink = mentionRegex.test(body); - - const hasLinkedIssue = hasClosingLink || hasMentionLink; - - // Logic for Closed PRs (Auto-Reopen) - if (pr.state === 'closed' && context.eventName === 'pull_request' && context.payload.action === 'edited') { - if (hasLinkedIssue) { - core.info(`PR #${pr.number} now has a linked issue. Reopening.`); - if (!dryRun) { - await github.rest.pulls.update({ - owner: context.repo.owner, - repo: context.repo.repo, - pull_number: pr.number, - state: 'open' - }); - await github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: pr.number, - body: "Thank you for linking an issue! This pull request has been automatically reopened." - }); - } - } - continue; + linkedIssues = res.repository.pullRequest.closingIssuesReferences.nodes; + } catch (e) { + core.warning(`GraphQL fetch failed for PR #${pr.number}: ${e.message}`); } - // Logic for Open PRs (Immediate Closure) - if (pr.state === 'open' && !maintainerPr && !hasLinkedIssue && !isBot) { - core.info(`PR #${pr.number} is missing a linked issue. Closing.`); + // Check for mentions in body as fallback (regex) + const body = pr.body || ''; + const mentionRegex = /(?:#|https:\/\/github\.com\/[^\/]+\/[^\/]+\/issues\/)(\d+)/i; + const matches = body.match(mentionRegex); + if (matches && linkedIssues.length === 0) { + const issueNumber = parseInt(matches[1]); + try { + const { data: issue } = await github.rest.issues.get({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: issueNumber + }); + linkedIssues = [{ number: issueNumber, labels: { nodes: issue.labels.map(l => ({ name: l.name })) } }]; + } catch (e) {} + } + + // 3. Enforcement Logic + const prLabels = pr.labels.map(l => l.name.toLowerCase()); + const hasHelpWanted = prLabels.includes('help wanted') || + linkedIssues.some(issue => issue.labels.nodes.some(l => l.name.toLowerCase() === 'help wanted')); + + const hasMaintainerOnly = prLabels.includes('🔒 maintainer only') || + linkedIssues.some(issue => issue.labels.nodes.some(l => l.name.toLowerCase() === '🔒 maintainer only')); + + const hasLinkedIssue = linkedIssues.length > 0; + + // Closure Policy: No help-wanted label = Close after 14 days + if (pr.state === 'open' && !hasHelpWanted && !hasMaintainerOnly) { + const prCreatedAt = new Date(pr.created_at); + + // We give a 14-day grace period for non-help-wanted PRs to be manually reviewed/labeled by an EM + if (prCreatedAt > fourteenDaysAgo) { + core.info(`PR #${pr.number} is new and lacks 'help wanted'. Giving 14-day grace period for EM review.`); + continue; + } + + core.info(`PR #${pr.number} is older than 14 days and lacks 'help wanted' association. Closing.`); if (!dryRun) { await github.rest.issues.createComment({ owner: context.repo.owner, repo: context.repo.repo, issue_number: pr.number, - body: "Hi there! Thank you for your contribution to Gemini CLI. \n\nTo improve our contribution process and better track changes, we now require all pull requests to be associated with an existing issue, as announced in our [recent discussion](https://github.com/google-gemini/gemini-cli/discussions/16706) and as detailed in our [CONTRIBUTING.md](https://github.com/google-gemini/gemini-cli/blob/main/CONTRIBUTING.md#1-link-to-an-existing-issue).\n\nThis pull request is being closed because it is not currently linked to an issue. **Once you have updated the description of this PR to link an issue (e.g., by adding `Fixes #123` or `Related to #123`), it will be automatically reopened.**\n\n**How to link an issue:**\nAdd a keyword followed by the issue number (e.g., `Fixes #123`) in the description of your pull request. For more details on supported keywords and how linking works, please refer to the [GitHub Documentation on linking pull requests to issues](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue).\n\nThank you for your understanding and for being a part of our community!" + body: "Hi there! Thank you for your interest in contributing to Gemini CLI. \n\nTo ensure we maintain high code quality and focus on our prioritized roadmap, we have updated our contribution policy (see [Discussion #17383](https://github.com/google-gemini/gemini-cli/discussions/17383)). \n\n**We only *guarantee* review and consideration of pull requests for issues that are explicitly labeled as 'help wanted'.** All other community pull requests are subject to closure after 14 days if they do not align with our current focus areas. For this reason, we strongly recommend that contributors only submit pull requests against issues explicitly labeled as **'help-wanted'**. \n\nThis pull request is being closed as it has been open for 14 days without a 'help wanted' designation. We encourage you to find and contribute to existing 'help wanted' issues in our backlog! Thank you for your understanding and for being part of our community!" }); await github.rest.pulls.update({ owner: context.repo.owner, @@ -187,27 +192,22 @@ jobs: continue; } - // Staleness check (Scheduled runs only) - if (pr.state === 'open' && context.eventName !== 'pull_request') { - const labels = pr.labels.map(l => l.name.toLowerCase()); - if (labels.includes('help wanted') || labels.includes('🔒 maintainer only')) continue; + // Also check for linked issue even if it has help wanted (redundant but safe) + if (pr.state === 'open' && !hasLinkedIssue) { + // Already covered by hasHelpWanted check above, but good for future-proofing + continue; + } + // 4. Staleness Check (Scheduled only) + if (pr.state === 'open' && context.eventName !== 'pull_request') { // Skip PRs that were created less than 30 days ago - they cannot be stale yet const prCreatedAt = new Date(pr.created_at); - if (prCreatedAt > thirtyDaysAgo) { - const daysOld = Math.floor((Date.now() - prCreatedAt.getTime()) / (1000 * 60 * 60 * 24)); - core.info(`PR #${pr.number} was created ${daysOld} days ago. Skipping staleness check.`); - continue; - } + if (prCreatedAt > thirtyDaysAgo) continue; - // Initialize lastActivity to PR creation date (not epoch) as a safety baseline. - // This ensures we never incorrectly mark a PR as stale due to failed activity lookups. let lastActivity = new Date(pr.created_at); try { const reviews = await github.paginate(github.rest.pulls.listReviews, { - owner: context.repo.owner, - repo: context.repo.repo, - pull_number: pr.number + owner: context.repo.owner, repo: context.repo.repo, pull_number: pr.number }); for (const r of reviews) { if (await isMaintainer(r.user.login, r.author_association)) { @@ -216,9 +216,7 @@ jobs: } } const comments = await github.paginate(github.rest.issues.listComments, { - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: pr.number + owner: context.repo.owner, repo: context.repo.repo, issue_number: pr.number }); for (const c of comments) { if (await isMaintainer(c.user.login, c.author_association)) { @@ -226,25 +224,23 @@ jobs: if (d > lastActivity) lastActivity = d; } } - } catch (e) { - core.warning(`Failed to fetch reviews/comments for PR #${pr.number}: ${e.message}`); - } - - // For maintainer PRs, the PR creation itself counts as maintainer activity. - // (Now redundant since we initialize to pr.created_at, but kept for clarity) - if (maintainerPr) { - const d = new Date(pr.created_at); - if (d > lastActivity) lastActivity = d; - } + } catch (e) {} if (lastActivity < thirtyDaysAgo) { - core.info(`PR #${pr.number} is stale.`); + const labels = pr.labels.map(l => l.name.toLowerCase()); + const isProtected = labels.includes('help wanted') || labels.includes('🔒 maintainer only'); + if (isProtected) { + core.info(`PR #${pr.number} is stale but has a protected label. Skipping closure.`); + continue; + } + + core.info(`PR #${pr.number} is stale (no maintainer activity for 30+ days). Closing.`); if (!dryRun) { await github.rest.issues.createComment({ owner: context.repo.owner, repo: context.repo.repo, issue_number: pr.number, - body: "Hi there! Thank you for your contribution to Gemini CLI. We really appreciate the time and effort you've put into this pull request.\n\nTo keep our backlog manageable and ensure we're focusing on current priorities, we are closing pull requests that haven't seen maintainer activity for 30 days. Currently, the team is prioritizing work associated with **🔒 maintainer only** or **help wanted** issues.\n\nIf you believe this change is still critical, please feel free to comment with updated details. Otherwise, we encourage contributors to focus on open issues labeled as **help wanted**. Thank you for your understanding!" + body: "Hi there! Thank you for your contribution. To keep our backlog manageable, we are closing pull requests that haven't seen maintainer activity for 30 days. If you're still working on this, please let us know!" }); await github.rest.pulls.update({ owner: context.repo.owner, diff --git a/docs/cli/settings.md b/docs/cli/settings.md index 35a09a99ab..eb9ba4158e 100644 --- a/docs/cli/settings.md +++ b/docs/cli/settings.md @@ -125,7 +125,9 @@ they appear in the UI. | UI Label | Setting | Description | Default | | ------------------------------------- | ----------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------- | +| Tool Sandboxing | `security.toolSandboxing` | Experimental tool-level sandboxing (implementation in progress). | `false` | | Disable YOLO Mode | `security.disableYoloMode` | Disable YOLO mode, even if enabled by a flag. | `false` | +| Disable Always Allow | `security.disableAlwaysAllow` | Disable "Always allow" options in tool confirmation dialogs. | `false` | | Allow Permanent Tool Approval | `security.enablePermanentToolApproval` | Enable the "Allow for all future sessions" option in tool confirmation dialogs. | `false` | | Auto-add to Policy by Default | `security.autoAddToPolicyByDefault` | When enabled, the "Allow for all future sessions" option becomes the default choice for low-risk tools in trusted workspaces. | `false` | | Blocks extensions from Git | `security.blockGitExtensions` | Blocks installing and loading extensions from Git. | `false` | @@ -150,6 +152,7 @@ they appear in the UI. | Plan | `experimental.plan` | Enable Plan Mode. | `true` | | Model Steering | `experimental.modelSteering` | Enable model steering (user hints) to guide the model during tool execution. | `false` | | Direct Web Fetch | `experimental.directWebFetch` | Enable web fetch behavior that bypasses LLM summarization. | `false` | +| Topic & Update Narration | `experimental.topicUpdateNarration` | Enable the experimental Topic & Update communication model for reduced chattiness and structured progress reporting. | `false` | ### Skills diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index 4e0e9856d9..01aaea676f 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -677,6 +677,141 @@ their corresponding top-level category object in your `settings.json` file. used. - **Default:** `[]` +- **`modelConfigs.modelDefinitions`** (object): + - **Description:** Registry of model metadata, including tier, family, and + features. + - **Default:** + + ```json + { + "gemini-3.1-pro-preview": { + "tier": "pro", + "family": "gemini-3", + "isPreview": true, + "dialogLocation": "manual", + "features": { + "thinking": true, + "multimodalToolUse": true + } + }, + "gemini-3.1-pro-preview-customtools": { + "tier": "pro", + "family": "gemini-3", + "isPreview": true, + "features": { + "thinking": true, + "multimodalToolUse": true + } + }, + "gemini-3-pro-preview": { + "tier": "pro", + "family": "gemini-3", + "isPreview": true, + "dialogLocation": "manual", + "features": { + "thinking": true, + "multimodalToolUse": true + } + }, + "gemini-3-flash-preview": { + "tier": "flash", + "family": "gemini-3", + "isPreview": true, + "dialogLocation": "manual", + "features": { + "thinking": false, + "multimodalToolUse": true + } + }, + "gemini-2.5-pro": { + "tier": "pro", + "family": "gemini-2.5", + "isPreview": false, + "dialogLocation": "manual", + "features": { + "thinking": false, + "multimodalToolUse": false + } + }, + "gemini-2.5-flash": { + "tier": "flash", + "family": "gemini-2.5", + "isPreview": false, + "dialogLocation": "manual", + "features": { + "thinking": false, + "multimodalToolUse": false + } + }, + "gemini-2.5-flash-lite": { + "tier": "flash-lite", + "family": "gemini-2.5", + "isPreview": false, + "dialogLocation": "manual", + "features": { + "thinking": false, + "multimodalToolUse": false + } + }, + "auto": { + "tier": "auto", + "isPreview": true, + "features": { + "thinking": true, + "multimodalToolUse": false + } + }, + "pro": { + "tier": "pro", + "isPreview": false, + "features": { + "thinking": true, + "multimodalToolUse": false + } + }, + "flash": { + "tier": "flash", + "isPreview": false, + "features": { + "thinking": false, + "multimodalToolUse": false + } + }, + "flash-lite": { + "tier": "flash-lite", + "isPreview": false, + "features": { + "thinking": false, + "multimodalToolUse": false + } + }, + "auto-gemini-3": { + "displayName": "Auto (Gemini 3)", + "tier": "auto", + "isPreview": true, + "dialogLocation": "main", + "dialogDescription": "Let Gemini CLI decide the best model for the task: gemini-3.1-pro, gemini-3-flash", + "features": { + "thinking": true, + "multimodalToolUse": false + } + }, + "auto-gemini-2.5": { + "displayName": "Auto (Gemini 2.5)", + "tier": "auto", + "isPreview": false, + "dialogLocation": "main", + "dialogDescription": "Let Gemini CLI decide the best model for the task: gemini-2.5-pro, gemini-2.5-flash", + "features": { + "thinking": false, + "multimodalToolUse": false + } + } + } + ``` + + - **Requires restart:** Yes + #### `agents` - **`agents.overrides`** (object): @@ -706,6 +841,17 @@ their corresponding top-level category object in your `settings.json` file. - **Default:** `undefined` - **Requires restart:** Yes +- **`agents.browser.allowedDomains`** (array): + - **Description:** A list of allowed domains for the browser agent (e.g., + ["github.com", "*.google.com"]). + - **Default:** + + ```json + ["github.com", "*.google.com", "localhost"] + ``` + + - **Requires restart:** Yes + - **`agents.browser.disableUserInput`** (boolean): - **Description:** Disable user input on browser window during automation. - **Default:** `true` @@ -773,9 +919,10 @@ their corresponding top-level category object in your `settings.json` file. #### `tools` - **`tools.sandbox`** (string): - - **Description:** Sandbox execution environment. Set to a boolean to enable - or disable the sandbox, provide a string path to a sandbox profile, or - specify an explicit sandbox command (e.g., "docker", "podman", "lxc"). + - **Description:** Legacy full-process sandbox execution environment. Set to a + boolean to enable or disable the sandbox, provide a string path to a sandbox + profile, or specify an explicit sandbox command (e.g., "docker", "podman", + "lxc"). - **Default:** `undefined` - **Requires restart:** Yes @@ -879,11 +1026,22 @@ their corresponding top-level category object in your `settings.json` file. #### `security` +- **`security.toolSandboxing`** (boolean): + - **Description:** Experimental tool-level sandboxing (implementation in + progress). + - **Default:** `false` + - **`security.disableYoloMode`** (boolean): - **Description:** Disable YOLO mode, even if enabled by a flag. - **Default:** `false` - **Requires restart:** Yes +- **`security.disableAlwaysAllow`** (boolean): + - **Description:** Disable "Always allow" options in tool confirmation + dialogs. + - **Default:** `false` + - **Requires restart:** Yes + - **`security.enablePermanentToolApproval`** (boolean): - **Description:** Enable the "Allow for all future sessions" option in tool confirmation dialogs. @@ -1068,6 +1226,12 @@ their corresponding top-level category object in your `settings.json` file. - **Default:** `false` - **Requires restart:** Yes +- **`experimental.dynamicModelConfiguration`** (boolean): + - **Description:** Enable dynamic model configuration (definitions, + resolutions, and chains) via settings. + - **Default:** `false` + - **Requires restart:** Yes + - **`experimental.gemmaModelRouter.enabled`** (boolean): - **Description:** Enable the Gemma Model Router (experimental). Requires a local endpoint serving Gemma via the Gemini API using LiteRT-LM shim. @@ -1085,6 +1249,11 @@ their corresponding top-level category object in your `settings.json` file. - **Default:** `"gemma3-1b-gpu-custom"` - **Requires restart:** Yes +- **`experimental.topicUpdateNarration`** (boolean): + - **Description:** Enable the experimental Topic & Update communication model + for reduced chattiness and structured progress reporting. + - **Default:** `false` + #### `skills` - **`skills.enabled`** (boolean): @@ -1174,7 +1343,8 @@ their corresponding top-level category object in your `settings.json` file. #### `admin` - **`admin.secureModeEnabled`** (boolean): - - **Description:** If true, disallows yolo mode from being used. + - **Description:** If true, disallows YOLO mode and "Always allow" options + from being used. - **Default:** `false` - **`admin.extensions.enabled`** (boolean): diff --git a/docs/reference/policy-engine.md b/docs/reference/policy-engine.md index 9b63c89f62..495a4584e1 100644 --- a/docs/reference/policy-engine.md +++ b/docs/reference/policy-engine.md @@ -60,7 +60,7 @@ command. ```toml [[rule]] toolName = "run_shell_command" -commandPrefix = "git " +commandPrefix = "git" decision = "ask_user" priority = 100 ``` @@ -264,7 +264,7 @@ argsPattern = '"command":"(git|npm)' # (Optional) A string or array of strings that a shell command must start with. # This is syntactic sugar for `toolName = "run_shell_command"` and an `argsPattern`. -commandPrefix = "git " +commandPrefix = "git" # (Optional) A regex to match against the entire shell command. # This is also syntactic sugar for `toolName = "run_shell_command"`. @@ -321,7 +321,7 @@ This rule will ask for user confirmation before executing any `git` command. ```toml [[rule]] toolName = "run_shell_command" -commandPrefix = "git " +commandPrefix = "git" decision = "ask_user" priority = 100 ``` diff --git a/docs/tools/mcp-server.md b/docs/tools/mcp-server.md index 6b8cd22ac0..5cdbbacf1c 100644 --- a/docs/tools/mcp-server.md +++ b/docs/tools/mcp-server.md @@ -729,6 +729,43 @@ tools. The model will automatically: The MCP integration tracks several states: +#### Overriding extension configurations + +If an MCP server is provided by an extension (for example, the +`google-workspace` extension), you can still override its settings in your local +`settings.json`. Gemini CLI merges your local configuration with the extension's +defaults: + +- **Tool lists:** Tool lists are merged securely to ensure the most restrictive + policy wins: + - **Exclusions (`excludeTools`):** Arrays are combined (unioned). If either + source blocks a tool, it remains disabled. + - **Inclusions (`includeTools`):** Arrays are intersected. If both sources + provide an allowlist, only tools present in **both** lists are enabled. If + only one source provides an allowlist, that list is respected. + - **Precedence:** `excludeTools` always takes precedence over `includeTools`. + + This ensures you always have veto power over tools provided by an extension + and that an extension cannot re-enable tools you have omitted from your + personal allowlist. + +- **Environment variables:** The `env` objects are merged. If the same variable + is defined in both places, your local value takes precedence. +- **Scalar properties:** Properties like `command`, `url`, and `timeout` are + replaced by your local values if provided. + +**Example override:** + +```json +{ + "mcpServers": { + "google-workspace": { + "excludeTools": ["gmail.send"] + } + } +} +``` + #### Server status (`MCPServerStatus`) - **`DISCONNECTED`:** Server is not connected or has errors diff --git a/eslint.config.js b/eslint.config.js index d3a267f30a..150a50d2b7 100644 --- a/eslint.config.js +++ b/eslint.config.js @@ -303,7 +303,7 @@ export default tseslint.config( }, }, { - files: ['./scripts/**/*.js', 'esbuild.config.js'], + files: ['./scripts/**/*.js', 'esbuild.config.js', 'packages/core/scripts/**/*.{js,mjs}'], languageOptions: { globals: { ...globals.node, diff --git a/evals/answer-vs-act.eval.ts b/evals/answer-vs-act.eval.ts index 4e30b828d0..ff87d12564 100644 --- a/evals/answer-vs-act.eval.ts +++ b/evals/answer-vs-act.eval.ts @@ -111,7 +111,7 @@ describe('Answer vs. ask eval', () => { * Ensures that when the user asks a question about style, the agent does NOT * automatically modify the file. */ - evalTest('USUALLY_PASSES', { + evalTest('ALWAYS_PASSES', { name: 'should not edit files when asked about style', prompt: 'Is app.ts following good style?', files: FILES, diff --git a/evals/hierarchical_memory.eval.ts b/evals/hierarchical_memory.eval.ts index ff7483416b..dd4f8fbbd1 100644 --- a/evals/hierarchical_memory.eval.ts +++ b/evals/hierarchical_memory.eval.ts @@ -11,7 +11,7 @@ import { assertModelHasOutput } from '../integration-tests/test-helper.js'; describe('Hierarchical Memory', () => { const conflictResolutionTest = 'Agent follows hierarchy for contradictory instructions'; - evalTest('USUALLY_PASSES', { + evalTest('ALWAYS_PASSES', { name: conflictResolutionTest, params: { settings: { diff --git a/evals/save_memory.eval.ts b/evals/save_memory.eval.ts index e4fe9bc687..901cbf3c17 100644 --- a/evals/save_memory.eval.ts +++ b/evals/save_memory.eval.ts @@ -14,7 +14,7 @@ import { describe('save_memory', () => { const TEST_PREFIX = 'Save memory test: '; const rememberingFavoriteColor = "Agent remembers user's favorite color"; - evalTest('USUALLY_PASSES', { + evalTest('ALWAYS_PASSES', { name: rememberingFavoriteColor, params: { settings: { tools: { core: ['save_memory'] } }, @@ -79,7 +79,7 @@ describe('save_memory', () => { const ignoringTemporaryInformation = 'Agent ignores temporary conversation details'; - evalTest('USUALLY_PASSES', { + evalTest('ALWAYS_PASSES', { name: ignoringTemporaryInformation, params: { settings: { tools: { core: ['save_memory'] } }, @@ -104,7 +104,7 @@ describe('save_memory', () => { }); const rememberingPetName = "Agent remembers user's pet's name"; - evalTest('USUALLY_PASSES', { + evalTest('ALWAYS_PASSES', { name: rememberingPetName, params: { settings: { tools: { core: ['save_memory'] } }, diff --git a/package-lock.json b/package-lock.json index 7cc458581b..55c4bc93ee 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@google/gemini-cli", - "version": "0.35.0-nightly.20260311.657f19c1f", + "version": "0.35.0-nightly.20260313.bb060d7a9", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@google/gemini-cli", - "version": "0.35.0-nightly.20260311.657f19c1f", + "version": "0.35.0-nightly.20260313.bb060d7a9", "workspaces": [ "packages/*" ], @@ -3038,6 +3038,27 @@ "integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==", "license": "BSD-3-Clause" }, + "node_modules/@puppeteer/browsers": { + "version": "2.13.0", + "resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-2.13.0.tgz", + "integrity": "sha512-46BZJYJjc/WwmKjsvDFykHtXrtomsCIrwYQPOP7VfMJoZY2bsDF9oROBABR3paDjDcmkUye1Pb1BqdcdiipaWA==", + "license": "Apache-2.0", + "dependencies": { + "debug": "^4.4.3", + "extract-zip": "^2.0.1", + "progress": "^2.0.3", + "proxy-agent": "^6.5.0", + "semver": "^7.7.4", + "tar-fs": "^3.1.1", + "yargs": "^17.7.2" + }, + "bin": { + "browsers": "lib/cjs/main-cli.js" + }, + "engines": { + "node": ">=18" + } + }, "node_modules/@rollup/rollup-android-arm-eabi": { "version": "4.59.0", "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.59.0.tgz", @@ -3762,6 +3783,12 @@ "node": ">= 10" } }, + "node_modules/@tootallnate/quickjs-emscripten": { + "version": "0.23.0", + "resolved": "https://registry.npmjs.org/@tootallnate/quickjs-emscripten/-/quickjs-emscripten-0.23.0.tgz", + "integrity": "sha512-C5Mc6rdnsaJDjO3UpGW/CQTHtCKaYlScZTly4JIu97Jxo/odCiH0ITnDXSJPTOrEKk/ycSZ0AOgTmkDtkOsvIA==", + "license": "MIT" + }, "node_modules/@ts-morph/common": { "version": "0.12.3", "resolved": "https://registry.npmjs.org/@ts-morph/common/-/common-0.12.3.tgz", @@ -5584,6 +5611,18 @@ "node": ">=12" } }, + "node_modules/ast-types": { + "version": "0.13.4", + "resolved": "https://registry.npmjs.org/ast-types/-/ast-types-0.13.4.tgz", + "integrity": "sha512-x1FCFnFifvYDDzTaLII71vG5uvDwgtmDTEVWAxrgeiR8VjMONcCXJx7E+USjDtHlwFmt9MysbqgF9b9Vjr6w+w==", + "license": "MIT", + "dependencies": { + "tslib": "^2.0.1" + }, + "engines": { + "node": ">=4" + } + }, "node_modules/ast-v8-to-istanbul": { "version": "0.3.8", "resolved": "https://registry.npmjs.org/ast-v8-to-istanbul/-/ast-v8-to-istanbul-0.3.8.tgz", @@ -5676,6 +5715,20 @@ "typed-rest-client": "^1.8.4" } }, + "node_modules/b4a": { + "version": "1.8.0", + "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.8.0.tgz", + "integrity": "sha512-qRuSmNSkGQaHwNbM7J78Wwy+ghLEYF1zNrSeMxj4Kgw6y33O3mXcQ6Ie9fRvfU/YnxWkOchPXbaLb73TkIsfdg==", + "license": "Apache-2.0", + "peerDependencies": { + "react-native-b4a": "*" + }, + "peerDependenciesMeta": { + "react-native-b4a": { + "optional": true + } + } + }, "node_modules/balanced-match": { "version": "4.0.4", "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.4.tgz", @@ -5685,6 +5738,93 @@ "node": "18 || 20 || >=22" } }, + "node_modules/bare-events": { + "version": "2.8.2", + "resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.8.2.tgz", + "integrity": "sha512-riJjyv1/mHLIPX4RwiK+oW9/4c3TEUeORHKefKAKnZ5kyslbN+HXowtbaVEqt4IMUB7OXlfixcs6gsFeo/jhiQ==", + "license": "Apache-2.0", + "peerDependencies": { + "bare-abort-controller": "*" + }, + "peerDependenciesMeta": { + "bare-abort-controller": { + "optional": true + } + } + }, + "node_modules/bare-fs": { + "version": "4.5.5", + "resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.5.5.tgz", + "integrity": "sha512-XvwYM6VZqKoqDll8BmSww5luA5eflDzY0uEFfBJtFKe4PAAtxBjU3YIxzIBzhyaEQBy1VXEQBto4cpN5RZJw+w==", + "license": "Apache-2.0", + "dependencies": { + "bare-events": "^2.5.4", + "bare-path": "^3.0.0", + "bare-stream": "^2.6.4", + "bare-url": "^2.2.2", + "fast-fifo": "^1.3.2" + }, + "engines": { + "bare": ">=1.16.0" + }, + "peerDependencies": { + "bare-buffer": "*" + }, + "peerDependenciesMeta": { + "bare-buffer": { + "optional": true + } + } + }, + "node_modules/bare-os": { + "version": "3.7.1", + "resolved": "https://registry.npmjs.org/bare-os/-/bare-os-3.7.1.tgz", + "integrity": "sha512-ebvMaS5BgZKmJlvuWh14dg9rbUI84QeV3WlWn6Ph6lFI8jJoh7ADtVTyD2c93euwbe+zgi0DVrl4YmqXeM9aIA==", + "license": "Apache-2.0", + "engines": { + "bare": ">=1.14.0" + } + }, + "node_modules/bare-path": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/bare-path/-/bare-path-3.0.0.tgz", + "integrity": "sha512-tyfW2cQcB5NN8Saijrhqn0Zh7AnFNsnczRcuWODH0eYAXBsJ5gVxAUuNr7tsHSC6IZ77cA0SitzT+s47kot8Mw==", + "license": "Apache-2.0", + "dependencies": { + "bare-os": "^3.0.1" + } + }, + "node_modules/bare-stream": { + "version": "2.8.1", + "resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.8.1.tgz", + "integrity": "sha512-bSeR8RfvbRwDpD7HWZvn8M3uYNDrk7m9DQjYOFkENZlXW8Ju/MPaqUPQq5LqJ3kyjEm07siTaAQ7wBKCU59oHg==", + "license": "Apache-2.0", + "dependencies": { + "streamx": "^2.21.0", + "teex": "^1.0.1" + }, + "peerDependencies": { + "bare-buffer": "*", + "bare-events": "*" + }, + "peerDependenciesMeta": { + "bare-buffer": { + "optional": true + }, + "bare-events": { + "optional": true + } + } + }, + "node_modules/bare-url": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/bare-url/-/bare-url-2.3.2.tgz", + "integrity": "sha512-ZMq4gd9ngV5aTMa5p9+UfY0b3skwhHELaDkhEHetMdX0LRkW9kzaym4oo/Eh+Ghm0CCDuMTsRIGM/ytUc1ZYmw==", + "license": "Apache-2.0", + "dependencies": { + "bare-path": "^3.0.0" + } + }, "node_modules/base64-js": { "version": "1.5.1", "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", @@ -5705,6 +5845,15 @@ ], "license": "MIT" }, + "node_modules/basic-ftp": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/basic-ftp/-/basic-ftp-5.2.0.tgz", + "integrity": "sha512-VoMINM2rqJwJgfdHq6RiUudKt2BV+FY5ZFezP/ypmwayk68+NzzAQy4XXLlqsGD4MCzq3DrmNFD/uUmBJuGoXw==", + "license": "MIT", + "engines": { + "node": ">=10.0.0" + } + }, "node_modules/before-after-hook": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/before-after-hook/-/before-after-hook-4.0.0.tgz", @@ -6103,6 +6252,32 @@ "node": ">=18" } }, + "node_modules/chrome-devtools-mcp": { + "version": "0.19.0", + "resolved": "https://registry.npmjs.org/chrome-devtools-mcp/-/chrome-devtools-mcp-0.19.0.tgz", + "integrity": "sha512-LfqjOxdUjWvCQrfeI5V3ZBJCUIDKGNmexSbSAgsrjVggN4X1OSObLxleSlX2zwcXRZYxqy209cww0MXcXuN1zw==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "chrome-devtools-mcp": "build/src/index.js" + }, + "engines": { + "node": "^20.19.0 || ^22.12.0 || >=23" + } + }, + "node_modules/chromium-bidi": { + "version": "14.0.0", + "resolved": "https://registry.npmjs.org/chromium-bidi/-/chromium-bidi-14.0.0.tgz", + "integrity": "sha512-9gYlLtS6tStdRWzrtXaTMnqcM4dudNegMXJxkR0I/CXObHalYeYcAMPrL19eroNZHtJ8DQmu1E+ZNOYu/IXMXw==", + "license": "Apache-2.0", + "dependencies": { + "mitt": "^3.0.1", + "zod": "^3.24.1" + }, + "peerDependencies": { + "devtools-protocol": "*" + } + }, "node_modules/cjs-module-lexer": { "version": "2.2.0", "resolved": "https://registry.npmjs.org/cjs-module-lexer/-/cjs-module-lexer-2.2.0.tgz", @@ -6945,6 +7120,20 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/degenerator": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/degenerator/-/degenerator-5.0.1.tgz", + "integrity": "sha512-TllpMR/t0M5sqCXfj85i4XaAzxmS5tVA16dqvdkMwGmzI+dXLXnw3J+3Vdv7VKw+ThlTMboK6i9rnZ6Nntj5CQ==", + "license": "MIT", + "dependencies": { + "ast-types": "^0.13.4", + "escodegen": "^2.1.0", + "esprima": "^4.0.1" + }, + "engines": { + "node": ">= 14" + } + }, "node_modules/delayed-stream": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", @@ -7204,6 +7393,12 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/devtools-protocol": { + "version": "0.0.1581282", + "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1581282.tgz", + "integrity": "sha512-nv7iKtNZQshSW2hKzYNr46nM/Cfh5SEvE2oV0/SEGgc9XupIY5ggf84Cz8eJIkBce7S3bmTAauFD6aysMpnqsQ==", + "license": "BSD-3-Clause" + }, "node_modules/dezalgo": { "version": "1.0.4", "resolved": "https://registry.npmjs.org/dezalgo/-/dezalgo-1.0.4.tgz", @@ -7759,6 +7954,27 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/escodegen": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/escodegen/-/escodegen-2.1.0.tgz", + "integrity": "sha512-2NlIDTwUWJN0mRPQOdtQBzbUHvdGY2P1VXSyU83Q3xKxM7WHX2Ql8dKq782Q9TgQUNOLEzEYu9bzLNj1q88I5w==", + "license": "BSD-2-Clause", + "dependencies": { + "esprima": "^4.0.1", + "estraverse": "^5.2.0", + "esutils": "^2.0.2" + }, + "bin": { + "escodegen": "bin/escodegen.js", + "esgenerate": "bin/esgenerate.js" + }, + "engines": { + "node": ">=6.0" + }, + "optionalDependencies": { + "source-map": "~0.6.1" + } + }, "node_modules/eslint": { "version": "9.29.0", "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.29.0.tgz", @@ -8118,7 +8334,6 @@ "version": "5.3.0", "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz", "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==", - "dev": true, "license": "BSD-2-Clause", "engines": { "node": ">=4.0" @@ -8137,7 +8352,6 @@ "version": "2.0.3", "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==", - "dev": true, "license": "BSD-2-Clause", "engines": { "node": ">=0.10.0" @@ -8189,6 +8403,15 @@ "uuid": "dist/bin/uuid" } }, + "node_modules/events-universal": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/events-universal/-/events-universal-1.0.1.tgz", + "integrity": "sha512-LUd5euvbMLpwOF8m6ivPCbhQeSiYVNb8Vs0fQ8QjXo0JTkEHpz8pxdQf0gStltaPpw0Cca8b39KxvK9cfKRiAw==", + "license": "Apache-2.0", + "dependencies": { + "bare-events": "^2.7.0" + } + }, "node_modules/eventsource": { "version": "3.0.7", "resolved": "https://registry.npmjs.org/eventsource/-/eventsource-3.0.7.tgz", @@ -8395,6 +8618,12 @@ "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", "license": "MIT" }, + "node_modules/fast-fifo": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/fast-fifo/-/fast-fifo-1.3.2.tgz", + "integrity": "sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ==", + "license": "MIT" + }, "node_modules/fast-glob": { "version": "3.3.3", "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.3.tgz", @@ -9037,6 +9266,29 @@ "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1" } }, + "node_modules/get-uri": { + "version": "6.0.5", + "resolved": "https://registry.npmjs.org/get-uri/-/get-uri-6.0.5.tgz", + "integrity": "sha512-b1O07XYq8eRuVzBNgJLstU6FYc1tS6wnMtF1I1D9lE8LxZSOGZ7LhxN54yPP6mGw5f2CkXY2BQUL9Fx41qvcIg==", + "license": "MIT", + "dependencies": { + "basic-ftp": "^5.0.2", + "data-uri-to-buffer": "^6.0.2", + "debug": "^4.3.4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/get-uri/node_modules/data-uri-to-buffer": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz", + "integrity": "sha512-7hvf7/GW8e86rW0ptuwS3OcBGDjIi6SZva7hCyWC0yYry2cOPmLIjXAUHI6DK2HsnwJd9ifmt57i8eV2n4YNpw==", + "license": "MIT", + "engines": { + "node": ">= 14" + } + }, "node_modules/glob": { "version": "12.0.0", "resolved": "https://registry.npmjs.org/glob/-/glob-12.0.0.tgz", @@ -9663,7 +9915,6 @@ "version": "7.0.2", "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz", "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==", - "dev": true, "license": "MIT", "dependencies": { "agent-base": "^7.1.0", @@ -11759,6 +12010,12 @@ "node": ">= 18" } }, + "node_modules/mitt": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/mitt/-/mitt-3.0.1.tgz", + "integrity": "sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw==", + "license": "MIT" + }, "node_modules/mkdirp": { "version": "1.0.4", "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-1.0.4.tgz", @@ -11959,6 +12216,15 @@ "node": ">= 0.6" } }, + "node_modules/netmask": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/netmask/-/netmask-2.0.2.tgz", + "integrity": "sha512-dBpDMdxv9Irdq66304OLfEmQ9tbNRFnFTuZiLo+bD+r332bBmMJ8GBLXklIXXgxd3+v9+KUnZaUR5PJMa75Gsg==", + "license": "MIT", + "engines": { + "node": ">= 0.4.0" + } + }, "node_modules/node-addon-api": { "version": "4.3.0", "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-4.3.0.tgz", @@ -12662,6 +12928,38 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/pac-proxy-agent": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz", + "integrity": "sha512-TEB8ESquiLMc0lV8vcd5Ql/JAKAoyzHFXaStwjkzpOpC5Yv+pIzLfHvjTSdf3vpa2bMiUQrg9i6276yn8666aA==", + "license": "MIT", + "dependencies": { + "@tootallnate/quickjs-emscripten": "^0.23.0", + "agent-base": "^7.1.2", + "debug": "^4.3.4", + "get-uri": "^6.0.1", + "http-proxy-agent": "^7.0.0", + "https-proxy-agent": "^7.0.6", + "pac-resolver": "^7.0.1", + "socks-proxy-agent": "^8.0.5" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/pac-resolver": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/pac-resolver/-/pac-resolver-7.0.1.tgz", + "integrity": "sha512-5NPgf87AT2STgwa2ntRMr45jTKrYBGkVU36yT0ig/n/GMAa3oPqhZfIQ2kMEimReg0+t9kZViDVZ83qfVUlckg==", + "license": "MIT", + "dependencies": { + "degenerator": "^5.0.0", + "netmask": "^2.0.2" + }, + "engines": { + "node": ">= 14" + } + }, "node_modules/package-json": { "version": "10.0.1", "resolved": "https://registry.npmjs.org/package-json/-/package-json-10.0.1.tgz", @@ -13132,6 +13430,15 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/progress": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz", + "integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==", + "license": "MIT", + "engines": { + "node": ">=0.4.0" + } + }, "node_modules/prompts": { "version": "2.4.2", "resolved": "https://registry.npmjs.org/prompts/-/prompts-2.4.2.tgz", @@ -13237,6 +13544,40 @@ "node": ">= 0.10" } }, + "node_modules/proxy-agent": { + "version": "6.5.0", + "resolved": "https://registry.npmjs.org/proxy-agent/-/proxy-agent-6.5.0.tgz", + "integrity": "sha512-TmatMXdr2KlRiA2CyDu8GqR8EjahTG3aY3nXjdzFyoZbmB8hrBsTyMezhULIXKnC0jpfjlmiZ3+EaCzoInSu/A==", + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "^4.3.4", + "http-proxy-agent": "^7.0.1", + "https-proxy-agent": "^7.0.6", + "lru-cache": "^7.14.1", + "pac-proxy-agent": "^7.1.0", + "proxy-from-env": "^1.1.0", + "socks-proxy-agent": "^8.0.5" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/proxy-agent/node_modules/lru-cache": { + "version": "7.18.3", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-7.18.3.tgz", + "integrity": "sha512-jumlc0BIUrS3qJGgIkWZsyfAM7NCWiBcCDhnd+3NNM5KbBmLTgHVfWBcg6W+rLUsIpzpERPsvwUP7CckAQSOoA==", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/proxy-from-env": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==", + "license": "MIT" + }, "node_modules/psl": { "version": "1.15.0", "resolved": "https://registry.npmjs.org/psl/-/psl-1.15.0.tgz", @@ -13290,6 +13631,45 @@ "node": ">=6" } }, + "node_modules/puppeteer-core": { + "version": "24.39.0", + "resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-24.39.0.tgz", + "integrity": "sha512-SzIxz76Kgu17HUIi57HOejPiN0JKa9VCd2GcPY1sAh6RA4BzGZarFQdOYIYrBdUVbtyH7CrDb9uhGEwVXK/YNA==", + "license": "Apache-2.0", + "dependencies": { + "@puppeteer/browsers": "2.13.0", + "chromium-bidi": "14.0.0", + "debug": "^4.4.3", + "devtools-protocol": "0.0.1581282", + "typed-query-selector": "^2.12.1", + "webdriver-bidi-protocol": "0.4.1", + "ws": "^8.19.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/puppeteer-core/node_modules/ws": { + "version": "8.19.0", + "resolved": "https://registry.npmjs.org/ws/-/ws-8.19.0.tgz", + "integrity": "sha512-blAT2mjOEIi0ZzruJfIhb3nps74PRWTCz1IjglWEEpQl5XS/UNama6u2/rjFkDDouqr4L67ry+1aGIALViWjDg==", + "license": "MIT", + "engines": { + "node": ">=10.0.0" + }, + "peerDependencies": { + "bufferutil": "^4.0.1", + "utf-8-validate": ">=5.0.2" + }, + "peerDependenciesMeta": { + "bufferutil": { + "optional": true + }, + "utf-8-validate": { + "optional": true + } + } + }, "node_modules/qs": { "version": "6.14.2", "resolved": "https://registry.npmjs.org/qs/-/qs-6.14.2.tgz", @@ -14250,9 +14630,9 @@ } }, "node_modules/semver": { - "version": "7.7.2", - "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.2.tgz", - "integrity": "sha512-RF0Fw+rO5AMf9MAyaRXI4AV0Ulj5lMHqVxxdSgiVbixSCXoEmmX/jk0CuJw4+3SqroYO9VoUh+HcuJivvtJemA==", + "version": "7.7.4", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz", + "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==", "license": "ISC", "bin": { "semver": "bin/semver.js" @@ -14583,6 +14963,54 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/smart-buffer": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/smart-buffer/-/smart-buffer-4.2.0.tgz", + "integrity": "sha512-94hK0Hh8rPqQl2xXc3HsaBoOXKV20MToPkcXvwbISWLEs+64sBq5kFgn2kJDHb1Pry9yrP0dxrCI9RRci7RXKg==", + "license": "MIT", + "engines": { + "node": ">= 6.0.0", + "npm": ">= 3.0.0" + } + }, + "node_modules/socks": { + "version": "2.8.7", + "resolved": "https://registry.npmjs.org/socks/-/socks-2.8.7.tgz", + "integrity": "sha512-HLpt+uLy/pxB+bum/9DzAgiKS8CX1EvbWxI4zlmgGCExImLdiad2iCwXT5Z4c9c3Eq8rP2318mPW2c+QbtjK8A==", + "license": "MIT", + "dependencies": { + "ip-address": "^10.0.1", + "smart-buffer": "^4.2.0" + }, + "engines": { + "node": ">= 10.0.0", + "npm": ">= 3.0.0" + } + }, + "node_modules/socks-proxy-agent": { + "version": "8.0.5", + "resolved": "https://registry.npmjs.org/socks-proxy-agent/-/socks-proxy-agent-8.0.5.tgz", + "integrity": "sha512-HehCEsotFqbPW9sJ8WVYB6UbmIMv7kUUORIF2Nncq4VQvBfNBLibW9YZR5dlYCSUhwcD628pRllm7n+E+YTzJw==", + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "^4.3.4", + "socks": "^2.8.3" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/source-map": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "license": "BSD-3-Clause", + "optional": true, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/source-map-js": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", @@ -14711,6 +15139,17 @@ "integrity": "sha512-76ORR0DO1o1hlKwTbi/DM3EXWGf3ZJYO8cXX5RJwnul2DEg2oyoZyjLNoQM8WsvZiFKCRfC1O0J7iCvie3RZmQ==", "license": "MIT" }, + "node_modules/streamx": { + "version": "2.23.0", + "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz", + "integrity": "sha512-kn+e44esVfn2Fa/O0CPFcex27fjIL6MkVae0Mm6q+E6f0hWv578YCERbv+4m02cjxvDsPKLnmxral/rR6lBMAg==", + "license": "MIT", + "dependencies": { + "events-universal": "^1.0.0", + "fast-fifo": "^1.3.2", + "text-decoder": "^1.1.0" + } + }, "node_modules/strict-event-emitter": { "version": "0.5.1", "resolved": "https://registry.npmjs.org/strict-event-emitter/-/strict-event-emitter-0.5.1.tgz", @@ -15308,6 +15747,32 @@ "node": ">=8" } }, + "node_modules/tar-fs": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.1.2.tgz", + "integrity": "sha512-QGxxTxxyleAdyM3kpFs14ymbYmNFrfY+pHj7Z8FgtbZ7w2//VAgLMac7sT6nRpIHjppXO2AwwEOg0bPFVRcmXw==", + "license": "MIT", + "dependencies": { + "pump": "^3.0.0", + "tar-stream": "^3.1.5" + }, + "optionalDependencies": { + "bare-fs": "^4.0.1", + "bare-path": "^3.0.0" + } + }, + "node_modules/tar-stream": { + "version": "3.1.8", + "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.8.tgz", + "integrity": "sha512-U6QpVRyCGHva435KoNWy9PRoi2IFYCgtEhq9nmrPPpbRacPs9IH4aJ3gbrFC8dPcXvdSZ4XXfXT5Fshbp2MtlQ==", + "license": "MIT", + "dependencies": { + "b4a": "^1.6.4", + "bare-fs": "^4.5.5", + "fast-fifo": "^1.2.0", + "streamx": "^2.15.0" + } + }, "node_modules/teeny-request": { "version": "9.0.0", "resolved": "https://registry.npmjs.org/teeny-request/-/teeny-request-9.0.0.tgz", @@ -15363,6 +15828,15 @@ "node": ">= 6" } }, + "node_modules/teex": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/teex/-/teex-1.0.1.tgz", + "integrity": "sha512-eYE6iEI62Ni1H8oIa7KlDU6uQBtqr4Eajni3wX7rpfXD8ysFx8z0+dri+KWEPWpBsxXfxu58x/0jvTVT1ekOSg==", + "license": "MIT", + "dependencies": { + "streamx": "^2.12.5" + } + }, "node_modules/terminal-link": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/terminal-link/-/terminal-link-4.0.0.tgz", @@ -15395,6 +15869,15 @@ "node": ">=18" } }, + "node_modules/text-decoder": { + "version": "1.2.7", + "resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.7.tgz", + "integrity": "sha512-vlLytXkeP4xvEq2otHeJfSQIRyWxo/oZGEbXrtEEF9Hnmrdly59sUbzZ/QgyWuLYHctCHxFF4tRQZNQ9k60ExQ==", + "license": "Apache-2.0", + "dependencies": { + "b4a": "^1.6.4" + } + }, "node_modules/text-hex": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/text-hex/-/text-hex-1.0.0.tgz", @@ -15719,7 +16202,6 @@ "version": "2.8.1", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "dev": true, "license": "0BSD" }, "node_modules/tsx": { @@ -15869,6 +16351,12 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/typed-query-selector": { + "version": "2.12.1", + "resolved": "https://registry.npmjs.org/typed-query-selector/-/typed-query-selector-2.12.1.tgz", + "integrity": "sha512-uzR+FzI8qrUEIu96oaeBJmd9E7CFEiQ3goA5qCVgc4s5llSubcfGHq9yUstZx/k4s9dXHVKsE35YWoFyvEqEHA==", + "license": "MIT" + }, "node_modules/typed-rest-client": { "version": "1.8.11", "resolved": "https://registry.npmjs.org/typed-rest-client/-/typed-rest-client-1.8.11.tgz", @@ -16336,6 +16824,12 @@ } } }, + "node_modules/webdriver-bidi-protocol": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/webdriver-bidi-protocol/-/webdriver-bidi-protocol-0.4.1.tgz", + "integrity": "sha512-ARrjNjtWRRs2w4Tk7nqrf2gBI0QXWuOmMCx2hU+1jUt6d00MjMxURrhxhGbrsoiZKJrhTSTzbIrc554iKI10qw==", + "license": "Apache-2.0" + }, "node_modules/webidl-conversions": { "version": "7.0.0", "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-7.0.0.tgz", @@ -16890,7 +17384,7 @@ }, "packages/a2a-server": { "name": "@google/gemini-cli-a2a-server", - "version": "0.35.0-nightly.20260311.657f19c1f", + "version": "0.35.0-nightly.20260313.bb060d7a9", "dependencies": { "@a2a-js/sdk": "0.3.11", "@google-cloud/storage": "^7.16.0", @@ -17005,7 +17499,7 @@ }, "packages/cli": { "name": "@google/gemini-cli", - "version": "0.35.0-nightly.20260311.657f19c1f", + "version": "0.35.0-nightly.20260313.bb060d7a9", "license": "Apache-2.0", "dependencies": { "@agentclientprotocol/sdk": "^0.12.0", @@ -17177,7 +17671,7 @@ }, "packages/core": { "name": "@google/gemini-cli-core", - "version": "0.35.0-nightly.20260311.657f19c1f", + "version": "0.35.0-nightly.20260313.bb060d7a9", "license": "Apache-2.0", "dependencies": { "@a2a-js/sdk": "0.3.11", @@ -17232,6 +17726,7 @@ "open": "^10.1.2", "picomatch": "^4.0.1", "proper-lockfile": "^4.1.2", + "puppeteer-core": "^24.0.0", "read-package-up": "^11.0.0", "shell-quote": "^1.8.3", "simple-git": "^3.28.0", @@ -17250,6 +17745,7 @@ "@types/fast-levenshtein": "^0.0.4", "@types/js-yaml": "^4.0.9", "@types/picomatch": "^4.0.1", + "chrome-devtools-mcp": "^0.19.0", "msw": "^2.3.4", "typescript": "^5.3.3", "vitest": "^3.1.1" @@ -17439,7 +17935,7 @@ }, "packages/devtools": { "name": "@google/gemini-cli-devtools", - "version": "0.35.0-nightly.20260311.657f19c1f", + "version": "0.35.0-nightly.20260313.bb060d7a9", "license": "Apache-2.0", "dependencies": { "ws": "^8.16.0" @@ -17454,7 +17950,7 @@ }, "packages/sdk": { "name": "@google/gemini-cli-sdk", - "version": "0.35.0-nightly.20260311.657f19c1f", + "version": "0.35.0-nightly.20260313.bb060d7a9", "license": "Apache-2.0", "dependencies": { "@google/gemini-cli-core": "file:../core", @@ -17471,7 +17967,7 @@ }, "packages/test-utils": { "name": "@google/gemini-cli-test-utils", - "version": "0.35.0-nightly.20260311.657f19c1f", + "version": "0.35.0-nightly.20260313.bb060d7a9", "license": "Apache-2.0", "dependencies": { "@google/gemini-cli-core": "file:../core", @@ -17488,7 +17984,7 @@ }, "packages/vscode-ide-companion": { "name": "gemini-cli-vscode-ide-companion", - "version": "0.35.0-nightly.20260311.657f19c1f", + "version": "0.35.0-nightly.20260313.bb060d7a9", "license": "LICENSE", "dependencies": { "@modelcontextprotocol/sdk": "^1.23.0", diff --git a/package.json b/package.json index 0067054629..ca1b15ba41 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@google/gemini-cli", - "version": "0.35.0-nightly.20260311.657f19c1f", + "version": "0.35.0-nightly.20260313.bb060d7a9", "engines": { "node": ">=20.0.0" }, @@ -14,7 +14,7 @@ "url": "git+https://github.com/google-gemini/gemini-cli.git" }, "config": { - "sandboxImageUri": "us-docker.pkg.dev/gemini-code-dev/gemini-cli/sandbox:0.35.0-nightly.20260311.657f19c1f" + "sandboxImageUri": "us-docker.pkg.dev/gemini-code-dev/gemini-cli/sandbox:0.35.0-nightly.20260313.bb060d7a9" }, "scripts": { "start": "cross-env NODE_ENV=development node scripts/start.js", diff --git a/packages/a2a-server/package.json b/packages/a2a-server/package.json index ecf3ee3d66..8349626027 100644 --- a/packages/a2a-server/package.json +++ b/packages/a2a-server/package.json @@ -1,6 +1,6 @@ { "name": "@google/gemini-cli-a2a-server", - "version": "0.35.0-nightly.20260311.657f19c1f", + "version": "0.35.0-nightly.20260313.bb060d7a9", "description": "Gemini CLI A2A Server", "repository": { "type": "git", diff --git a/packages/a2a-server/src/commands/memory.ts b/packages/a2a-server/src/commands/memory.ts index d01ff5e7d4..f7c3dfa896 100644 --- a/packages/a2a-server/src/commands/memory.ts +++ b/packages/a2a-server/src/commands/memory.ts @@ -104,6 +104,7 @@ export class AddMemoryCommand implements Command { const signal = abortController.signal; await tool.buildAndExecute(result.toolArgs, signal, undefined, { sanitizationConfig: DEFAULT_SANITIZATION_CONFIG, + sandboxManager: loopContext.sandboxManager, }); await refreshMemory(context.config); return { diff --git a/packages/a2a-server/src/utils/testing_utils.ts b/packages/a2a-server/src/utils/testing_utils.ts index c55eae98ee..fd4d721732 100644 --- a/packages/a2a-server/src/utils/testing_utils.ts +++ b/packages/a2a-server/src/utils/testing_utils.ts @@ -21,7 +21,9 @@ import { tmpdir, type Config, type Storage, + NoopSandboxManager, type ToolRegistry, + type SandboxManager, } from '@google/gemini-cli-core'; import { createMockMessageBus } from '@google/gemini-cli-core/src/test-utils/mock-message-bus.js'; import { expect, vi } from 'vitest'; @@ -97,6 +99,15 @@ export function createMockConfig( }), getGitService: vi.fn(), validatePathAccess: vi.fn().mockReturnValue(undefined), + getShellExecutionConfig: vi.fn().mockReturnValue({ + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion + sandboxManager: new NoopSandboxManager() as unknown as SandboxManager, + sanitizationConfig: { + allowedEnvironmentVariables: [], + blockedEnvironmentVariables: [], + enableEnvironmentVariableRedaction: false, + }, + }), ...overrides, } as unknown as Config; diff --git a/packages/cli/package.json b/packages/cli/package.json index 648c4751e5..8bfe5b69f0 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -1,6 +1,6 @@ { "name": "@google/gemini-cli", - "version": "0.35.0-nightly.20260311.657f19c1f", + "version": "0.35.0-nightly.20260313.bb060d7a9", "description": "Gemini CLI", "license": "Apache-2.0", "repository": { @@ -26,7 +26,7 @@ "dist" ], "config": { - "sandboxImageUri": "us-docker.pkg.dev/gemini-code-dev/gemini-cli/sandbox:0.35.0-nightly.20260311.657f19c1f" + "sandboxImageUri": "us-docker.pkg.dev/gemini-code-dev/gemini-cli/sandbox:0.35.0-nightly.20260313.bb060d7a9" }, "dependencies": { "@agentclientprotocol/sdk": "^0.12.0", diff --git a/packages/cli/src/acp/acpClient.test.ts b/packages/cli/src/acp/acpClient.test.ts index e2fc0f0d33..65b23247ef 100644 --- a/packages/cli/src/acp/acpClient.test.ts +++ b/packages/cli/src/acp/acpClient.test.ts @@ -176,6 +176,7 @@ describe('GeminiAgent', () => { getGemini31LaunchedSync: vi.fn().mockReturnValue(false), getHasAccessToPreviewModel: vi.fn().mockReturnValue(false), getCheckpointingEnabled: vi.fn().mockReturnValue(false), + getDisableAlwaysAllow: vi.fn().mockReturnValue(false), } as unknown as Mocked>>; mockSettings = { merged: { @@ -654,6 +655,7 @@ describe('Session', () => { getCheckpointingEnabled: vi.fn().mockReturnValue(false), getGitService: vi.fn().mockResolvedValue({} as GitService), waitForMcpInit: vi.fn(), + getDisableAlwaysAllow: vi.fn().mockReturnValue(false), } as unknown as Mocked; mockConnection = { sessionUpdate: vi.fn(), @@ -947,6 +949,61 @@ describe('Session', () => { ); }); + it('should exclude always allow options when disableAlwaysAllow is true', async () => { + mockConfig.getDisableAlwaysAllow = vi.fn().mockReturnValue(true); + const confirmationDetails = { + type: 'info', + onConfirm: vi.fn(), + }; + mockTool.build.mockReturnValue({ + getDescription: () => 'Test Tool', + toolLocations: () => [], + shouldConfirmExecute: vi.fn().mockResolvedValue(confirmationDetails), + execute: vi.fn().mockResolvedValue({ llmContent: 'Tool Result' }), + }); + + mockConnection.requestPermission.mockResolvedValue({ + outcome: { + outcome: 'selected', + optionId: ToolConfirmationOutcome.ProceedOnce, + }, + }); + + const stream1 = createMockStream([ + { + type: StreamEventType.CHUNK, + value: { + functionCalls: [{ name: 'test_tool', args: {} }], + }, + }, + ]); + const stream2 = createMockStream([ + { + type: StreamEventType.CHUNK, + value: { candidates: [] }, + }, + ]); + + mockChat.sendMessageStream + .mockResolvedValueOnce(stream1) + .mockResolvedValueOnce(stream2); + + await session.prompt({ + sessionId: 'session-1', + prompt: [{ type: 'text', text: 'Call tool' }], + }); + + expect(mockConnection.requestPermission).toHaveBeenCalledWith( + expect.objectContaining({ + options: expect.not.arrayContaining([ + expect.objectContaining({ + optionId: ToolConfirmationOutcome.ProceedAlways, + }), + ]), + }), + ); + }); + it('should use filePath for ACP diff content in permission request', async () => { const confirmationDetails = { type: 'edit', diff --git a/packages/cli/src/acp/acpClient.ts b/packages/cli/src/acp/acpClient.ts index c36e214d27..072d91c20a 100644 --- a/packages/cli/src/acp/acpClient.ts +++ b/packages/cli/src/acp/acpClient.ts @@ -908,7 +908,7 @@ export class Session { const params: acp.RequestPermissionRequest = { sessionId: this.id, - options: toPermissionOptions(confirmationDetails), + options: toPermissionOptions(confirmationDetails, this.config), toolCall: { toolCallId: callId, status: 'pending', @@ -1004,6 +1004,7 @@ export class Session { callId, toolResult.llmContent, this.config.getActiveModel(), + this.config, ), resultDisplay: toolResult.returnDisplay, error: undefined, @@ -1017,6 +1018,7 @@ export class Session { callId, toolResult.llmContent, this.config.getActiveModel(), + this.config, ); } catch (e) { const error = e instanceof Error ? e : new Error(String(e)); @@ -1457,60 +1459,76 @@ const basicPermissionOptions = [ function toPermissionOptions( confirmation: ToolCallConfirmationDetails, + config: Config, ): acp.PermissionOption[] { - switch (confirmation.type) { - case 'edit': - return [ - { + const disableAlwaysAllow = config.getDisableAlwaysAllow(); + const options: acp.PermissionOption[] = []; + + if (!disableAlwaysAllow) { + switch (confirmation.type) { + case 'edit': + options.push({ optionId: ToolConfirmationOutcome.ProceedAlways, name: 'Allow All Edits', kind: 'allow_always', - }, - ...basicPermissionOptions, - ]; - case 'exec': - return [ - { + }); + break; + case 'exec': + options.push({ optionId: ToolConfirmationOutcome.ProceedAlways, name: `Always Allow ${confirmation.rootCommand}`, kind: 'allow_always', - }, - ...basicPermissionOptions, - ]; - case 'mcp': - return [ - { - optionId: ToolConfirmationOutcome.ProceedAlwaysServer, - name: `Always Allow ${confirmation.serverName}`, - kind: 'allow_always', - }, - { - optionId: ToolConfirmationOutcome.ProceedAlwaysTool, - name: `Always Allow ${confirmation.toolName}`, - kind: 'allow_always', - }, - ...basicPermissionOptions, - ]; - case 'info': - return [ - { + }); + break; + case 'mcp': + options.push( + { + optionId: ToolConfirmationOutcome.ProceedAlwaysServer, + name: `Always Allow ${confirmation.serverName}`, + kind: 'allow_always', + }, + { + optionId: ToolConfirmationOutcome.ProceedAlwaysTool, + name: `Always Allow ${confirmation.toolName}`, + kind: 'allow_always', + }, + ); + break; + case 'info': + options.push({ optionId: ToolConfirmationOutcome.ProceedAlways, name: `Always Allow`, kind: 'allow_always', - }, - ...basicPermissionOptions, - ]; + }); + break; + case 'ask_user': + case 'exit_plan_mode': + // askuser and exit_plan_mode don't need "always allow" options + break; + default: + // No "always allow" options for other types + break; + } + } + + options.push(...basicPermissionOptions); + + // Exhaustive check + switch (confirmation.type) { + case 'edit': + case 'exec': + case 'mcp': + case 'info': case 'ask_user': - // askuser doesn't need "always allow" options since it's asking questions - return [...basicPermissionOptions]; case 'exit_plan_mode': - // exit_plan_mode doesn't need "always allow" options since it's a plan approval flow - return [...basicPermissionOptions]; + break; default: { const unreachable: never = confirmation; throw new Error(`Unexpected: ${unreachable}`); } } + + return options; } /** diff --git a/packages/cli/src/acp/commands/memory.ts b/packages/cli/src/acp/commands/memory.ts index 9460af7ad1..1154c852a1 100644 --- a/packages/cli/src/acp/commands/memory.ts +++ b/packages/cli/src/acp/commands/memory.ts @@ -105,6 +105,7 @@ export class AddMemoryCommand implements Command { await tool.buildAndExecute(result.toolArgs, signal, undefined, { sanitizationConfig: DEFAULT_SANITIZATION_CONFIG, + sandboxManager: context.config.sandboxManager, }); await refreshMemory(context.config); return { diff --git a/packages/cli/src/config/config.test.ts b/packages/cli/src/config/config.test.ts index 334236fd85..72c55a64b3 100644 --- a/packages/cli/src/config/config.test.ts +++ b/packages/cli/src/config/config.test.ts @@ -1773,7 +1773,7 @@ describe('loadCliConfig model selection', () => { }); it('always prefers model from argv', async () => { - process.argv = ['node', 'script.js', '--model', 'gemini-2.5-flash']; + process.argv = ['node', 'script.js', '--model', 'gemini-2.5-flash-preview']; const argv = await parseArguments(createTestMergedSettings()); const config = await loadCliConfig( createTestMergedSettings({ @@ -1785,11 +1785,11 @@ describe('loadCliConfig model selection', () => { argv, ); - expect(config.getModel()).toBe('gemini-2.5-flash'); + expect(config.getModel()).toBe('gemini-2.5-flash-preview'); }); it('selects the model from argv if provided', async () => { - process.argv = ['node', 'script.js', '--model', 'gemini-2.5-flash']; + process.argv = ['node', 'script.js', '--model', 'gemini-2.5-flash-preview']; const argv = await parseArguments(createTestMergedSettings()); const config = await loadCliConfig( createTestMergedSettings({ @@ -1799,7 +1799,7 @@ describe('loadCliConfig model selection', () => { argv, ); - expect(config.getModel()).toBe('gemini-2.5-flash'); + expect(config.getModel()).toBe('gemini-2.5-flash-preview'); }); it('selects the default auto model if provided via auto alias', async () => { diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index e910d47546..ab6a22fb64 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -31,8 +31,6 @@ import { type HierarchicalMemory, coreEvents, GEMINI_MODEL_ALIAS_AUTO, - isValidModelOrAlias, - getValidModelsAndAliases, getAdminErrorMessage, isHeadlessMode, Config, @@ -498,9 +496,10 @@ export async function loadCliConfig( const experimentalJitContext = settings.experimental?.jitContext ?? false; - let extensionRegistryURI: string | undefined = trustedFolder - ? settings.experimental?.extensionRegistryURI - : undefined; + let extensionRegistryURI = + process.env['GEMINI_CLI_EXTENSION_REGISTRY_URI'] ?? + (trustedFolder ? settings.experimental?.extensionRegistryURI : undefined); + if (extensionRegistryURI && !extensionRegistryURI.startsWith('http')) { extensionRegistryURI = resolveToRealPath( path.resolve(cwd, resolvePath(extensionRegistryURI)), @@ -673,18 +672,6 @@ export async function loadCliConfig( const specifiedModel = argv.model || process.env['GEMINI_MODEL'] || settings.model?.name; - // Validate the model if one was explicitly specified - if (specifiedModel && specifiedModel !== GEMINI_MODEL_ALIAS_AUTO) { - if (!isValidModelOrAlias(specifiedModel)) { - const validModels = getValidModelsAndAliases(); - - throw new FatalConfigError( - `Invalid model: "${specifiedModel}"\n\n` + - `Valid models and aliases:\n${validModels.map((m) => ` - ${m}`).join('\n')}\n\n` + - `Use /model to switch models interactively.`, - ); - } - } const resolvedModel = specifiedModel === GEMINI_MODEL_ALIAS_AUTO ? defaultModel @@ -744,6 +731,7 @@ export async function loadCliConfig( clientVersion: await getVersion(), embeddingModel: DEFAULT_GEMINI_EMBEDDING_MODEL, sandbox: sandboxConfig, + toolSandboxing: settings.security?.toolSandboxing ?? false, targetDir: cwd, includeDirectoryTree, includeDirectories, @@ -784,6 +772,9 @@ export async function loadCliConfig( approvalMode, disableYoloMode: settings.security?.disableYoloMode || settings.admin?.secureModeEnabled, + disableAlwaysAllow: + settings.security?.disableAlwaysAllow || + settings.admin?.secureModeEnabled, showMemoryUsage: settings.ui?.showMemoryUsage || false, accessibility: { ...settings.ui?.accessibility, @@ -823,6 +814,7 @@ export async function loadCliConfig( disabledSkills: settings.skills?.disabled, experimentalJitContext: settings.experimental?.jitContext, modelSteering: settings.experimental?.modelSteering, + topicUpdateNarration: settings.experimental?.topicUpdateNarration, toolOutputMasking: settings.experimental?.toolOutputMasking, noBrowser: !!process.env['NO_BROWSER'], summarizeToolOutput: settings.model?.summarizeToolOutput, @@ -857,6 +849,7 @@ export async function loadCliConfig( disableLLMCorrection: settings.tools?.disableLLMCorrection, rawOutput: argv.rawOutput, acceptRawOutputRisk: argv.acceptRawOutputRisk, + dynamicModelConfiguration: settings.experimental?.dynamicModelConfiguration, modelConfigServiceConfig: settings.modelConfigs, // TODO: loading of hooks based on workspace trust enableHooks: settings.hooksConfig.enabled, diff --git a/packages/cli/src/config/extension-manager-themes.spec.ts b/packages/cli/src/config/extension-manager-themes.spec.ts index b1b21aab55..9358784a2f 100644 --- a/packages/cli/src/config/extension-manager-themes.spec.ts +++ b/packages/cli/src/config/extension-manager-themes.spec.ts @@ -20,7 +20,12 @@ import { import { createExtension } from '../test-utils/createExtension.js'; import { ExtensionManager } from './extension-manager.js'; import { themeManager, DEFAULT_THEME } from '../ui/themes/theme-manager.js'; -import { GEMINI_DIR, type Config, tmpdir } from '@google/gemini-cli-core'; +import { + GEMINI_DIR, + type Config, + tmpdir, + NoopSandboxManager, +} from '@google/gemini-cli-core'; import { createTestMergedSettings, SettingScope } from './settings.js'; describe('ExtensionManager theme loading', () => { @@ -117,6 +122,7 @@ describe('ExtensionManager theme loading', () => { terminalHeight: 24, showColor: false, pager: 'cat', + sandboxManager: new NoopSandboxManager(), sanitizationConfig: { allowedEnvironmentVariables: [], blockedEnvironmentVariables: [], diff --git a/packages/cli/src/config/extension-manager.ts b/packages/cli/src/config/extension-manager.ts index 68617bcbcd..974cb1b83e 100644 --- a/packages/cli/src/config/extension-manager.ts +++ b/packages/cli/src/config/extension-manager.ts @@ -898,9 +898,10 @@ Would you like to attempt to install via "git clone" instead?`, let skills = await loadSkillsFromDir( path.join(effectiveExtensionPath, 'skills'), ); - skills = skills.map((skill) => - recursivelyHydrateStrings(skill, hydrationContext), - ); + skills = skills.map((skill) => ({ + ...recursivelyHydrateStrings(skill, hydrationContext), + extensionName: config.name, + })); let rules: PolicyRule[] | undefined; let checkers: SafetyCheckerRule[] | undefined; @@ -923,9 +924,10 @@ Would you like to attempt to install via "git clone" instead?`, const agentLoadResult = await loadAgentsFromDirectory( path.join(effectiveExtensionPath, 'agents'), ); - agentLoadResult.agents = agentLoadResult.agents.map((agent) => - recursivelyHydrateStrings(agent, hydrationContext), - ); + agentLoadResult.agents = agentLoadResult.agents.map((agent) => ({ + ...recursivelyHydrateStrings(agent, hydrationContext), + extensionName: config.name, + })); // Log errors but don't fail the entire extension load for (const error of agentLoadResult.errors) { diff --git a/packages/cli/src/config/policy.ts b/packages/cli/src/config/policy.ts index 4bbd396fba..9837c2c355 100644 --- a/packages/cli/src/config/policy.ts +++ b/packages/cli/src/config/policy.ts @@ -63,6 +63,9 @@ export async function createPolicyEngineConfig( policyPaths: settings.policyPaths, adminPolicyPaths: settings.adminPolicyPaths, workspacePoliciesDir, + disableAlwaysAllow: + settings.security?.disableAlwaysAllow || + settings.admin?.secureModeEnabled, }; return createCorePolicyEngineConfig(policySettings, approvalMode); diff --git a/packages/cli/src/config/sandboxConfig.ts b/packages/cli/src/config/sandboxConfig.ts index cce5033f1a..59a9685f70 100644 --- a/packages/cli/src/config/sandboxConfig.ts +++ b/packages/cli/src/config/sandboxConfig.ts @@ -34,7 +34,9 @@ const VALID_SANDBOX_COMMANDS = [ function isSandboxCommand( value: string, ): value is Exclude { - return VALID_SANDBOX_COMMANDS.includes(value); + return (VALID_SANDBOX_COMMANDS as ReadonlyArray).includes( + value, + ); } function getSandboxCommand( diff --git a/packages/cli/src/config/settings.test.ts b/packages/cli/src/config/settings.test.ts index af143afcc0..06129a4760 100644 --- a/packages/cli/src/config/settings.test.ts +++ b/packages/cli/src/config/settings.test.ts @@ -524,16 +524,19 @@ describe('Settings Loading and Merging', () => { const userSettingsContent = { security: { disableYoloMode: false, + disableAlwaysAllow: false, }, }; const workspaceSettingsContent = { security: { disableYoloMode: false, // This should be ignored + disableAlwaysAllow: false, // This should be ignored }, }; const systemSettingsContent = { security: { disableYoloMode: true, + disableAlwaysAllow: true, }, }; @@ -551,6 +554,7 @@ describe('Settings Loading and Merging', () => { const settings = loadSettings(MOCK_WORKSPACE_DIR); expect(settings.merged.security?.disableYoloMode).toBe(true); // System setting should be used + expect(settings.merged.security?.disableAlwaysAllow).toBe(true); // System setting should be used }); it.each([ diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index 7d47d66e32..87fbe98fc3 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -1039,6 +1039,20 @@ const SETTINGS_SCHEMA = { 'Apply specific configuration overrides based on matches, with a primary key of model (or alias). The most specific match will be used.', showInDialog: false, }, + modelDefinitions: { + type: 'object', + label: 'Model Definitions', + category: 'Model', + requiresRestart: true, + default: DEFAULT_MODEL_CONFIGS.modelDefinitions, + description: + 'Registry of model metadata, including tier, family, and features.', + showInDialog: false, + additionalProperties: { + type: 'object', + ref: 'ModelDefinition', + }, + }, }, }, @@ -1117,6 +1131,19 @@ const SETTINGS_SCHEMA = { description: 'Model override for the visual agent.', showInDialog: false, }, + allowedDomains: { + type: 'array', + label: 'Allowed Domains', + category: 'Advanced', + requiresRestart: true, + default: ['github.com', '*.google.com', 'localhost'] as string[], + description: oneLine` + A list of allowed domains for the browser agent + (e.g., ["github.com", "*.google.com"]). + `, + showInDialog: false, + items: { type: 'string' }, + }, disableUserInput: { type: 'boolean', label: 'Disable User Input', @@ -1287,7 +1314,7 @@ const SETTINGS_SCHEMA = { default: undefined as boolean | string | SandboxConfig | undefined, ref: 'BooleanOrStringOrObject', description: oneLine` - Sandbox execution environment. + Legacy full-process sandbox execution environment. Set to a boolean to enable or disable the sandbox, provide a string path to a sandbox profile, or specify an explicit sandbox command (e.g., "docker", "podman", "lxc"). `, @@ -1509,6 +1536,16 @@ const SETTINGS_SCHEMA = { description: 'Security-related settings.', showInDialog: false, properties: { + toolSandboxing: { + type: 'boolean', + label: 'Tool Sandboxing', + category: 'Security', + requiresRestart: false, + default: false, + description: + 'Experimental tool-level sandboxing (implementation in progress).', + showInDialog: true, + }, disableYoloMode: { type: 'boolean', label: 'Disable YOLO Mode', @@ -1518,6 +1555,16 @@ const SETTINGS_SCHEMA = { description: 'Disable YOLO mode, even if enabled by a flag.', showInDialog: true, }, + disableAlwaysAllow: { + type: 'boolean', + label: 'Disable Always Allow', + category: 'Security', + requiresRestart: true, + default: false, + description: + 'Disable "Always allow" options in tool confirmation dialogs.', + showInDialog: true, + }, enablePermanentToolApproval: { type: 'boolean', label: 'Allow Permanent Tool Approval', @@ -1910,6 +1957,16 @@ const SETTINGS_SCHEMA = { 'Enable web fetch behavior that bypasses LLM summarization.', showInDialog: true, }, + dynamicModelConfiguration: { + type: 'boolean', + label: 'Dynamic Model Configuration', + category: 'Experimental', + requiresRestart: true, + default: false, + description: + 'Enable dynamic model configuration (definitions, resolutions, and chains) via settings.', + showInDialog: false, + }, gemmaModelRouter: { type: 'object', label: 'Gemma Model Router', @@ -1961,9 +2018,18 @@ const SETTINGS_SCHEMA = { }, }, }, + topicUpdateNarration: { + type: 'boolean', + label: 'Topic & Update Narration', + category: 'Experimental', + requiresRestart: false, + default: false, + description: + 'Enable the experimental Topic & Update communication model for reduced chattiness and structured progress reporting.', + showInDialog: true, + }, }, }, - extensions: { type: 'object', label: 'Extensions', @@ -2244,7 +2310,8 @@ const SETTINGS_SCHEMA = { category: 'Admin', requiresRestart: false, default: false, - description: 'If true, disallows yolo mode from being used.', + description: + 'If true, disallows YOLO mode and "Always allow" options from being used.', showInDialog: false, mergeStrategy: MergeStrategy.REPLACE, }, @@ -2726,6 +2793,25 @@ export const SETTINGS_SCHEMA_DEFINITIONS: Record< }, }, }, + ModelDefinition: { + type: 'object', + description: 'Model metadata registry entry.', + properties: { + displayName: { type: 'string' }, + tier: { enum: ['pro', 'flash', 'flash-lite', 'custom', 'auto'] }, + family: { type: 'string' }, + isPreview: { type: 'boolean' }, + dialogLocation: { enum: ['main', 'manual'] }, + dialogDescription: { type: 'string' }, + features: { + type: 'object', + properties: { + thinking: { type: 'boolean' }, + multimodalToolUse: { type: 'boolean' }, + }, + }, + }, + }, }; export function getSettingsSchema(): SettingsSchemaType { diff --git a/packages/cli/src/services/SkillCommandLoader.test.ts b/packages/cli/src/services/SkillCommandLoader.test.ts index 15a2ebec18..51cc098536 100644 --- a/packages/cli/src/services/SkillCommandLoader.test.ts +++ b/packages/cli/src/services/SkillCommandLoader.test.ts @@ -122,4 +122,16 @@ describe('SkillCommandLoader', () => { const actionResult = (await commands[0].action!({} as any, '')) as any; expect(actionResult.toolArgs).toEqual({ name: 'my awesome skill' }); }); + + it('should propagate extensionName to the generated slash command', async () => { + const mockSkills = [ + { name: 'skill1', description: 'desc', extensionName: 'ext1' }, + ]; + mockSkillManager.getDisplayableSkills.mockReturnValue(mockSkills); + + const loader = new SkillCommandLoader(mockConfig); + const commands = await loader.loadCommands(new AbortController().signal); + + expect(commands[0].extensionName).toBe('ext1'); + }); }); diff --git a/packages/cli/src/services/SkillCommandLoader.ts b/packages/cli/src/services/SkillCommandLoader.ts index 85f1884299..e264da2e31 100644 --- a/packages/cli/src/services/SkillCommandLoader.ts +++ b/packages/cli/src/services/SkillCommandLoader.ts @@ -41,6 +41,7 @@ export class SkillCommandLoader implements ICommandLoader { description: skill.description || `Activate the ${skill.name} skill`, kind: CommandKind.SKILL, autoExecute: true, + extensionName: skill.extensionName, action: async (_context, args) => ({ type: 'tool', toolName: ACTIVATE_SKILL_TOOL_NAME, diff --git a/packages/cli/src/services/SlashCommandConflictHandler.test.ts b/packages/cli/src/services/SlashCommandConflictHandler.test.ts index a828923fe5..5527188a04 100644 --- a/packages/cli/src/services/SlashCommandConflictHandler.test.ts +++ b/packages/cli/src/services/SlashCommandConflictHandler.test.ts @@ -172,4 +172,23 @@ describe('SlashCommandConflictHandler', () => { vi.advanceTimersByTime(600); expect(coreEvents.emitFeedback).not.toHaveBeenCalled(); }); + + it('should display a descriptive message for a skill conflict', () => { + simulateEvent([ + { + name: 'chat', + renamedTo: 'google-workspace.chat', + loserExtensionName: 'google-workspace', + loserKind: CommandKind.SKILL, + winnerKind: CommandKind.BUILT_IN, + }, + ]); + + vi.advanceTimersByTime(600); + + expect(coreEvents.emitFeedback).toHaveBeenCalledWith( + 'info', + "Extension 'google-workspace' skill '/chat' was renamed to '/google-workspace.chat' because it conflicts with built-in command.", + ); + }); }); diff --git a/packages/cli/src/services/SlashCommandConflictHandler.ts b/packages/cli/src/services/SlashCommandConflictHandler.ts index b51617840e..7da4e53842 100644 --- a/packages/cli/src/services/SlashCommandConflictHandler.ts +++ b/packages/cli/src/services/SlashCommandConflictHandler.ts @@ -154,6 +154,10 @@ export class SlashCommandConflictHandler { return extensionName ? `extension '${extensionName}' command` : 'extension command'; + case CommandKind.SKILL: + return extensionName + ? `extension '${extensionName}' skill` + : 'skill command'; case CommandKind.MCP_PROMPT: return mcpServerName ? `MCP server '${mcpServerName}' command` diff --git a/packages/cli/src/services/SlashCommandResolver.test.ts b/packages/cli/src/services/SlashCommandResolver.test.ts index e703028b3d..43d1c310a8 100644 --- a/packages/cli/src/services/SlashCommandResolver.test.ts +++ b/packages/cli/src/services/SlashCommandResolver.test.ts @@ -173,5 +173,30 @@ describe('SlashCommandResolver', () => { expect(finalCommands.find((c) => c.name === 'gcp.deploy1')).toBeDefined(); }); + + it('should prefix skills with extension name when they conflict with built-in', () => { + const builtin = createMockCommand('chat', CommandKind.BUILT_IN); + const skill = { + ...createMockCommand('chat', CommandKind.SKILL), + extensionName: 'google-workspace', + }; + + const { finalCommands } = SlashCommandResolver.resolve([builtin, skill]); + + const names = finalCommands.map((c) => c.name); + expect(names).toContain('chat'); + expect(names).toContain('google-workspace.chat'); + }); + + it('should NOT prefix skills with "skill" when extension name is missing', () => { + const builtin = createMockCommand('chat', CommandKind.BUILT_IN); + const skill = createMockCommand('chat', CommandKind.SKILL); + + const { finalCommands } = SlashCommandResolver.resolve([builtin, skill]); + + const names = finalCommands.map((c) => c.name); + expect(names).toContain('chat'); + expect(names).toContain('chat1'); + }); }); }); diff --git a/packages/cli/src/services/SlashCommandResolver.ts b/packages/cli/src/services/SlashCommandResolver.ts index d4e7efc7bb..4947e6545a 100644 --- a/packages/cli/src/services/SlashCommandResolver.ts +++ b/packages/cli/src/services/SlashCommandResolver.ts @@ -174,6 +174,7 @@ export class SlashCommandResolver { private static getPrefix(cmd: SlashCommand): string | undefined { switch (cmd.kind) { case CommandKind.EXTENSION_FILE: + case CommandKind.SKILL: return cmd.extensionName; case CommandKind.MCP_PROMPT: return cmd.mcpServerName; @@ -185,7 +186,6 @@ export class SlashCommandResolver { return undefined; } } - /** * Logs a conflict event. */ diff --git a/packages/cli/src/services/prompt-processors/shellProcessor.test.ts b/packages/cli/src/services/prompt-processors/shellProcessor.test.ts index 0f6fb562a8..84010ab625 100644 --- a/packages/cli/src/services/prompt-processors/shellProcessor.test.ts +++ b/packages/cli/src/services/prompt-processors/shellProcessor.test.ts @@ -13,6 +13,7 @@ import { ApprovalMode, getShellConfiguration, PolicyDecision, + NoopSandboxManager, } from '@google/gemini-cli-core'; import { quote } from 'shell-quote'; import { createPartFromText } from '@google/genai'; @@ -77,7 +78,14 @@ describe('ShellProcessor', () => { getTargetDir: vi.fn().mockReturnValue('/test/dir'), getApprovalMode: vi.fn().mockReturnValue(ApprovalMode.DEFAULT), getEnableInteractiveShell: vi.fn().mockReturnValue(false), - getShellExecutionConfig: vi.fn().mockReturnValue({}), + getShellExecutionConfig: vi.fn().mockReturnValue({ + sandboxManager: new NoopSandboxManager(), + sanitizationConfig: { + allowedEnvironmentVariables: [], + blockedEnvironmentVariables: [], + enableEnvironmentVariableRedaction: false, + }, + }), getPolicyEngine: vi.fn().mockReturnValue({ check: mockPolicyEngineCheck, }), diff --git a/packages/cli/src/test-utils/mockConfig.ts b/packages/cli/src/test-utils/mockConfig.ts index 170d009843..59d19b3412 100644 --- a/packages/cli/src/test-utils/mockConfig.ts +++ b/packages/cli/src/test-utils/mockConfig.ts @@ -5,6 +5,7 @@ */ import { vi } from 'vitest'; +import { NoopSandboxManager } from '@google/gemini-cli-core'; import type { Config } from '@google/gemini-cli-core'; import { createTestMergedSettings, @@ -121,6 +122,7 @@ export const createMockConfig = (overrides: Partial = {}): Config => getBannerTextNoCapacityIssues: vi.fn().mockResolvedValue(''), getBannerTextCapacityIssues: vi.fn().mockResolvedValue(''), isInteractiveShellEnabled: vi.fn().mockReturnValue(false), + getDisableAlwaysAllow: vi.fn().mockReturnValue(false), isSkillsSupportEnabled: vi.fn().mockReturnValue(false), reloadSkills: vi.fn().mockResolvedValue(undefined), reloadAgents: vi.fn().mockResolvedValue(undefined), @@ -131,7 +133,14 @@ export const createMockConfig = (overrides: Partial = {}): Config => getRetryFetchErrors: vi.fn().mockReturnValue(true), getEnableShellOutputEfficiency: vi.fn().mockReturnValue(true), getShellToolInactivityTimeout: vi.fn().mockReturnValue(300000), - getShellExecutionConfig: vi.fn().mockReturnValue({}), + getShellExecutionConfig: vi.fn().mockReturnValue({ + sandboxManager: new NoopSandboxManager(), + sanitizationConfig: { + allowedEnvironmentVariables: [], + blockedEnvironmentVariables: [], + enableEnvironmentVariableRedaction: false, + }, + }), setShellExecutionConfig: vi.fn(), getEnableToolOutputTruncation: vi.fn().mockReturnValue(true), getTruncateToolOutputThreshold: vi.fn().mockReturnValue(1000), diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx index 0bfdeba120..fa0a293916 100644 --- a/packages/cli/src/ui/AppContainer.tsx +++ b/packages/cli/src/ui/AppContainer.tsx @@ -1425,6 +1425,7 @@ Logging in with Google... Restarting Gemini CLI to continue. pager: settings.merged.tools.shell.pager, showColor: settings.merged.tools.shell.showColor, sanitizationConfig: config.sanitizationConfig, + sandboxManager: config.sandboxManager, }); const { isFocused, hasReceivedFocusEvent } = useFocus(); diff --git a/packages/cli/src/ui/components/StatsDisplay.tsx b/packages/cli/src/ui/components/StatsDisplay.tsx index 320203f3dc..9effb39b5c 100644 --- a/packages/cli/src/ui/components/StatsDisplay.tsx +++ b/packages/cli/src/ui/components/StatsDisplay.tsx @@ -27,6 +27,7 @@ import { } from '../utils/displayUtils.js'; import { computeSessionStats } from '../utils/computeStats.js'; import { + type Config, type RetrieveUserQuotaResponse, isActiveModel, getDisplayString, @@ -88,13 +89,16 @@ const Section: React.FC = ({ title, children }) => ( // Logic for building the unified list of table rows const buildModelRows = ( models: Record, + config: Config, quotas?: RetrieveUserQuotaResponse, useGemini3_1 = false, useCustomToolModel = false, ) => { const getBaseModelName = (name: string) => name.replace('-001', ''); const usedModelNames = new Set( - Object.keys(models).map(getBaseModelName).map(getDisplayString), + Object.keys(models) + .map(getBaseModelName) + .map((name) => getDisplayString(name, config)), ); // 1. Models with active usage @@ -104,7 +108,7 @@ const buildModelRows = ( const inputTokens = metrics.tokens.input; return { key: name, - modelName: getDisplayString(modelName), + modelName: getDisplayString(modelName, config), requests: metrics.api.totalRequests, cachedTokens: cachedTokens.toLocaleString(), inputTokens: inputTokens.toLocaleString(), @@ -121,11 +125,11 @@ const buildModelRows = ( (b) => b.modelId && isActiveModel(b.modelId, useGemini3_1, useCustomToolModel) && - !usedModelNames.has(getDisplayString(b.modelId)), + !usedModelNames.has(getDisplayString(b.modelId, config)), ) .map((bucket) => ({ key: bucket.modelId!, - modelName: getDisplayString(bucket.modelId!), + modelName: getDisplayString(bucket.modelId!, config), requests: '-', cachedTokens: '-', inputTokens: '-', @@ -139,6 +143,7 @@ const buildModelRows = ( const ModelUsageTable: React.FC<{ models: Record; + config: Config; quotas?: RetrieveUserQuotaResponse; cacheEfficiency: number; totalCachedTokens: number; @@ -150,6 +155,7 @@ const ModelUsageTable: React.FC<{ useCustomToolModel?: boolean; }> = ({ models, + config, quotas, cacheEfficiency, totalCachedTokens, @@ -162,7 +168,13 @@ const ModelUsageTable: React.FC<{ }) => { const { stdout } = useStdout(); const terminalWidth = stdout?.columns ?? 84; - const rows = buildModelRows(models, quotas, useGemini3_1, useCustomToolModel); + const rows = buildModelRows( + models, + config, + quotas, + useGemini3_1, + useCustomToolModel, + ); if (rows.length === 0) { return null; @@ -676,6 +688,7 @@ export const StatsDisplay: React.FC = ({ { const mockConfig = { isTrustedFolder: () => true, getIdeMode: () => false, + getDisableAlwaysAllow: () => false, getModel: () => 'gemini-pro', getDebugMode: () => false, getTargetDir: () => '/mock/target/dir', diff --git a/packages/cli/src/ui/components/__snapshots__/AlternateBufferQuittingDisplay.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/AlternateBufferQuittingDisplay.test.tsx.snap index b4f2bc919c..5394ab83c0 100644 --- a/packages/cli/src/ui/components/__snapshots__/AlternateBufferQuittingDisplay.test.tsx.snap +++ b/packages/cli/src/ui/components/__snapshots__/AlternateBufferQuittingDisplay.test.tsx.snap @@ -13,10 +13,6 @@ Tips for getting started: 2. /help for more information 3. Ask coding questions, edit code or run commands 4. Be specific for the best results -╭──────────────────────────────────────────────────────────────────────────╮ -│ ? confirming_tool Confirming tool description │ -│ │ -╰──────────────────────────────────────────────────────────────────────────╯ Action Required (was prompted): diff --git a/packages/cli/src/ui/components/messages/RedirectionConfirmation.test.tsx b/packages/cli/src/ui/components/messages/RedirectionConfirmation.test.tsx index 15763bdae7..df8522d99c 100644 --- a/packages/cli/src/ui/components/messages/RedirectionConfirmation.test.tsx +++ b/packages/cli/src/ui/components/messages/RedirectionConfirmation.test.tsx @@ -21,6 +21,7 @@ describe('ToolConfirmationMessage Redirection', () => { const mockConfig = { isTrustedFolder: () => true, getIdeMode: () => false, + getDisableAlwaysAllow: () => false, } as unknown as Config; it('should display redirection warning and tip for redirected commands', async () => { diff --git a/packages/cli/src/ui/components/messages/ToolConfirmationMessage.test.tsx b/packages/cli/src/ui/components/messages/ToolConfirmationMessage.test.tsx index ec623f69a4..92c8b5743c 100644 --- a/packages/cli/src/ui/components/messages/ToolConfirmationMessage.test.tsx +++ b/packages/cli/src/ui/components/messages/ToolConfirmationMessage.test.tsx @@ -37,6 +37,7 @@ describe('ToolConfirmationMessage', () => { const mockConfig = { isTrustedFolder: () => true, getIdeMode: () => false, + getDisableAlwaysAllow: () => false, } as unknown as Config; it('should not display urls if prompt and url are the same', async () => { @@ -331,8 +332,8 @@ describe('ToolConfirmationMessage', () => { const mockConfig = { isTrustedFolder: () => true, getIdeMode: () => false, + getDisableAlwaysAllow: () => false, } as unknown as Config; - const { lastFrame, waitUntilReady, unmount } = renderWithProviders( { const mockConfig = { isTrustedFolder: () => false, getIdeMode: () => false, + getDisableAlwaysAllow: () => false, } as unknown as Config; const { lastFrame, waitUntilReady, unmount } = renderWithProviders( @@ -388,8 +390,8 @@ describe('ToolConfirmationMessage', () => { const mockConfig = { isTrustedFolder: () => true, getIdeMode: () => false, + getDisableAlwaysAllow: () => false, } as unknown as Config; - const { lastFrame, waitUntilReady, unmount } = renderWithProviders( { const mockConfig = { isTrustedFolder: () => true, getIdeMode: () => false, + getDisableAlwaysAllow: () => false, } as unknown as Config; - const { lastFrame, waitUntilReady, unmount } = renderWithProviders( { const mockConfig = { isTrustedFolder: () => true, getIdeMode: () => false, + getDisableAlwaysAllow: () => false, } as unknown as Config; - vi.mocked(useToolActions).mockReturnValue({ confirm: vi.fn(), cancel: vi.fn(), @@ -485,8 +487,8 @@ describe('ToolConfirmationMessage', () => { const mockConfig = { isTrustedFolder: () => true, getIdeMode: () => true, + getDisableAlwaysAllow: () => false, } as unknown as Config; - vi.mocked(useToolActions).mockReturnValue({ confirm: vi.fn(), cancel: vi.fn(), @@ -513,8 +515,8 @@ describe('ToolConfirmationMessage', () => { const mockConfig = { isTrustedFolder: () => true, getIdeMode: () => true, + getDisableAlwaysAllow: () => false, } as unknown as Config; - vi.mocked(useToolActions).mockReturnValue({ confirm: vi.fn(), cancel: vi.fn(), diff --git a/packages/cli/src/ui/components/messages/ToolConfirmationMessage.tsx b/packages/cli/src/ui/components/messages/ToolConfirmationMessage.tsx index 8bc329f3df..2e9e133a35 100644 --- a/packages/cli/src/ui/components/messages/ToolConfirmationMessage.tsx +++ b/packages/cli/src/ui/components/messages/ToolConfirmationMessage.tsx @@ -86,12 +86,14 @@ export const ToolConfirmationMessage: React.FC< const settings = useSettings(); const allowPermanentApproval = - settings.merged.security.enablePermanentToolApproval; + settings.merged.security.enablePermanentToolApproval && + !config.getDisableAlwaysAllow(); const handlesOwnUI = confirmationDetails.type === 'ask_user' || confirmationDetails.type === 'exit_plan_mode'; - const isTrustedFolder = config.isTrustedFolder(); + const isTrustedFolder = + config.isTrustedFolder() && !config.getDisableAlwaysAllow(); const handleConfirm = useCallback( (outcome: ToolConfirmationOutcome, payload?: ToolConfirmationPayload) => { diff --git a/packages/cli/src/ui/components/messages/ToolGroupMessage.test.tsx b/packages/cli/src/ui/components/messages/ToolGroupMessage.test.tsx index b38f76aa04..eff418a609 100644 --- a/packages/cli/src/ui/components/messages/ToolGroupMessage.test.tsx +++ b/packages/cli/src/ui/components/messages/ToolGroupMessage.test.tsx @@ -118,10 +118,30 @@ describe('', () => { { config: baseMockConfig, settings: fullVerbositySettings }, ); - // Should now render confirming tools + // Should now hide confirming tools (to avoid duplication with Global Queue) + await waitUntilReady(); + expect(lastFrame({ allowEmpty: true })).toBe(''); + unmount(); + }); + + it('renders canceled tool calls', async () => { + const toolCalls = [ + createToolCall({ + callId: 'canceled-tool', + name: 'canceled-tool', + status: CoreToolCallStatus.Cancelled, + }), + ]; + const item = createItem(toolCalls); + + const { lastFrame, unmount, waitUntilReady } = renderWithProviders( + , + { config: baseMockConfig, settings: fullVerbositySettings }, + ); + await waitUntilReady(); const output = lastFrame(); - expect(output).toContain('test-tool'); + expect(output).toMatchSnapshot('canceled_tool'); unmount(); }); @@ -842,7 +862,7 @@ describe('', () => { ); await waitUntilReady(); - expect(lastFrame({ allowEmpty: true })).not.toBe(''); + expect(lastFrame({ allowEmpty: true })).toBe(''); unmount(); }); diff --git a/packages/cli/src/ui/components/messages/ToolGroupMessage.tsx b/packages/cli/src/ui/components/messages/ToolGroupMessage.tsx index e22d3c6313..ee3a98930f 100644 --- a/packages/cli/src/ui/components/messages/ToolGroupMessage.tsx +++ b/packages/cli/src/ui/components/messages/ToolGroupMessage.tsx @@ -110,11 +110,12 @@ export const ToolGroupMessage: React.FC = ({ () => toolCalls.filter((t) => { const displayStatus = mapCoreStatusToDisplayStatus(t.status); - // We used to filter out Pending and Confirming statuses here to avoid - // duplication with the Global Queue, but this causes tools to appear to - // "vanish" from the context after approval. - // We now allow them to be visible here as well. - return displayStatus !== ToolCallStatus.Canceled; + // We hide Confirming tools from the history log because they are + // currently being rendered in the interactive ToolConfirmationQueue. + // We show everything else, including Pending (waiting to run) and + // Canceled (rejected by user), to ensure the history is complete + // and to avoid tools "vanishing" after approval. + return displayStatus !== ToolCallStatus.Confirming; }), [toolCalls], diff --git a/packages/cli/src/ui/components/messages/__snapshots__/ToolGroupMessage.test.tsx.snap b/packages/cli/src/ui/components/messages/__snapshots__/ToolGroupMessage.test.tsx.snap index c1ea071bc5..98db513da8 100644 --- a/packages/cli/src/ui/components/messages/__snapshots__/ToolGroupMessage.test.tsx.snap +++ b/packages/cli/src/ui/components/messages/__snapshots__/ToolGroupMessage.test.tsx.snap @@ -49,6 +49,15 @@ exports[` > Border Color Logic > uses yellow border for shel " `; +exports[` > Golden Snapshots > renders canceled tool calls > canceled_tool 1`] = ` +"╭──────────────────────────────────────────────────────────────────────────╮ +│ - canceled-tool A tool for testing │ +│ │ +│ Test result │ +╰──────────────────────────────────────────────────────────────────────────╯ +" +`; + exports[` > Golden Snapshots > renders empty tool calls array 1`] = `""`; exports[` > Golden Snapshots > renders header when scrolled 1`] = ` diff --git a/packages/cli/src/ui/contexts/KeypressContext.test.tsx b/packages/cli/src/ui/contexts/KeypressContext.test.tsx index 357d4cf2cd..31e43af575 100644 --- a/packages/cli/src/ui/contexts/KeypressContext.test.tsx +++ b/packages/cli/src/ui/contexts/KeypressContext.test.tsx @@ -647,6 +647,15 @@ describe('KeypressContext', () => { sequence: `\x1b[27;6;9~`, expected: { name: 'tab', shift: true, ctrl: true }, }, + // Unicode CJK (Kitty/modifyOtherKeys scalar values) + { + sequence: '\x1b[44032u', + expected: { name: '가', sequence: '가', insertable: true }, + }, + { + sequence: '\x1b[27;1;44032~', + expected: { name: '가', sequence: '가', insertable: true }, + }, // XTerm Function Key { sequence: `\x1b[1;129A`, expected: { name: 'up' } }, { sequence: `\x1b[1;2H`, expected: { name: 'home', shift: true } }, @@ -1403,7 +1412,7 @@ describe('KeypressContext', () => { expect(keyHandler).toHaveBeenCalledTimes(inputString.length); for (const char of inputString) { expect(keyHandler).toHaveBeenCalledWith( - expect.objectContaining({ sequence: char }), + expect.objectContaining({ sequence: char, name: char.toLowerCase() }), ); } }); diff --git a/packages/cli/src/ui/contexts/KeypressContext.tsx b/packages/cli/src/ui/contexts/KeypressContext.tsx index 63e8a07a94..cdd6da7feb 100644 --- a/packages/cli/src/ui/contexts/KeypressContext.tsx +++ b/packages/cli/src/ui/contexts/KeypressContext.tsx @@ -610,20 +610,28 @@ function* emitKeys( if (code.endsWith('u') || code.endsWith('~')) { // CSI-u or tilde-coded functional keys: ESC [ ; (u|~) const codeNumber = parseInt(code.slice(1, -1), 10); - if (codeNumber >= 33 && codeNumber <= 126) { - const char = String.fromCharCode(codeNumber); + const mapped = KITTY_CODE_MAP[codeNumber]; + if (mapped) { + name = mapped.name; + if (mapped.sequence && !ctrl && !cmd && !alt) { + sequence = mapped.sequence; + insertable = true; + } + } else if ( + codeNumber >= 33 && // Printable characters start after space (32), + codeNumber <= 0x10ffff && // Valid Unicode scalar values (excluding control characters) + (codeNumber < 0xd800 || codeNumber > 0xdfff) // Exclude UTF-16 surrogate halves + ) { + // Valid printable Unicode scalar values (up to Unicode maximum) + // Note: Kitty maps its special keys to the PUA (57344+), which are handled by KITTY_CODE_MAP above. + const char = String.fromCodePoint(codeNumber); name = char.toLowerCase(); - if (char >= 'A' && char <= 'Z') { + if (char !== name) { shift = true; } - } else { - const mapped = KITTY_CODE_MAP[codeNumber]; - if (mapped) { - name = mapped.name; - if (mapped.sequence && !ctrl && !cmd && !alt) { - sequence = mapped.sequence; - insertable = true; - } + if (!ctrl && !cmd && !alt) { + sequence = char; + insertable = true; } } } @@ -696,6 +704,10 @@ function* emitKeys( alt = ch.length > 0; } else { // Any other character is considered printable. + name = ch.toLowerCase(); + if (ch !== name) { + shift = true; + } insertable = true; } diff --git a/packages/cli/src/ui/hooks/shellCommandProcessor.test.tsx b/packages/cli/src/ui/hooks/shellCommandProcessor.test.tsx index b8486bc378..f5e3b61e2b 100644 --- a/packages/cli/src/ui/hooks/shellCommandProcessor.test.tsx +++ b/packages/cli/src/ui/hooks/shellCommandProcessor.test.tsx @@ -16,6 +16,7 @@ import { afterEach, type Mock, } from 'vitest'; +import { NoopSandboxManager } from '@google/gemini-cli-core'; const mockIsBinary = vi.hoisted(() => vi.fn()); const mockShellExecutionService = vi.hoisted(() => vi.fn()); @@ -109,8 +110,14 @@ describe('useShellCommandProcessor', () => { getShellExecutionConfig: () => ({ terminalHeight: 20, terminalWidth: 80, + sandboxManager: new NoopSandboxManager(), + sanitizationConfig: { + allowedEnvironmentVariables: [], + blockedEnvironmentVariables: [], + enableEnvironmentVariableRedaction: false, + }, }), - } as Config; + } as unknown as Config; mockGeminiClient = { addHistory: vi.fn() } as unknown as GeminiClient; vi.mocked(os.platform).mockReturnValue('linux'); diff --git a/packages/cli/src/ui/hooks/slashCommandProcessor.ts b/packages/cli/src/ui/hooks/slashCommandProcessor.ts index 6f3ecd7b96..d070840f2d 100644 --- a/packages/cli/src/ui/hooks/slashCommandProcessor.ts +++ b/packages/cli/src/ui/hooks/slashCommandProcessor.ts @@ -325,9 +325,9 @@ export const useSlashCommandProcessor = ( (async () => { const commandService = await CommandService.create( [ + new BuiltinCommandLoader(config), new SkillCommandLoader(config), new McpPromptLoader(config), - new BuiltinCommandLoader(config), new FileCommandLoader(config), ], controller.signal, diff --git a/packages/cli/src/ui/key/keyBindings.test.ts b/packages/cli/src/ui/key/keyBindings.test.ts index 77237f128f..10f88dd4d9 100644 --- a/packages/cli/src/ui/key/keyBindings.test.ts +++ b/packages/cli/src/ui/key/keyBindings.test.ts @@ -22,7 +22,7 @@ describe('KeyBinding', () => { describe('constructor', () => { it('should parse a simple key', () => { const binding = new KeyBinding('a'); - expect(binding.key).toBe('a'); + expect(binding.name).toBe('a'); expect(binding.ctrl).toBe(false); expect(binding.shift).toBe(false); expect(binding.alt).toBe(false); @@ -31,45 +31,45 @@ describe('KeyBinding', () => { it('should parse ctrl+key', () => { const binding = new KeyBinding('ctrl+c'); - expect(binding.key).toBe('c'); + expect(binding.name).toBe('c'); expect(binding.ctrl).toBe(true); }); it('should parse shift+key', () => { const binding = new KeyBinding('shift+z'); - expect(binding.key).toBe('z'); + expect(binding.name).toBe('z'); expect(binding.shift).toBe(true); }); it('should parse alt+key', () => { const binding = new KeyBinding('alt+left'); - expect(binding.key).toBe('left'); + expect(binding.name).toBe('left'); expect(binding.alt).toBe(true); }); it('should parse cmd+key', () => { const binding = new KeyBinding('cmd+f'); - expect(binding.key).toBe('f'); + expect(binding.name).toBe('f'); expect(binding.cmd).toBe(true); }); it('should handle aliases (option/opt/meta)', () => { const optionBinding = new KeyBinding('option+b'); - expect(optionBinding.key).toBe('b'); + expect(optionBinding.name).toBe('b'); expect(optionBinding.alt).toBe(true); const optBinding = new KeyBinding('opt+b'); - expect(optBinding.key).toBe('b'); + expect(optBinding.name).toBe('b'); expect(optBinding.alt).toBe(true); const metaBinding = new KeyBinding('meta+enter'); - expect(metaBinding.key).toBe('enter'); + expect(metaBinding.name).toBe('enter'); expect(metaBinding.cmd).toBe(true); }); it('should parse multiple modifiers', () => { const binding = new KeyBinding('ctrl+shift+alt+cmd+x'); - expect(binding.key).toBe('x'); + expect(binding.name).toBe('x'); expect(binding.ctrl).toBe(true); expect(binding.shift).toBe(true); expect(binding.alt).toBe(true); @@ -78,14 +78,14 @@ describe('KeyBinding', () => { it('should be case-insensitive', () => { const binding = new KeyBinding('CTRL+Shift+F'); - expect(binding.key).toBe('f'); + expect(binding.name).toBe('f'); expect(binding.ctrl).toBe(true); expect(binding.shift).toBe(true); }); it('should handle named keys with modifiers', () => { const binding = new KeyBinding('ctrl+enter'); - expect(binding.key).toBe('enter'); + expect(binding.name).toBe('enter'); expect(binding.ctrl).toBe(true); }); diff --git a/packages/cli/src/ui/key/keyBindings.ts b/packages/cli/src/ui/key/keyBindings.ts index e8014b7429..5b1afc0735 100644 --- a/packages/cli/src/ui/key/keyBindings.ts +++ b/packages/cli/src/ui/key/keyBindings.ts @@ -144,14 +144,14 @@ export class KeyBinding { ]); /** The key name (e.g., 'a', 'enter', 'tab', 'escape') */ - readonly key: string; + readonly name: string; readonly shift: boolean; readonly alt: boolean; readonly ctrl: boolean; readonly cmd: boolean; constructor(pattern: string) { - let remains = pattern.toLowerCase().trim(); + let remains = pattern.trim(); let shift = false; let alt = false; let ctrl = false; @@ -160,31 +160,32 @@ export class KeyBinding { let matched: boolean; do { matched = false; - if (remains.startsWith('ctrl+')) { + const lowerRemains = remains.toLowerCase(); + if (lowerRemains.startsWith('ctrl+')) { ctrl = true; remains = remains.slice(5); matched = true; - } else if (remains.startsWith('shift+')) { + } else if (lowerRemains.startsWith('shift+')) { shift = true; remains = remains.slice(6); matched = true; - } else if (remains.startsWith('alt+')) { + } else if (lowerRemains.startsWith('alt+')) { alt = true; remains = remains.slice(4); matched = true; - } else if (remains.startsWith('option+')) { + } else if (lowerRemains.startsWith('option+')) { alt = true; remains = remains.slice(7); matched = true; - } else if (remains.startsWith('opt+')) { + } else if (lowerRemains.startsWith('opt+')) { alt = true; remains = remains.slice(4); matched = true; - } else if (remains.startsWith('cmd+')) { + } else if (lowerRemains.startsWith('cmd+')) { cmd = true; remains = remains.slice(4); matched = true; - } else if (remains.startsWith('meta+')) { + } else if (lowerRemains.startsWith('meta+')) { cmd = true; remains = remains.slice(5); matched = true; @@ -193,15 +194,17 @@ export class KeyBinding { const key = remains; - if ([...key].length !== 1 && !KeyBinding.VALID_LONG_KEYS.has(key)) { + const isSingleChar = [...key].length === 1; + + if (!isSingleChar && !KeyBinding.VALID_LONG_KEYS.has(key.toLowerCase())) { throw new Error( `Invalid keybinding key: "${key}" in "${pattern}".` + ` Must be a single character or one of: ${[...KeyBinding.VALID_LONG_KEYS].join(', ')}`, ); } - this.key = key; - this.shift = shift; + this.name = key.toLowerCase(); + this.shift = shift || (isSingleChar && this.name !== key); this.alt = alt; this.ctrl = ctrl; this.cmd = cmd; @@ -209,7 +212,7 @@ export class KeyBinding { matches(key: Key): boolean { return ( - this.key === key.name && + key.name === this.name && !!key.shift === !!this.shift && !!key.alt === !!this.alt && !!key.ctrl === !!this.ctrl && @@ -219,7 +222,7 @@ export class KeyBinding { equals(other: KeyBinding): boolean { return ( - this.key === other.key && + this.name === other.name && this.shift === other.shift && this.alt === other.alt && this.ctrl === other.ctrl && diff --git a/packages/cli/src/ui/key/keyMatchers.test.ts b/packages/cli/src/ui/key/keyMatchers.test.ts index b1d7ddc304..ab12ca1ddf 100644 --- a/packages/cli/src/ui/key/keyMatchers.test.ts +++ b/packages/cli/src/ui/key/keyMatchers.test.ts @@ -475,6 +475,22 @@ describe('keyMatchers', () => { expect(matchers[Command.QUIT](createKey('q', { ctrl: true }))).toBe(true); expect(matchers[Command.QUIT](createKey('q', { alt: true }))).toBe(true); }); + it('should support matching non-ASCII and CJK characters', () => { + const config = new Map(defaultKeyBindingConfig); + config.set(Command.QUIT, [new KeyBinding('Å'), new KeyBinding('가')]); + + const matchers = createKeyMatchers(config); + + // Å is normalized to å with shift=true by the parser + expect(matchers[Command.QUIT](createKey('å', { shift: true }))).toBe( + true, + ); + expect(matchers[Command.QUIT](createKey('å'))).toBe(false); + + // CJK characters do not have a lower/upper case + expect(matchers[Command.QUIT](createKey('가'))).toBe(true); + expect(matchers[Command.QUIT](createKey('나'))).toBe(false); + }); }); describe('Edge Cases', () => { diff --git a/packages/cli/src/ui/key/keybindingUtils.ts b/packages/cli/src/ui/key/keybindingUtils.ts index 0c79e67d13..b1b31d247d 100644 --- a/packages/cli/src/ui/key/keybindingUtils.ts +++ b/packages/cli/src/ui/key/keybindingUtils.ts @@ -86,7 +86,7 @@ export function formatKeyBinding( if (binding.shift) parts.push(modMap.shift); if (binding.cmd) parts.push(modMap.cmd); - const keyName = KEY_NAME_MAP[binding.key] || binding.key.toUpperCase(); + const keyName = KEY_NAME_MAP[binding.name] || binding.name.toUpperCase(); parts.push(keyName); return parts.join('+'); diff --git a/packages/core/package.json b/packages/core/package.json index ea3f22c9ec..4a560072d7 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -1,6 +1,6 @@ { "name": "@google/gemini-cli-core", - "version": "0.35.0-nightly.20260311.657f19c1f", + "version": "0.35.0-nightly.20260313.bb060d7a9", "description": "Gemini CLI Core", "license": "Apache-2.0", "repository": { @@ -10,6 +10,7 @@ "type": "module", "main": "dist/index.js", "scripts": { + "bundle:browser-mcp": "node scripts/bundle-browser-mcp.mjs", "build": "node ../../scripts/build_package.js", "lint": "eslint . --ext .ts,.tsx", "format": "prettier --write .", @@ -73,6 +74,7 @@ "open": "^10.1.2", "picomatch": "^4.0.1", "proper-lockfile": "^4.1.2", + "puppeteer-core": "^24.0.0", "read-package-up": "^11.0.0", "shell-quote": "^1.8.3", "simple-git": "^3.28.0", @@ -101,6 +103,7 @@ "@types/fast-levenshtein": "^0.0.4", "@types/js-yaml": "^4.0.9", "@types/picomatch": "^4.0.1", + "chrome-devtools-mcp": "^0.19.0", "msw": "^2.3.4", "typescript": "^5.3.3", "vitest": "^3.1.1" diff --git a/packages/core/scripts/bundle-browser-mcp.mjs b/packages/core/scripts/bundle-browser-mcp.mjs new file mode 100644 index 0000000000..efbdd5714c --- /dev/null +++ b/packages/core/scripts/bundle-browser-mcp.mjs @@ -0,0 +1,104 @@ +import esbuild from 'esbuild'; +import fs from 'node:fs'; // Import the full fs module +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); + +const manifestPath = path.resolve( + __dirname, + '../src/agents/browser/browser-tools-manifest.json', +); +const manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf-8')); + +// Only exclude tools explicitly mentioned in the manifest's exclude list +const excludedToolsFiles = (manifest.exclude || []).map((t) => t.name); + +// Basic esbuild plugin to empty out excluded modules +const emptyModulePlugin = { + name: 'empty-modules', + setup(build) { + if (excludedToolsFiles.length === 0) return; + + // Create a filter that matches any of the excluded tools + const excludeFilter = new RegExp(`(${excludedToolsFiles.join('|')})\\.js$`); + + build.onResolve({ filter: excludeFilter }, (args) => { + // Check if we are inside a tools directory to avoid accidental matches + if ( + args.importer.includes('chrome-devtools-mcp') && + /[\\/]tools[\\/]/.test(args.importer) + ) { + return { path: args.path, namespace: 'empty' }; + } + return null; + }); + + build.onLoad({ filter: /.*/, namespace: 'empty' }, (_args) => ({ + contents: 'export {};', // Empty module (ESM) + loader: 'js', + })); + }, +}; + +async function bundle() { + try { + const entryPoint = path.resolve( + __dirname, + '../../../node_modules/chrome-devtools-mcp/build/src/index.js', + ); + await esbuild.build({ + entryPoints: [entryPoint], + bundle: true, + outfile: path.resolve( + __dirname, + '../dist/bundled/chrome-devtools-mcp.mjs', + ), + format: 'esm', + platform: 'node', + plugins: [emptyModulePlugin], + external: [ + 'puppeteer-core', + '/bundled/*', + '../../../node_modules/puppeteer-core/*', + ], + banner: { + js: 'import { createRequire as __createRequire } from "module"; const require = __createRequire(import.meta.url);', + }, + }); + + // Copy third_party assets + const srcThirdParty = path.resolve( + __dirname, + '../../../node_modules/chrome-devtools-mcp/build/src/third_party', + ); + const destThirdParty = path.resolve( + __dirname, + '../dist/bundled/third_party', + ); + + if (fs.existsSync(srcThirdParty)) { + if (fs.existsSync(destThirdParty)) { + fs.rmSync(destThirdParty, { recursive: true, force: true }); + } + fs.cpSync(srcThirdParty, destThirdParty, { + recursive: true, + filter: (src) => { + // Skip large/unnecessary bundles that are either explicitly excluded + // or not required for the browser agent functionality. + return ( + !src.includes('lighthouse-devtools-mcp-bundle.js') && + !src.includes('devtools-formatter-worker.js') + ); + }, + }); + } else { + console.warn(`Warning: third_party assets not found at ${srcThirdParty}`); + } + } catch (error) { + console.error('Error bundling chrome-devtools-mcp:', error); + process.exit(1); + } +} + +bundle(); diff --git a/packages/core/src/agents/agent-scheduler.test.ts b/packages/core/src/agents/agent-scheduler.test.ts index 9551650507..2be2f033d9 100644 --- a/packages/core/src/agents/agent-scheduler.test.ts +++ b/packages/core/src/agents/agent-scheduler.test.ts @@ -120,4 +120,25 @@ describe('agent-scheduler', () => { expect(schedulerConfig.toolRegistry).toBe(agentRegistry); expect(schedulerConfig.toolRegistry).not.toBe(mainRegistry); }); + + it('should create an AgentLoopContext that has a defined .config property', async () => { + const mockConfig = { + messageBus: mockMessageBus, + toolRegistry: mockToolRegistry, + promptId: 'test-prompt', + } as unknown as Mocked; + + const options = { + schedulerId: 'subagent-1', + toolRegistry: mockToolRegistry as unknown as ToolRegistry, + signal: new AbortController().signal, + }; + + await scheduleAgentTools(mockConfig as unknown as Config, [], options); + + const schedulerContext = vi.mocked(Scheduler).mock.calls[0][0].context; + expect(schedulerContext.config).toBeDefined(); + expect(schedulerContext.config.promptId).toBe('test-prompt'); + expect(schedulerContext.toolRegistry).toBe(mockToolRegistry); + }); }); diff --git a/packages/core/src/agents/agent-scheduler.ts b/packages/core/src/agents/agent-scheduler.ts index ae18b95ca4..7b8f966111 100644 --- a/packages/core/src/agents/agent-scheduler.ts +++ b/packages/core/src/agents/agent-scheduler.ts @@ -83,8 +83,17 @@ export async function scheduleAgentTools( configurable: true, }); + const schedulerContext = { + config: agentConfig, + promptId: config.promptId, + toolRegistry, + messageBus: toolRegistry.messageBus, + geminiClient: config.geminiClient, + sandboxManager: config.sandboxManager, + }; + const scheduler = new Scheduler({ - context: agentConfig, + context: schedulerContext, messageBus: toolRegistry.messageBus, getPreferredEditor: getPreferredEditor ?? (() => undefined), schedulerId, diff --git a/packages/core/src/agents/browser/browser-tools-manifest.json b/packages/core/src/agents/browser/browser-tools-manifest.json new file mode 100644 index 0000000000..26b7575890 --- /dev/null +++ b/packages/core/src/agents/browser/browser-tools-manifest.json @@ -0,0 +1,22 @@ +{ + "description": "Explicitly promoted tools from chrome-devtools-mcp for the gemini-cli browser agent.", + "targetVersion": "0.19.0", + "exclude": [ + { + "name": "lighthouse", + "reason": "3.5 MB pre-built bundle — not needed for gemini-cli browser agent's core tasks." + }, + { + "name": "performance", + "reason": "Depends on chrome-devtools-frontend TraceEngine (~800 KB) — not needed for core tasks." + }, + { + "name": "screencast", + "reason": "Requires ffmpeg at runtime — not a common browser agent use case and adds external dependency." + }, + { + "name": "extensions", + "reason": "Extension management not relevant for the gemini-cli browser agent's current scope." + } + ] +} diff --git a/packages/core/src/agents/browser/browserAgentDefinition.ts b/packages/core/src/agents/browser/browserAgentDefinition.ts index 2703f53930..0d0f863834 100644 --- a/packages/core/src/agents/browser/browserAgentDefinition.ts +++ b/packages/core/src/agents/browser/browserAgentDefinition.ts @@ -53,9 +53,22 @@ When you need to identify elements by visual attributes not in the AX tree (e.g. * Extracted from prototype (computer_use_subagent_cdt branch). * * @param visionEnabled Whether visual tools (analyze_screenshot, click_at) are available. + * @param allowedDomains Optional list of allowed domains to restrict navigation. */ -export function buildBrowserSystemPrompt(visionEnabled: boolean): string { - return `You are an expert browser automation agent (Orchestrator). Your goal is to completely fulfill the user's request. +export function buildBrowserSystemPrompt( + visionEnabled: boolean, + allowedDomains?: string[], +): string { + const allowedDomainsInstruction = + allowedDomains && allowedDomains.length > 0 + ? `\n\nSECURITY DOMAIN RESTRICTION - CRITICAL:\nYou are strictly limited to the following allowed domains (and their subdomains if specified with '*.'):\n${allowedDomains + .map((d) => `- ${d}`) + .join( + '\n', + )}\nDo NOT attempt to navigate to any other domains using new_page or navigate_page, as it will be rejected. This is a hard security constraint.` + : ''; + + return `You are an expert browser automation agent (Orchestrator). Your goal is to completely fulfill the user's request.${allowedDomainsInstruction} IMPORTANT: You will receive an accessibility tree snapshot showing elements with uid values (e.g., uid=87_4 button "Login"). Use these uid values directly with your tools: @@ -109,7 +122,7 @@ export const BrowserAgentDefinition = ( ): LocalAgentDefinition => { // Use Preview Flash model if the main model is any of the preview models. // If the main model is not a preview model, use the default flash model. - const model = isPreviewModel(config.getModel()) + const model = isPreviewModel(config.getModel(), config) ? PREVIEW_GEMINI_FLASH_MODEL : DEFAULT_GEMINI_FLASH_MODEL; @@ -166,7 +179,10 @@ export const BrowserAgentDefinition = ( First, use new_page to open the relevant URL. Then call take_snapshot to see the page and proceed with your task.`, - systemPrompt: buildBrowserSystemPrompt(visionEnabled), + systemPrompt: buildBrowserSystemPrompt( + visionEnabled, + config.getBrowserAgentConfig().customConfig.allowedDomains, + ), }, }; }; diff --git a/packages/core/src/agents/browser/browserAgentFactory.test.ts b/packages/core/src/agents/browser/browserAgentFactory.test.ts index c7d7b1a6b0..94ee0bf0a1 100644 --- a/packages/core/src/agents/browser/browserAgentFactory.test.ts +++ b/packages/core/src/agents/browser/browserAgentFactory.test.ts @@ -24,6 +24,7 @@ const mockBrowserManager = { { name: 'click', description: 'Click element' }, { name: 'fill', description: 'Fill form field' }, { name: 'navigate_page', description: 'Navigate to URL' }, + { name: 'type_text', description: 'Type text into an element' }, // Visual tools (from --experimental-vision) { name: 'click_at', description: 'Click at coordinates' }, ]), @@ -70,6 +71,7 @@ describe('browserAgentFactory', () => { { name: 'click', description: 'Click element' }, { name: 'fill', description: 'Fill form field' }, { name: 'navigate_page', description: 'Navigate to URL' }, + { name: 'type_text', description: 'Type text into an element' }, // Visual tools (from --experimental-vision) { name: 'click_at', description: 'Click at coordinates' }, ]); @@ -135,7 +137,7 @@ describe('browserAgentFactory', () => { ); expect(definition.name).toBe(BROWSER_AGENT_NAME); - // 5 MCP tools + 1 type_text composite tool (no analyze_screenshot without visualModel) + // 6 MCP tools (no analyze_screenshot without visualModel) expect(definition.toolConfig?.tools).toHaveLength(6); }); @@ -228,7 +230,7 @@ describe('browserAgentFactory', () => { mockMessageBus, ); - // 5 MCP tools + 1 type_text + 1 analyze_screenshot + // 6 MCP tools + 1 analyze_screenshot expect(definition.toolConfig?.tools).toHaveLength(7); const toolNames = definition.toolConfig?.tools @@ -239,6 +241,25 @@ describe('browserAgentFactory', () => { expect(toolNames).toContain('analyze_screenshot'); }); + it('should include domain restrictions in system prompt when configured', async () => { + const configWithDomains = makeFakeConfig({ + agents: { + browser: { + allowedDomains: ['restricted.com'], + }, + }, + }); + + const { definition } = await createBrowserAgentDefinition( + configWithDomains, + mockMessageBus, + ); + + const systemPrompt = definition.promptConfig?.systemPrompt ?? ''; + expect(systemPrompt).toContain('SECURITY DOMAIN RESTRICTION - CRITICAL:'); + expect(systemPrompt).toContain('- restricted.com'); + }); + it('should include all MCP navigation tools (new_page, navigate_page) in definition', async () => { mockBrowserManager.getDiscoveredTools.mockResolvedValue([ { name: 'take_snapshot', description: 'Take snapshot' }, @@ -249,6 +270,7 @@ describe('browserAgentFactory', () => { { name: 'close_page', description: 'Close page' }, { name: 'select_page', description: 'Select page' }, { name: 'press_key', description: 'Press key' }, + { name: 'type_text', description: 'Type text into an element' }, { name: 'hover', description: 'Hover element' }, ]); @@ -272,7 +294,6 @@ describe('browserAgentFactory', () => { expect(toolNames).toContain('click'); expect(toolNames).toContain('take_snapshot'); expect(toolNames).toContain('press_key'); - // Custom composite tool must also be present expect(toolNames).toContain('type_text'); // Total: 9 MCP + 1 type_text (no analyze_screenshot without visualModel) expect(definition.toolConfig?.tools).toHaveLength(10); @@ -323,4 +344,22 @@ describe('buildBrowserSystemPrompt', () => { expect(prompt).toContain('complete_task'); } }); + + it('should include allowed domains restriction when provided', () => { + const prompt = buildBrowserSystemPrompt(false, [ + 'github.com', + '*.google.com', + ]); + expect(prompt).toContain('SECURITY DOMAIN RESTRICTION - CRITICAL:'); + expect(prompt).toContain('- github.com'); + expect(prompt).toContain('- *.google.com'); + }); + + it('should exclude allowed domains restriction when not provided or empty', () => { + let prompt = buildBrowserSystemPrompt(false); + expect(prompt).not.toContain('SECURITY DOMAIN RESTRICTION - CRITICAL:'); + + prompt = buildBrowserSystemPrompt(false, []); + expect(prompt).not.toContain('SECURITY DOMAIN RESTRICTION - CRITICAL:'); + }); }); diff --git a/packages/core/src/agents/browser/browserManager.test.ts b/packages/core/src/agents/browser/browserManager.test.ts index 68eafc6e31..18ea162df9 100644 --- a/packages/core/src/agents/browser/browserManager.test.ts +++ b/packages/core/src/agents/browser/browserManager.test.ts @@ -39,6 +39,7 @@ vi.mock('@modelcontextprotocol/sdk/client/stdio.js', () => ({ vi.mock('../../utils/debugLogger.js', () => ({ debugLogger: { log: vi.fn(), + warn: vi.fn(), error: vi.fn(), }, })); @@ -47,6 +48,20 @@ vi.mock('./automationOverlay.js', () => ({ injectAutomationOverlay: vi.fn().mockResolvedValue(undefined), })); +vi.mock('node:fs', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + existsSync: vi.fn((p: string) => { + if (p.endsWith('bundled/chrome-devtools-mcp.mjs')) { + return false; // Default + } + return actual.existsSync(p); + }), + }; +}); + +import * as fs from 'node:fs'; import { Client } from '@modelcontextprotocol/sdk/client/index.js'; import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js'; @@ -96,6 +111,40 @@ describe('BrowserManager', () => { vi.restoreAllMocks(); }); + describe('MCP bundled path resolution', () => { + it('should use bundled path if it exists (handles bundled CLI)', async () => { + vi.mocked(fs.existsSync).mockReturnValue(true); + const manager = new BrowserManager(mockConfig); + await manager.ensureConnection(); + + expect(StdioClientTransport).toHaveBeenCalledWith( + expect.objectContaining({ + command: 'node', + args: expect.arrayContaining([ + expect.stringMatching(/bundled\/chrome-devtools-mcp\.mjs$/), + ]), + }), + ); + }); + + it('should fall back to development path if bundled path does not exist', async () => { + vi.mocked(fs.existsSync).mockReturnValue(false); + const manager = new BrowserManager(mockConfig); + await manager.ensureConnection(); + + expect(StdioClientTransport).toHaveBeenCalledWith( + expect.objectContaining({ + command: 'node', + args: expect.arrayContaining([ + expect.stringMatching( + /(dist\/)?bundled\/chrome-devtools-mcp\.mjs$/, + ), + ]), + }), + ); + }); + }); + describe('getRawMcpClient', () => { it('should ensure connection and return raw MCP client', async () => { const manager = new BrowserManager(mockConfig); @@ -143,6 +192,75 @@ describe('BrowserManager', () => { isError: false, }); }); + + it('should block navigate_page to disallowed domain', async () => { + const restrictedConfig = makeFakeConfig({ + agents: { + browser: { + allowedDomains: ['google.com'], + }, + }, + }); + const manager = new BrowserManager(restrictedConfig); + const result = await manager.callTool('navigate_page', { + url: 'https://evil.com', + }); + + expect(result.isError).toBe(true); + expect((result.content || [])[0]?.text).toContain('not permitted'); + expect(Client).not.toHaveBeenCalled(); + }); + + it('should allow navigate_page to allowed domain', async () => { + const restrictedConfig = makeFakeConfig({ + agents: { + browser: { + allowedDomains: ['google.com'], + }, + }, + }); + const manager = new BrowserManager(restrictedConfig); + const result = await manager.callTool('navigate_page', { + url: 'https://google.com/search', + }); + + expect(result.isError).toBe(false); + expect((result.content || [])[0]?.text).toBe('Tool result'); + }); + + it('should allow navigate_page to subdomain when wildcard is used', async () => { + const restrictedConfig = makeFakeConfig({ + agents: { + browser: { + allowedDomains: ['*.google.com'], + }, + }, + }); + const manager = new BrowserManager(restrictedConfig); + const result = await manager.callTool('navigate_page', { + url: 'https://mail.google.com', + }); + + expect(result.isError).toBe(false); + expect((result.content || [])[0]?.text).toBe('Tool result'); + }); + + it('should block new_page to disallowed domain', async () => { + const restrictedConfig = makeFakeConfig({ + agents: { + browser: { + allowedDomains: ['google.com'], + }, + }, + }); + const manager = new BrowserManager(restrictedConfig); + const result = await manager.callTool('new_page', { + url: 'https://evil.com', + }); + + expect(result.isError).toBe(true); + expect((result.content || [])[0]?.text).toContain('not permitted'); + }); }); describe('MCP connection', () => { @@ -153,10 +271,9 @@ describe('BrowserManager', () => { // Verify StdioClientTransport was created with correct args expect(StdioClientTransport).toHaveBeenCalledWith( expect.objectContaining({ - command: process.platform === 'win32' ? 'npx.cmd' : 'npx', + command: 'node', args: expect.arrayContaining([ - '-y', - expect.stringMatching(/chrome-devtools-mcp@/), + expect.stringMatching(/chrome-devtools-mcp\.mjs$/), '--experimental-vision', ]), }), @@ -166,12 +283,47 @@ describe('BrowserManager', () => { ?.args as string[]; expect(args).not.toContain('--isolated'); expect(args).not.toContain('--autoConnect'); + expect(args).not.toContain('-y'); // Persistent mode should set the default --userDataDir under ~/.gemini expect(args).toContain('--userDataDir'); const userDataDirIndex = args.indexOf('--userDataDir'); expect(args[userDataDirIndex + 1]).toMatch(/cli-browser-profile$/); }); + it('should pass --host-rules when allowedDomains is configured', async () => { + const restrictedConfig = makeFakeConfig({ + agents: { + browser: { + allowedDomains: ['google.com', '*.openai.com'], + }, + }, + }); + + const manager = new BrowserManager(restrictedConfig); + await manager.ensureConnection(); + + const args = vi.mocked(StdioClientTransport).mock.calls[0]?.[0] + ?.args as string[]; + expect(args).toContain( + '--chromeArg="--host-rules=MAP * 127.0.0.1, EXCLUDE google.com, EXCLUDE *.openai.com, EXCLUDE 127.0.0.1"', + ); + }); + + it('should throw error when invalid domain is configured in allowedDomains', async () => { + const invalidConfig = makeFakeConfig({ + agents: { + browser: { + allowedDomains: ['invalid domain!'], + }, + }, + }); + + const manager = new BrowserManager(invalidConfig); + await expect(manager.ensureConnection()).rejects.toThrow( + 'Invalid domain in allowedDomains: invalid domain!', + ); + }); + it('should pass headless flag when configured', async () => { const headlessConfig = makeFakeConfig({ agents: { @@ -191,7 +343,7 @@ describe('BrowserManager', () => { expect(StdioClientTransport).toHaveBeenCalledWith( expect.objectContaining({ - command: process.platform === 'win32' ? 'npx.cmd' : 'npx', + command: 'node', args: expect.arrayContaining(['--headless']), }), ); @@ -216,7 +368,7 @@ describe('BrowserManager', () => { expect(StdioClientTransport).toHaveBeenCalledWith( expect.objectContaining({ - command: process.platform === 'win32' ? 'npx.cmd' : 'npx', + command: 'node', args: expect.arrayContaining(['--userDataDir', '/path/to/profile']), }), ); diff --git a/packages/core/src/agents/browser/browserManager.ts b/packages/core/src/agents/browser/browserManager.ts index 426a6cec70..08e9597755 100644 --- a/packages/core/src/agents/browser/browserManager.ts +++ b/packages/core/src/agents/browser/browserManager.ts @@ -25,10 +25,12 @@ import type { Config } from '../../config/config.js'; import { Storage } from '../../config/storage.js'; import { injectInputBlocker } from './inputBlocker.js'; import * as path from 'node:path'; +import * as fs from 'node:fs'; +import { fileURLToPath } from 'node:url'; import { injectAutomationOverlay } from './automationOverlay.js'; -// Pin chrome-devtools-mcp version for reproducibility. -const CHROME_DEVTOOLS_MCP_VERSION = '0.17.1'; +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); // Default browser profile directory name within ~/.gemini/ const BROWSER_PROFILE_DIR = 'cli-browser-profile'; @@ -147,6 +149,19 @@ export class BrowserManager { throw signal.reason ?? new Error('Operation cancelled'); } + const errorMessage = this.checkNavigationRestrictions(toolName, args); + if (errorMessage) { + return { + content: [ + { + type: 'text', + text: errorMessage, + }, + ], + isError: true, + }; + } + const client = await this.getRawMcpClient(); const callPromise = client.callTool( { name: toolName, arguments: args }, @@ -266,7 +281,7 @@ export class BrowserManager { this.rawMcpClient = undefined; } - // Close transport (this terminates the npx process and browser) + // Close transport (this terminates the browser) if (this.mcpTransport) { try { await this.mcpTransport.close(); @@ -284,8 +299,7 @@ export class BrowserManager { /** * Connects to chrome-devtools-mcp which manages the browser process. * - * Spawns npx chrome-devtools-mcp with: - * - --isolated: Manages its own browser instance + * Spawns node with the bundled chrome-devtools-mcp.mjs. * - --experimental-vision: Enables visual tools (click_at, etc.) * * IMPORTANT: This does NOT use McpClientManager and does NOT register @@ -310,11 +324,7 @@ export class BrowserManager { const browserConfig = this.config.getBrowserAgentConfig(); const sessionMode = browserConfig.customConfig.sessionMode ?? 'persistent'; - const mcpArgs = [ - '-y', - `chrome-devtools-mcp@${CHROME_DEVTOOLS_MCP_VERSION}`, - '--experimental-vision', - ]; + const mcpArgs = ['--experimental-vision']; // Session mode determines how the browser is managed: // - "isolated": Temp profile, cleaned up after session (--isolated) @@ -342,16 +352,46 @@ export class BrowserManager { mcpArgs.push('--userDataDir', defaultProfilePath); } + if ( + browserConfig.customConfig.allowedDomains && + browserConfig.customConfig.allowedDomains.length > 0 + ) { + const exclusionRules = browserConfig.customConfig.allowedDomains + .map((domain) => { + if (!/^(\*\.)?([a-zA-Z0-9-]+\.)*[a-zA-Z0-9-]+$/.test(domain)) { + throw new Error(`Invalid domain in allowedDomains: ${domain}`); + } + return `EXCLUDE ${domain}`; + }) + .join(', '); + mcpArgs.push( + `--chromeArg="--host-rules=MAP * 127.0.0.1, ${exclusionRules}, EXCLUDE 127.0.0.1"`, + ); + } + debugLogger.log( - `Launching chrome-devtools-mcp (${sessionMode} mode) with args: ${mcpArgs.join(' ')}`, + `Launching bundled chrome-devtools-mcp (${sessionMode} mode) with args: ${mcpArgs.join(' ')}`, ); - // Create stdio transport to npx chrome-devtools-mcp. + // Create stdio transport to the bundled chrome-devtools-mcp. // stderr is piped (not inherited) to prevent MCP server banners and // warnings from corrupting the UI in alternate buffer mode. + let bundleMcpPath = path.resolve( + __dirname, + 'bundled/chrome-devtools-mcp.mjs', + ); + if (!fs.existsSync(bundleMcpPath)) { + bundleMcpPath = path.resolve( + __dirname, + __dirname.includes(`${path.sep}dist${path.sep}`) + ? '../../../bundled/chrome-devtools-mcp.mjs' + : '../../../dist/bundled/chrome-devtools-mcp.mjs', + ); + } + this.mcpTransport = new StdioClientTransport({ - command: process.platform === 'win32' ? 'npx.cmd' : 'npx', - args: mcpArgs, + command: 'node', + args: [bundleMcpPath, ...mcpArgs], stderr: 'pipe', }); @@ -462,8 +502,7 @@ export class BrowserManager { `Timed out connecting to Chrome: ${message}\n\n` + `Possible causes:\n` + ` 1. Chrome is not installed or not in PATH\n` + - ` 2. npx cannot download chrome-devtools-mcp (check network/proxy)\n` + - ` 3. Chrome failed to start (try setting headless: true in settings.json)`, + ` 2. Chrome failed to start (try setting headless: true in settings.json)`, ); } @@ -502,6 +541,63 @@ export class BrowserManager { ); } + /** + * Check navigation restrictions based on tools and the args sent + * along with them. + * + * @returns error message if failed, undefined if passed. + */ + private checkNavigationRestrictions( + toolName: string, + args: Record, + ): string | undefined { + const pageNavigationTools = ['navigate_page', 'new_page']; + + if (!pageNavigationTools.includes(toolName)) { + return undefined; + } + + const allowedDomains = + this.config.getBrowserAgentConfig().customConfig.allowedDomains; + if (!allowedDomains || allowedDomains.length === 0) { + return undefined; + } + + const url = args['url']; + if (!url) { + return undefined; + } + if (typeof url !== 'string') { + return `Invalid URL: URL must be a string.`; + } + + try { + const parsedUrl = new URL(url); + const urlHostname = parsedUrl.hostname.replace(/\.$/, ''); + + for (const domainPattern of allowedDomains) { + if (domainPattern.startsWith('*.')) { + const baseDomain = domainPattern.substring(2); + if ( + urlHostname === baseDomain || + urlHostname.endsWith(`.${baseDomain}`) + ) { + return undefined; + } + } else { + if (urlHostname === domainPattern) { + return undefined; + } + } + } + } catch { + return `Invalid URL: Malformed URL string.`; + } + + // If none matched, then deny + return `Tool '${toolName}' is not permitted for the requested URL/domain based on your current browser settings.`; + } + /** * Registers a fallback notification handler on the MCP client to * automatically re-inject the input blocker after any server-side diff --git a/packages/core/src/agents/browser/mcpToolWrapper.test.ts b/packages/core/src/agents/browser/mcpToolWrapper.test.ts index c74f273b27..9dc2f77b1f 100644 --- a/packages/core/src/agents/browser/mcpToolWrapper.test.ts +++ b/packages/core/src/agents/browser/mcpToolWrapper.test.ts @@ -68,18 +68,19 @@ describe('mcpToolWrapper', () => { const tools = await createMcpDeclarativeTools( mockBrowserManager, mockMessageBus, + false, ); - expect(tools).toHaveLength(3); + expect(tools).toHaveLength(2); expect(tools[0].name).toBe('take_snapshot'); expect(tools[1].name).toBe('click'); - expect(tools[2].name).toBe('type_text'); }); it('should return tools with correct description', async () => { const tools = await createMcpDeclarativeTools( mockBrowserManager, mockMessageBus, + false, ); // Descriptions include augmented hints, so we check they contain the original @@ -93,6 +94,7 @@ describe('mcpToolWrapper', () => { const tools = await createMcpDeclarativeTools( mockBrowserManager, mockMessageBus, + false, ); const schema = tools[0].schema; @@ -106,6 +108,7 @@ describe('mcpToolWrapper', () => { const tools = await createMcpDeclarativeTools( mockBrowserManager, mockMessageBus, + false, ); const invocation = tools[0].build({ verbose: true }); @@ -118,6 +121,7 @@ describe('mcpToolWrapper', () => { const tools = await createMcpDeclarativeTools( mockBrowserManager, mockMessageBus, + false, ); const invocation = tools[0].build({}); @@ -131,6 +135,7 @@ describe('mcpToolWrapper', () => { const tools = await createMcpDeclarativeTools( mockBrowserManager, mockMessageBus, + false, ); const invocation = tools[1].build({ uid: 'elem-123' }); @@ -149,6 +154,7 @@ describe('mcpToolWrapper', () => { const tools = await createMcpDeclarativeTools( mockBrowserManager, mockMessageBus, + false, ); const invocation = tools[0].build({ verbose: true }); @@ -167,6 +173,7 @@ describe('mcpToolWrapper', () => { const tools = await createMcpDeclarativeTools( mockBrowserManager, mockMessageBus, + false, ); const invocation = tools[1].build({ uid: 'invalid' }); @@ -184,6 +191,7 @@ describe('mcpToolWrapper', () => { const tools = await createMcpDeclarativeTools( mockBrowserManager, mockMessageBus, + false, ); const invocation = tools[0].build({}); diff --git a/packages/core/src/agents/browser/mcpToolWrapper.ts b/packages/core/src/agents/browser/mcpToolWrapper.ts index edbff503ca..3af3f307da 100644 --- a/packages/core/src/agents/browser/mcpToolWrapper.ts +++ b/packages/core/src/agents/browser/mcpToolWrapper.ts @@ -175,144 +175,6 @@ class McpToolInvocation extends BaseToolInvocation< } } -/** - * Composite tool invocation that types a full string by calling press_key - * for each character internally, avoiding N model round-trips. - */ -class TypeTextInvocation extends BaseToolInvocation< - Record, - ToolResult -> { - constructor( - private readonly browserManager: BrowserManager, - private readonly text: string, - private readonly submitKey: string | undefined, - messageBus: MessageBus, - ) { - super({ text, submitKey }, messageBus, 'type_text', 'type_text'); - } - - getDescription(): string { - const preview = `"${this.text.substring(0, 50)}${this.text.length > 50 ? '...' : ''}"`; - return this.submitKey - ? `type_text: ${preview} + ${this.submitKey}` - : `type_text: ${preview}`; - } - - protected override async getConfirmationDetails( - _abortSignal: AbortSignal, - ): Promise { - if (!this.messageBus) { - return false; - } - - return { - type: 'mcp', - title: `Confirm Tool: type_text`, - serverName: 'browser-agent', - toolName: 'type_text', - toolDisplayName: 'type_text', - onConfirm: async (outcome: ToolConfirmationOutcome) => { - await this.publishPolicyUpdate(outcome); - }, - }; - } - - override getPolicyUpdateOptions( - _outcome: ToolConfirmationOutcome, - ): PolicyUpdateOptions | undefined { - return { - mcpName: 'browser-agent', - }; - } - - override async execute(signal: AbortSignal): Promise { - try { - if (signal.aborted) { - return { - llmContent: 'Error: Operation cancelled before typing started.', - returnDisplay: 'Operation cancelled before typing started.', - error: { message: 'Operation cancelled' }, - }; - } - - await this.typeCharByChar(signal); - - // Optionally press a submit key (Enter, Tab, etc.) after typing - if (this.submitKey && !signal.aborted) { - const keyResult = await this.browserManager.callTool( - 'press_key', - { key: this.submitKey }, - signal, - ); - if (keyResult.isError) { - const errText = this.extractErrorText(keyResult); - debugLogger.warn( - `type_text: submitKey("${this.submitKey}") failed: ${errText}`, - ); - } - } - - const summary = this.submitKey - ? `Successfully typed "${this.text}" and pressed ${this.submitKey}` - : `Successfully typed "${this.text}"`; - - return { - llmContent: summary, - returnDisplay: summary, - }; - } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); - - // Chrome connection errors are fatal - if (errorMsg.includes('Could not connect to Chrome')) { - throw error; - } - - debugLogger.error(`type_text failed: ${errorMsg}`); - return { - llmContent: `Error: ${errorMsg}`, - returnDisplay: `Error: ${errorMsg}`, - error: { message: errorMsg }, - }; - } - } - - /** Types each character via individual press_key MCP calls. */ - private async typeCharByChar(signal: AbortSignal): Promise { - const chars = [...this.text]; // Handle Unicode correctly - for (const char of chars) { - if (signal.aborted) return; - - // Map special characters to key names - const key = char === ' ' ? 'Space' : char; - const result = await this.browserManager.callTool( - 'press_key', - { key }, - signal, - ); - - if (result.isError) { - debugLogger.warn( - `type_text: press_key("${key}") failed: ${this.extractErrorText(result)}`, - ); - } - } - } - - /** Extract error text from an MCP tool result. */ - private extractErrorText(result: McpToolCallResult): string { - return ( - result.content - ?.filter( - (c: { type: string; text?: string }) => c.type === 'text' && c.text, - ) - .map((c: { type: string; text?: string }) => c.text) - .join('\n') || 'Unknown error' - ); - } -} - /** * DeclarativeTool wrapper for an MCP tool. */ @@ -353,65 +215,6 @@ class McpDeclarativeTool extends DeclarativeTool< } } -/** - * DeclarativeTool for the custom type_text composite tool. - */ -class TypeTextDeclarativeTool extends DeclarativeTool< - Record, - ToolResult -> { - constructor( - private readonly browserManager: BrowserManager, - messageBus: MessageBus, - ) { - super( - 'type_text', - 'type_text', - 'Types a full text string into the currently focused element. ' + - 'Much faster than calling press_key for each character individually. ' + - 'Use this to enter text into form fields, search boxes, spreadsheet cells, or any focused input. ' + - 'The element must already be focused (e.g., after a click). ' + - 'Use submitKey to press a key after typing (e.g., submitKey="Enter" to submit a form or confirm a value, submitKey="Tab" to move to the next field).', - Kind.Other, - { - type: 'object', - properties: { - text: { - type: 'string', - description: 'The text to type into the focused element.', - }, - submitKey: { - type: 'string', - description: - 'Optional key to press after typing (e.g., "Enter", "Tab", "Escape"). ' + - 'Useful for submitting form fields or moving to the next cell in a spreadsheet.', - }, - }, - required: ['text'], - }, - messageBus, - /* isOutputMarkdown */ true, - /* canUpdateOutput */ false, - ); - } - - build( - params: Record, - ): ToolInvocation, ToolResult> { - const submitKey = - // eslint-disable-next-line no-restricted-syntax - typeof params['submitKey'] === 'string' && params['submitKey'] - ? params['submitKey'] - : undefined; - return new TypeTextInvocation( - this.browserManager, - String(params['text'] ?? ''), - submitKey, - this.messageBus, - ); - } -} - /** * Creates DeclarativeTool instances from dynamically discovered MCP tools, * plus custom composite tools (like type_text). @@ -423,13 +226,14 @@ class TypeTextDeclarativeTool extends DeclarativeTool< * * @param browserManager The browser manager with isolated MCP client * @param messageBus Message bus for tool invocations + * @param shouldDisableInput Whether input should be disabled for this agent * @returns Array of DeclarativeTools that dispatch to the isolated MCP client */ export async function createMcpDeclarativeTools( browserManager: BrowserManager, messageBus: MessageBus, shouldDisableInput: boolean = false, -): Promise> { +): Promise { // Get dynamically discovered tools from the MCP server const mcpTools = await browserManager.getDiscoveredTools(); @@ -438,29 +242,25 @@ export async function createMcpDeclarativeTools( (shouldDisableInput ? ' (input blocker enabled)' : ''), ); - const tools: Array = - mcpTools.map((mcpTool) => { - const schema = convertMcpToolToFunctionDeclaration(mcpTool); - // Augment description with uid-context hints - const augmentedDescription = augmentToolDescription( - mcpTool.name, - mcpTool.description ?? '', - ); - return new McpDeclarativeTool( - browserManager, - mcpTool.name, - augmentedDescription, - schema.parametersJsonSchema, - messageBus, - shouldDisableInput, - ); - }); - - // Add custom composite tools - tools.push(new TypeTextDeclarativeTool(browserManager, messageBus)); + const tools: McpDeclarativeTool[] = mcpTools.map((mcpTool) => { + const schema = convertMcpToolToFunctionDeclaration(mcpTool); + // Augment description with uid-context hints + const augmentedDescription = augmentToolDescription( + mcpTool.name, + mcpTool.description ?? '', + ); + return new McpDeclarativeTool( + browserManager, + mcpTool.name, + augmentedDescription, + schema.parametersJsonSchema, + messageBus, + shouldDisableInput, + ); + }); debugLogger.log( - `Total tools registered: ${tools.length} (${mcpTools.length} MCP + 1 custom)`, + `Total tools registered: ${tools.length} (${mcpTools.length} MCP)`, ); return tools; diff --git a/packages/core/src/availability/policyHelpers.ts b/packages/core/src/availability/policyHelpers.ts index 406abde5e3..290c47d896 100644 --- a/packages/core/src/availability/policyHelpers.ts +++ b/packages/core/src/availability/policyHelpers.ts @@ -54,19 +54,21 @@ export function resolvePolicyChain( useCustomToolModel, hasAccessToPreview, ); - const isAutoPreferred = preferredModel ? isAutoModel(preferredModel) : false; - const isAutoConfigured = isAutoModel(configuredModel); + const isAutoPreferred = preferredModel + ? isAutoModel(preferredModel, config) + : false; + const isAutoConfigured = isAutoModel(configuredModel, config); if (resolvedModel === DEFAULT_GEMINI_FLASH_LITE_MODEL) { chain = getFlashLitePolicyChain(); } else if ( - isGemini3Model(resolvedModel) || + isGemini3Model(resolvedModel, config) || isAutoPreferred || isAutoConfigured ) { if (hasAccessToPreview) { const previewEnabled = - isGemini3Model(resolvedModel) || + isGemini3Model(resolvedModel, config) || preferredModel === PREVIEW_GEMINI_MODEL_AUTO || configuredModel === PREVIEW_GEMINI_MODEL_AUTO; chain = getModelPolicyChain({ diff --git a/packages/core/src/code_assist/oauth2.test.ts b/packages/core/src/code_assist/oauth2.test.ts index 2405e3307c..afe35ce665 100644 --- a/packages/core/src/code_assist/oauth2.test.ts +++ b/packages/core/src/code_assist/oauth2.test.ts @@ -480,6 +480,7 @@ describe('oauth2', () => { expect(fs.existsSync(googleAccountPath)).toBe(true); if (fs.existsSync(googleAccountPath)) { const cachedGoogleAccount = fs.readFileSync(googleAccountPath, 'utf-8'); + expect(JSON.parse(cachedGoogleAccount)).toEqual({ active: 'test-user-code-account@gmail.com', old: [], @@ -1349,7 +1350,7 @@ describe('oauth2', () => { let dataHandler: ((data: Buffer) => void) | undefined; await vi.waitFor(() => { const dataCall = stdinOnSpy.mock.calls.find( - (call: [string, ...unknown[]]) => call[0] === 'data', + (call: [string | symbol, ...unknown[]]) => call[0] === 'data', ); dataHandler = dataCall?.[1] as ((data: Buffer) => void) | undefined; if (!dataHandler) throw new Error('stdin handler not registered yet'); diff --git a/packages/core/src/config/agent-loop-context.ts b/packages/core/src/config/agent-loop-context.ts index 92eff0c3c1..0a879d9c93 100644 --- a/packages/core/src/config/agent-loop-context.ts +++ b/packages/core/src/config/agent-loop-context.ts @@ -7,6 +7,7 @@ import type { GeminiClient } from '../core/client.js'; import type { MessageBus } from '../confirmation-bus/message-bus.js'; import type { ToolRegistry } from '../tools/tool-registry.js'; +import type { SandboxManager } from '../services/sandboxManager.js'; import type { Config } from './config.js'; /** @@ -28,4 +29,7 @@ export interface AgentLoopContext { /** The client used to communicate with the LLM in this context. */ readonly geminiClient: GeminiClient; + + /** The service used to prepare commands for sandboxed execution. */ + readonly sandboxManager: SandboxManager; } diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index ce0d878b30..cd79500268 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -41,6 +41,10 @@ import { LocalLiteRtLmClient } from '../core/localLiteRtLmClient.js'; import type { HookDefinition, HookEventName } from '../hooks/types.js'; import { FileDiscoveryService } from '../services/fileDiscoveryService.js'; import { GitService } from '../services/gitService.js'; +import { + createSandboxManager, + type SandboxManager, +} from '../services/sandboxManager.js'; import { initializeTelemetry, DEFAULT_TELEMETRY_TARGET, @@ -318,6 +322,8 @@ export interface BrowserAgentCustomConfig { profilePath?: string; /** Model override for the visual agent. */ visualModel?: string; + /** List of allowed domains for the browser agent (e.g., ["github.com", "*.google.com"]). */ + allowedDomains?: string[]; /** Disable user input on the browser window during automation. Default: true in non-headless mode */ disableUserInput?: boolean; } @@ -510,6 +516,7 @@ export interface ConfigParameters { clientVersion?: string; embeddingModel?: string; sandbox?: SandboxConfig; + toolSandboxing?: boolean; targetDir: string; debugMode: boolean; question?: string; @@ -602,8 +609,10 @@ export interface ConfigParameters { recordResponses?: string; ptyInfo?: string; disableYoloMode?: boolean; + disableAlwaysAllow?: boolean; rawOutput?: boolean; acceptRawOutputRisk?: boolean; + dynamicModelConfiguration?: boolean; modelConfigServiceConfig?: ModelConfigServiceConfig; enableHooks?: boolean; enableHooksUI?: boolean; @@ -617,6 +626,7 @@ export interface ConfigParameters { disabledSkills?: string[]; adminSkillsEnabled?: boolean; experimentalJitContext?: boolean; + topicUpdateNarration?: boolean; toolOutputMasking?: Partial; disableLLMCorrection?: boolean; plan?: boolean; @@ -688,6 +698,7 @@ export class Config implements McpContext, AgentLoopContext { private readonly telemetrySettings: TelemetrySettings; private readonly usageStatisticsEnabled: boolean; private _geminiClient!: GeminiClient; + private readonly _sandboxManager: SandboxManager; private baseLlmClient!: BaseLlmClient; private localLiteRtLmClient?: LocalLiteRtLmClient; private modelRouterService: ModelRouterService; @@ -801,8 +812,10 @@ export class Config implements McpContext, AgentLoopContext { readonly fakeResponses?: string; readonly recordResponses?: string; private readonly disableYoloMode: boolean; + private readonly disableAlwaysAllow: boolean; private readonly rawOutput: boolean; private readonly acceptRawOutputRisk: boolean; + private readonly dynamicModelConfiguration: boolean; private pendingIncludeDirectories: string[]; private readonly enableHooks: boolean; private readonly enableHooksUI: boolean; @@ -836,6 +849,7 @@ export class Config implements McpContext, AgentLoopContext { private readonly adminSkillsEnabled: boolean; private readonly experimentalJitContext: boolean; + private readonly topicUpdateNarration: boolean; private readonly disableLLMCorrection: boolean; private readonly planEnabled: boolean; private readonly trackerEnabled: boolean; @@ -857,7 +871,19 @@ export class Config implements McpContext, AgentLoopContext { this.embeddingModel = params.embeddingModel ?? DEFAULT_GEMINI_EMBEDDING_MODEL; this.fileSystemService = new StandardFileSystemService(); - this.sandbox = params.sandbox; + this.sandbox = params.sandbox + ? { + enabled: params.sandbox.enabled ?? false, + allowedPaths: params.sandbox.allowedPaths ?? [], + networkAccess: params.sandbox.networkAccess ?? false, + command: params.sandbox.command, + image: params.sandbox.image, + } + : { + enabled: false, + allowedPaths: [], + networkAccess: false, + }; this.targetDir = path.resolve(params.targetDir); this.folderTrust = params.folderTrust ?? false; this.workspaceContext = new WorkspaceContext(this.targetDir, []); @@ -938,7 +964,42 @@ export class Config implements McpContext, AgentLoopContext { this.disabledSkills = params.disabledSkills ?? []; this.adminSkillsEnabled = params.adminSkillsEnabled ?? true; this.modelAvailabilityService = new ModelAvailabilityService(); + this.dynamicModelConfiguration = params.dynamicModelConfiguration ?? false; + + // HACK: The settings loading logic doesn't currently merge the default + // generation config with the user's settings. This means if a user provides + // any `generation` settings (e.g., just `overrides`), the default `aliases` + // are lost. This hack manually merges the default aliases back in if they + // are missing from the user's config. + // TODO(12593): Fix the settings loading logic to properly merge defaults and + // remove this hack. + let modelConfigServiceConfig = params.modelConfigServiceConfig; + if (modelConfigServiceConfig) { + // Ensure user-defined model definitions augment, not replace, the defaults. + const mergedModelDefinitions = { + ...DEFAULT_MODEL_CONFIGS.modelDefinitions, + ...modelConfigServiceConfig.modelDefinitions, + }; + + modelConfigServiceConfig = { + // Preserve other user settings like customAliases + ...modelConfigServiceConfig, + // Apply defaults for aliases and overrides if they are not provided + aliases: + modelConfigServiceConfig.aliases ?? DEFAULT_MODEL_CONFIGS.aliases, + overrides: + modelConfigServiceConfig.overrides ?? DEFAULT_MODEL_CONFIGS.overrides, + // Use the merged model definitions + modelDefinitions: mergedModelDefinitions, + }; + } + + this.modelConfigService = new ModelConfigService( + modelConfigServiceConfig ?? DEFAULT_MODEL_CONFIGS, + ); + this.experimentalJitContext = params.experimentalJitContext ?? false; + this.topicUpdateNarration = params.topicUpdateNarration ?? false; this.modelSteering = params.modelSteering ?? false; this.userHintService = new UserHintService(() => this.isModelSteeringEnabled(), @@ -988,11 +1049,12 @@ export class Config implements McpContext, AgentLoopContext { showColor: params.shellExecutionConfig?.showColor ?? false, pager: params.shellExecutionConfig?.pager ?? 'cat', sanitizationConfig: this.sanitizationConfig, + sandboxManager: this.sandboxManager, }; this.truncateToolOutputThreshold = params.truncateToolOutputThreshold ?? DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD; - this.useWriteTodos = isPreviewModel(this.model) + this.useWriteTodos = isPreviewModel(this.model, this) ? false : (params.useWriteTodos ?? true); this.workspacePoliciesDir = params.workspacePoliciesDir; @@ -1029,11 +1091,13 @@ export class Config implements McpContext, AgentLoopContext { this.policyUpdateConfirmationRequest = params.policyUpdateConfirmationRequest; + this.disableAlwaysAllow = params.disableAlwaysAllow ?? false; this.policyEngine = new PolicyEngine( { ...params.policyEngineConfig, approvalMode: params.approvalMode ?? params.policyEngineConfig?.approvalMode, + disableAlwaysAllow: this.disableAlwaysAllow, }, checkerRunner, ); @@ -1105,34 +1169,9 @@ export class Config implements McpContext, AgentLoopContext { } } this._geminiClient = new GeminiClient(this); + this._sandboxManager = createSandboxManager(params.toolSandboxing ?? false); + this.shellExecutionConfig.sandboxManager = this._sandboxManager; this.modelRouterService = new ModelRouterService(this); - - // HACK: The settings loading logic doesn't currently merge the default - // generation config with the user's settings. This means if a user provides - // any `generation` settings (e.g., just `overrides`), the default `aliases` - // are lost. This hack manually merges the default aliases back in if they - // are missing from the user's config. - // TODO(12593): Fix the settings loading logic to properly merge defaults and - // remove this hack. - let modelConfigServiceConfig = params.modelConfigServiceConfig; - if (modelConfigServiceConfig) { - if (!modelConfigServiceConfig.aliases) { - modelConfigServiceConfig = { - ...modelConfigServiceConfig, - aliases: DEFAULT_MODEL_CONFIGS.aliases, - }; - } - if (!modelConfigServiceConfig.overrides) { - modelConfigServiceConfig = { - ...modelConfigServiceConfig, - overrides: DEFAULT_MODEL_CONFIGS.overrides, - }; - } - } - - this.modelConfigService = new ModelConfigService( - modelConfigServiceConfig ?? DEFAULT_MODEL_CONFIGS, - ); } get config(): Config { @@ -1334,7 +1373,10 @@ export class Config implements McpContext, AgentLoopContext { // Only reset when we have explicit "no access" (hasAccessToPreviewModel === false). // When null (quota not fetched) or true, we preserve the saved model. - if (isPreviewModel(this.model) && this.hasAccessToPreviewModel === false) { + if ( + isPreviewModel(this.model, this) && + this.hasAccessToPreviewModel === false + ) { this.setModel(DEFAULT_GEMINI_MODEL_AUTO); } @@ -1430,6 +1472,10 @@ export class Config implements McpContext, AgentLoopContext { return this._geminiClient; } + get sandboxManager(): SandboxManager { + return this._sandboxManager; + } + getSessionId(): string { return this.promptId; } @@ -1602,7 +1648,7 @@ export class Config implements McpContext, AgentLoopContext { const isPreview = model === PREVIEW_GEMINI_MODEL_AUTO || - isPreviewModel(this.getActiveModel()); + isPreviewModel(this.getActiveModel(), this); const proModel = isPreview ? PREVIEW_GEMINI_MODEL : DEFAULT_GEMINI_MODEL; const flashModel = isPreview ? PREVIEW_GEMINI_FLASH_MODEL @@ -1800,8 +1846,9 @@ export class Config implements McpContext, AgentLoopContext { } const hasAccess = - quota.buckets?.some((b) => b.modelId && isPreviewModel(b.modelId)) ?? - false; + quota.buckets?.some( + (b) => b.modelId && isPreviewModel(b.modelId, this), + ) ?? false; this.setHasAccessToPreviewModel(hasAccess); return quota; } catch (e) { @@ -2027,6 +2074,10 @@ export class Config implements McpContext, AgentLoopContext { return this.experimentalJitContext; } + isTopicUpdateNarrationEnabled(): boolean { + return this.topicUpdateNarration; + } + isModelSteeringEnabled(): boolean { return this.modelSteering; } @@ -2189,6 +2240,10 @@ export class Config implements McpContext, AgentLoopContext { return this.disableYoloMode || !this.isTrustedFolder(); } + getDisableAlwaysAllow(): boolean { + return this.disableAlwaysAllow; + } + getRawOutput(): boolean { return this.rawOutput; } @@ -2197,6 +2252,10 @@ export class Config implements McpContext, AgentLoopContext { return this.acceptRawOutputRisk; } + getExperimentalDynamicModelConfiguration(): boolean { + return this.dynamicModelConfiguration; + } + getPendingIncludeDirectories(): string[] { return this.pendingIncludeDirectories; } @@ -2821,6 +2880,8 @@ export class Config implements McpContext, AgentLoopContext { sanitizationConfig: config.sanitizationConfig ?? this.shellExecutionConfig.sanitizationConfig, + sandboxManager: + config.sandboxManager ?? this.shellExecutionConfig.sandboxManager, }; } getScreenReader(): boolean { @@ -2915,6 +2976,7 @@ export class Config implements McpContext, AgentLoopContext { headless: customConfig.headless ?? false, profilePath: customConfig.profilePath, visualModel: customConfig.visualModel, + allowedDomains: customConfig.allowedDomains, disableUserInput: customConfig.disableUserInput, }, }; diff --git a/packages/core/src/config/defaultModelConfigs.ts b/packages/core/src/config/defaultModelConfigs.ts index 5344aa4421..c0e8b6c6ba 100644 --- a/packages/core/src/config/defaultModelConfigs.ts +++ b/packages/core/src/config/defaultModelConfigs.ts @@ -249,4 +249,94 @@ export const DEFAULT_MODEL_CONFIGS: ModelConfigServiceConfig = { }, }, ], + modelDefinitions: { + // Concrete Models + 'gemini-3.1-pro-preview': { + tier: 'pro', + family: 'gemini-3', + isPreview: true, + dialogLocation: 'manual', + features: { thinking: true, multimodalToolUse: true }, + }, + 'gemini-3.1-pro-preview-customtools': { + tier: 'pro', + family: 'gemini-3', + isPreview: true, + features: { thinking: true, multimodalToolUse: true }, + }, + 'gemini-3-pro-preview': { + tier: 'pro', + family: 'gemini-3', + isPreview: true, + dialogLocation: 'manual', + features: { thinking: true, multimodalToolUse: true }, + }, + 'gemini-3-flash-preview': { + tier: 'flash', + family: 'gemini-3', + isPreview: true, + dialogLocation: 'manual', + features: { thinking: false, multimodalToolUse: true }, + }, + 'gemini-2.5-pro': { + tier: 'pro', + family: 'gemini-2.5', + isPreview: false, + dialogLocation: 'manual', + features: { thinking: false, multimodalToolUse: false }, + }, + 'gemini-2.5-flash': { + tier: 'flash', + family: 'gemini-2.5', + isPreview: false, + dialogLocation: 'manual', + features: { thinking: false, multimodalToolUse: false }, + }, + 'gemini-2.5-flash-lite': { + tier: 'flash-lite', + family: 'gemini-2.5', + isPreview: false, + dialogLocation: 'manual', + features: { thinking: false, multimodalToolUse: false }, + }, + // Aliases + auto: { + tier: 'auto', + isPreview: true, + features: { thinking: true, multimodalToolUse: false }, + }, + pro: { + tier: 'pro', + isPreview: false, + features: { thinking: true, multimodalToolUse: false }, + }, + flash: { + tier: 'flash', + isPreview: false, + features: { thinking: false, multimodalToolUse: false }, + }, + 'flash-lite': { + tier: 'flash-lite', + isPreview: false, + features: { thinking: false, multimodalToolUse: false }, + }, + 'auto-gemini-3': { + displayName: 'Auto (Gemini 3)', + tier: 'auto', + isPreview: true, + dialogLocation: 'main', + dialogDescription: + 'Let Gemini CLI decide the best model for the task: gemini-3.1-pro, gemini-3-flash', + features: { thinking: true, multimodalToolUse: false }, + }, + 'auto-gemini-2.5': { + displayName: 'Auto (Gemini 2.5)', + tier: 'auto', + isPreview: false, + dialogLocation: 'main', + dialogDescription: + 'Let Gemini CLI decide the best model for the task: gemini-2.5-pro, gemini-2.5-flash', + features: { thinking: false, multimodalToolUse: false }, + }, + }, }; diff --git a/packages/core/src/config/models.test.ts b/packages/core/src/config/models.test.ts index b3f5db9430..26da6ca1cb 100644 --- a/packages/core/src/config/models.test.ts +++ b/packages/core/src/config/models.test.ts @@ -22,7 +22,6 @@ import { GEMINI_MODEL_ALIAS_PRO, GEMINI_MODEL_ALIAS_FLASH, GEMINI_MODEL_ALIAS_AUTO, - GEMINI_MODEL_ALIAS_FLASH_LITE, PREVIEW_GEMINI_FLASH_MODEL, PREVIEW_GEMINI_MODEL_AUTO, DEFAULT_GEMINI_MODEL_AUTO, @@ -31,11 +30,97 @@ import { PREVIEW_GEMINI_3_1_CUSTOM_TOOLS_MODEL, isPreviewModel, isProModel, - isValidModelOrAlias, - getValidModelsAndAliases, - VALID_GEMINI_MODELS, - VALID_ALIASES, } from './models.js'; +import type { Config } from './config.js'; +import { ModelConfigService } from '../services/modelConfigService.js'; +import { DEFAULT_MODEL_CONFIGS } from './defaultModelConfigs.js'; + +const modelConfigService = new ModelConfigService(DEFAULT_MODEL_CONFIGS); + +const dynamicConfig = { + getExperimentalDynamicModelConfiguration: () => true, + modelConfigService, +} as unknown as Config; + +const legacyConfig = { + getExperimentalDynamicModelConfiguration: () => false, + modelConfigService, +} as unknown as Config; + +describe('Dynamic Configuration Parity', () => { + const modelsToTest = [ + GEMINI_MODEL_ALIAS_AUTO, + GEMINI_MODEL_ALIAS_PRO, + GEMINI_MODEL_ALIAS_FLASH, + PREVIEW_GEMINI_MODEL_AUTO, + DEFAULT_GEMINI_MODEL_AUTO, + PREVIEW_GEMINI_MODEL, + DEFAULT_GEMINI_MODEL, + 'custom-model', + ]; + + it('getDisplayString should match legacy behavior', () => { + for (const model of modelsToTest) { + const legacy = getDisplayString(model, legacyConfig); + const dynamic = getDisplayString(model, dynamicConfig); + expect(dynamic).toBe(legacy); + } + }); + + it('isPreviewModel should match legacy behavior', () => { + const allModels = [ + ...modelsToTest, + PREVIEW_GEMINI_3_1_MODEL, + PREVIEW_GEMINI_3_1_CUSTOM_TOOLS_MODEL, + PREVIEW_GEMINI_FLASH_MODEL, + ]; + for (const model of allModels) { + const legacy = isPreviewModel(model, legacyConfig); + const dynamic = isPreviewModel(model, dynamicConfig); + expect(dynamic).toBe(legacy); + } + }); + + it('isProModel should match legacy behavior', () => { + for (const model of modelsToTest) { + const legacy = isProModel(model, legacyConfig); + const dynamic = isProModel(model, dynamicConfig); + expect(dynamic).toBe(legacy); + } + }); + + it('isGemini3Model should match legacy behavior', () => { + for (const model of modelsToTest) { + const legacy = isGemini3Model(model, legacyConfig); + const dynamic = isGemini3Model(model, dynamicConfig); + expect(dynamic).toBe(legacy); + } + }); + + it('isCustomModel should match legacy behavior', () => { + for (const model of modelsToTest) { + const legacy = isCustomModel(model, legacyConfig); + const dynamic = isCustomModel(model, dynamicConfig); + expect(dynamic).toBe(legacy); + } + }); + + it('supportsModernFeatures should match legacy behavior', () => { + for (const model of modelsToTest) { + const legacy = supportsModernFeatures(model); + const dynamic = supportsModernFeatures(model); + expect(dynamic).toBe(legacy); + } + }); + + it('supportsMultimodalFunctionResponse should match legacy behavior', () => { + for (const model of modelsToTest) { + const legacy = supportsMultimodalFunctionResponse(model, legacyConfig); + const dynamic = supportsMultimodalFunctionResponse(model, dynamicConfig); + expect(dynamic).toBe(legacy); + } + }); +}); describe('isPreviewModel', () => { it('should return true for preview models', () => { @@ -394,62 +479,3 @@ describe('isActiveModel', () => { ).toBe(false); }); }); - -describe('isValidModelOrAlias', () => { - it('should return true for valid model names', () => { - expect(isValidModelOrAlias(DEFAULT_GEMINI_MODEL)).toBe(true); - expect(isValidModelOrAlias(PREVIEW_GEMINI_MODEL)).toBe(true); - expect(isValidModelOrAlias(DEFAULT_GEMINI_FLASH_MODEL)).toBe(true); - expect(isValidModelOrAlias(DEFAULT_GEMINI_FLASH_LITE_MODEL)).toBe(true); - expect(isValidModelOrAlias(PREVIEW_GEMINI_FLASH_MODEL)).toBe(true); - expect(isValidModelOrAlias(PREVIEW_GEMINI_3_1_MODEL)).toBe(true); - expect(isValidModelOrAlias(PREVIEW_GEMINI_3_1_CUSTOM_TOOLS_MODEL)).toBe( - true, - ); - }); - - it('should return true for valid aliases', () => { - expect(isValidModelOrAlias(GEMINI_MODEL_ALIAS_AUTO)).toBe(true); - expect(isValidModelOrAlias(GEMINI_MODEL_ALIAS_PRO)).toBe(true); - expect(isValidModelOrAlias(GEMINI_MODEL_ALIAS_FLASH)).toBe(true); - expect(isValidModelOrAlias(GEMINI_MODEL_ALIAS_FLASH_LITE)).toBe(true); - expect(isValidModelOrAlias(PREVIEW_GEMINI_MODEL_AUTO)).toBe(true); - expect(isValidModelOrAlias(DEFAULT_GEMINI_MODEL_AUTO)).toBe(true); - }); - - it('should return true for custom (non-gemini) models', () => { - expect(isValidModelOrAlias('gpt-4')).toBe(true); - expect(isValidModelOrAlias('claude-3')).toBe(true); - expect(isValidModelOrAlias('my-custom-model')).toBe(true); - }); - - it('should return false for invalid gemini model names', () => { - expect(isValidModelOrAlias('gemini-4-pro')).toBe(false); - expect(isValidModelOrAlias('gemini-99-flash')).toBe(false); - expect(isValidModelOrAlias('gemini-invalid')).toBe(false); - }); -}); - -describe('getValidModelsAndAliases', () => { - it('should return a sorted array', () => { - const result = getValidModelsAndAliases(); - const sorted = [...result].sort(); - expect(result).toEqual(sorted); - }); - - it('should include all valid models and aliases', () => { - const result = getValidModelsAndAliases(); - for (const model of VALID_GEMINI_MODELS) { - expect(result).toContain(model); - } - for (const alias of VALID_ALIASES) { - expect(result).toContain(alias); - } - }); - - it('should not contain duplicates', () => { - const result = getValidModelsAndAliases(); - const unique = [...new Set(result)]; - expect(result).toEqual(unique); - }); -}); diff --git a/packages/core/src/config/models.ts b/packages/core/src/config/models.ts index 59e7e4b457..73eab4633c 100644 --- a/packages/core/src/config/models.ts +++ b/packages/core/src/config/models.ts @@ -4,6 +4,33 @@ * SPDX-License-Identifier: Apache-2.0 */ +/** + * Interface for the ModelConfigService to break circular dependencies. + */ +export interface IModelConfigService { + getModelDefinition(modelId: string): + | { + tier?: string; + family?: string; + isPreview?: boolean; + displayName?: string; + features?: { + thinking?: boolean; + multimodalToolUse?: boolean; + }; + } + | undefined; +} + +/** + * Interface defining the minimal configuration required for model capability checks. + * This helps break circular dependencies between Config and models.ts. + */ +export interface ModelCapabilityContext { + readonly modelConfigService: IModelConfigService; + getExperimentalDynamicModelConfiguration(): boolean; +} + export const PREVIEW_GEMINI_MODEL = 'gemini-3-pro-preview'; export const PREVIEW_GEMINI_3_1_MODEL = 'gemini-3.1-pro-preview'; export const PREVIEW_GEMINI_3_1_CUSTOM_TOOLS_MODEL = @@ -32,15 +59,6 @@ export const GEMINI_MODEL_ALIAS_PRO = 'pro'; export const GEMINI_MODEL_ALIAS_FLASH = 'flash'; export const GEMINI_MODEL_ALIAS_FLASH_LITE = 'flash-lite'; -export const VALID_ALIASES = new Set([ - GEMINI_MODEL_ALIAS_AUTO, - GEMINI_MODEL_ALIAS_PRO, - GEMINI_MODEL_ALIAS_FLASH, - GEMINI_MODEL_ALIAS_FLASH_LITE, - PREVIEW_GEMINI_MODEL_AUTO, - DEFAULT_GEMINI_MODEL_AUTO, -]); - export const DEFAULT_GEMINI_EMBEDDING_MODEL = 'gemini-embedding-001'; // Cap the thinking at 8192 to prevent run-away thinking loops. @@ -148,7 +166,17 @@ export function resolveClassifierModel( } return resolveModel(requestedModel, useGemini3_1, useCustomToolModel); } -export function getDisplayString(model: string) { +export function getDisplayString( + model: string, + config?: ModelCapabilityContext, +) { + if (config?.getExperimentalDynamicModelConfiguration?.() === true) { + const definition = config.modelConfigService.getModelDefinition(model); + if (definition?.displayName) { + return definition.displayName; + } + } + switch (model) { case PREVIEW_GEMINI_MODEL_AUTO: return 'Auto (Gemini 3)'; @@ -169,9 +197,19 @@ export function getDisplayString(model: string) { * Checks if the model is a preview model. * * @param model The model name to check. + * @param config Optional config object for dynamic model configuration. * @returns True if the model is a preview model. */ -export function isPreviewModel(model: string): boolean { +export function isPreviewModel( + model: string, + config?: ModelCapabilityContext, +): boolean { + if (config?.getExperimentalDynamicModelConfiguration?.() === true) { + return ( + config.modelConfigService.getModelDefinition(model)?.isPreview === true + ); + } + return ( model === PREVIEW_GEMINI_MODEL || model === PREVIEW_GEMINI_3_1_MODEL || @@ -186,9 +224,16 @@ export function isPreviewModel(model: string): boolean { * Checks if the model is a Pro model. * * @param model The model name to check. + * @param config Optional config object for dynamic model configuration. * @returns True if the model is a Pro model. */ -export function isProModel(model: string): boolean { +export function isProModel( + model: string, + config?: ModelCapabilityContext, +): boolean { + if (config?.getExperimentalDynamicModelConfiguration?.() === true) { + return config.modelConfigService.getModelDefinition(model)?.tier === 'pro'; + } return model.toLowerCase().includes('pro'); } @@ -196,9 +241,22 @@ export function isProModel(model: string): boolean { * Checks if the model is a Gemini 3 model. * * @param model The model name to check. + * @param config Optional config object for dynamic model configuration. * @returns True if the model is a Gemini 3 model. */ -export function isGemini3Model(model: string): boolean { +export function isGemini3Model( + model: string, + config?: ModelCapabilityContext, +): boolean { + if (config?.getExperimentalDynamicModelConfiguration?.() === true) { + // Legacy behavior resolves the model first. + const resolved = resolveModel(model); + return ( + config.modelConfigService.getModelDefinition(resolved)?.family === + 'gemini-3' + ); + } + const resolved = resolveModel(model); return /^gemini-3(\.|-|$)/.test(resolved); } @@ -210,6 +268,8 @@ export function isGemini3Model(model: string): boolean { * @returns True if the model is a Gemini-2.x model. */ export function isGemini2Model(model: string): boolean { + // This is legacy behavior, will remove this when gemini 2 models are no + // longer needed. return /^gemini-2(\.|$)/.test(model); } @@ -217,9 +277,20 @@ export function isGemini2Model(model: string): boolean { * Checks if the model is a "custom" model (not Gemini branded). * * @param model The model name to check. + * @param config Optional config object for dynamic model configuration. * @returns True if the model is not a Gemini branded model. */ -export function isCustomModel(model: string): boolean { +export function isCustomModel( + model: string, + config?: ModelCapabilityContext, +): boolean { + if (config?.getExperimentalDynamicModelConfiguration?.() === true) { + const resolved = resolveModel(model); + return ( + config.modelConfigService.getModelDefinition(resolved)?.tier === + 'custom' || !resolved.startsWith('gemini-') + ); + } const resolved = resolveModel(model); return !resolved.startsWith('gemini-'); } @@ -240,9 +311,16 @@ export function supportsModernFeatures(model: string): boolean { * Checks if the model is an auto model. * * @param model The model name to check. + * @param config Optional config object for dynamic model configuration. * @returns True if the model is an auto model. */ -export function isAutoModel(model: string): boolean { +export function isAutoModel( + model: string, + config?: ModelCapabilityContext, +): boolean { + if (config?.getExperimentalDynamicModelConfiguration?.() === true) { + return config.modelConfigService.getModelDefinition(model)?.tier === 'auto'; + } return ( model === GEMINI_MODEL_ALIAS_AUTO || model === PREVIEW_GEMINI_MODEL_AUTO || @@ -257,7 +335,16 @@ export function isAutoModel(model: string): boolean { * @param model The model name to check. * @returns True if the model supports multimodal function responses. */ -export function supportsMultimodalFunctionResponse(model: string): boolean { +export function supportsMultimodalFunctionResponse( + model: string, + config?: ModelCapabilityContext, +): boolean { + if (config?.getExperimentalDynamicModelConfiguration?.() === true) { + return ( + config.modelConfigService.getModelDefinition(model)?.features + ?.multimodalToolUse === true + ); + } return model.startsWith('gemini-3-'); } @@ -292,37 +379,3 @@ export function isActiveModel( ); } } - -/** - * Checks if the model name is valid (either a valid model or a valid alias). - * - * @param model The model name to check. - * @returns True if the model is valid. - */ -export function isValidModelOrAlias(model: string): boolean { - // Check if it's a valid alias - if (VALID_ALIASES.has(model)) { - return true; - } - - // Check if it's a valid model name - if (VALID_GEMINI_MODELS.has(model)) { - return true; - } - - // Allow custom models (non-gemini models) - if (!model.startsWith('gemini-')) { - return true; - } - - return false; -} - -/** - * Gets a list of all valid model names and aliases for error messages. - * - * @returns Array of valid model names and aliases. - */ -export function getValidModelsAndAliases(): string[] { - return [...new Set([...VALID_ALIASES, ...VALID_GEMINI_MODELS])].sort(); -} diff --git a/packages/core/src/config/sandbox-integration.test.ts b/packages/core/src/config/sandbox-integration.test.ts new file mode 100644 index 0000000000..305b9e2638 --- /dev/null +++ b/packages/core/src/config/sandbox-integration.test.ts @@ -0,0 +1,65 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi } from 'vitest'; +import { Config } from './config.js'; +import { NoopSandboxManager } from '../services/sandboxManager.js'; + +// Minimal mocks for Config dependencies to allow instantiation +vi.mock('../core/client.js'); +vi.mock('../core/contentGenerator.js'); +vi.mock('../telemetry/index.js'); +vi.mock('../core/tokenLimits.js'); +vi.mock('../services/fileDiscoveryService.js'); +vi.mock('../services/gitService.js'); +vi.mock('../services/trackerService.js'); +vi.mock('../confirmation-bus/message-bus.js', () => ({ + MessageBus: vi.fn(), +})); +vi.mock('../policy/policy-engine.js', () => ({ + PolicyEngine: vi.fn().mockImplementation(() => ({ + getExcludedTools: vi.fn().mockReturnValue(new Set()), + })), +})); +vi.mock('../skills/skillManager.js', () => ({ + SkillManager: vi.fn().mockImplementation(() => ({ + setAdminSettings: vi.fn(), + })), +})); +vi.mock('../agents/registry.js', () => ({ + AgentRegistry: vi.fn().mockImplementation(() => ({ + initialize: vi.fn(), + })), +})); +vi.mock('../agents/acknowledgedAgents.js', () => ({ + AcknowledgedAgentsService: vi.fn(), +})); +vi.mock('../services/modelConfigService.js', () => ({ + ModelConfigService: vi.fn(), +})); +vi.mock('./models.js', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + isPreviewModel: vi.fn().mockReturnValue(false), + resolveModel: vi.fn().mockReturnValue('test-model'), + }; +}); + +describe('Sandbox Integration', () => { + it('should have a NoopSandboxManager by default in Config', () => { + const config = new Config({ + sessionId: 'test-session', + targetDir: '.', + model: 'test-model', + cwd: '.', + debugMode: false, + }); + + expect(config.sandboxManager).toBeDefined(); + expect(config.sandboxManager).toBeInstanceOf(NoopSandboxManager); + }); +}); diff --git a/packages/core/src/config/storage.test.ts b/packages/core/src/config/storage.test.ts index 6b1cd39d88..ea8fce6da3 100644 --- a/packages/core/src/config/storage.test.ts +++ b/packages/core/src/config/storage.test.ts @@ -180,6 +180,25 @@ describe('Storage – additional helpers', () => { expect(storageWithSession.getProjectTempPlansDir()).toBe(expected); }); + it('getProjectTempTrackerDir returns ~/.gemini/tmp//tracker when no sessionId is provided', async () => { + await storage.initialize(); + const tempDir = storage.getProjectTempDir(); + const expected = path.join(tempDir, 'tracker'); + expect(storage.getProjectTempTrackerDir()).toBe(expected); + }); + + it('getProjectTempTrackerDir returns ~/.gemini/tmp///tracker when sessionId is provided', async () => { + const sessionId = 'test-session-id'; + const storageWithSession = new Storage(projectRoot, sessionId); + ProjectRegistry.prototype.getShortId = vi + .fn() + .mockReturnValue(PROJECT_SLUG); + await storageWithSession.initialize(); + const tempDir = storageWithSession.getProjectTempDir(); + const expected = path.join(tempDir, sessionId, 'tracker'); + expect(storageWithSession.getProjectTempTrackerDir()).toBe(expected); + }); + describe('Session and JSON Loading', () => { beforeEach(async () => { await storage.initialize(); diff --git a/packages/core/src/config/storage.ts b/packages/core/src/config/storage.ts index f0e9c0220b..38654346fa 100644 --- a/packages/core/src/config/storage.ts +++ b/packages/core/src/config/storage.ts @@ -302,6 +302,9 @@ export class Storage { } getProjectTempTrackerDir(): string { + if (this.sessionId) { + return path.join(this.getProjectTempDir(), this.sessionId, 'tracker'); + } return path.join(this.getProjectTempDir(), 'tracker'); } diff --git a/packages/core/src/core/__snapshots__/prompts.test.ts.snap b/packages/core/src/core/__snapshots__/prompts.test.ts.snap index 3c8362cb85..cdda26d32c 100644 --- a/packages/core/src/core/__snapshots__/prompts.test.ts.snap +++ b/packages/core/src/core/__snapshots__/prompts.test.ts.snap @@ -49,9 +49,9 @@ Use the following guidelines to optimize your search and read patterns. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. - **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. +- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. # Available Sub-Agents @@ -147,7 +147,7 @@ Use the \`exit_plan_mode\` tool to present the plan and formally request approva - **High-Signal Output:** Focus exclusively on **intent** and **technical rationale**. Avoid conversational filler, apologies, and mechanical tool-use narration (e.g., "I will now call..."). - **Concise & Direct:** Adopt a professional, direct, and concise tone suitable for a CLI environment. - **Minimal Output:** Aim for fewer than 3 lines of text output (excluding tool use/code generation) per response whenever practical. -- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they serve to explain intent as required by the 'Explain Before Acting' mandate. +- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they are part of the 'Explain Before Acting' mandate. - **No Repetition:** Once you have provided a final synthesis of your work, do not repeat yourself or provide additional summaries. For simple or direct requests, prioritize extreme brevity. - **Formatting:** Use GitHub-flavored Markdown. Responses will be rendered in monospace. - **Tools vs. Text:** Use tools for actions, text output *only* for communication. Do not add explanatory comments within tool calls. @@ -220,9 +220,9 @@ Use the following guidelines to optimize your search and read patterns. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. - **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. +- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. # Available Sub-Agents @@ -324,7 +324,7 @@ An approved plan is available for this task at \`/tmp/plans/feature-x.md\`. - **High-Signal Output:** Focus exclusively on **intent** and **technical rationale**. Avoid conversational filler, apologies, and mechanical tool-use narration (e.g., "I will now call..."). - **Concise & Direct:** Adopt a professional, direct, and concise tone suitable for a CLI environment. - **Minimal Output:** Aim for fewer than 3 lines of text output (excluding tool use/code generation) per response whenever practical. -- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they serve to explain intent as required by the 'Explain Before Acting' mandate. +- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they are part of the 'Explain Before Acting' mandate. - **No Repetition:** Once you have provided a final synthesis of your work, do not repeat yourself or provide additional summaries. For simple or direct requests, prioritize extreme brevity. - **Formatting:** Use GitHub-flavored Markdown. Responses will be rendered in monospace. - **Tools vs. Text:** Use tools for actions, text output *only* for communication. Do not add explanatory comments within tool calls. @@ -510,9 +510,9 @@ Use the following guidelines to optimize your search and read patterns. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. - **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. +- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. # Available Sub-Agents @@ -608,7 +608,7 @@ Use the \`exit_plan_mode\` tool to present the plan and formally request approva - **High-Signal Output:** Focus exclusively on **intent** and **technical rationale**. Avoid conversational filler, apologies, and mechanical tool-use narration (e.g., "I will now call..."). - **Concise & Direct:** Adopt a professional, direct, and concise tone suitable for a CLI environment. - **Minimal Output:** Aim for fewer than 3 lines of text output (excluding tool use/code generation) per response whenever practical. -- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they serve to explain intent as required by the 'Explain Before Acting' mandate. +- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they are part of the 'Explain Before Acting' mandate. - **No Repetition:** Once you have provided a final synthesis of your work, do not repeat yourself or provide additional summaries. For simple or direct requests, prioritize extreme brevity. - **Formatting:** Use GitHub-flavored Markdown. Responses will be rendered in monospace. - **Tools vs. Text:** Use tools for actions, text output *only* for communication. Do not add explanatory comments within tool calls. @@ -681,9 +681,9 @@ Use the following guidelines to optimize your search and read patterns. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. - **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. +- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. # Available Sub-Agents @@ -762,7 +762,7 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **High-Signal Output:** Focus exclusively on **intent** and **technical rationale**. Avoid conversational filler, apologies, and mechanical tool-use narration (e.g., "I will now call..."). - **Concise & Direct:** Adopt a professional, direct, and concise tone suitable for a CLI environment. - **Minimal Output:** Aim for fewer than 3 lines of text output (excluding tool use/code generation) per response whenever practical. -- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they serve to explain intent as required by the 'Explain Before Acting' mandate. +- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they are part of the 'Explain Before Acting' mandate. - **No Repetition:** Once you have provided a final synthesis of your work, do not repeat yourself or provide additional summaries. For simple or direct requests, prioritize extreme brevity. - **Formatting:** Use GitHub-flavored Markdown. Responses will be rendered in monospace. - **Tools vs. Text:** Use tools for actions, text output *only* for communication. Do not add explanatory comments within tool calls. @@ -852,9 +852,9 @@ Use the following guidelines to optimize your search and read patterns. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. - **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Handle Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, do not perform it automatically. +- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. - **Non-Interactive Environment:** You are running in a headless/CI environment and cannot interact with the user. Do not ask the user questions or request additional information, as the session will terminate. Use your best judgment to complete the task. If a tool fails because it requires user interaction, do not retry it indefinitely; instead, explain the limitation and suggest how the user can provide the required data (e.g., via environment variables). # Hook Context @@ -902,7 +902,7 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **High-Signal Output:** Focus exclusively on **intent** and **technical rationale**. Avoid conversational filler, apologies, and mechanical tool-use narration (e.g., "I will now call..."). - **Concise & Direct:** Adopt a professional, direct, and concise tone suitable for a CLI environment. - **Minimal Output:** Aim for fewer than 3 lines of text output (excluding tool use/code generation) per response whenever practical. -- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they serve to explain intent as required by the 'Explain Before Acting' mandate. +- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they are part of the 'Explain Before Acting' mandate. - **No Repetition:** Once you have provided a final synthesis of your work, do not repeat yourself or provide additional summaries. For simple or direct requests, prioritize extreme brevity. - **Formatting:** Use GitHub-flavored Markdown. Responses will be rendered in monospace. - **Tools vs. Text:** Use tools for actions, text output *only* for communication. Do not add explanatory comments within tool calls. @@ -975,9 +975,9 @@ Use the following guidelines to optimize your search and read patterns. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. - **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Handle Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, do not perform it automatically. +- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. - **Non-Interactive Environment:** You are running in a headless/CI environment and cannot interact with the user. Do not ask the user questions or request additional information, as the session will terminate. Use your best judgment to complete the task. If a tool fails because it requires user interaction, do not retry it indefinitely; instead, explain the limitation and suggest how the user can provide the required data (e.g., via environment variables). # Hook Context @@ -1025,7 +1025,7 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **High-Signal Output:** Focus exclusively on **intent** and **technical rationale**. Avoid conversational filler, apologies, and mechanical tool-use narration (e.g., "I will now call..."). - **Concise & Direct:** Adopt a professional, direct, and concise tone suitable for a CLI environment. - **Minimal Output:** Aim for fewer than 3 lines of text output (excluding tool use/code generation) per response whenever practical. -- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they serve to explain intent as required by the 'Explain Before Acting' mandate. +- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they are part of the 'Explain Before Acting' mandate. - **No Repetition:** Once you have provided a final synthesis of your work, do not repeat yourself or provide additional summaries. For simple or direct requests, prioritize extreme brevity. - **Formatting:** Use GitHub-flavored Markdown. Responses will be rendered in monospace. - **Tools vs. Text:** Use tools for actions, text output *only* for communication. Do not add explanatory comments within tool calls. @@ -1571,10 +1571,10 @@ Use the following guidelines to optimize your search and read patterns. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. - **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. +- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. - **Skill Guidance:** Once a skill is activated via \`activate_skill\`, its instructions and resources are returned wrapped in \`\` tags. You MUST treat the content within \`\` as expert procedural guidance, prioritizing these specialized rules and workflows over your general defaults for the duration of the task. You may utilize any listed \`\` as needed. Follow this expert guidance strictly while continuing to uphold your core safety and security standards. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. # Available Sub-Agents @@ -1665,7 +1665,7 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **High-Signal Output:** Focus exclusively on **intent** and **technical rationale**. Avoid conversational filler, apologies, and mechanical tool-use narration (e.g., "I will now call..."). - **Concise & Direct:** Adopt a professional, direct, and concise tone suitable for a CLI environment. - **Minimal Output:** Aim for fewer than 3 lines of text output (excluding tool use/code generation) per response whenever practical. -- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they serve to explain intent as required by the 'Explain Before Acting' mandate. +- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they are part of the 'Explain Before Acting' mandate. - **No Repetition:** Once you have provided a final synthesis of your work, do not repeat yourself or provide additional summaries. For simple or direct requests, prioritize extreme brevity. - **Formatting:** Use GitHub-flavored Markdown. Responses will be rendered in monospace. - **Tools vs. Text:** Use tools for actions, text output *only* for communication. Do not add explanatory comments within tool calls. @@ -1738,9 +1738,9 @@ Use the following guidelines to optimize your search and read patterns. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. - **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. +- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. # Available Sub-Agents @@ -1819,7 +1819,7 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **High-Signal Output:** Focus exclusively on **intent** and **technical rationale**. Avoid conversational filler, apologies, and mechanical tool-use narration (e.g., "I will now call..."). - **Concise & Direct:** Adopt a professional, direct, and concise tone suitable for a CLI environment. - **Minimal Output:** Aim for fewer than 3 lines of text output (excluding tool use/code generation) per response whenever practical. -- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they serve to explain intent as required by the 'Explain Before Acting' mandate. +- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they are part of the 'Explain Before Acting' mandate. - **No Repetition:** Once you have provided a final synthesis of your work, do not repeat yourself or provide additional summaries. For simple or direct requests, prioritize extreme brevity. - **Formatting:** Use GitHub-flavored Markdown. Responses will be rendered in monospace. - **Tools vs. Text:** Use tools for actions, text output *only* for communication. Do not add explanatory comments within tool calls. @@ -1896,9 +1896,9 @@ Use the following guidelines to optimize your search and read patterns. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. - **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. +- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. # Available Sub-Agents @@ -1977,7 +1977,7 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **High-Signal Output:** Focus exclusively on **intent** and **technical rationale**. Avoid conversational filler, apologies, and mechanical tool-use narration (e.g., "I will now call..."). - **Concise & Direct:** Adopt a professional, direct, and concise tone suitable for a CLI environment. - **Minimal Output:** Aim for fewer than 3 lines of text output (excluding tool use/code generation) per response whenever practical. -- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they serve to explain intent as required by the 'Explain Before Acting' mandate. +- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they are part of the 'Explain Before Acting' mandate. - **No Repetition:** Once you have provided a final synthesis of your work, do not repeat yourself or provide additional summaries. For simple or direct requests, prioritize extreme brevity. - **Formatting:** Use GitHub-flavored Markdown. Responses will be rendered in monospace. - **Tools vs. Text:** Use tools for actions, text output *only* for communication. Do not add explanatory comments within tool calls. @@ -2054,9 +2054,9 @@ Use the following guidelines to optimize your search and read patterns. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. - **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. +- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. # Available Sub-Agents @@ -2135,7 +2135,7 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **High-Signal Output:** Focus exclusively on **intent** and **technical rationale**. Avoid conversational filler, apologies, and mechanical tool-use narration (e.g., "I will now call..."). - **Concise & Direct:** Adopt a professional, direct, and concise tone suitable for a CLI environment. - **Minimal Output:** Aim for fewer than 3 lines of text output (excluding tool use/code generation) per response whenever practical. -- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they serve to explain intent as required by the 'Explain Before Acting' mandate. +- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they are part of the 'Explain Before Acting' mandate. - **No Repetition:** Once you have provided a final synthesis of your work, do not repeat yourself or provide additional summaries. For simple or direct requests, prioritize extreme brevity. - **Formatting:** Use GitHub-flavored Markdown. Responses will be rendered in monospace. - **Tools vs. Text:** Use tools for actions, text output *only* for communication. Do not add explanatory comments within tool calls. @@ -2208,9 +2208,9 @@ Use the following guidelines to optimize your search and read patterns. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. - **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. +- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. # Available Sub-Agents @@ -2289,7 +2289,7 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **High-Signal Output:** Focus exclusively on **intent** and **technical rationale**. Avoid conversational filler, apologies, and mechanical tool-use narration (e.g., "I will now call..."). - **Concise & Direct:** Adopt a professional, direct, and concise tone suitable for a CLI environment. - **Minimal Output:** Aim for fewer than 3 lines of text output (excluding tool use/code generation) per response whenever practical. -- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they serve to explain intent as required by the 'Explain Before Acting' mandate. +- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they are part of the 'Explain Before Acting' mandate. - **No Repetition:** Once you have provided a final synthesis of your work, do not repeat yourself or provide additional summaries. For simple or direct requests, prioritize extreme brevity. - **Formatting:** Use GitHub-flavored Markdown. Responses will be rendered in monospace. - **Tools vs. Text:** Use tools for actions, text output *only* for communication. Do not add explanatory comments within tool calls. @@ -2362,9 +2362,9 @@ Use the following guidelines to optimize your search and read patterns. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. - **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. +- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. # Available Sub-Agents @@ -2435,7 +2435,7 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **High-Signal Output:** Focus exclusively on **intent** and **technical rationale**. Avoid conversational filler, apologies, and mechanical tool-use narration (e.g., "I will now call..."). - **Concise & Direct:** Adopt a professional, direct, and concise tone suitable for a CLI environment. - **Minimal Output:** Aim for fewer than 3 lines of text output (excluding tool use/code generation) per response whenever practical. -- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they serve to explain intent as required by the 'Explain Before Acting' mandate. +- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they are part of the 'Explain Before Acting' mandate. - **No Repetition:** Once you have provided a final synthesis of your work, do not repeat yourself or provide additional summaries. For simple or direct requests, prioritize extreme brevity. - **Formatting:** Use GitHub-flavored Markdown. Responses will be rendered in monospace. - **Tools vs. Text:** Use tools for actions, text output *only* for communication. Do not add explanatory comments within tool calls. @@ -2508,9 +2508,9 @@ Use the following guidelines to optimize your search and read patterns. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. - **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. +- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. # Available Sub-Agents @@ -2588,7 +2588,7 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **High-Signal Output:** Focus exclusively on **intent** and **technical rationale**. Avoid conversational filler, apologies, and mechanical tool-use narration (e.g., "I will now call..."). - **Concise & Direct:** Adopt a professional, direct, and concise tone suitable for a CLI environment. - **Minimal Output:** Aim for fewer than 3 lines of text output (excluding tool use/code generation) per response whenever practical. -- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they serve to explain intent as required by the 'Explain Before Acting' mandate. +- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they are part of the 'Explain Before Acting' mandate. - **No Repetition:** Once you have provided a final synthesis of your work, do not repeat yourself or provide additional summaries. For simple or direct requests, prioritize extreme brevity. - **Formatting:** Use GitHub-flavored Markdown. Responses will be rendered in monospace. - **Tools vs. Text:** Use tools for actions, text output *only* for communication. Do not add explanatory comments within tool calls. @@ -2661,9 +2661,9 @@ Use the following guidelines to optimize your search and read patterns. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. - **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. +- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. # Available Sub-Agents @@ -2742,7 +2742,7 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **High-Signal Output:** Focus exclusively on **intent** and **technical rationale**. Avoid conversational filler, apologies, and mechanical tool-use narration (e.g., "I will now call..."). - **Concise & Direct:** Adopt a professional, direct, and concise tone suitable for a CLI environment. - **Minimal Output:** Aim for fewer than 3 lines of text output (excluding tool use/code generation) per response whenever practical. -- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they serve to explain intent as required by the 'Explain Before Acting' mandate. +- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they are part of the 'Explain Before Acting' mandate. - **No Repetition:** Once you have provided a final synthesis of your work, do not repeat yourself or provide additional summaries. For simple or direct requests, prioritize extreme brevity. - **Formatting:** Use GitHub-flavored Markdown. Responses will be rendered in monospace. - **Tools vs. Text:** Use tools for actions, text output *only* for communication. Do not add explanatory comments within tool calls. @@ -2815,9 +2815,9 @@ Use the following guidelines to optimize your search and read patterns. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. - **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. +- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. # Available Sub-Agents @@ -2907,7 +2907,7 @@ You are operating with a persistent file-based task tracking system located at \ - **High-Signal Output:** Focus exclusively on **intent** and **technical rationale**. Avoid conversational filler, apologies, and mechanical tool-use narration (e.g., "I will now call..."). - **Concise & Direct:** Adopt a professional, direct, and concise tone suitable for a CLI environment. - **Minimal Output:** Aim for fewer than 3 lines of text output (excluding tool use/code generation) per response whenever practical. -- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they serve to explain intent as required by the 'Explain Before Acting' mandate. +- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they are part of the 'Explain Before Acting' mandate. - **No Repetition:** Once you have provided a final synthesis of your work, do not repeat yourself or provide additional summaries. For simple or direct requests, prioritize extreme brevity. - **Formatting:** Use GitHub-flavored Markdown. Responses will be rendered in monospace. - **Tools vs. Text:** Use tools for actions, text output *only* for communication. Do not add explanatory comments within tool calls. @@ -3221,9 +3221,9 @@ Use the following guidelines to optimize your search and read patterns. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. - **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. +- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. # Available Sub-Agents @@ -3302,7 +3302,7 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **High-Signal Output:** Focus exclusively on **intent** and **technical rationale**. Avoid conversational filler, apologies, and mechanical tool-use narration (e.g., "I will now call..."). - **Concise & Direct:** Adopt a professional, direct, and concise tone suitable for a CLI environment. - **Minimal Output:** Aim for fewer than 3 lines of text output (excluding tool use/code generation) per response whenever practical. -- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they serve to explain intent as required by the 'Explain Before Acting' mandate. +- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they are part of the 'Explain Before Acting' mandate. - **No Repetition:** Once you have provided a final synthesis of your work, do not repeat yourself or provide additional summaries. For simple or direct requests, prioritize extreme brevity. - **Formatting:** Use GitHub-flavored Markdown. Responses will be rendered in monospace. - **Tools vs. Text:** Use tools for actions, text output *only* for communication. Do not add explanatory comments within tool calls. @@ -3375,9 +3375,9 @@ Use the following guidelines to optimize your search and read patterns. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. - **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. +- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. # Available Sub-Agents @@ -3456,7 +3456,7 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **High-Signal Output:** Focus exclusively on **intent** and **technical rationale**. Avoid conversational filler, apologies, and mechanical tool-use narration (e.g., "I will now call..."). - **Concise & Direct:** Adopt a professional, direct, and concise tone suitable for a CLI environment. - **Minimal Output:** Aim for fewer than 3 lines of text output (excluding tool use/code generation) per response whenever practical. -- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they serve to explain intent as required by the 'Explain Before Acting' mandate. +- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they are part of the 'Explain Before Acting' mandate. - **No Repetition:** Once you have provided a final synthesis of your work, do not repeat yourself or provide additional summaries. For simple or direct requests, prioritize extreme brevity. - **Formatting:** Use GitHub-flavored Markdown. Responses will be rendered in monospace. - **Tools vs. Text:** Use tools for actions, text output *only* for communication. Do not add explanatory comments within tool calls. @@ -3641,9 +3641,9 @@ Use the following guidelines to optimize your search and read patterns. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. - **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. +- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. # Available Sub-Agents @@ -3722,7 +3722,7 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **High-Signal Output:** Focus exclusively on **intent** and **technical rationale**. Avoid conversational filler, apologies, and mechanical tool-use narration (e.g., "I will now call..."). - **Concise & Direct:** Adopt a professional, direct, and concise tone suitable for a CLI environment. - **Minimal Output:** Aim for fewer than 3 lines of text output (excluding tool use/code generation) per response whenever practical. -- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they serve to explain intent as required by the 'Explain Before Acting' mandate. +- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they are part of the 'Explain Before Acting' mandate. - **No Repetition:** Once you have provided a final synthesis of your work, do not repeat yourself or provide additional summaries. For simple or direct requests, prioritize extreme brevity. - **Formatting:** Use GitHub-flavored Markdown. Responses will be rendered in monospace. - **Tools vs. Text:** Use tools for actions, text output *only* for communication. Do not add explanatory comments within tool calls. @@ -3795,9 +3795,9 @@ Use the following guidelines to optimize your search and read patterns. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. - **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. +- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. # Available Sub-Agents @@ -3876,7 +3876,7 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **High-Signal Output:** Focus exclusively on **intent** and **technical rationale**. Avoid conversational filler, apologies, and mechanical tool-use narration (e.g., "I will now call..."). - **Concise & Direct:** Adopt a professional, direct, and concise tone suitable for a CLI environment. - **Minimal Output:** Aim for fewer than 3 lines of text output (excluding tool use/code generation) per response whenever practical. -- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they serve to explain intent as required by the 'Explain Before Acting' mandate. +- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they are part of the 'Explain Before Acting' mandate. - **No Repetition:** Once you have provided a final synthesis of your work, do not repeat yourself or provide additional summaries. For simple or direct requests, prioritize extreme brevity. - **Formatting:** Use GitHub-flavored Markdown. Responses will be rendered in monospace. - **Tools vs. Text:** Use tools for actions, text output *only* for communication. Do not add explanatory comments within tool calls. diff --git a/packages/core/src/core/coreToolScheduler.test.ts b/packages/core/src/core/coreToolScheduler.test.ts index acd091a27b..3a9d0e2e92 100644 --- a/packages/core/src/core/coreToolScheduler.test.ts +++ b/packages/core/src/core/coreToolScheduler.test.ts @@ -34,6 +34,7 @@ import { GeminiCliOperation, } from '../index.js'; import { createMockMessageBus } from '../test-utils/mock-message-bus.js'; +import { NoopSandboxManager } from '../services/sandboxManager.js'; import { MockModifiableTool, MockTool, @@ -274,6 +275,7 @@ function createMockConfig(overrides: Partial = {}): Config { allowedEnvironmentVariables: [], blockedEnvironmentVariables: [], }, + sandboxManager: new NoopSandboxManager(), }), storage: { getProjectTempDir: () => '/tmp', @@ -1211,6 +1213,7 @@ describe('CoreToolScheduler request queueing', () => { allowedEnvironmentVariables: [], blockedEnvironmentVariables: [], }, + sandboxManager: new NoopSandboxManager(), }), isInteractive: () => false, }); @@ -1320,6 +1323,7 @@ describe('CoreToolScheduler request queueing', () => { allowedEnvironmentVariables: [], blockedEnvironmentVariables: [], }, + sandboxManager: new NoopSandboxManager(), }), getToolRegistry: () => toolRegistry, getHookSystem: () => undefined, diff --git a/packages/core/src/core/coreToolScheduler.ts b/packages/core/src/core/coreToolScheduler.ts index 5004e63f25..1ecae4ef33 100644 --- a/packages/core/src/core/coreToolScheduler.ts +++ b/packages/core/src/core/coreToolScheduler.ts @@ -133,7 +133,7 @@ export class CoreToolScheduler { this.onAllToolCallsComplete = options.onAllToolCallsComplete; this.onToolCallsUpdate = options.onToolCallsUpdate; this.getPreferredEditor = options.getPreferredEditor; - this.toolExecutor = new ToolExecutor(this.context.config); + this.toolExecutor = new ToolExecutor(this.context); this.toolModifier = new ToolModificationHandler(); // Subscribe to message bus for ASK_USER policy decisions diff --git a/packages/core/src/core/prompts.test.ts b/packages/core/src/core/prompts.test.ts index f60ff99a54..02b3068718 100644 --- a/packages/core/src/core/prompts.test.ts +++ b/packages/core/src/core/prompts.test.ts @@ -95,6 +95,7 @@ describe('Core System Prompt (prompts.ts)', () => { }, isInteractive: vi.fn().mockReturnValue(true), isInteractiveShellEnabled: vi.fn().mockReturnValue(true), + isTopicUpdateNarrationEnabled: vi.fn().mockReturnValue(false), isAgentsEnabled: vi.fn().mockReturnValue(false), getPreviewFeatures: vi.fn().mockReturnValue(true), getModel: vi.fn().mockReturnValue(DEFAULT_GEMINI_MODEL_AUTO), @@ -408,6 +409,7 @@ describe('Core System Prompt (prompts.ts)', () => { }, isInteractive: vi.fn().mockReturnValue(false), isInteractiveShellEnabled: vi.fn().mockReturnValue(false), + isTopicUpdateNarrationEnabled: vi.fn().mockReturnValue(false), isAgentsEnabled: vi.fn().mockReturnValue(false), getModel: vi.fn().mockReturnValue('auto'), getActiveModel: vi.fn().mockReturnValue(PREVIEW_GEMINI_MODEL), diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index b846e2f2e9..b395daf2f9 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -146,6 +146,7 @@ export * from './ide/types.js'; // Export Shell Execution Service export * from './services/shellExecutionService.js'; +export * from './services/sandboxManager.js'; // Export base tool definitions export * from './tools/tools.js'; diff --git a/packages/core/src/mcp/oauth-token-storage.test.ts b/packages/core/src/mcp/oauth-token-storage.test.ts index d882109ca3..2ccce0e7e2 100644 --- a/packages/core/src/mcp/oauth-token-storage.test.ts +++ b/packages/core/src/mcp/oauth-token-storage.test.ts @@ -23,10 +23,14 @@ vi.mock('node:fs', () => ({ }, })); -vi.mock('node:path', () => ({ - dirname: vi.fn(), - join: vi.fn(), -})); +vi.mock('node:path', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + dirname: vi.fn(), + join: vi.fn(), + }; +}); vi.mock('../config/storage.js', () => ({ Storage: { @@ -40,14 +44,14 @@ vi.mock('../utils/events.js', () => ({ }, })); -const mockHybridTokenStorage = { +const mockHybridTokenStorage = vi.hoisted(() => ({ listServers: vi.fn(), setCredentials: vi.fn(), getCredentials: vi.fn(), deleteCredentials: vi.fn(), clearAll: vi.fn(), getAllCredentials: vi.fn(), -}; +})); vi.mock('./token-storage/hybrid-token-storage.js', () => ({ HybridTokenStorage: vi.fn(() => mockHybridTokenStorage), })); diff --git a/packages/core/src/mcp/oauth-utils.test.ts b/packages/core/src/mcp/oauth-utils.test.ts index f27ee7727b..6dab62a338 100644 --- a/packages/core/src/mcp/oauth-utils.test.ts +++ b/packages/core/src/mcp/oauth-utils.test.ts @@ -272,6 +272,34 @@ describe('OAuthUtils', () => { OAuthUtils.discoverOAuthConfig('https://example.com/mcp'), ).rejects.toThrow(/does not match expected/); }); + + it('should accept equivalent root resources with and without trailing slash', async () => { + mockFetch + // fetchProtectedResourceMetadata + .mockResolvedValueOnce({ + ok: true, + json: () => + Promise.resolve({ + resource: 'https://example.com', + authorization_servers: ['https://auth.example.com'], + bearer_methods_supported: ['header'], + }), + }) + // discoverAuthorizationServerMetadata + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve(mockAuthServerMetadata), + }); + + await expect( + OAuthUtils.discoverOAuthConfig('https://example.com'), + ).resolves.toEqual({ + authorizationUrl: 'https://auth.example.com/authorize', + issuer: 'https://auth.example.com', + tokenUrl: 'https://auth.example.com/token', + scopes: ['read', 'write'], + }); + }); }); describe('metadataToOAuthConfig', () => { @@ -336,6 +364,45 @@ describe('OAuthUtils', () => { }); }); + describe('discoverOAuthFromWWWAuthenticate', () => { + const mockAuthServerMetadata: OAuthAuthorizationServerMetadata = { + issuer: 'https://auth.example.com', + authorization_endpoint: 'https://auth.example.com/authorize', + token_endpoint: 'https://auth.example.com/token', + scopes_supported: ['read', 'write'], + }; + + it('should accept equivalent root resources with and without trailing slash', async () => { + mockFetch + // fetchProtectedResourceMetadata(resource_metadata URL) + .mockResolvedValueOnce({ + ok: true, + json: () => + Promise.resolve({ + resource: 'https://example.com', + authorization_servers: ['https://auth.example.com'], + }), + }) + // discoverAuthorizationServerMetadata(auth server well-known URL) + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve(mockAuthServerMetadata), + }); + + const result = await OAuthUtils.discoverOAuthFromWWWAuthenticate( + 'Bearer realm="example", resource_metadata="https://example.com/.well-known/oauth-protected-resource"', + 'https://example.com/', + ); + + expect(result).toEqual({ + authorizationUrl: 'https://auth.example.com/authorize', + issuer: 'https://auth.example.com', + tokenUrl: 'https://auth.example.com/token', + scopes: ['read', 'write'], + }); + }); + }); + describe('extractBaseUrl', () => { it('should extract base URL from MCP server URL', () => { const result = OAuthUtils.extractBaseUrl('https://example.com/mcp/v1'); diff --git a/packages/core/src/mcp/oauth-utils.ts b/packages/core/src/mcp/oauth-utils.ts index 320c3b9685..12ab2bd9ff 100644 --- a/packages/core/src/mcp/oauth-utils.ts +++ b/packages/core/src/mcp/oauth-utils.ts @@ -257,7 +257,12 @@ export class OAuthUtils { // it is using as the prefix for the metadata request exactly matches the value // of the resource metadata parameter in the protected resource metadata document. const expectedResource = this.buildResourceParameter(serverUrl); - if (resourceMetadata.resource !== expectedResource) { + if ( + !this.isEquivalentResourceIdentifier( + resourceMetadata.resource, + expectedResource, + ) + ) { throw new ResourceMismatchError( `Protected resource ${resourceMetadata.resource} does not match expected ${expectedResource}`, ); @@ -348,7 +353,12 @@ export class OAuthUtils { if (resourceMetadata && mcpServerUrl) { // Validate resource parameter per RFC 9728 Section 7.3 const expectedResource = this.buildResourceParameter(mcpServerUrl); - if (resourceMetadata.resource !== expectedResource) { + if ( + !this.isEquivalentResourceIdentifier( + resourceMetadata.resource, + expectedResource, + ) + ) { throw new ResourceMismatchError( `Protected resource ${resourceMetadata.resource} does not match expected ${expectedResource}`, ); @@ -402,6 +412,21 @@ export class OAuthUtils { return `${url.protocol}//${url.host}${url.pathname}`; } + private static isEquivalentResourceIdentifier( + discoveredResource: string, + expectedResource: string, + ): boolean { + const normalize = (resource: string): string => { + try { + return this.buildResourceParameter(resource); + } catch { + return resource; + } + }; + + return normalize(discoveredResource) === normalize(expectedResource); + } + /** * Parses a JWT string to extract its expiry time. * @param idToken The JWT ID token. diff --git a/packages/core/src/mcp/token-storage/file-token-storage.test.ts b/packages/core/src/mcp/token-storage/file-token-storage.test.ts deleted file mode 100644 index a2f080a652..0000000000 --- a/packages/core/src/mcp/token-storage/file-token-storage.test.ts +++ /dev/null @@ -1,360 +0,0 @@ -/** - * @license - * Copyright 2025 Google LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; -import { promises as fs } from 'node:fs'; -import * as path from 'node:path'; -import { FileTokenStorage } from './file-token-storage.js'; -import type { OAuthCredentials } from './types.js'; -import { GEMINI_DIR } from '../../utils/paths.js'; - -vi.mock('node:fs', () => ({ - promises: { - readFile: vi.fn(), - writeFile: vi.fn(), - unlink: vi.fn(), - mkdir: vi.fn(), - rename: vi.fn(), - }, -})); - -vi.mock('node:os', () => ({ - default: { - homedir: vi.fn(() => '/home/test'), - hostname: vi.fn(() => 'test-host'), - userInfo: vi.fn(() => ({ username: 'test-user' })), - }, - homedir: vi.fn(() => '/home/test'), - hostname: vi.fn(() => 'test-host'), - userInfo: vi.fn(() => ({ username: 'test-user' })), -})); - -describe('FileTokenStorage', () => { - let storage: FileTokenStorage; - const mockFs = fs as unknown as { - readFile: ReturnType; - writeFile: ReturnType; - unlink: ReturnType; - mkdir: ReturnType; - rename: ReturnType; - }; - const existingCredentials: OAuthCredentials = { - serverName: 'existing-server', - token: { - accessToken: 'existing-token', - tokenType: 'Bearer', - }, - updatedAt: Date.now() - 10000, - }; - - beforeEach(() => { - vi.clearAllMocks(); - storage = new FileTokenStorage('test-storage'); - }); - - afterEach(() => { - vi.clearAllMocks(); - }); - - describe('getCredentials', () => { - it('should return null when file does not exist', async () => { - mockFs.readFile.mockRejectedValue({ code: 'ENOENT' }); - - const result = await storage.getCredentials('test-server'); - expect(result).toBeNull(); - }); - - it('should return null for expired tokens', async () => { - const credentials: OAuthCredentials = { - serverName: 'test-server', - token: { - accessToken: 'access-token', - tokenType: 'Bearer', - expiresAt: Date.now() - 3600000, - }, - updatedAt: Date.now(), - }; - - const encryptedData = storage['encrypt']( - JSON.stringify({ 'test-server': credentials }), - ); - mockFs.readFile.mockResolvedValue(encryptedData); - - const result = await storage.getCredentials('test-server'); - expect(result).toBeNull(); - }); - - it('should return credentials for valid tokens', async () => { - const credentials: OAuthCredentials = { - serverName: 'test-server', - token: { - accessToken: 'access-token', - tokenType: 'Bearer', - expiresAt: Date.now() + 3600000, - }, - updatedAt: Date.now(), - }; - - const encryptedData = storage['encrypt']( - JSON.stringify({ 'test-server': credentials }), - ); - mockFs.readFile.mockResolvedValue(encryptedData); - - const result = await storage.getCredentials('test-server'); - expect(result).toEqual(credentials); - }); - - it('should throw error with file path when file is corrupted', async () => { - mockFs.readFile.mockResolvedValue('corrupted-data'); - - try { - await storage.getCredentials('test-server'); - expect.fail('Expected error to be thrown'); - } catch (error) { - expect(error).toBeInstanceOf(Error); - const err = error as Error; - expect(err.message).toContain('Corrupted token file detected at:'); - expect(err.message).toContain('mcp-oauth-tokens-v2.json'); - expect(err.message).toContain('delete or rename'); - } - }); - }); - - describe('auth type switching', () => { - it('should throw error when trying to save credentials with corrupted file', async () => { - // Simulate corrupted file on first read - mockFs.readFile.mockResolvedValue('corrupted-data'); - - // Try to save new credentials (simulating switch from OAuth to API key) - const newCredentials: OAuthCredentials = { - serverName: 'new-auth-server', - token: { - accessToken: 'new-api-key', - tokenType: 'ApiKey', - }, - updatedAt: Date.now(), - }; - - // Should throw error with file path - try { - await storage.setCredentials(newCredentials); - expect.fail('Expected error to be thrown'); - } catch (error) { - expect(error).toBeInstanceOf(Error); - const err = error as Error; - expect(err.message).toContain('Corrupted token file detected at:'); - expect(err.message).toContain('mcp-oauth-tokens-v2.json'); - expect(err.message).toContain('delete or rename'); - } - }); - }); - - describe('setCredentials', () => { - it('should save credentials with encryption', async () => { - const encryptedData = storage['encrypt']( - JSON.stringify({ 'existing-server': existingCredentials }), - ); - mockFs.readFile.mockResolvedValue(encryptedData); - mockFs.mkdir.mockResolvedValue(undefined); - mockFs.writeFile.mockResolvedValue(undefined); - - const credentials: OAuthCredentials = { - serverName: 'test-server', - token: { - accessToken: 'access-token', - tokenType: 'Bearer', - }, - updatedAt: Date.now(), - }; - - await storage.setCredentials(credentials); - - expect(mockFs.mkdir).toHaveBeenCalledWith( - path.join('/home/test', GEMINI_DIR), - { recursive: true, mode: 0o700 }, - ); - expect(mockFs.writeFile).toHaveBeenCalled(); - - const writeCall = mockFs.writeFile.mock.calls[0]; - expect(writeCall[1]).toMatch(/^[0-9a-f]+:[0-9a-f]+:[0-9a-f]+$/); - expect(writeCall[2]).toEqual({ mode: 0o600 }); - }); - - it('should update existing credentials', async () => { - const encryptedData = storage['encrypt']( - JSON.stringify({ 'existing-server': existingCredentials }), - ); - mockFs.readFile.mockResolvedValue(encryptedData); - mockFs.writeFile.mockResolvedValue(undefined); - - const newCredentials: OAuthCredentials = { - serverName: 'test-server', - token: { - accessToken: 'new-token', - tokenType: 'Bearer', - }, - updatedAt: Date.now(), - }; - - await storage.setCredentials(newCredentials); - - expect(mockFs.writeFile).toHaveBeenCalled(); - const writeCall = mockFs.writeFile.mock.calls[0]; - const decrypted = storage['decrypt'](writeCall[1]); - const saved = JSON.parse(decrypted); - - expect(saved['existing-server']).toEqual(existingCredentials); - expect(saved['test-server'].token.accessToken).toBe('new-token'); - }); - }); - - describe('deleteCredentials', () => { - it('should throw when credentials do not exist', async () => { - mockFs.readFile.mockRejectedValue({ code: 'ENOENT' }); - - await expect(storage.deleteCredentials('test-server')).rejects.toThrow( - 'No credentials found for test-server', - ); - }); - - it('should delete file when last credential is removed', async () => { - const credentials: OAuthCredentials = { - serverName: 'test-server', - token: { - accessToken: 'access-token', - tokenType: 'Bearer', - }, - updatedAt: Date.now(), - }; - - const encryptedData = storage['encrypt']( - JSON.stringify({ 'test-server': credentials }), - ); - mockFs.readFile.mockResolvedValue(encryptedData); - mockFs.unlink.mockResolvedValue(undefined); - - await storage.deleteCredentials('test-server'); - - expect(mockFs.unlink).toHaveBeenCalledWith( - path.join('/home/test', GEMINI_DIR, 'mcp-oauth-tokens-v2.json'), - ); - }); - - it('should update file when other credentials remain', async () => { - const credentials1: OAuthCredentials = { - serverName: 'server1', - token: { - accessToken: 'token1', - tokenType: 'Bearer', - }, - updatedAt: Date.now(), - }; - - const credentials2: OAuthCredentials = { - serverName: 'server2', - token: { - accessToken: 'token2', - tokenType: 'Bearer', - }, - updatedAt: Date.now(), - }; - - const encryptedData = storage['encrypt']( - JSON.stringify({ server1: credentials1, server2: credentials2 }), - ); - mockFs.readFile.mockResolvedValue(encryptedData); - mockFs.writeFile.mockResolvedValue(undefined); - - await storage.deleteCredentials('server1'); - - expect(mockFs.writeFile).toHaveBeenCalled(); - expect(mockFs.unlink).not.toHaveBeenCalled(); - - const writeCall = mockFs.writeFile.mock.calls[0]; - const decrypted = storage['decrypt'](writeCall[1]); - const saved = JSON.parse(decrypted); - - expect(saved['server1']).toBeUndefined(); - expect(saved['server2']).toEqual(credentials2); - }); - }); - - describe('listServers', () => { - it('should return empty list when file does not exist', async () => { - mockFs.readFile.mockRejectedValue({ code: 'ENOENT' }); - - const result = await storage.listServers(); - expect(result).toEqual([]); - }); - - it('should return list of server names', async () => { - const credentials: Record = { - server1: { - serverName: 'server1', - token: { accessToken: 'token1', tokenType: 'Bearer' }, - updatedAt: Date.now(), - }, - server2: { - serverName: 'server2', - token: { accessToken: 'token2', tokenType: 'Bearer' }, - updatedAt: Date.now(), - }, - }; - - const encryptedData = storage['encrypt'](JSON.stringify(credentials)); - mockFs.readFile.mockResolvedValue(encryptedData); - - const result = await storage.listServers(); - expect(result).toEqual(['server1', 'server2']); - }); - }); - - describe('clearAll', () => { - it('should delete the token file', async () => { - mockFs.unlink.mockResolvedValue(undefined); - - await storage.clearAll(); - - expect(mockFs.unlink).toHaveBeenCalledWith( - path.join('/home/test', GEMINI_DIR, 'mcp-oauth-tokens-v2.json'), - ); - }); - - it('should not throw when file does not exist', async () => { - mockFs.unlink.mockRejectedValue({ code: 'ENOENT' }); - - await expect(storage.clearAll()).resolves.not.toThrow(); - }); - }); - - describe('encryption', () => { - it('should encrypt and decrypt data correctly', () => { - const original = 'test-data-123'; - const encrypted = storage['encrypt'](original); - const decrypted = storage['decrypt'](encrypted); - - expect(decrypted).toBe(original); - expect(encrypted).not.toBe(original); - expect(encrypted).toMatch(/^[0-9a-f]+:[0-9a-f]+:[0-9a-f]+$/); - }); - - it('should produce different encrypted output each time', () => { - const original = 'test-data'; - const encrypted1 = storage['encrypt'](original); - const encrypted2 = storage['encrypt'](original); - - expect(encrypted1).not.toBe(encrypted2); - expect(storage['decrypt'](encrypted1)).toBe(original); - expect(storage['decrypt'](encrypted2)).toBe(original); - }); - - it('should throw on invalid encrypted data format', () => { - expect(() => storage['decrypt']('invalid-data')).toThrow( - 'Invalid encrypted data format', - ); - }); - }); -}); diff --git a/packages/core/src/mcp/token-storage/file-token-storage.ts b/packages/core/src/mcp/token-storage/file-token-storage.ts deleted file mode 100644 index 97eae56194..0000000000 --- a/packages/core/src/mcp/token-storage/file-token-storage.ts +++ /dev/null @@ -1,194 +0,0 @@ -/** - * @license - * Copyright 2025 Google LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import { promises as fs } from 'node:fs'; -import * as path from 'node:path'; -import * as os from 'node:os'; -import * as crypto from 'node:crypto'; -import { BaseTokenStorage } from './base-token-storage.js'; -import type { OAuthCredentials } from './types.js'; -import { GEMINI_DIR, homedir } from '../../utils/paths.js'; - -export class FileTokenStorage extends BaseTokenStorage { - private readonly tokenFilePath: string; - private readonly encryptionKey: Buffer; - - constructor(serviceName: string) { - super(serviceName); - const configDir = path.join(homedir(), GEMINI_DIR); - this.tokenFilePath = path.join(configDir, 'mcp-oauth-tokens-v2.json'); - this.encryptionKey = this.deriveEncryptionKey(); - } - - private deriveEncryptionKey(): Buffer { - const salt = `${os.hostname()}-${os.userInfo().username}-gemini-cli`; - return crypto.scryptSync('gemini-cli-oauth', salt, 32); - } - - private encrypt(text: string): string { - const iv = crypto.randomBytes(16); - const cipher = crypto.createCipheriv('aes-256-gcm', this.encryptionKey, iv); - - let encrypted = cipher.update(text, 'utf8', 'hex'); - encrypted += cipher.final('hex'); - - const authTag = cipher.getAuthTag(); - - return iv.toString('hex') + ':' + authTag.toString('hex') + ':' + encrypted; - } - - private decrypt(encryptedData: string): string { - const parts = encryptedData.split(':'); - if (parts.length !== 3) { - throw new Error('Invalid encrypted data format'); - } - - const iv = Buffer.from(parts[0], 'hex'); - const authTag = Buffer.from(parts[1], 'hex'); - const encrypted = parts[2]; - - const decipher = crypto.createDecipheriv( - 'aes-256-gcm', - this.encryptionKey, - iv, - ); - decipher.setAuthTag(authTag); - - let decrypted = decipher.update(encrypted, 'hex', 'utf8'); - decrypted += decipher.final('utf8'); - - return decrypted; - } - - private async ensureDirectoryExists(): Promise { - const dir = path.dirname(this.tokenFilePath); - await fs.mkdir(dir, { recursive: true, mode: 0o700 }); - } - - private async loadTokens(): Promise> { - try { - const data = await fs.readFile(this.tokenFilePath, 'utf-8'); - const decrypted = this.decrypt(data); - // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion - const tokens = JSON.parse(decrypted) as Record; - return new Map(Object.entries(tokens)); - } catch (error: unknown) { - // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion - const err = error as NodeJS.ErrnoException & { message?: string }; - if (err.code === 'ENOENT') { - return new Map(); - } - if ( - err.message?.includes('Invalid encrypted data format') || - err.message?.includes( - 'Unsupported state or unable to authenticate data', - ) - ) { - // Decryption failed - this can happen when switching between auth types - // or if the file is genuinely corrupted. - throw new Error( - `Corrupted token file detected at: ${this.tokenFilePath}\n` + - `Please delete or rename this file to resolve the issue.`, - ); - } - throw error; - } - } - - private async saveTokens( - tokens: Map, - ): Promise { - await this.ensureDirectoryExists(); - - const data = Object.fromEntries(tokens); - const json = JSON.stringify(data, null, 2); - const encrypted = this.encrypt(json); - - await fs.writeFile(this.tokenFilePath, encrypted, { mode: 0o600 }); - } - - async getCredentials(serverName: string): Promise { - const tokens = await this.loadTokens(); - const credentials = tokens.get(serverName); - - if (!credentials) { - return null; - } - - if (this.isTokenExpired(credentials)) { - return null; - } - - return credentials; - } - - async setCredentials(credentials: OAuthCredentials): Promise { - this.validateCredentials(credentials); - - const tokens = await this.loadTokens(); - const updatedCredentials: OAuthCredentials = { - ...credentials, - updatedAt: Date.now(), - }; - - tokens.set(credentials.serverName, updatedCredentials); - await this.saveTokens(tokens); - } - - async deleteCredentials(serverName: string): Promise { - const tokens = await this.loadTokens(); - - if (!tokens.has(serverName)) { - throw new Error(`No credentials found for ${serverName}`); - } - - tokens.delete(serverName); - - if (tokens.size === 0) { - try { - await fs.unlink(this.tokenFilePath); - } catch (error: unknown) { - // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion - const err = error as NodeJS.ErrnoException; - if (err.code !== 'ENOENT') { - throw error; - } - } - } else { - await this.saveTokens(tokens); - } - } - - async listServers(): Promise { - const tokens = await this.loadTokens(); - return Array.from(tokens.keys()); - } - - async getAllCredentials(): Promise> { - const tokens = await this.loadTokens(); - const result = new Map(); - - for (const [serverName, credentials] of tokens) { - if (!this.isTokenExpired(credentials)) { - result.set(serverName, credentials); - } - } - - return result; - } - - async clearAll(): Promise { - try { - await fs.unlink(this.tokenFilePath); - } catch (error: unknown) { - // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion - const err = error as NodeJS.ErrnoException; - if (err.code !== 'ENOENT') { - throw error; - } - } - } -} diff --git a/packages/core/src/mcp/token-storage/hybrid-token-storage.test.ts b/packages/core/src/mcp/token-storage/hybrid-token-storage.test.ts index 88d7d5c6ee..ecbe96adba 100644 --- a/packages/core/src/mcp/token-storage/hybrid-token-storage.test.ts +++ b/packages/core/src/mcp/token-storage/hybrid-token-storage.test.ts @@ -7,12 +7,12 @@ import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; import { HybridTokenStorage } from './hybrid-token-storage.js'; import { KeychainTokenStorage } from './keychain-token-storage.js'; -import { FileTokenStorage } from './file-token-storage.js'; import { type OAuthCredentials, TokenStorageType } from './types.js'; vi.mock('./keychain-token-storage.js', () => ({ KeychainTokenStorage: vi.fn().mockImplementation(() => ({ isAvailable: vi.fn(), + isUsingFileFallback: vi.fn(), getCredentials: vi.fn(), setCredentials: vi.fn(), deleteCredentials: vi.fn(), @@ -36,19 +36,9 @@ vi.mock('../../core/apiKeyCredentialStorage.js', () => ({ clearApiKey: vi.fn(), })); -vi.mock('./file-token-storage.js', () => ({ - FileTokenStorage: vi.fn().mockImplementation(() => ({ - getCredentials: vi.fn(), - setCredentials: vi.fn(), - deleteCredentials: vi.fn(), - listServers: vi.fn(), - getAllCredentials: vi.fn(), - clearAll: vi.fn(), - })), -})); - interface MockStorage { isAvailable?: ReturnType; + isUsingFileFallback: ReturnType; getCredentials: ReturnType; setCredentials: ReturnType; deleteCredentials: ReturnType; @@ -60,7 +50,6 @@ interface MockStorage { describe('HybridTokenStorage', () => { let storage: HybridTokenStorage; let mockKeychainStorage: MockStorage; - let mockFileStorage: MockStorage; const originalEnv = process.env; beforeEach(() => { @@ -70,15 +59,7 @@ describe('HybridTokenStorage', () => { // Create mock instances before creating HybridTokenStorage mockKeychainStorage = { isAvailable: vi.fn(), - getCredentials: vi.fn(), - setCredentials: vi.fn(), - deleteCredentials: vi.fn(), - listServers: vi.fn(), - getAllCredentials: vi.fn(), - clearAll: vi.fn(), - }; - - mockFileStorage = { + isUsingFileFallback: vi.fn(), getCredentials: vi.fn(), setCredentials: vi.fn(), deleteCredentials: vi.fn(), @@ -90,9 +71,6 @@ describe('HybridTokenStorage', () => { ( KeychainTokenStorage as unknown as ReturnType ).mockImplementation(() => mockKeychainStorage); - ( - FileTokenStorage as unknown as ReturnType - ).mockImplementation(() => mockFileStorage); storage = new HybridTokenStorage('test-service'); }); @@ -102,74 +80,31 @@ describe('HybridTokenStorage', () => { }); describe('storage selection', () => { - it('should use keychain when available', async () => { - mockKeychainStorage.isAvailable!.mockResolvedValue(true); + it('should use keychain normally', async () => { + mockKeychainStorage.isUsingFileFallback.mockResolvedValue(false); mockKeychainStorage.getCredentials.mockResolvedValue(null); await storage.getCredentials('test-server'); - expect(mockKeychainStorage.isAvailable).toHaveBeenCalled(); expect(mockKeychainStorage.getCredentials).toHaveBeenCalledWith( 'test-server', ); expect(await storage.getStorageType()).toBe(TokenStorageType.KEYCHAIN); }); - it('should use file storage when GEMINI_FORCE_FILE_STORAGE is set', async () => { - process.env['GEMINI_FORCE_FILE_STORAGE'] = 'true'; - mockFileStorage.getCredentials.mockResolvedValue(null); - - await storage.getCredentials('test-server'); - - expect(mockKeychainStorage.isAvailable).not.toHaveBeenCalled(); - expect(mockFileStorage.getCredentials).toHaveBeenCalledWith( - 'test-server', - ); - expect(await storage.getStorageType()).toBe( - TokenStorageType.ENCRYPTED_FILE, - ); - }); - - it('should fall back to file storage when keychain is unavailable', async () => { - mockKeychainStorage.isAvailable!.mockResolvedValue(false); - mockFileStorage.getCredentials.mockResolvedValue(null); - - await storage.getCredentials('test-server'); - - expect(mockKeychainStorage.isAvailable).toHaveBeenCalled(); - expect(mockFileStorage.getCredentials).toHaveBeenCalledWith( - 'test-server', - ); - expect(await storage.getStorageType()).toBe( - TokenStorageType.ENCRYPTED_FILE, - ); - }); - - it('should fall back to file storage when keychain throws error', async () => { - mockKeychainStorage.isAvailable!.mockRejectedValue( - new Error('Keychain error'), - ); - mockFileStorage.getCredentials.mockResolvedValue(null); - - await storage.getCredentials('test-server'); - - expect(mockKeychainStorage.isAvailable).toHaveBeenCalled(); - expect(mockFileStorage.getCredentials).toHaveBeenCalledWith( - 'test-server', - ); - expect(await storage.getStorageType()).toBe( - TokenStorageType.ENCRYPTED_FILE, - ); - }); - - it('should cache storage selection', async () => { - mockKeychainStorage.isAvailable!.mockResolvedValue(true); + it('should use file storage when isUsingFileFallback is true', async () => { + mockKeychainStorage.isUsingFileFallback.mockResolvedValue(true); mockKeychainStorage.getCredentials.mockResolvedValue(null); - await storage.getCredentials('test-server'); - await storage.getCredentials('another-server'); + const forceStorage = new HybridTokenStorage('test-service-forced'); + await forceStorage.getCredentials('test-server'); - expect(mockKeychainStorage.isAvailable).toHaveBeenCalledTimes(1); + expect(mockKeychainStorage.getCredentials).toHaveBeenCalledWith( + 'test-server', + ); + expect(await forceStorage.getStorageType()).toBe( + TokenStorageType.ENCRYPTED_FILE, + ); }); }); @@ -184,7 +119,6 @@ describe('HybridTokenStorage', () => { updatedAt: Date.now(), }; - mockKeychainStorage.isAvailable!.mockResolvedValue(true); mockKeychainStorage.getCredentials.mockResolvedValue(credentials); const result = await storage.getCredentials('test-server'); @@ -207,7 +141,6 @@ describe('HybridTokenStorage', () => { updatedAt: Date.now(), }; - mockKeychainStorage.isAvailable!.mockResolvedValue(true); mockKeychainStorage.setCredentials.mockResolvedValue(undefined); await storage.setCredentials(credentials); @@ -220,7 +153,6 @@ describe('HybridTokenStorage', () => { describe('deleteCredentials', () => { it('should delegate to selected storage', async () => { - mockKeychainStorage.isAvailable!.mockResolvedValue(true); mockKeychainStorage.deleteCredentials.mockResolvedValue(undefined); await storage.deleteCredentials('test-server'); @@ -234,7 +166,6 @@ describe('HybridTokenStorage', () => { describe('listServers', () => { it('should delegate to selected storage', async () => { const servers = ['server1', 'server2']; - mockKeychainStorage.isAvailable!.mockResolvedValue(true); mockKeychainStorage.listServers.mockResolvedValue(servers); const result = await storage.listServers(); @@ -265,7 +196,6 @@ describe('HybridTokenStorage', () => { ], ]); - mockKeychainStorage.isAvailable!.mockResolvedValue(true); mockKeychainStorage.getAllCredentials.mockResolvedValue(credentialsMap); const result = await storage.getAllCredentials(); @@ -277,7 +207,6 @@ describe('HybridTokenStorage', () => { describe('clearAll', () => { it('should delegate to selected storage', async () => { - mockKeychainStorage.isAvailable!.mockResolvedValue(true); mockKeychainStorage.clearAll.mockResolvedValue(undefined); await storage.clearAll(); diff --git a/packages/core/src/mcp/token-storage/hybrid-token-storage.ts b/packages/core/src/mcp/token-storage/hybrid-token-storage.ts index 20560ba30e..a495b8d9d7 100644 --- a/packages/core/src/mcp/token-storage/hybrid-token-storage.ts +++ b/packages/core/src/mcp/token-storage/hybrid-token-storage.ts @@ -5,7 +5,7 @@ */ import { BaseTokenStorage } from './base-token-storage.js'; -import { FileTokenStorage } from './file-token-storage.js'; +import { KeychainTokenStorage } from './keychain-token-storage.js'; import { TokenStorageType, type TokenStorage, @@ -13,8 +13,7 @@ import { } from './types.js'; import { coreEvents } from '../../utils/events.js'; import { TokenStorageInitializationEvent } from '../../telemetry/types.js'; - -const FORCE_FILE_STORAGE_ENV_VAR = 'GEMINI_FORCE_FILE_STORAGE'; +import { FORCE_FILE_STORAGE_ENV_VAR } from '../../services/keychainService.js'; export class HybridTokenStorage extends BaseTokenStorage { private storage: TokenStorage | null = null; @@ -28,34 +27,20 @@ export class HybridTokenStorage extends BaseTokenStorage { private async initializeStorage(): Promise { const forceFileStorage = process.env[FORCE_FILE_STORAGE_ENV_VAR] === 'true'; - if (!forceFileStorage) { - try { - const { KeychainTokenStorage } = await import( - './keychain-token-storage.js' - ); - const keychainStorage = new KeychainTokenStorage(this.serviceName); + const keychainStorage = new KeychainTokenStorage(this.serviceName); + this.storage = keychainStorage; - const isAvailable = await keychainStorage.isAvailable(); - if (isAvailable) { - this.storage = keychainStorage; - this.storageType = TokenStorageType.KEYCHAIN; + const isUsingFileFallback = await keychainStorage.isUsingFileFallback(); - coreEvents.emitTelemetryTokenStorageType( - new TokenStorageInitializationEvent('keychain', forceFileStorage), - ); - - return this.storage; - } - } catch (_e) { - // Fallback to file storage if keychain fails to initialize - } - } - - this.storage = new FileTokenStorage(this.serviceName); - this.storageType = TokenStorageType.ENCRYPTED_FILE; + this.storageType = isUsingFileFallback + ? TokenStorageType.ENCRYPTED_FILE + : TokenStorageType.KEYCHAIN; coreEvents.emitTelemetryTokenStorageType( - new TokenStorageInitializationEvent('encrypted_file', forceFileStorage), + new TokenStorageInitializationEvent( + isUsingFileFallback ? 'encrypted_file' : 'keychain', + forceFileStorage, + ), ); return this.storage; diff --git a/packages/core/src/mcp/token-storage/index.ts b/packages/core/src/mcp/token-storage/index.ts index 0b48a933a9..b1e75e9859 100644 --- a/packages/core/src/mcp/token-storage/index.ts +++ b/packages/core/src/mcp/token-storage/index.ts @@ -6,8 +6,8 @@ export * from './types.js'; export * from './base-token-storage.js'; -export * from './file-token-storage.js'; export * from './hybrid-token-storage.js'; +export * from './keychain-token-storage.js'; export const DEFAULT_SERVICE_NAME = 'gemini-cli-oauth'; export const FORCE_ENCRYPTED_FILE_ENV_VAR = diff --git a/packages/core/src/mcp/token-storage/keychain-token-storage.ts b/packages/core/src/mcp/token-storage/keychain-token-storage.ts index d0b4990279..f649b0f1c0 100644 --- a/packages/core/src/mcp/token-storage/keychain-token-storage.ts +++ b/packages/core/src/mcp/token-storage/keychain-token-storage.ts @@ -159,6 +159,10 @@ export class KeychainTokenStorage return this.keychainService.isAvailable(); } + async isUsingFileFallback(): Promise { + return this.keychainService.isUsingFileFallback(); + } + async setSecret(key: string, value: string): Promise { await this.keychainService.setPassword(`${SECRET_PREFIX}${key}`, value); } diff --git a/packages/core/src/policy/config.ts b/packages/core/src/policy/config.ts index 4c976bc160..392ab15c0c 100644 --- a/packages/core/src/policy/config.ts +++ b/packages/core/src/policy/config.ts @@ -16,6 +16,7 @@ import { type PolicyRule, type PolicySettings, type SafetyCheckerRule, + ALWAYS_ALLOW_PRIORITY_OFFSET, } from './types.js'; import type { PolicyEngine } from './policy-engine.js'; import { loadPoliciesFromToml, type PolicyFileError } from './toml-loader.js'; @@ -66,19 +67,6 @@ export const WORKSPACE_POLICY_TIER = 3; export const USER_POLICY_TIER = 4; export const ADMIN_POLICY_TIER = 5; -/** - * The fractional priority of "Always allow" rules (e.g., 950/1000). - * Higher fraction within a tier wins. - */ -export const ALWAYS_ALLOW_PRIORITY_FRACTION = 950; - -/** - * The fractional priority offset for "Always allow" rules (e.g., 0.95). - * This ensures consistency between in-memory rules and persisted rules. - */ -export const ALWAYS_ALLOW_PRIORITY_OFFSET = - ALWAYS_ALLOW_PRIORITY_FRACTION / 1000; - // Specific priority offsets and derived priorities for dynamic/settings rules. export const MCP_EXCLUDED_PRIORITY = USER_POLICY_TIER + 0.9; @@ -535,6 +523,7 @@ export async function createPolicyEngineConfig( checkers, defaultDecision: PolicyDecision.ASK_USER, approvalMode, + disableAlwaysAllow: settings.disableAlwaysAllow, }; } diff --git a/packages/core/src/policy/policies/tracker.toml b/packages/core/src/policy/policies/tracker.toml new file mode 100644 index 0000000000..e17c4fc387 --- /dev/null +++ b/packages/core/src/policy/policies/tracker.toml @@ -0,0 +1,34 @@ +# Priority system for policy rules: +# - Higher priority numbers win over lower priority numbers +# - When multiple rules match, the highest priority rule is applied +# - Rules are evaluated in order of priority (highest first) +# +# Priority bands (tiers): +# - Default policies (TOML): 1 + priority/1000 (e.g., priority 100 → 1.100) +# - Extension policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100) +# - Workspace policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100) +# - User policies (TOML): 4 + priority/1000 (e.g., priority 100 → 4.100) +# - Admin policies (TOML): 5 + priority/1000 (e.g., priority 100 → 5.100) +# +# Settings-based and dynamic rules (all in user tier 4.x): +# 4.95: Tools that the user has selected as "Always Allow" in the interactive UI +# 4.9: MCP servers excluded list (security: persistent server blocks) +# 4.4: Command line flag --exclude-tools (explicit temporary blocks) +# 4.3: Command line flag --allowed-tools (explicit temporary allows) +# 4.2: MCP servers with trust=true (persistent trusted servers) +# 4.1: MCP servers allowed list (persistent general server allows) + +# Allow tracker tools to execute without asking the user. +# These tools are only registered when the tracker feature is enabled, +# so this rule is a no-op when the feature is disabled. +[[rule]] +toolName = [ + "tracker_create_task", + "tracker_update_task", + "tracker_get_task", + "tracker_list_tasks", + "tracker_add_dependency", + "tracker_visualize" +] +decision = "allow" +priority = 50 diff --git a/packages/core/src/policy/policy-engine.test.ts b/packages/core/src/policy/policy-engine.test.ts index a54da32376..376e465604 100644 --- a/packages/core/src/policy/policy-engine.test.ts +++ b/packages/core/src/policy/policy-engine.test.ts @@ -14,6 +14,7 @@ import { InProcessCheckerType, ApprovalMode, PRIORITY_SUBAGENT_TOOL, + ALWAYS_ALLOW_PRIORITY_FRACTION, } from './types.js'; import type { FunctionCall } from '@google/genai'; import { SafetyCheckDecision } from '../safety/protocol.js'; @@ -3229,4 +3230,116 @@ describe('PolicyEngine', () => { expect(hookCheckers[1].priority).toBe(5); }); }); + + describe('disableAlwaysAllow', () => { + it('should ignore "Always Allow" rules when disableAlwaysAllow is true', async () => { + const alwaysAllowRule: PolicyRule = { + toolName: 'test-tool', + decision: PolicyDecision.ALLOW, + priority: 3 + ALWAYS_ALLOW_PRIORITY_FRACTION / 1000, // 3.95 + source: 'Dynamic (Confirmed)', + }; + + const engine = new PolicyEngine({ + rules: [alwaysAllowRule], + disableAlwaysAllow: true, + defaultDecision: PolicyDecision.ASK_USER, + }); + + const result = await engine.check( + { name: 'test-tool', args: {} }, + undefined, + ); + expect(result.decision).toBe(PolicyDecision.ASK_USER); + }); + + it('should respect "Always Allow" rules when disableAlwaysAllow is false', async () => { + const alwaysAllowRule: PolicyRule = { + toolName: 'test-tool', + decision: PolicyDecision.ALLOW, + priority: 3 + ALWAYS_ALLOW_PRIORITY_FRACTION / 1000, // 3.95 + source: 'Dynamic (Confirmed)', + }; + + const engine = new PolicyEngine({ + rules: [alwaysAllowRule], + disableAlwaysAllow: false, + defaultDecision: PolicyDecision.ASK_USER, + }); + + const result = await engine.check( + { name: 'test-tool', args: {} }, + undefined, + ); + expect(result.decision).toBe(PolicyDecision.ALLOW); + }); + + it('should NOT ignore other rules when disableAlwaysAllow is true', async () => { + const normalRule: PolicyRule = { + toolName: 'test-tool', + decision: PolicyDecision.ALLOW, + priority: 1.5, // Not a .950 fraction + source: 'Normal Rule', + }; + + const engine = new PolicyEngine({ + rules: [normalRule], + disableAlwaysAllow: true, + defaultDecision: PolicyDecision.ASK_USER, + }); + + const result = await engine.check( + { name: 'test-tool', args: {} }, + undefined, + ); + expect(result.decision).toBe(PolicyDecision.ALLOW); + }); + }); + + describe('getExcludedTools with disableAlwaysAllow', () => { + it('should exclude tool if an Always Allow rule says ALLOW but disableAlwaysAllow is true (falling back to DENY)', async () => { + // To prove the ALWAYS_ALLOW rule is ignored, we set the default decision to DENY. + // If the rule was honored, the decision would be ALLOW (tool not excluded). + // Since it's ignored, it falls back to the default DENY (tool is excluded). + // In the real app, it usually falls back to ASK_USER, but ASK_USER also doesn't + // exclude the tool, so we use DENY here purely to make the test observable. + const alwaysAllowRule: PolicyRule = { + toolName: 'test-tool', + decision: PolicyDecision.ALLOW, + priority: 3 + ALWAYS_ALLOW_PRIORITY_FRACTION / 1000, + }; + + const engine = new PolicyEngine({ + rules: [alwaysAllowRule], + disableAlwaysAllow: true, + defaultDecision: PolicyDecision.DENY, + }); + + const excluded = engine.getExcludedTools( + undefined, + new Set(['test-tool']), + ); + expect(excluded.has('test-tool')).toBe(true); + }); + + it('should NOT exclude tool if ALWAYS_ALLOW is enabled and rule says ALLOW', async () => { + const alwaysAllowRule: PolicyRule = { + toolName: 'test-tool', + decision: PolicyDecision.ALLOW, + priority: 3 + ALWAYS_ALLOW_PRIORITY_FRACTION / 1000, + }; + + const engine = new PolicyEngine({ + rules: [alwaysAllowRule], + disableAlwaysAllow: false, + defaultDecision: PolicyDecision.DENY, + }); + + const excluded = engine.getExcludedTools( + undefined, + new Set(['test-tool']), + ); + expect(excluded.has('test-tool')).toBe(false); + }); + }); }); diff --git a/packages/core/src/policy/policy-engine.ts b/packages/core/src/policy/policy-engine.ts index b626666370..ec84eb23aa 100644 --- a/packages/core/src/policy/policy-engine.ts +++ b/packages/core/src/policy/policy-engine.ts @@ -13,6 +13,7 @@ import { type HookCheckerRule, ApprovalMode, type CheckResult, + ALWAYS_ALLOW_PRIORITY_FRACTION, } from './types.js'; import { stableStringify } from './stable-stringify.js'; import { debugLogger } from '../utils/debugLogger.js'; @@ -154,6 +155,7 @@ export class PolicyEngine { private hookCheckers: HookCheckerRule[]; private readonly defaultDecision: PolicyDecision; private readonly nonInteractive: boolean; + private readonly disableAlwaysAllow: boolean; private readonly checkerRunner?: CheckerRunner; private approvalMode: ApprovalMode; @@ -169,6 +171,7 @@ export class PolicyEngine { ); this.defaultDecision = config.defaultDecision ?? PolicyDecision.ASK_USER; this.nonInteractive = config.nonInteractive ?? false; + this.disableAlwaysAllow = config.disableAlwaysAllow ?? false; this.checkerRunner = checkerRunner; this.approvalMode = config.approvalMode ?? ApprovalMode.DEFAULT; } @@ -187,6 +190,13 @@ export class PolicyEngine { return this.approvalMode; } + private isAlwaysAllowRule(rule: PolicyRule): boolean { + return ( + rule.priority !== undefined && + Math.round((rule.priority % 1) * 1000) === ALWAYS_ALLOW_PRIORITY_FRACTION + ); + } + private shouldDowngradeForRedirection( command: string, allowRedirection?: boolean, @@ -422,6 +432,10 @@ export class PolicyEngine { } for (const rule of this.rules) { + if (this.disableAlwaysAllow && this.isAlwaysAllowRule(rule)) { + continue; + } + const match = toolCallsToTry.some((tc) => ruleMatches( rule, @@ -684,6 +698,10 @@ export class PolicyEngine { // Evaluate rules in priority order (they are already sorted in constructor) for (const rule of this.rules) { + if (this.disableAlwaysAllow && this.isAlwaysAllowRule(rule)) { + continue; + } + // Create a copy of the rule without argsPattern to see if it targets the tool // regardless of the runtime arguments it might receive. const ruleWithoutArgs: PolicyRule = { ...rule, argsPattern: undefined }; diff --git a/packages/core/src/policy/types.ts b/packages/core/src/policy/types.ts index 6fa45630d9..6e14e1fac9 100644 --- a/packages/core/src/policy/types.ts +++ b/packages/core/src/policy/types.ts @@ -285,6 +285,11 @@ export interface PolicyEngineConfig { */ nonInteractive?: boolean; + /** + * Whether to ignore "Always Allow" rules. + */ + disableAlwaysAllow?: boolean; + /** * Whether to allow hooks to execute. * When false, all hooks are denied. @@ -314,6 +319,7 @@ export interface PolicySettings { // Admin provided policies that will supplement the ADMIN level policies adminPolicyPaths?: string[]; workspacePoliciesDir?: string; + disableAlwaysAllow?: boolean; } export interface CheckResult { @@ -326,3 +332,16 @@ export interface CheckResult { * Effective priority matching Tier 1 (Default) read-only tools. */ export const PRIORITY_SUBAGENT_TOOL = 1.05; + +/** + * The fractional priority of "Always allow" rules (e.g., 950/1000). + * Higher fraction within a tier wins. + */ +export const ALWAYS_ALLOW_PRIORITY_FRACTION = 950; + +/** + * The fractional priority offset for "Always allow" rules (e.g., 0.95). + * This ensures consistency between in-memory rules and persisted rules. + */ +export const ALWAYS_ALLOW_PRIORITY_OFFSET = + ALWAYS_ALLOW_PRIORITY_FRACTION / 1000; diff --git a/packages/core/src/prompts/promptProvider.test.ts b/packages/core/src/prompts/promptProvider.test.ts index a740705e35..c2253a9b57 100644 --- a/packages/core/src/prompts/promptProvider.test.ts +++ b/packages/core/src/prompts/promptProvider.test.ts @@ -60,6 +60,7 @@ describe('PromptProvider', () => { }, isInteractive: vi.fn().mockReturnValue(true), isInteractiveShellEnabled: vi.fn().mockReturnValue(true), + isTopicUpdateNarrationEnabled: vi.fn().mockReturnValue(false), getSkillManager: vi.fn().mockReturnValue({ getSkills: vi.fn().mockReturnValue([]), }), diff --git a/packages/core/src/prompts/promptProvider.ts b/packages/core/src/prompts/promptProvider.ts index b9975d79c4..ed71b035dc 100644 --- a/packages/core/src/prompts/promptProvider.ts +++ b/packages/core/src/prompts/promptProvider.ts @@ -124,6 +124,7 @@ export class PromptProvider { hasSkills: skills.length > 0, hasHierarchicalMemory, contextFilenames, + topicUpdateNarration: context.config.isTopicUpdateNarrationEnabled(), })), subAgents: this.withSection('agentContexts', () => context.config @@ -162,6 +163,8 @@ export class PromptProvider { ? { path: approvedPlanPath } : undefined, taskTracker: context.config.isTrackerEnabled(), + topicUpdateNarration: + context.config.isTopicUpdateNarrationEnabled(), }), !isPlanMode, ), @@ -183,6 +186,8 @@ export class PromptProvider { enableShellEfficiency: context.config.getEnableShellOutputEfficiency(), interactiveShellEnabled: context.config.isInteractiveShellEnabled(), + topicUpdateNarration: + context.config.isTopicUpdateNarrationEnabled(), }), ), sandbox: this.withSection('sandbox', () => getSandboxMode()), diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index 93dd635396..11b559d116 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -60,6 +60,7 @@ export interface CoreMandatesOptions { hasSkills: boolean; hasHierarchicalMemory: boolean; contextFilenames?: string[]; + topicUpdateNarration: boolean; } export interface PrimaryWorkflowsOptions { @@ -71,11 +72,13 @@ export interface PrimaryWorkflowsOptions { enableGlob: boolean; approvedPlan?: { path: string }; taskTracker?: boolean; + topicUpdateNarration: boolean; } export interface OperationalGuidelinesOptions { interactive: boolean; interactiveShellEnabled: boolean; + topicUpdateNarration: boolean; } export type SandboxMode = 'macos-seatbelt' | 'generic' | 'outside'; @@ -223,10 +226,12 @@ Use the following guidelines to optimize your search and read patterns. - **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes.${mandateConflictResolution(options.hasHierarchicalMemory)} - **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. -- ${mandateConfirm(options.interactive)} -- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. -- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.${mandateSkillGuidance(options.hasSkills)} -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy.${mandateContinueWork(options.interactive)} +- ${mandateConfirm(options.interactive)}${ + options.topicUpdateNarration + ? mandateTopicUpdateModel() + : mandateExplainBeforeActing() + } +- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.${mandateSkillGuidance(options.hasSkills)}${mandateContinueWork(options.interactive)} `.trim(); } @@ -341,10 +346,18 @@ export function renderOperationalGuidelines( ## Tone and Style - **Role:** A senior software engineer and collaborative peer programmer. -- **High-Signal Output:** Focus exclusively on **intent** and **technical rationale**. Avoid conversational filler, apologies, and mechanical tool-use narration (e.g., "I will now call..."). +- **High-Signal Output:** Focus exclusively on **intent** and **technical rationale**. Avoid conversational filler, apologies, and ${ + options.topicUpdateNarration + ? 'per-tool explanations.' + : 'mechanical tool-use narration (e.g., "I will now call...").' + } - **Concise & Direct:** Adopt a professional, direct, and concise tone suitable for a CLI environment. - **Minimal Output:** Aim for fewer than 3 lines of text output (excluding tool use/code generation) per response whenever practical. -- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they serve to explain intent as required by the 'Explain Before Acting' mandate. +- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they are ${ + options.topicUpdateNarration + ? 'part of the **Topic Model**.' + : "part of the 'Explain Before Acting' mandate." + } - **No Repetition:** Once you have provided a final synthesis of your work, do not repeat yourself or provide additional summaries. For simple or direct requests, prioritize extreme brevity. - **Formatting:** Use GitHub-flavored Markdown. Responses will be rendered in monospace. - **Tools vs. Text:** Use tools for actions, text output *only* for communication. Do not add explanatory comments within tool calls. @@ -560,6 +573,56 @@ function mandateConfirm(interactive: boolean): string { : '**Handle Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, do not perform it automatically.'; } +function mandateTopicUpdateModel(): string { + return ` +- **Protocol: Topic Model** + You are an agentic system. You must maintain a visible state log that tracks broad logical phases using a specific header format. + +- **1. Topic Initialization & Persistence:** + - **The Trigger:** You MUST issue a \`Topic: : \` header ONLY when beginning a task or when the broad logical nature of the task changes (e.g., transitioning from research to implementation). + - **The Format:** Use exactly \`Topic: : \` (e.g., \`Topic: : Researching Agent Skills in the repo\`). + - **Persistence:** Once a Topic is declared, do NOT repeat it for subsequent tool calls or in subsequent messages within that same phase. + - **Start of Task:** Your very first tool execution must be preceded by a Topic header. + +- **2. Tool Execution Protocol (Zero-Noise):** + - **No Per-Tool Headers:** It is a violation of protocol to print "Topic:" before every tool call. + - **Silent Mode:** No conversational filler, no "I will now...", and no summaries between tools. + - Only the Topic header at the start of a broad phase is permitted to break the silence. Everything in between must be silent. + +- **3. Thinking Protocol:** + - Use internal thought blocks to keep track of what tools you have called, plan your next steps, and reason about the task. + - Without reasoning and tracking in thought blocks, you may lose context. + - Always use the required syntax for thought blocks to ensure they remain hidden from the user interface. + +- **4. Completion:** + - Only when the entire task is finalized do you provide a **Final Summary**. + +**IMPORTANT: Topic Headers vs. Thoughts** +The \`Topic: : \` header must **NOT** be placed inside a thought block. It must be standard text output so that it is properly rendered and displayed in the UI. + +**Correct State Log Example:** +\`\`\` +Topic: : Researching Agent Skills in the repo + + + + +Topic: : Implementing the skill-creator logic + + + +The task is complete. [Final Summary] +\`\`\` + +- **Constraint Enforcement:** If you repeat a "Topic:" line without a fundamental shift in work, or if you provide a Topic for every tool call, you have failed the system integrity protocol.`; +} + +function mandateExplainBeforeActing(): string { + return ` +- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. +- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked.`; +} + function mandateSkillGuidance(hasSkills: boolean): string { if (!hasSkills) return ''; return ` diff --git a/packages/core/src/routing/strategies/approvalModeStrategy.ts b/packages/core/src/routing/strategies/approvalModeStrategy.ts index 403a4c3176..b7565f6dc3 100644 --- a/packages/core/src/routing/strategies/approvalModeStrategy.ts +++ b/packages/core/src/routing/strategies/approvalModeStrategy.ts @@ -36,7 +36,7 @@ export class ApprovalModeStrategy implements RoutingStrategy { const model = context.requestedModel ?? config.getModel(); // This strategy only applies to "auto" models. - if (!isAutoModel(model)) { + if (!isAutoModel(model, config)) { return null; } diff --git a/packages/core/src/routing/strategies/classifierStrategy.ts b/packages/core/src/routing/strategies/classifierStrategy.ts index 2040e7eccd..3532e34c63 100644 --- a/packages/core/src/routing/strategies/classifierStrategy.ts +++ b/packages/core/src/routing/strategies/classifierStrategy.ts @@ -139,7 +139,7 @@ export class ClassifierStrategy implements RoutingStrategy { const model = context.requestedModel ?? config.getModel(); if ( (await config.getNumericalRoutingEnabled()) && - isGemini3Model(model) + isGemini3Model(model, config) ) { return null; } diff --git a/packages/core/src/routing/strategies/numericalClassifierStrategy.ts b/packages/core/src/routing/strategies/numericalClassifierStrategy.ts index c86576d6ce..a97180c8eb 100644 --- a/packages/core/src/routing/strategies/numericalClassifierStrategy.ts +++ b/packages/core/src/routing/strategies/numericalClassifierStrategy.ts @@ -109,7 +109,7 @@ export class NumericalClassifierStrategy implements RoutingStrategy { return null; } - if (!isGemini3Model(model)) { + if (!isGemini3Model(model, config)) { return null; } diff --git a/packages/core/src/routing/strategies/overrideStrategy.ts b/packages/core/src/routing/strategies/overrideStrategy.ts index 9a89d2af70..37e23e188b 100644 --- a/packages/core/src/routing/strategies/overrideStrategy.ts +++ b/packages/core/src/routing/strategies/overrideStrategy.ts @@ -29,7 +29,7 @@ export class OverrideStrategy implements RoutingStrategy { const overrideModel = context.requestedModel ?? config.getModel(); // If the model is 'auto' we should pass to the next strategy. - if (isAutoModel(overrideModel)) { + if (isAutoModel(overrideModel, config)) { return null; } diff --git a/packages/core/src/scheduler/policy.test.ts b/packages/core/src/scheduler/policy.test.ts index e802a4b220..32a92309e0 100644 --- a/packages/core/src/scheduler/policy.test.ts +++ b/packages/core/src/scheduler/policy.test.ts @@ -102,6 +102,32 @@ describe('policy.ts', () => { ); }); + it('should respect disableAlwaysAllow from config', async () => { + const mockPolicyEngine = { + check: vi.fn().mockResolvedValue({ decision: PolicyDecision.ALLOW }), + } as unknown as Mocked; + + const mockConfig = { + getPolicyEngine: vi.fn().mockReturnValue(mockPolicyEngine), + getDisableAlwaysAllow: vi.fn().mockReturnValue(true), + } as unknown as Mocked; + + (mockConfig as unknown as { config: Config }).config = + mockConfig as Config; + + const toolCall = { + request: { name: 'test-tool', args: {} }, + tool: { name: 'test-tool' }, + } as ValidatingToolCall; + + // Note: checkPolicy calls config.getPolicyEngine().check() + // The PolicyEngine itself is already configured with disableAlwaysAllow + // when created in Config. Here we are just verifying that checkPolicy + // doesn't somehow bypass it. + await checkPolicy(toolCall, mockConfig); + expect(mockPolicyEngine.check).toHaveBeenCalled(); + }); + it('should throw if ASK_USER is returned in non-interactive mode', async () => { const mockPolicyEngine = { check: vi.fn().mockResolvedValue({ decision: PolicyDecision.ASK_USER }), diff --git a/packages/core/src/scheduler/tool-executor.ts b/packages/core/src/scheduler/tool-executor.ts index 4c7ef2ee04..83d77c5a0b 100644 --- a/packages/core/src/scheduler/tool-executor.ts +++ b/packages/core/src/scheduler/tool-executor.ts @@ -296,6 +296,7 @@ export class ToolExecutor { call.request.callId, output, this.config.getActiveModel(), + this.config, ); // Inject the cancellation error into the response object @@ -352,6 +353,7 @@ export class ToolExecutor { callId, content, this.config.getActiveModel(), + this.config, ); const successResponse: ToolCallResponseInfo = { diff --git a/packages/core/src/services/chatRecordingService.test.ts b/packages/core/src/services/chatRecordingService.test.ts index 3b18d04389..6b395b92e0 100644 --- a/packages/core/src/services/chatRecordingService.test.ts +++ b/packages/core/src/services/chatRecordingService.test.ts @@ -439,6 +439,7 @@ describe('ChatRecordingService', () => { describe('deleteSession', () => { it('should delete the session file, tool outputs, session directory, and logs if they exist', () => { const sessionId = 'test-session-id'; + const shortId = '12345678'; const chatsDir = path.join(testTempDir, 'chats'); const logsDir = path.join(testTempDir, 'logs'); const toolOutputsDir = path.join(testTempDir, 'tool-outputs'); @@ -449,8 +450,12 @@ describe('ChatRecordingService', () => { fs.mkdirSync(toolOutputsDir, { recursive: true }); fs.mkdirSync(sessionDir, { recursive: true }); - const sessionFile = path.join(chatsDir, `${sessionId}.json`); - fs.writeFileSync(sessionFile, '{}'); + // Create main session file with timestamp + const sessionFile = path.join( + chatsDir, + `session-2023-01-01T00-00-${shortId}.json`, + ); + fs.writeFileSync(sessionFile, JSON.stringify({ sessionId })); const logFile = path.join(logsDir, `session-${sessionId}.jsonl`); fs.writeFileSync(logFile, '{}'); @@ -458,7 +463,8 @@ describe('ChatRecordingService', () => { const toolOutputDir = path.join(toolOutputsDir, `session-${sessionId}`); fs.mkdirSync(toolOutputDir, { recursive: true }); - chatRecordingService.deleteSession(sessionId); + // Call with shortId + chatRecordingService.deleteSession(shortId); expect(fs.existsSync(sessionFile)).toBe(false); expect(fs.existsSync(logFile)).toBe(false); @@ -466,6 +472,93 @@ describe('ChatRecordingService', () => { expect(fs.existsSync(sessionDir)).toBe(false); }); + it('should delete subagent files and their logs when parent is deleted', () => { + const parentSessionId = '12345678-session-id'; + const shortId = '12345678'; + const subagentSessionId = 'subagent-session-id'; + const chatsDir = path.join(testTempDir, 'chats'); + const logsDir = path.join(testTempDir, 'logs'); + const toolOutputsDir = path.join(testTempDir, 'tool-outputs'); + + fs.mkdirSync(chatsDir, { recursive: true }); + fs.mkdirSync(logsDir, { recursive: true }); + fs.mkdirSync(toolOutputsDir, { recursive: true }); + + // Create parent session file + const parentFile = path.join( + chatsDir, + `session-2023-01-01T00-00-${shortId}.json`, + ); + fs.writeFileSync( + parentFile, + JSON.stringify({ sessionId: parentSessionId }), + ); + + // Create subagent session file + const subagentFile = path.join( + chatsDir, + `session-2023-01-01T00-01-${shortId}.json`, + ); + fs.writeFileSync( + subagentFile, + JSON.stringify({ sessionId: subagentSessionId, kind: 'subagent' }), + ); + + // Create logs for both + const parentLog = path.join(logsDir, `session-${parentSessionId}.jsonl`); + fs.writeFileSync(parentLog, '{}'); + const subagentLog = path.join( + logsDir, + `session-${subagentSessionId}.jsonl`, + ); + fs.writeFileSync(subagentLog, '{}'); + + // Create tool outputs for both + const parentToolOutputDir = path.join( + toolOutputsDir, + `session-${parentSessionId}`, + ); + fs.mkdirSync(parentToolOutputDir, { recursive: true }); + const subagentToolOutputDir = path.join( + toolOutputsDir, + `session-${subagentSessionId}`, + ); + fs.mkdirSync(subagentToolOutputDir, { recursive: true }); + + // Call with parent sessionId + chatRecordingService.deleteSession(parentSessionId); + + expect(fs.existsSync(parentFile)).toBe(false); + expect(fs.existsSync(subagentFile)).toBe(false); + expect(fs.existsSync(parentLog)).toBe(false); + expect(fs.existsSync(subagentLog)).toBe(false); + expect(fs.existsSync(parentToolOutputDir)).toBe(false); + expect(fs.existsSync(subagentToolOutputDir)).toBe(false); + }); + + it('should delete by basename', () => { + const sessionId = 'test-session-id'; + const shortId = '12345678'; + const chatsDir = path.join(testTempDir, 'chats'); + const logsDir = path.join(testTempDir, 'logs'); + + fs.mkdirSync(chatsDir, { recursive: true }); + fs.mkdirSync(logsDir, { recursive: true }); + + const basename = `session-2023-01-01T00-00-${shortId}`; + const sessionFile = path.join(chatsDir, `${basename}.json`); + fs.writeFileSync(sessionFile, JSON.stringify({ sessionId })); + + const logFile = path.join(logsDir, `session-${sessionId}.jsonl`); + fs.writeFileSync(logFile, '{}'); + + // Call with basename + chatRecordingService.deleteSession(basename); + + expect(fs.existsSync(sessionFile)).toBe(false); + expect(fs.existsSync(logFile)).toBe(false); + }); + it('should not throw if session file does not exist', () => { expect(() => chatRecordingService.deleteSession('non-existent'), diff --git a/packages/core/src/services/chatRecordingService.ts b/packages/core/src/services/chatRecordingService.ts index 606a7334db..2591d90bb4 100644 --- a/packages/core/src/services/chatRecordingService.ts +++ b/packages/core/src/services/chatRecordingService.ts @@ -590,46 +590,27 @@ export class ChatRecordingService { } /** - * Deletes a session file by session ID. + * Deletes a session file by sessionId, filename, or basename. + * Derives an 8-character shortId to find and delete all associated files + * (parent and subagents). + * + * @throws {Error} If shortId validation fails. */ - deleteSession(sessionId: string): void { + deleteSession(sessionIdOrBasename: string): void { try { const tempDir = this.context.config.storage.getProjectTempDir(); const chatsDir = path.join(tempDir, 'chats'); - const sessionPath = path.join(chatsDir, `${sessionId}.json`); - if (fs.existsSync(sessionPath)) { - fs.unlinkSync(sessionPath); + + const shortId = this.deriveShortId(sessionIdOrBasename); + + if (!fs.existsSync(chatsDir)) { + return; // Nothing to delete } - // Cleanup Activity logs in the project logs directory - const logsDir = path.join(tempDir, 'logs'); - const logPath = path.join(logsDir, `session-${sessionId}.jsonl`); - if (fs.existsSync(logPath)) { - fs.unlinkSync(logPath); - } + const matchingFiles = this.getMatchingSessionFiles(chatsDir, shortId); - // Cleanup tool outputs for this session - const safeSessionId = sanitizeFilenamePart(sessionId); - const toolOutputDir = path.join( - tempDir, - 'tool-outputs', - `session-${safeSessionId}`, - ); - - // Robustness: Ensure the path is strictly within the tool-outputs base - const toolOutputsBase = path.join(tempDir, 'tool-outputs'); - if ( - fs.existsSync(toolOutputDir) && - toolOutputDir.startsWith(toolOutputsBase) - ) { - fs.rmSync(toolOutputDir, { recursive: true, force: true }); - } - - // ALSO cleanup the session-specific directory (contains plans, tasks, etc.) - const sessionDir = path.join(tempDir, safeSessionId); - // Robustness: Ensure the path is strictly within the temp root - if (fs.existsSync(sessionDir) && sessionDir.startsWith(tempDir)) { - fs.rmSync(sessionDir, { recursive: true, force: true }); + for (const file of matchingFiles) { + this.deleteSessionAndArtifacts(chatsDir, file, tempDir); } } catch (error) { debugLogger.error('Error deleting session file.', error); @@ -637,6 +618,115 @@ export class ChatRecordingService { } } + /** + * Derives an 8-character shortId from a sessionId, filename, or basename. + */ + private deriveShortId(sessionIdOrBasename: string): string { + let shortId = sessionIdOrBasename; + if (sessionIdOrBasename.startsWith(SESSION_FILE_PREFIX)) { + const withoutExt = sessionIdOrBasename.replace('.json', ''); + const parts = withoutExt.split('-'); + shortId = parts[parts.length - 1]; + } else if (sessionIdOrBasename.length >= 8) { + shortId = sessionIdOrBasename.slice(0, 8); + } else { + throw new Error('Invalid sessionId or basename provided for deletion'); + } + + if (shortId.length !== 8) { + throw new Error('Derived shortId must be exactly 8 characters'); + } + + return shortId; + } + + /** + * Finds all session files matching the pattern session-*-.json + */ + private getMatchingSessionFiles(chatsDir: string, shortId: string): string[] { + const files = fs.readdirSync(chatsDir); + return files.filter( + (f) => + f.startsWith(SESSION_FILE_PREFIX) && f.endsWith(`-${shortId}.json`), + ); + } + + /** + * Deletes a single session file and its associated logs, tool-outputs, and directory. + */ + private deleteSessionAndArtifacts( + chatsDir: string, + file: string, + tempDir: string, + ): void { + const filePath = path.join(chatsDir, file); + try { + const fileContent = fs.readFileSync(filePath, 'utf8'); + const content = JSON.parse(fileContent) as unknown; + + let fullSessionId: string | undefined; + if (content && typeof content === 'object' && 'sessionId' in content) { + const id = (content as Record)['sessionId']; + if (typeof id === 'string') { + fullSessionId = id; + } + } + + // Delete the session file + fs.unlinkSync(filePath); + + if (fullSessionId) { + this.deleteSessionLogs(fullSessionId, tempDir); + this.deleteSessionToolOutputs(fullSessionId, tempDir); + this.deleteSessionDirectory(fullSessionId, tempDir); + } + } catch (error) { + debugLogger.error(`Error deleting associated file ${file}:`, error); + } + } + + /** + * Cleans up activity logs for a session. + */ + private deleteSessionLogs(sessionId: string, tempDir: string): void { + const logsDir = path.join(tempDir, 'logs'); + const safeSessionId = sanitizeFilenamePart(sessionId); + const logPath = path.join(logsDir, `session-${safeSessionId}.jsonl`); + if (fs.existsSync(logPath) && logPath.startsWith(logsDir)) { + fs.unlinkSync(logPath); + } + } + + /** + * Cleans up tool outputs for a session. + */ + private deleteSessionToolOutputs(sessionId: string, tempDir: string): void { + const safeSessionId = sanitizeFilenamePart(sessionId); + const toolOutputDir = path.join( + tempDir, + 'tool-outputs', + `session-${safeSessionId}`, + ); + const toolOutputsBase = path.join(tempDir, 'tool-outputs'); + if ( + fs.existsSync(toolOutputDir) && + toolOutputDir.startsWith(toolOutputsBase) + ) { + fs.rmSync(toolOutputDir, { recursive: true, force: true }); + } + } + + /** + * Cleans up the session-specific directory. + */ + private deleteSessionDirectory(sessionId: string, tempDir: string): void { + const safeSessionId = sanitizeFilenamePart(sessionId); + const sessionDir = path.join(tempDir, safeSessionId); + if (fs.existsSync(sessionDir) && sessionDir.startsWith(tempDir)) { + fs.rmSync(sessionDir, { recursive: true, force: true }); + } + } + /** * Rewinds the conversation to the state just before the specified message ID. * All messages from (and including) the specified ID onwards are removed. diff --git a/packages/core/src/services/environmentSanitization.ts b/packages/core/src/services/environmentSanitization.ts index 9d35249a8e..ee7c824e9c 100644 --- a/packages/core/src/services/environmentSanitization.ts +++ b/packages/core/src/services/environmentSanitization.ts @@ -125,7 +125,7 @@ export const NEVER_ALLOWED_VALUE_PATTERNS = [ /-----BEGIN (RSA|OPENSSH|EC|PGP) PRIVATE KEY-----/i, /-----BEGIN CERTIFICATE-----/i, // Credentials in URL - /(https?|ftp|smtp):\/\/[^:]+:[^@]+@/i, + /(https?|ftp|smtp):\/\/[^:\s]{1,1024}:[^@\s]{1,1024}@/i, // GitHub tokens (classic, fine-grained, OAuth, etc.) /(ghp|gho|ghu|ghs|ghr|github_pat)_[a-zA-Z0-9_]{36,}/i, // Google API keys @@ -133,7 +133,7 @@ export const NEVER_ALLOWED_VALUE_PATTERNS = [ // Amazon AWS Access Key ID /AKIA[A-Z0-9]{16}/i, // Generic OAuth/JWT tokens - /eyJ[a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*/i, + /eyJ[a-zA-Z0-9_-]{0,10240}\.[a-zA-Z0-9_-]{0,10240}\.[a-zA-Z0-9_-]{0,10240}/i, // Stripe API keys /(s|r)k_(live|test)_[0-9a-zA-Z]{24}/i, // Slack tokens (bot, user, etc.) diff --git a/packages/core/src/services/fileKeychain.ts b/packages/core/src/services/fileKeychain.ts new file mode 100644 index 0000000000..57341a59f2 --- /dev/null +++ b/packages/core/src/services/fileKeychain.ts @@ -0,0 +1,160 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { promises as fs } from 'node:fs'; +import * as path from 'node:path'; +import * as os from 'node:os'; +import * as crypto from 'node:crypto'; +import type { Keychain } from './keychainTypes.js'; +import { GEMINI_DIR, homedir } from '../utils/paths.js'; + +export class FileKeychain implements Keychain { + private readonly tokenFilePath: string; + private readonly encryptionKey: Buffer; + + constructor() { + const configDir = path.join(homedir(), GEMINI_DIR); + this.tokenFilePath = path.join(configDir, 'gemini-credentials.json'); + this.encryptionKey = this.deriveEncryptionKey(); + } + + private deriveEncryptionKey(): Buffer { + const salt = `${os.hostname()}-${os.userInfo().username}-gemini-cli`; + return crypto.scryptSync('gemini-cli-oauth', salt, 32); + } + + private encrypt(text: string): string { + const iv = crypto.randomBytes(16); + const cipher = crypto.createCipheriv('aes-256-gcm', this.encryptionKey, iv); + + let encrypted = cipher.update(text, 'utf8', 'hex'); + encrypted += cipher.final('hex'); + + const authTag = cipher.getAuthTag(); + + return iv.toString('hex') + ':' + authTag.toString('hex') + ':' + encrypted; + } + + private decrypt(encryptedData: string): string { + const parts = encryptedData.split(':'); + if (parts.length !== 3) { + throw new Error('Invalid encrypted data format'); + } + + const iv = Buffer.from(parts[0], 'hex'); + const authTag = Buffer.from(parts[1], 'hex'); + const encrypted = parts[2]; + + const decipher = crypto.createDecipheriv( + 'aes-256-gcm', + this.encryptionKey, + iv, + ); + decipher.setAuthTag(authTag); + + let decrypted = decipher.update(encrypted, 'hex', 'utf8'); + decrypted += decipher.final('utf8'); + + return decrypted; + } + + private async ensureDirectoryExists(): Promise { + const dir = path.dirname(this.tokenFilePath); + await fs.mkdir(dir, { recursive: true, mode: 0o700 }); + } + + private async loadData(): Promise>> { + try { + const data = await fs.readFile(this.tokenFilePath, 'utf-8'); + const decrypted = this.decrypt(data); + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion + return JSON.parse(decrypted) as Record>; + } catch (error: unknown) { + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion + const err = error as NodeJS.ErrnoException & { message?: string }; + if (err.code === 'ENOENT') { + return {}; + } + if ( + err.message?.includes('Invalid encrypted data format') || + err.message?.includes( + 'Unsupported state or unable to authenticate data', + ) + ) { + throw new Error( + `Corrupted credentials file detected at: ${this.tokenFilePath}\n` + + `Please delete or rename this file to resolve the issue.`, + ); + } + throw error; + } + } + + private async saveData( + data: Record>, + ): Promise { + await this.ensureDirectoryExists(); + const json = JSON.stringify(data, null, 2); + const encrypted = this.encrypt(json); + await fs.writeFile(this.tokenFilePath, encrypted, { mode: 0o600 }); + } + + async getPassword(service: string, account: string): Promise { + const data = await this.loadData(); + return data[service]?.[account] ?? null; + } + + async setPassword( + service: string, + account: string, + password: string, + ): Promise { + const data = await this.loadData(); + if (!data[service]) { + data[service] = {}; + } + data[service][account] = password; + await this.saveData(data); + } + + async deletePassword(service: string, account: string): Promise { + const data = await this.loadData(); + if (data[service] && account in data[service]) { + delete data[service][account]; + + if (Object.keys(data[service]).length === 0) { + delete data[service]; + } + + if (Object.keys(data).length === 0) { + try { + await fs.unlink(this.tokenFilePath); + } catch (error: unknown) { + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion + const err = error as NodeJS.ErrnoException; + if (err.code !== 'ENOENT') { + throw error; + } + } + } else { + await this.saveData(data); + } + return true; + } + return false; + } + + async findCredentials( + service: string, + ): Promise> { + const data = await this.loadData(); + const serviceData = data[service] || {}; + return Object.entries(serviceData).map(([account, password]) => ({ + account, + password, + })); + } +} diff --git a/packages/core/src/services/keychainService.test.ts b/packages/core/src/services/keychainService.test.ts index 4ab59a5369..5423ff3545 100644 --- a/packages/core/src/services/keychainService.test.ts +++ b/packages/core/src/services/keychainService.test.ts @@ -4,10 +4,19 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { describe, it, expect, vi, beforeEach, type Mock } from 'vitest'; +import { + describe, + it, + expect, + vi, + beforeEach, + afterEach, + type Mock, +} from 'vitest'; import { KeychainService } from './keychainService.js'; import { coreEvents } from '../utils/events.js'; import { debugLogger } from '../utils/debugLogger.js'; +import { FileKeychain } from './fileKeychain.js'; type MockKeychain = { getPassword: Mock | undefined; @@ -23,8 +32,19 @@ const mockKeytar: MockKeychain = { findCredentials: vi.fn(), }; +const mockFileKeychain: MockKeychain = { + getPassword: vi.fn(), + setPassword: vi.fn(), + deletePassword: vi.fn(), + findCredentials: vi.fn(), +}; + vi.mock('keytar', () => ({ default: mockKeytar })); +vi.mock('./fileKeychain.js', () => ({ + FileKeychain: vi.fn(() => mockFileKeychain), +})); + vi.mock('../utils/events.js', () => ({ coreEvents: { emitTelemetryKeychainAvailability: vi.fn() }, })); @@ -37,13 +57,15 @@ describe('KeychainService', () => { let service: KeychainService; const SERVICE_NAME = 'test-service'; let passwords: Record = {}; + const originalEnv = process.env; beforeEach(() => { vi.clearAllMocks(); + process.env = { ...originalEnv }; service = new KeychainService(SERVICE_NAME); passwords = {}; - // Stateful mock implementation to verify behavioral correctness + // Stateful mock implementation for native keychain mockKeytar.setPassword?.mockImplementation((_svc, acc, val) => { passwords[acc] = val; return Promise.resolve(); @@ -64,10 +86,36 @@ describe('KeychainService', () => { })), ), ); + + // Stateful mock implementation for fallback file keychain + mockFileKeychain.setPassword?.mockImplementation((_svc, acc, val) => { + passwords[acc] = val; + return Promise.resolve(); + }); + mockFileKeychain.getPassword?.mockImplementation((_svc, acc) => + Promise.resolve(passwords[acc] ?? null), + ); + mockFileKeychain.deletePassword?.mockImplementation((_svc, acc) => { + const exists = !!passwords[acc]; + delete passwords[acc]; + return Promise.resolve(exists); + }); + mockFileKeychain.findCredentials?.mockImplementation(() => + Promise.resolve( + Object.entries(passwords).map(([account, password]) => ({ + account, + password, + })), + ), + ); + }); + + afterEach(() => { + process.env = originalEnv; }); describe('isAvailable', () => { - it('should return true and emit telemetry on successful functional test', async () => { + it('should return true and emit telemetry on successful functional test with native keychain', async () => { const available = await service.isAvailable(); expect(available).toBe(true); @@ -77,12 +125,13 @@ describe('KeychainService', () => { ); }); - it('should return false, log error, and emit telemetry on failed functional test', async () => { + it('should return true (via fallback), log error, and emit telemetry indicating native is unavailable on failed functional test', async () => { mockKeytar.setPassword?.mockRejectedValue(new Error('locked')); const available = await service.isAvailable(); - expect(available).toBe(false); + // Because it falls back to FileKeychain, it is always available. + expect(available).toBe(true); expect(debugLogger.log).toHaveBeenCalledWith( expect.stringContaining('encountered an error'), 'locked', @@ -90,15 +139,19 @@ describe('KeychainService', () => { expect(coreEvents.emitTelemetryKeychainAvailability).toHaveBeenCalledWith( expect.objectContaining({ available: false }), ); + expect(debugLogger.log).toHaveBeenCalledWith( + expect.stringContaining('Using FileKeychain fallback'), + ); + expect(FileKeychain).toHaveBeenCalled(); }); - it('should return false, log validation error, and emit telemetry on module load failure', async () => { + it('should return true (via fallback), log validation error, and emit telemetry on module load failure', async () => { const originalMock = mockKeytar.getPassword; mockKeytar.getPassword = undefined; // Break schema const available = await service.isAvailable(); - expect(available).toBe(false); + expect(available).toBe(true); expect(debugLogger.log).toHaveBeenCalledWith( expect.stringContaining('failed structural validation'), expect.objectContaining({ getPassword: expect.any(Array) }), @@ -106,19 +159,31 @@ describe('KeychainService', () => { expect(coreEvents.emitTelemetryKeychainAvailability).toHaveBeenCalledWith( expect.objectContaining({ available: false }), ); + expect(FileKeychain).toHaveBeenCalled(); mockKeytar.getPassword = originalMock; }); - it('should log failure if functional test cycle returns false', async () => { + it('should log failure if functional test cycle returns false, then fallback', async () => { mockKeytar.getPassword?.mockResolvedValue('wrong-password'); const available = await service.isAvailable(); - expect(available).toBe(false); + expect(available).toBe(true); expect(debugLogger.log).toHaveBeenCalledWith( expect.stringContaining('functional verification failed'), ); + expect(FileKeychain).toHaveBeenCalled(); + }); + + it('should fallback to FileKeychain when GEMINI_FORCE_FILE_STORAGE is true', async () => { + process.env['GEMINI_FORCE_FILE_STORAGE'] = 'true'; + const available = await service.isAvailable(); + expect(available).toBe(true); + expect(FileKeychain).toHaveBeenCalled(); + expect(coreEvents.emitTelemetryKeychainAvailability).toHaveBeenCalledWith( + expect.objectContaining({ available: false }), + ); }); it('should cache the result and handle concurrent initialization attempts once', async () => { @@ -159,25 +224,5 @@ describe('KeychainService', () => { }); }); - describe('When Unavailable', () => { - beforeEach(() => { - mockKeytar.setPassword?.mockRejectedValue(new Error('Unavailable')); - }); - - it.each([ - { method: 'getPassword', args: ['acc'] }, - { method: 'setPassword', args: ['acc', 'val'] }, - { method: 'deletePassword', args: ['acc'] }, - { method: 'findCredentials', args: [] }, - ])('$method should throw a consistent error', async ({ method, args }) => { - await expect( - ( - service as unknown as Record< - string, - (...args: unknown[]) => Promise - > - )[method](...args), - ).rejects.toThrow('Keychain is not available'); - }); - }); + // Removing 'When Unavailable' tests since the service is always available via fallback }); diff --git a/packages/core/src/services/keychainService.ts b/packages/core/src/services/keychainService.ts index a43890f89b..48a13c3dda 100644 --- a/packages/core/src/services/keychainService.ts +++ b/packages/core/src/services/keychainService.ts @@ -14,6 +14,9 @@ import { KEYCHAIN_TEST_PREFIX, } from './keychainTypes.js'; import { isRecord } from '../utils/markdownUtils.js'; +import { FileKeychain } from './fileKeychain.js'; + +export const FORCE_FILE_STORAGE_ENV_VAR = 'GEMINI_FORCE_FILE_STORAGE'; /** * Service for interacting with OS-level secure storage (e.g. keytar). @@ -31,6 +34,14 @@ export class KeychainService { return (await this.getKeychain()) !== null; } + /** + * Returns true if the service is using the encrypted file fallback backend. + */ + async isUsingFileFallback(): Promise { + const keychain = await this.getKeychain(); + return keychain instanceof FileKeychain; + } + /** * Retrieves a secret for the given account. * @throws Error if the keychain is unavailable. @@ -85,26 +96,40 @@ export class KeychainService { // High-level orchestration of the loading and testing cycle. private async initializeKeychain(): Promise { let resultKeychain: Keychain | null = null; + const forceFileStorage = process.env[FORCE_FILE_STORAGE_ENV_VAR] === 'true'; - try { - const keychainModule = await this.loadKeychainModule(); - if (keychainModule) { - if (await this.isKeychainFunctional(keychainModule)) { - resultKeychain = keychainModule; - } else { - debugLogger.log('Keychain functional verification failed'); + if (!forceFileStorage) { + try { + const keychainModule = await this.loadKeychainModule(); + if (keychainModule) { + if (await this.isKeychainFunctional(keychainModule)) { + resultKeychain = keychainModule; + } else { + debugLogger.log('Keychain functional verification failed'); + } } + } catch (error) { + // Avoid logging full error objects to prevent PII exposure. + const message = error instanceof Error ? error.message : String(error); + debugLogger.log( + 'Keychain initialization encountered an error:', + message, + ); } - } catch (error) { - // Avoid logging full error objects to prevent PII exposure. - const message = error instanceof Error ? error.message : String(error); - debugLogger.log('Keychain initialization encountered an error:', message); } coreEvents.emitTelemetryKeychainAvailability( - new KeychainAvailabilityEvent(resultKeychain !== null), + new KeychainAvailabilityEvent( + resultKeychain !== null && !forceFileStorage, + ), ); + // Fallback to FileKeychain if native keychain is unavailable or file storage is forced + if (!resultKeychain) { + resultKeychain = new FileKeychain(); + debugLogger.log('Using FileKeychain fallback for secure storage.'); + } + return resultKeychain; } diff --git a/packages/core/src/services/modelConfigService.ts b/packages/core/src/services/modelConfigService.ts index 5142411be7..2999129116 100644 --- a/packages/core/src/services/modelConfigService.ts +++ b/packages/core/src/services/modelConfigService.ts @@ -51,11 +51,34 @@ export interface ModelConfigAlias { modelConfig: ModelConfig; } +// A model definition is a mapping from a model name to a list of features +// that the model supports. Model names can be either direct model IDs +// (gemini-2.5-pro) or aliases (auto). +export interface ModelDefinition { + displayName?: string; + tier?: string; // 'pro' | 'flash' | 'flash-lite' | 'custom' | 'auto' + family?: string; // The gemini family, e.g. 'gemini-3' | 'gemini-2' + isPreview?: boolean; + // Specifies which view the model should appear in. If unset, the model will + // not appear in the dialog. + dialogLocation?: 'main' | 'manual'; + /** A short description of the model for the dialog. */ + dialogDescription?: string; + features?: { + // Whether the model supports thinking. + thinking?: boolean; + // Whether the model supports mutlimodal function responses. This is + // supported in Gemini 3. + multimodalToolUse?: boolean; + }; +} + export interface ModelConfigServiceConfig { aliases?: Record; customAliases?: Record; overrides?: ModelConfigOverride[]; customOverrides?: ModelConfigOverride[]; + modelDefinitions?: Record; } const MAX_ALIAS_CHAIN_DEPTH = 100; @@ -76,6 +99,28 @@ export class ModelConfigService { // TODO(12597): Process config to build a typed alias hierarchy. constructor(private readonly config: ModelConfigServiceConfig) {} + getModelDefinition(modelId: string): ModelDefinition | undefined { + const definition = this.config.modelDefinitions?.[modelId]; + if (definition) { + return definition; + } + + // For unknown models, return an implicit custom definition to match legacy behavior. + if (!modelId.startsWith('gemini-')) { + return { + tier: 'custom', + family: 'custom', + features: {}, + }; + } + + return undefined; + } + + getModelDefinitions(): Record { + return this.config.modelDefinitions ?? {}; + } + registerRuntimeModelConfig(aliasName: string, alias: ModelConfigAlias): void { this.runtimeAliases[aliasName] = alias; } diff --git a/packages/core/src/services/sandboxManager.test.ts b/packages/core/src/services/sandboxManager.test.ts index bac8a8a55c..963dbf8ccf 100644 --- a/packages/core/src/services/sandboxManager.test.ts +++ b/packages/core/src/services/sandboxManager.test.ts @@ -45,7 +45,7 @@ describe('NoopSandboxManager', () => { expect(result.env['MY_SECRET']).toBeUndefined(); }); - it('should force environment variable redaction even if not requested in config', async () => { + it('should allow disabling environment variable redaction if requested in config', async () => { const req = { command: 'echo', args: ['hello'], @@ -62,7 +62,7 @@ describe('NoopSandboxManager', () => { const result = await sandboxManager.prepareCommand(req); - expect(result.env['API_KEY']).toBeUndefined(); + expect(result.env['API_KEY']).toBe('sensitive-key'); }); it('should respect allowedEnvironmentVariables in config', async () => { diff --git a/packages/core/src/services/sandboxManager.ts b/packages/core/src/services/sandboxManager.ts index 458e15260e..f2435fa56b 100644 --- a/packages/core/src/services/sandboxManager.ts +++ b/packages/core/src/services/sandboxManager.ts @@ -1,6 +1,6 @@ /** * @license - * Copyright 2025 Google LLC + * Copyright 2026 Google LLC * SPDX-License-Identifier: Apache-2.0 */ @@ -37,6 +37,8 @@ export interface SandboxedCommand { args: string[]; /** Sanitized environment variables. */ env: NodeJS.ProcessEnv; + /** The working directory. */ + cwd?: string; } /** @@ -64,7 +66,9 @@ export class NoopSandboxManager implements SandboxManager { req.config?.sanitizationConfig?.allowedEnvironmentVariables ?? [], blockedEnvironmentVariables: req.config?.sanitizationConfig?.blockedEnvironmentVariables ?? [], - enableEnvironmentVariableRedaction: true, // Forced for safety + enableEnvironmentVariableRedaction: + req.config?.sanitizationConfig?.enableEnvironmentVariableRedaction ?? + true, }; const sanitizedEnv = sanitizeEnvironment(req.env, sanitizationConfig); @@ -76,3 +80,24 @@ export class NoopSandboxManager implements SandboxManager { }; } } + +/** + * SandboxManager that implements actual sandboxing. + */ +export class LocalSandboxManager implements SandboxManager { + async prepareCommand(_req: SandboxRequest): Promise { + throw new Error('Tool sandboxing is not yet implemented.'); + } +} + +/** + * Creates a sandbox manager based on the provided settings. + */ +export function createSandboxManager( + sandboxingEnabled: boolean, +): SandboxManager { + if (sandboxingEnabled) { + return new LocalSandboxManager(); + } + return new NoopSandboxManager(); +} diff --git a/packages/core/src/services/shellExecutionService.test.ts b/packages/core/src/services/shellExecutionService.test.ts index 0eab28017a..a828771c25 100644 --- a/packages/core/src/services/shellExecutionService.test.ts +++ b/packages/core/src/services/shellExecutionService.test.ts @@ -22,6 +22,7 @@ import { type ShellOutputEvent, type ShellExecutionConfig, } from './shellExecutionService.js'; +import { NoopSandboxManager } from './sandboxManager.js'; import { ExecutionLifecycleService } from './executionLifecycleService.js'; import type { AnsiOutput, AnsiToken } from '../utils/terminalSerializer.js'; @@ -137,6 +138,7 @@ const shellExecutionConfig: ShellExecutionConfig = { allowedEnvironmentVariables: [], blockedEnvironmentVariables: [], }, + sandboxManager: new NoopSandboxManager(), }; const createMockSerializeTerminalToObjectReturnValue = ( @@ -625,6 +627,7 @@ describe('ShellExecutionService', () => { new AbortController().signal, true, { + ...shellExecutionConfig, sanitizationConfig: { enableEnvironmentVariableRedaction: true, allowedEnvironmentVariables: [], @@ -1396,7 +1399,7 @@ describe('ShellExecutionService child_process fallback', () => { expect(mockCpSpawn).toHaveBeenCalledWith( expectedCommand, ['/pid', String(mockChildProcess.pid), '/f', '/t'], - undefined, + expect.anything(), ); } }); @@ -1417,6 +1420,7 @@ describe('ShellExecutionService child_process fallback', () => { abortController.signal, true, { + ...shellExecutionConfig, sanitizationConfig: { enableEnvironmentVariableRedaction: true, allowedEnvironmentVariables: [], @@ -1631,6 +1635,7 @@ describe('ShellExecutionService execution method selection', () => { abortController.signal, false, // shouldUseNodePty { + ...shellExecutionConfig, sanitizationConfig: { enableEnvironmentVariableRedaction: true, allowedEnvironmentVariables: [], @@ -1778,6 +1783,7 @@ describe('ShellExecutionService environment variables', () => { new AbortController().signal, true, { + ...shellExecutionConfig, sanitizationConfig: { enableEnvironmentVariableRedaction: false, allowedEnvironmentVariables: [], @@ -1837,6 +1843,7 @@ describe('ShellExecutionService environment variables', () => { new AbortController().signal, true, { + ...shellExecutionConfig, sanitizationConfig: { enableEnvironmentVariableRedaction: false, allowedEnvironmentVariables: [], @@ -1904,6 +1911,58 @@ describe('ShellExecutionService environment variables', () => { await new Promise(process.nextTick); }); + it('should call prepareCommand on sandboxManager when provided', async () => { + const mockSandboxManager = { + prepareCommand: vi.fn().mockResolvedValue({ + program: 'sandboxed-bash', + args: ['-c', 'ls'], + env: { SANDBOXED: 'true' }, + }), + }; + + const configWithSandbox: ShellExecutionConfig = { + ...shellExecutionConfig, + sandboxManager: mockSandboxManager, + }; + + mockResolveExecutable.mockResolvedValue('/bin/bash/resolved'); + const mockChild = new EventEmitter() as unknown as ChildProcess; + mockChild.stdout = new EventEmitter() as unknown as Readable; + mockChild.stderr = new EventEmitter() as unknown as Readable; + Object.assign(mockChild, { pid: 123 }); + mockCpSpawn.mockReturnValue(mockChild); + + const handle = await ShellExecutionService.execute( + 'ls', + '/test/cwd', + () => {}, + new AbortController().signal, + false, // child_process path + configWithSandbox, + ); + + expect(mockResolveExecutable).toHaveBeenCalledWith(expect.any(String)); + expect(mockSandboxManager.prepareCommand).toHaveBeenCalledWith( + expect.objectContaining({ + command: '/bin/bash/resolved', + args: expect.arrayContaining([expect.stringContaining('ls')]), + cwd: '/test/cwd', + }), + ); + expect(mockCpSpawn).toHaveBeenCalledWith( + 'sandboxed-bash', + ['-c', 'ls'], + expect.objectContaining({ + env: expect.objectContaining({ SANDBOXED: 'true' }), + }), + ); + + // Clean up + mockChild.emit('exit', 0, null); + mockChild.emit('close', 0, null); + await handle.result; + }); + it('should include headless git and gh environment variables in non-interactive mode and append git config safely', async () => { vi.resetModules(); vi.stubEnv('GIT_CONFIG_COUNT', '2'); diff --git a/packages/core/src/services/shellExecutionService.ts b/packages/core/src/services/shellExecutionService.ts index f8d2e728d2..47601172ac 100644 --- a/packages/core/src/services/shellExecutionService.ts +++ b/packages/core/src/services/shellExecutionService.ts @@ -1,6 +1,6 @@ /** * @license - * Copyright 2025 Google LLC + * Copyright 2026 Google LLC * SPDX-License-Identifier: Apache-2.0 */ @@ -27,11 +27,8 @@ import { serializeTerminalToObject, type AnsiOutput, } from '../utils/terminalSerializer.js'; -import { - sanitizeEnvironment, - type EnvironmentSanitizationConfig, -} from './environmentSanitization.js'; -import { NoopSandboxManager } from './sandboxManager.js'; +import { type EnvironmentSanitizationConfig } from './environmentSanitization.js'; +import { type SandboxManager } from './sandboxManager.js'; import { killProcessGroup } from '../utils/process-utils.js'; import { ExecutionLifecycleService, @@ -90,6 +87,7 @@ export interface ShellExecutionConfig { defaultFg?: string; defaultBg?: string; sanitizationConfig: EnvironmentSanitizationConfig; + sandboxManager: SandboxManager; // Used for testing disableDynamicLineTrimming?: boolean; scrollback?: number; @@ -274,15 +272,6 @@ export class ShellExecutionService { shouldUseNodePty: boolean, shellExecutionConfig: ShellExecutionConfig, ): Promise { - const sandboxManager = new NoopSandboxManager(); - const { env: sanitizedEnv } = await sandboxManager.prepareCommand({ - command: commandToExecute, - args: [], - env: process.env, - cwd, - config: shellExecutionConfig, - }); - if (shouldUseNodePty) { const ptyInfo = await getPty(); if (ptyInfo) { @@ -294,7 +283,6 @@ export class ShellExecutionService { abortSignal, shellExecutionConfig, ptyInfo, - sanitizedEnv, ); } catch (_e) { // Fallback to child_process @@ -307,7 +295,7 @@ export class ShellExecutionService { cwd, onOutputEvent, abortSignal, - shellExecutionConfig.sanitizationConfig, + shellExecutionConfig, shouldUseNodePty, ); } @@ -342,14 +330,49 @@ export class ShellExecutionService { return { newBuffer: truncatedBuffer + chunk, truncated: true }; } - private static childProcessFallback( + private static async prepareExecution( + executable: string, + args: string[], + cwd: string, + env: NodeJS.ProcessEnv, + shellExecutionConfig: ShellExecutionConfig, + sanitizationConfigOverride?: EnvironmentSanitizationConfig, + ): Promise<{ + program: string; + args: string[]; + env: NodeJS.ProcessEnv; + cwd: string; + }> { + const resolvedExecutable = + (await resolveExecutable(executable)) ?? executable; + + const prepared = await shellExecutionConfig.sandboxManager.prepareCommand({ + command: resolvedExecutable, + args, + cwd, + env, + config: { + sanitizationConfig: + sanitizationConfigOverride ?? shellExecutionConfig.sanitizationConfig, + }, + }); + + return { + program: prepared.program, + args: prepared.args, + env: prepared.env, + cwd: prepared.cwd ?? cwd, + }; + } + + private static async childProcessFallback( commandToExecute: string, cwd: string, onOutputEvent: (event: ShellOutputEvent) => void, abortSignal: AbortSignal, - sanitizationConfig: EnvironmentSanitizationConfig, + shellExecutionConfig: ShellExecutionConfig, isInteractive: boolean, - ): ShellExecutionHandle { + ): Promise { try { const isWindows = os.platform() === 'win32'; const { executable, argsPrefix, shell } = getShellConfiguration(); @@ -361,16 +384,17 @@ export class ShellExecutionService { const gitConfigKeys = !isInteractive ? Object.keys(process.env).filter((k) => k.startsWith('GIT_CONFIG_')) : []; - const sanitizedEnv = sanitizeEnvironment(process.env, { - ...sanitizationConfig, + const localSanitizationConfig = { + ...shellExecutionConfig.sanitizationConfig, allowedEnvironmentVariables: [ - ...(sanitizationConfig.allowedEnvironmentVariables || []), + ...(shellExecutionConfig.sanitizationConfig + .allowedEnvironmentVariables || []), ...gitConfigKeys, ], - }); + }; - const env: NodeJS.ProcessEnv = { - ...sanitizedEnv, + const env = { + ...process.env, [GEMINI_CLI_IDENTIFICATION_ENV_VAR]: GEMINI_CLI_IDENTIFICATION_ENV_VAR_VALUE, TERM: 'xterm-256color', @@ -378,12 +402,28 @@ export class ShellExecutionService { GIT_PAGER: 'cat', }; + const { + program: finalExecutable, + args: finalArgs, + env: sanitizedEnv, + cwd: finalCwd, + } = await this.prepareExecution( + executable, + spawnArgs, + cwd, + env, + shellExecutionConfig, + localSanitizationConfig, + ); + + const finalEnv = { ...sanitizedEnv }; + if (!isInteractive) { const gitConfigCount = parseInt( - sanitizedEnv['GIT_CONFIG_COUNT'] || '0', + finalEnv['GIT_CONFIG_COUNT'] || '0', 10, ); - Object.assign(env, { + Object.assign(finalEnv, { // Disable interactive prompts and session-linked credential helpers // in non-interactive mode to prevent hangs in detached process groups. GIT_TERMINAL_PROMPT: '0', @@ -399,13 +439,13 @@ export class ShellExecutionService { }); } - const child = cpSpawn(executable, spawnArgs, { - cwd, + const child = cpSpawn(finalExecutable, finalArgs, { + cwd: finalCwd, stdio: ['ignore', 'pipe', 'pipe'], windowsVerbatimArguments: isWindows ? false : undefined, shell: false, detached: !isWindows, - env, + env: finalEnv, }); const state = { @@ -682,7 +722,6 @@ export class ShellExecutionService { abortSignal: AbortSignal, shellExecutionConfig: ShellExecutionConfig, ptyInfo: PtyImplementation, - sanitizedEnv: Record, ): Promise { if (!ptyInfo) { // This should not happen, but as a safeguard... @@ -695,29 +734,52 @@ export class ShellExecutionService { const rows = shellExecutionConfig.terminalHeight ?? 30; const { executable, argsPrefix, shell } = getShellConfiguration(); - const resolvedExecutable = await resolveExecutable(executable); - if (!resolvedExecutable) { - throw new Error( - `Shell executable "${executable}" not found in PATH or at absolute location. Please ensure the shell is installed and available in your environment.`, - ); - } - const guardedCommand = ensurePromptvarsDisabled(commandToExecute, shell); const args = [...argsPrefix, guardedCommand]; - // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment - const ptyProcess = ptyInfo.module.spawn(executable, args, { + const env = { + ...process.env, + GEMINI_CLI: '1', + TERM: 'xterm-256color', + PAGER: shellExecutionConfig.pager ?? 'cat', + GIT_PAGER: shellExecutionConfig.pager ?? 'cat', + }; + + // Specifically allow GIT_CONFIG_* variables to pass through sanitization + // so we can safely append our overrides if needed. + const gitConfigKeys = Object.keys(process.env).filter((k) => + k.startsWith('GIT_CONFIG_'), + ); + const localSanitizationConfig = { + ...shellExecutionConfig.sanitizationConfig, + allowedEnvironmentVariables: [ + ...(shellExecutionConfig.sanitizationConfig + ?.allowedEnvironmentVariables ?? []), + ...gitConfigKeys, + ], + }; + + const { + program: finalExecutable, + args: finalArgs, + env: finalEnv, + cwd: finalCwd, + } = await this.prepareExecution( + executable, + args, cwd, + env, + shellExecutionConfig, + localSanitizationConfig, + ); + + // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment + const ptyProcess = ptyInfo.module.spawn(finalExecutable, finalArgs, { + cwd: finalCwd, name: 'xterm-256color', cols, rows, - env: { - ...sanitizedEnv, - GEMINI_CLI: '1', - TERM: 'xterm-256color', - PAGER: shellExecutionConfig.pager ?? 'cat', - GIT_PAGER: shellExecutionConfig.pager ?? 'cat', - }, + env: finalEnv, handleFlowControl: true, }); // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion diff --git a/packages/core/src/services/trackerService.ts b/packages/core/src/services/trackerService.ts index 06e890175f..3f3492c98e 100644 --- a/packages/core/src/services/trackerService.ts +++ b/packages/core/src/services/trackerService.ts @@ -51,8 +51,8 @@ export class TrackerService { }; if (task.parentId) { - const parentList = await this.listTasks(); - if (!parentList.find((t) => t.id === task.parentId)) { + const parent = await this.getTask(task.parentId); + if (!parent) { throw new Error(`Parent task with ID ${task.parentId} not found.`); } } @@ -143,14 +143,7 @@ export class TrackerService { const isClosing = updates.status === TaskStatus.CLOSED; const changingDependencies = updates.dependencies !== undefined; - let taskMap: Map | undefined; - - if (isClosing || changingDependencies) { - const allTasks = await this.listTasks(); - taskMap = new Map(allTasks.map((t) => [t.id, t])); - } - - const task = taskMap ? taskMap.get(id) : await this.getTask(id); + const task = await this.getTask(id); if (!task) { throw new Error(`Task with ID ${id} not found.`); @@ -159,9 +152,7 @@ export class TrackerService { const updatedTask = { ...task, ...updates, id: task.id }; if (updatedTask.parentId) { - const parentExists = taskMap - ? taskMap.has(updatedTask.parentId) - : !!(await this.getTask(updatedTask.parentId)); + const parentExists = !!(await this.getTask(updatedTask.parentId)); if (!parentExists) { throw new Error( `Parent task with ID ${updatedTask.parentId} not found.`, @@ -169,15 +160,12 @@ export class TrackerService { } } - if (taskMap) { - if (isClosing && task.status !== TaskStatus.CLOSED) { - this.validateCanClose(updatedTask, taskMap); - } + if (isClosing && task.status !== TaskStatus.CLOSED) { + await this.validateCanClose(updatedTask); + } - if (changingDependencies) { - taskMap.set(updatedTask.id, updatedTask); - this.validateNoCircularDependencies(updatedTask, taskMap); - } + if (changingDependencies) { + await this.validateNoCircularDependencies(updatedTask); } TrackerTaskSchema.parse(updatedTask); @@ -197,12 +185,9 @@ export class TrackerService { /** * Validates that a task can be closed (all dependencies must be closed). */ - private validateCanClose( - task: TrackerTask, - taskMap: Map, - ): void { + private async validateCanClose(task: TrackerTask): Promise { for (const depId of task.dependencies) { - const dep = taskMap.get(depId); + const dep = await this.getTask(depId); if (!dep) { throw new Error(`Dependency ${depId} not found for task ${task.id}.`); } @@ -217,14 +202,15 @@ export class TrackerService { /** * Validates that there are no circular dependencies. */ - private validateNoCircularDependencies( + private async validateNoCircularDependencies( task: TrackerTask, - taskMap: Map, - ): void { + ): Promise { const visited = new Set(); const stack = new Set(); + const cache = new Map(); + cache.set(task.id, task); - const check = (currentId: string) => { + const check = async (currentId: string) => { if (stack.has(currentId)) { throw new Error( `Circular dependency detected involving task ${currentId}.`, @@ -237,17 +223,23 @@ export class TrackerService { visited.add(currentId); stack.add(currentId); - const currentTask = taskMap.get(currentId); + let currentTask = cache.get(currentId); if (!currentTask) { - throw new Error(`Dependency ${currentId} not found.`); + const fetched = await this.getTask(currentId); + if (!fetched) { + throw new Error(`Dependency ${currentId} not found.`); + } + currentTask = fetched; + cache.set(currentId, currentTask); } + for (const depId of currentTask.dependencies) { - check(depId); + await check(depId); } stack.delete(currentId); }; - check(task.id); + await check(task.id); } } diff --git a/packages/core/src/skills/skillLoader.ts b/packages/core/src/skills/skillLoader.ts index e746caa179..7f6d3c11d0 100644 --- a/packages/core/src/skills/skillLoader.ts +++ b/packages/core/src/skills/skillLoader.ts @@ -27,6 +27,8 @@ export interface SkillDefinition { disabled?: boolean; /** Whether the skill is a built-in skill. */ isBuiltin?: boolean; + /** The name of the extension that provided this skill, if any. */ + extensionName?: string; } export const FRONTMATTER_REGEX = diff --git a/packages/core/src/telemetry/memory-monitor.test.ts b/packages/core/src/telemetry/memory-monitor.test.ts index fce8119753..8ad0d45595 100644 --- a/packages/core/src/telemetry/memory-monitor.test.ts +++ b/packages/core/src/telemetry/memory-monitor.test.ts @@ -89,6 +89,7 @@ const mockHeapStatistics = { total_global_handles_size: 8192, used_global_handles_size: 4096, external_memory: 2097152, + total_allocated_bytes: 31457280, }; const mockHeapSpaceStatistics = [ diff --git a/packages/core/src/tools/grep.ts b/packages/core/src/tools/grep.ts index f0d7aaa4aa..ea202c57de 100644 --- a/packages/core/src/tools/grep.ts +++ b/packages/core/src/tools/grep.ts @@ -1,6 +1,6 @@ /** * @license - * Copyright 2025 Google LLC + * Copyright 2026 Google LLC * SPDX-License-Identifier: Apache-2.0 */ @@ -301,15 +301,41 @@ class GrepToolInvocation extends BaseToolInvocation< * @param {string} command The command name (e.g., 'git', 'grep'). * @returns {Promise} True if the command is available, false otherwise. */ - private isCommandAvailable(command: string): Promise { - return new Promise((resolve) => { - const checkCommand = process.platform === 'win32' ? 'where' : 'command'; - const checkArgs = - process.platform === 'win32' ? [command] : ['-v', command]; - try { - const child = spawn(checkCommand, checkArgs, { + private async isCommandAvailable(command: string): Promise { + const checkCommand = process.platform === 'win32' ? 'where' : 'command'; + const checkArgs = + process.platform === 'win32' ? [command] : ['-v', command]; + try { + const sandboxManager = this.config.sandboxManager; + + let finalCommand = checkCommand; + let finalArgs = checkArgs; + let finalEnv = process.env; + + if (sandboxManager) { + try { + const prepared = await sandboxManager.prepareCommand({ + command: checkCommand, + args: checkArgs, + cwd: process.cwd(), + env: process.env, + }); + finalCommand = prepared.program; + finalArgs = prepared.args; + finalEnv = prepared.env; + } catch (err) { + debugLogger.debug( + `[GrepTool] Sandbox preparation failed for '${command}':`, + err, + ); + } + } + + return await new Promise((resolve) => { + const child = spawn(finalCommand, finalArgs, { stdio: 'ignore', shell: true, + env: finalEnv, }); child.on('close', (code) => resolve(code === 0)); child.on('error', (err) => { @@ -319,10 +345,10 @@ class GrepToolInvocation extends BaseToolInvocation< ); resolve(false); }); - } catch { - resolve(false); - } - }); + }); + } catch { + return false; + } } /** @@ -381,6 +407,7 @@ class GrepToolInvocation extends BaseToolInvocation< cwd: absolutePath, signal: options.signal, allowedExitCodes: [0, 1], + sandboxManager: this.config.sandboxManager, }); const results: GrepMatch[] = []; @@ -452,6 +479,7 @@ class GrepToolInvocation extends BaseToolInvocation< cwd: absolutePath, signal: options.signal, allowedExitCodes: [0, 1], + sandboxManager: this.config.sandboxManager, }); for await (const line of generator) { diff --git a/packages/core/src/tools/mcp-client-manager.test.ts b/packages/core/src/tools/mcp-client-manager.test.ts index 61b99915c0..4d91898077 100644 --- a/packages/core/src/tools/mcp-client-manager.test.ts +++ b/packages/core/src/tools/mcp-client-manager.test.ts @@ -89,7 +89,7 @@ describe('McpClientManager', () => { it('should discover tools from all configured', async () => { mockConfig.getMcpServers.mockReturnValue({ - 'test-server': {}, + 'test-server': { command: 'node' }, }); const manager = setupManager(new McpClientManager('0.0.1', mockConfig)); await manager.startConfiguredMcpServers(); @@ -100,9 +100,9 @@ describe('McpClientManager', () => { it('should batch context refresh when starting multiple servers', async () => { mockConfig.getMcpServers.mockReturnValue({ - 'server-1': {}, - 'server-2': {}, - 'server-3': {}, + 'server-1': { command: 'node' }, + 'server-2': { command: 'node' }, + 'server-3': { command: 'node' }, }); const manager = setupManager(new McpClientManager('0.0.1', mockConfig)); await manager.startConfiguredMcpServers(); @@ -117,7 +117,7 @@ describe('McpClientManager', () => { it('should update global discovery state', async () => { mockConfig.getMcpServers.mockReturnValue({ - 'test-server': {}, + 'test-server': { command: 'node' }, }); const manager = setupManager(new McpClientManager('0.0.1', mockConfig)); expect(manager.getDiscoveryState()).toBe(MCPDiscoveryState.NOT_STARTED); @@ -129,7 +129,7 @@ describe('McpClientManager', () => { it('should mark discovery completed when all configured servers are user-disabled', async () => { mockConfig.getMcpServers.mockReturnValue({ - 'test-server': {}, + 'test-server': { command: 'node' }, }); mockConfig.getMcpEnablementCallbacks.mockReturnValue({ isSessionDisabled: vi.fn().mockReturnValue(false), @@ -149,7 +149,7 @@ describe('McpClientManager', () => { it('should mark discovery completed when all configured servers are blocked', async () => { mockConfig.getMcpServers.mockReturnValue({ - 'test-server': {}, + 'test-server': { command: 'node' }, }); mockConfig.getBlockedMcpServers.mockReturnValue(['test-server']); @@ -166,7 +166,7 @@ describe('McpClientManager', () => { it('should not discover tools if folder is not trusted', async () => { mockConfig.getMcpServers.mockReturnValue({ - 'test-server': {}, + 'test-server': { command: 'node' }, }); mockConfig.isTrustedFolder.mockReturnValue(false); const manager = setupManager(new McpClientManager('0.0.1', mockConfig)); @@ -177,7 +177,7 @@ describe('McpClientManager', () => { it('should not start blocked servers', async () => { mockConfig.getMcpServers.mockReturnValue({ - 'test-server': {}, + 'test-server': { command: 'node' }, }); mockConfig.getBlockedMcpServers.mockReturnValue(['test-server']); const manager = setupManager(new McpClientManager('0.0.1', mockConfig)); @@ -188,8 +188,8 @@ describe('McpClientManager', () => { it('should only start allowed servers if allow list is not empty', async () => { mockConfig.getMcpServers.mockReturnValue({ - 'test-server': {}, - 'another-server': {}, + 'test-server': { command: 'node' }, + 'another-server': { command: 'node' }, }); mockConfig.getAllowedMcpServers.mockReturnValue(['another-server']); const manager = setupManager(new McpClientManager('0.0.1', mockConfig)); @@ -203,7 +203,7 @@ describe('McpClientManager', () => { await manager.startExtension({ name: 'test-extension', mcpServers: { - 'test-server': {}, + 'test-server': { command: 'node' }, }, isActive: true, version: '1.0.0', @@ -220,7 +220,7 @@ describe('McpClientManager', () => { await manager.startExtension({ name: 'test-extension', mcpServers: { - 'test-server': {}, + 'test-server': { command: 'node' }, }, isActive: false, version: '1.0.0', @@ -234,7 +234,7 @@ describe('McpClientManager', () => { it('should add blocked servers to the blockedMcpServers list', async () => { mockConfig.getMcpServers.mockReturnValue({ - 'test-server': {}, + 'test-server': { command: 'node' }, }); mockConfig.getBlockedMcpServers.mockReturnValue(['test-server']); const manager = setupManager(new McpClientManager('0.0.1', mockConfig)); @@ -244,12 +244,26 @@ describe('McpClientManager', () => { ]); }); + it('should skip discovery for servers without connection details', async () => { + mockConfig.getMcpServers.mockReturnValue({ + 'test-server': { excludeTools: ['dangerous_tool'] }, + }); + const manager = setupManager(new McpClientManager('0.0.1', mockConfig)); + await manager.startConfiguredMcpServers(); + expect(mockedMcpClient.connect).not.toHaveBeenCalled(); + expect(mockedMcpClient.discoverInto).not.toHaveBeenCalled(); + + // But it should still be tracked in allServerConfigs + expect(manager.getMcpServers()).toHaveProperty('test-server'); + }); + describe('restart', () => { it('should restart all running servers', async () => { + const serverConfig = { command: 'node' }; mockConfig.getMcpServers.mockReturnValue({ - 'test-server': {}, + 'test-server': serverConfig, }); - mockedMcpClient.getServerConfig.mockReturnValue({}); + mockedMcpClient.getServerConfig.mockReturnValue(serverConfig); const manager = setupManager(new McpClientManager('0.0.1', mockConfig)); await manager.startConfiguredMcpServers(); @@ -265,10 +279,11 @@ describe('McpClientManager', () => { describe('restartServer', () => { it('should restart the specified server', async () => { + const serverConfig = { command: 'node' }; mockConfig.getMcpServers.mockReturnValue({ - 'test-server': {}, + 'test-server': serverConfig, }); - mockedMcpClient.getServerConfig.mockReturnValue({}); + mockedMcpClient.getServerConfig.mockReturnValue(serverConfig); const manager = setupManager(new McpClientManager('0.0.1', mockConfig)); await manager.startConfiguredMcpServers(); @@ -320,7 +335,7 @@ describe('McpClientManager', () => { // A NEW McpClient should have been constructed with the updated config expect(constructorCalls).toHaveLength(2); - expect(constructorCalls[1][1]).toBe(updatedConfig); + expect(constructorCalls[1][1]).toMatchObject(updatedConfig); }); }); @@ -346,8 +361,8 @@ describe('McpClientManager', () => { const manager = new McpClientManager('0.0.1', mockConfig); mockConfig.getMcpServers.mockReturnValue({ - 'server-with-instructions': {}, - 'server-without-instructions': {}, + 'server-with-instructions': { command: 'node' }, + 'server-without-instructions': { command: 'node' }, }); await manager.startConfiguredMcpServers(); @@ -375,7 +390,7 @@ describe('McpClientManager', () => { }); mockConfig.getMcpServers.mockReturnValue({ - 'test-server': {}, + 'test-server': { command: 'node' }, }); const manager = new McpClientManager('0.0.1', mockConfig); @@ -391,10 +406,10 @@ describe('McpClientManager', () => { throw new Error('Disconnect failed unexpectedly'); } }); - mockedMcpClient.getServerConfig.mockReturnValue({}); + mockedMcpClient.getServerConfig.mockReturnValue({ command: 'node' }); mockConfig.getMcpServers.mockReturnValue({ - 'test-server': {}, + 'test-server': { command: 'node' }, }); const manager = new McpClientManager('0.0.1', mockConfig); @@ -428,7 +443,7 @@ describe('McpClientManager', () => { expect(manager.getMcpServers()).not.toHaveProperty('test-server'); }); - it('should ignore an extension attempting to register a server with an existing name', async () => { + it('should merge extension configuration with an existing user-configured server', async () => { const manager = setupManager(new McpClientManager('0.0.1', mockConfig)); const userConfig = { command: 'node', args: ['user-server.js'] }; @@ -454,8 +469,187 @@ describe('McpClientManager', () => { await manager.startExtension(extension); - expect(mockedMcpClient.disconnect).not.toHaveBeenCalled(); - expect(mockedMcpClient.connect).toHaveBeenCalledTimes(1); + // It should disconnect the user-only version and reconnect with the merged version + expect(mockedMcpClient.disconnect).toHaveBeenCalledTimes(1); + expect(mockedMcpClient.connect).toHaveBeenCalledTimes(2); + + // Verify user settings (command/args) still win in the merged config + const lastCall = vi.mocked(McpClient).mock.calls[1]; + expect(lastCall[1].command).toBe('node'); + expect(lastCall[1].args).toEqual(['user-server.js']); + expect(lastCall[1].extension).toEqual(extension); + }); + + it('should securely merge tool lists and env variables regardless of load order', async () => { + const manager = setupManager(new McpClientManager('0.0.1', mockConfig)); + + const userConfig = { + excludeTools: ['user-tool'], + includeTools: ['shared-inc', 'user-only-inc'], + env: { USER_VAR: 'user-val', OVERRIDE_VAR: 'user-override' }, + }; + + const extension: GeminiCLIExtension = { + name: 'test-extension', + mcpServers: { + 'test-server': { + command: 'node', + args: ['ext.js'], + excludeTools: ['ext-tool'], + includeTools: ['shared-inc', 'ext-only-inc'], + env: { EXT_VAR: 'ext-val', OVERRIDE_VAR: 'ext-override' }, + }, + }, + isActive: true, + version: '1.0.0', + path: '/some-path', + contextFiles: [], + id: '123', + }; + + // Case 1: Extension loads first, then User config (e.g. from startConfiguredMcpServers) + await manager.startExtension(extension); + + mockedMcpClient.getServerConfig.mockReturnValue({ + ...extension.mcpServers!['test-server'], + extension, + }); + + await manager.maybeDiscoverMcpServer('test-server', userConfig); + + let lastCall = vi.mocked(McpClient).mock.calls[1]; // Second call due to re-discovery + let mergedConfig = lastCall[1]; + + // Exclude list should be unioned (most restrictive) + expect(mergedConfig.excludeTools).toContain('ext-tool'); + expect(mergedConfig.excludeTools).toContain('user-tool'); + + // Include list should be intersected (most restrictive) + expect(mergedConfig.includeTools).toContain('shared-inc'); + expect(mergedConfig.includeTools).not.toContain('user-only-inc'); + expect(mergedConfig.includeTools).not.toContain('ext-only-inc'); + + expect(mergedConfig.env!['EXT_VAR']).toBe('ext-val'); + expect(mergedConfig.env!['USER_VAR']).toBe('user-val'); + expect(mergedConfig.env!['OVERRIDE_VAR']).toBe('user-override'); + expect(mergedConfig.extension).toBe(extension); // Extension ID preserved! + + // Reset for Case 2 + vi.mocked(McpClient).mockClear(); + const manager2 = setupManager(new McpClientManager('0.0.1', mockConfig)); + + // Case 2: User config loads first, then Extension loads + // This call will skip discovery because userConfig has no connection details + await manager2.maybeDiscoverMcpServer('test-server', userConfig); + + // In Case 2, the existing client is NOT created yet because discovery was skipped. + // So getServerConfig on mockedMcpClient won't be called yet. + // However, startExtension will call maybeDiscoverMcpServer which will merge. + + await manager2.startExtension(extension); + + lastCall = vi.mocked(McpClient).mock.calls[0]; + mergedConfig = lastCall[1]; + + expect(mergedConfig.excludeTools).toContain('ext-tool'); + expect(mergedConfig.excludeTools).toContain('user-tool'); + expect(mergedConfig.includeTools).toContain('shared-inc'); + expect(mergedConfig.includeTools).not.toContain('user-only-inc'); + expect(mergedConfig.includeTools).not.toContain('ext-only-inc'); + + expect(mergedConfig.env!['EXT_VAR']).toBe('ext-val'); + expect(mergedConfig.env!['USER_VAR']).toBe('user-val'); + expect(mergedConfig.env!['OVERRIDE_VAR']).toBe('user-override'); + expect(mergedConfig.extension).toBe(extension); // Extension ID preserved! + }); + + it('should result in empty includeTools if intersection is empty', async () => { + const manager = setupManager(new McpClientManager('0.0.1', mockConfig)); + const userConfig = { includeTools: ['user-tool'] }; + const extConfig = { + command: 'node', + args: ['ext.js'], + includeTools: ['ext-tool'], + }; + + await manager.maybeDiscoverMcpServer('test-server', userConfig); + await manager.maybeDiscoverMcpServer('test-server', extConfig); + + const lastCall = vi.mocked(McpClient).mock.calls[0]; + expect(lastCall[1].includeTools).toEqual([]); // Empty array = no tools allowed + }); + + it('should respect a single allowlist if only one is provided', async () => { + const manager = setupManager(new McpClientManager('0.0.1', mockConfig)); + const userConfig = { includeTools: ['user-tool'] }; + const extConfig = { command: 'node', args: ['ext.js'] }; + + await manager.maybeDiscoverMcpServer('test-server', userConfig); + await manager.maybeDiscoverMcpServer('test-server', extConfig); + + const lastCall = vi.mocked(McpClient).mock.calls[0]; + expect(lastCall[1].includeTools).toEqual(['user-tool']); + }); + + it('should allow partial overrides of connection properties', async () => { + const manager = setupManager(new McpClientManager('0.0.1', mockConfig)); + const extConfig = { command: 'node', args: ['ext.js'], timeout: 1000 }; + const userOverride = { args: ['overridden.js'] }; + + // Load extension first + await manager.maybeDiscoverMcpServer('test-server', extConfig); + mockedMcpClient.getServerConfig.mockReturnValue(extConfig); + + // Apply partial user override + await manager.maybeDiscoverMcpServer('test-server', userOverride); + + const lastCall = vi.mocked(McpClient).mock.calls[1]; + const finalConfig = lastCall[1]; + + expect(finalConfig.command).toBe('node'); // Preserved from base + expect(finalConfig.args).toEqual(['overridden.js']); // Overridden + expect(finalConfig.timeout).toBe(1000); // Preserved from base + }); + + it('should prevent one extension from hijacking another extension server name', async () => { + const manager = setupManager(new McpClientManager('0.0.1', mockConfig)); + + const extension1: GeminiCLIExtension = { + name: 'extension-1', + isActive: true, + id: 'ext-1', + version: '1.0.0', + path: '/path1', + contextFiles: [], + mcpServers: { + 'shared-name': { command: 'node', args: ['server1.js'] }, + }, + }; + + const extension2: GeminiCLIExtension = { + name: 'extension-2', + isActive: true, + id: 'ext-2', + version: '1.0.0', + path: '/path2', + contextFiles: [], + mcpServers: { + 'shared-name': { command: 'node', args: ['server2.js'] }, + }, + }; + + // Start extension 1 (discovery begins but is not yet complete) + const p1 = manager.startExtension(extension1); + + // Immediately attempt to start extension 2 with the same name + await manager.startExtension(extension2); + + await p1; + + // Only extension 1 should have been initialized + expect(vi.mocked(McpClient)).toHaveBeenCalledTimes(1); + const lastCall = vi.mocked(McpClient).mock.calls[0]; + expect(lastCall[1].extension).toBe(extension1); }); it('should remove servers from blockedMcpServers when stopExtension is called', async () => { diff --git a/packages/core/src/tools/mcp-client-manager.ts b/packages/core/src/tools/mcp-client-manager.ts index 7f2752561e..c20dc9b408 100644 --- a/packages/core/src/tools/mcp-client-manager.ts +++ b/packages/core/src/tools/mcp-client-manager.ts @@ -284,6 +284,51 @@ export class McpClientManager { return createHash('sha256').update(stableStringify(keyData)).digest('hex'); } + /** + * Merges two MCP configurations. The second configuration (override) + * takes precedence for scalar properties, but array properties are + * merged securely (exclude = union, include = intersection) and + * environment objects are merged. + */ + private mergeMcpConfigs( + base: MCPServerConfig, + override: MCPServerConfig, + ): MCPServerConfig { + // For allowlists (includeTools), use intersection to ensure the most + // restrictive policy wins. A tool must be allowed by BOTH parties. + let includeTools: string[] | undefined; + if (base.includeTools && override.includeTools) { + includeTools = base.includeTools.filter((t) => + override.includeTools!.includes(t), + ); + // If the intersection is empty, we must keep an empty array to indicate + // that NO tools are allowed (undefined would allow everything). + } else { + // If only one provides an allowlist, use that. + includeTools = override.includeTools ?? base.includeTools; + } + + // For blocklists (excludeTools), use union so if ANY party blocks it, + // it stays blocked. + const excludeTools = [ + ...new Set([ + ...(base.excludeTools ?? []), + ...(override.excludeTools ?? []), + ]), + ]; + + const env = { ...(base.env ?? {}), ...(override.env ?? {}) }; + + return { + ...base, + ...override, + includeTools, + excludeTools: excludeTools.length > 0 ? excludeTools : undefined, + env: Object.keys(env).length > 0 ? env : undefined, + extension: override.extension ?? base.extension, + }; + } + async maybeDiscoverMcpServer( name: string, config: MCPServerConfig, @@ -293,8 +338,38 @@ export class McpClientManager { resourceRegistry: ResourceRegistry; }, ): Promise { - const clientKey = this.getClientKey(name, config); - const existing = this.clients.get(clientKey); + const existingConfig = this.allServerConfigs.get(name); + if ( + existingConfig?.extension?.id && + config.extension?.id && + existingConfig.extension.id !== config.extension.id + ) { + const extensionText = config.extension + ? ` from extension "${config.extension.name}"` + : ''; + debugLogger.warn( + `Skipping MCP config for server with name "${name}"${extensionText} as it already exists.`, + ); + return; + } + + let finalConfig = config; + if (existingConfig) { + // If we're merging an extension config into a user config, + // the user config should be the override. + if (config.extension && !existingConfig.extension) { + finalConfig = this.mergeMcpConfigs(config, existingConfig); + } else { + // Otherwise (User over Extension, or User over User), + // the incoming config is the override. + finalConfig = this.mergeMcpConfigs(existingConfig, config); + } + } + + // Always track server config for UI display + this.allServerConfigs.set(name, finalConfig); + + const clientKey = this.getClientKey(name, finalConfig); // If no registries are provided (main agent) and a server with this name already exists // but with a different configuration, handle potential conflicts. @@ -303,43 +378,32 @@ export class McpClientManager { (c) => c.getServerName() === name, ); if (existingSameName) { - const existingConfig = existingSameName.getServerConfig(); - const existingKey = this.getClientKey(name, existingConfig); + const existingConfigFromClient = existingSameName.getServerConfig(); + const existingKey = this.getClientKey(name, existingConfigFromClient); if (existingKey !== clientKey) { - const bothMain = !config.extension && !existingConfig.extension; - const sameExtension = - config.extension && - existingConfig.extension && - config.extension.id === existingConfig.extension.id; - - if (bothMain || sameExtension) { - // This is a configuration update from the same source (hot-reload). - // We should stop the old client before starting the new one. - await this.disconnectClient(existingKey, true); - } else { - // This is a conflict (e.g. an extension trying to overwrite a main server). - const extensionText = config.extension - ? ` from extension "${config.extension.name}"` - : ''; - debugLogger.warn( - `Skipping MCP config for server with name "${name}"${extensionText} as a server with that name already exists from a different source.`, - ); - return; - } + // This is a configuration update (hot-reload). + // We should stop the old client before starting the new one. + await this.disconnectClient(existingKey, true); } } } - // Always track server config for UI display - this.allServerConfigs.set(name, config); + const existing = this.clients.get(clientKey); + + // If no connection details are provided, we can't discover this server. + // This often happens when a user provides only overrides (like excludeTools) + // for a server that is actually provided by an extension. + if (!finalConfig.command && !finalConfig.url && !finalConfig.httpUrl) { + return; + } // Check if blocked by admin settings (allowlist/excludelist) if (this.isBlockedBySettings(name)) { if (!this.blockedMcpServers.find((s) => s.name === name)) { this.blockedMcpServers?.push({ name, - extensionName: config.extension?.name ?? '', + extensionName: finalConfig.extension?.name ?? '', }); } return; @@ -354,7 +418,7 @@ export class McpClientManager { if (!this.cliConfig.isTrustedFolder()) { return; } - if (config.extension && !config.extension.isActive) { + if (finalConfig.extension && !finalConfig.extension.isActive) { return; } @@ -365,7 +429,7 @@ export class McpClientManager { if (!client) { client = new McpClient( name, - config, + finalConfig, this.cliConfig.getWorkspaceContext(), this.cliConfig, this.cliConfig.getDebugMode(), diff --git a/packages/core/src/tools/ripGrep.ts b/packages/core/src/tools/ripGrep.ts index 18a1b0c133..69f269143b 100644 --- a/packages/core/src/tools/ripGrep.ts +++ b/packages/core/src/tools/ripGrep.ts @@ -476,6 +476,7 @@ class GrepToolInvocation extends BaseToolInvocation< const generator = execStreaming(rgPath, rgArgs, { signal: options.signal, allowedExitCodes: [0, 1], + sandboxManager: this.config.sandboxManager, }); let matchesFound = 0; diff --git a/packages/core/src/tools/shell.test.ts b/packages/core/src/tools/shell.test.ts index 5e17f29690..ace59cd7cf 100644 --- a/packages/core/src/tools/shell.test.ts +++ b/packages/core/src/tools/shell.test.ts @@ -45,6 +45,7 @@ import { initializeShellParsers } from '../utils/shell-utils.js'; import { ShellTool, OUTPUT_UPDATE_INTERVAL_MS } from './shell.js'; import { debugLogger } from '../index.js'; import { type Config } from '../config/config.js'; +import { NoopSandboxManager } from '../services/sandboxManager.js'; import { type ShellExecutionResult, type ShellOutputEvent, @@ -137,6 +138,7 @@ describe('ShellTool', () => { getEnableInteractiveShell: vi.fn().mockReturnValue(false), getEnableShellOutputEfficiency: vi.fn().mockReturnValue(true), sanitizationConfig: {}, + sandboxManager: new NoopSandboxManager(), } as unknown as Config; const bus = createMockMessageBus(); @@ -281,7 +283,11 @@ describe('ShellTool', () => { expect.any(Function), expect.any(AbortSignal), false, - { pager: 'cat', sanitizationConfig: {} }, + expect.objectContaining({ + pager: 'cat', + sanitizationConfig: {}, + sandboxManager: expect.any(Object), + }), ); expect(result.llmContent).toContain('Background PIDs: 54322'); // The file should be deleted by the tool @@ -306,7 +312,11 @@ describe('ShellTool', () => { expect.any(Function), expect.any(AbortSignal), false, - { pager: 'cat', sanitizationConfig: {} }, + expect.objectContaining({ + pager: 'cat', + sanitizationConfig: {}, + sandboxManager: expect.any(Object), + }), ); }); @@ -327,7 +337,11 @@ describe('ShellTool', () => { expect.any(Function), expect.any(AbortSignal), false, - { pager: 'cat', sanitizationConfig: {} }, + expect.objectContaining({ + pager: 'cat', + sanitizationConfig: {}, + sandboxManager: expect.any(Object), + }), ); }); @@ -373,7 +387,11 @@ describe('ShellTool', () => { expect.any(Function), expect.any(AbortSignal), false, - { pager: 'cat', sanitizationConfig: {} }, + { + pager: 'cat', + sanitizationConfig: {}, + sandboxManager: new NoopSandboxManager(), + }, ); }, 20000, diff --git a/packages/core/src/tools/shell.ts b/packages/core/src/tools/shell.ts index d5af530d33..069bcd5981 100644 --- a/packages/core/src/tools/shell.ts +++ b/packages/core/src/tools/shell.ts @@ -278,6 +278,7 @@ export class ShellToolInvocation extends BaseToolInvocation< sanitizationConfig: shellExecutionConfig?.sanitizationConfig ?? this.context.config.sanitizationConfig, + sandboxManager: this.context.config.sandboxManager, }, ); diff --git a/packages/core/src/tools/tool-registry.ts b/packages/core/src/tools/tool-registry.ts index 157d90efa8..f8d01e5729 100644 --- a/packages/core/src/tools/tool-registry.ts +++ b/packages/core/src/tools/tool-registry.ts @@ -57,7 +57,28 @@ class DiscoveredToolInvocation extends BaseToolInvocation< _updateOutput?: (output: string) => void, ): Promise { const callCommand = this.config.getToolCallCommand()!; - const child = spawn(callCommand, [this.originalToolName]); + const args = [this.originalToolName]; + + let finalCommand = callCommand; + let finalArgs = args; + let finalEnv = process.env; + + const sandboxManager = this.config.sandboxManager; + if (sandboxManager) { + const prepared = await sandboxManager.prepareCommand({ + command: callCommand, + args, + cwd: process.cwd(), + env: process.env, + }); + finalCommand = prepared.program; + finalArgs = prepared.args; + finalEnv = prepared.env; + } + + const child = spawn(finalCommand, finalArgs, { + env: finalEnv, + }); child.stdin.write(JSON.stringify(this.params)); child.stdin.end(); @@ -328,8 +349,36 @@ export class ToolRegistry { 'Tool discovery command is empty or contains only whitespace.', ); } - // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion - const proc = spawn(cmdParts[0] as string, cmdParts.slice(1) as string[]); + + const firstPart = cmdParts[0]; + if (typeof firstPart !== 'string') { + throw new Error( + 'Tool discovery command must start with a program name.', + ); + } + + let finalCommand: string = firstPart; + let finalArgs: string[] = cmdParts + .slice(1) + .filter((p): p is string => typeof p === 'string'); + let finalEnv = process.env; + + const sandboxManager = this.config.sandboxManager; + if (sandboxManager) { + const prepared = await sandboxManager.prepareCommand({ + command: finalCommand, + args: finalArgs, + cwd: process.cwd(), + env: process.env, + }); + finalCommand = prepared.program; + finalArgs = prepared.args; + finalEnv = prepared.env; + } + + const proc = spawn(finalCommand, finalArgs, { + env: finalEnv, + }); let stdout = ''; const stdoutDecoder = new StringDecoder('utf8'); let stderr = ''; diff --git a/packages/core/src/utils/environmentContext.test.ts b/packages/core/src/utils/environmentContext.test.ts index a43bb5fd56..42b2316955 100644 --- a/packages/core/src/utils/environmentContext.test.ts +++ b/packages/core/src/utils/environmentContext.test.ts @@ -165,6 +165,29 @@ describe('getEnvironmentContext', () => { expect(getFolderStructure).not.toHaveBeenCalled(); }); + it('should exclude environment memory when JIT context is enabled', async () => { + (mockConfig as Record)['isJitContextEnabled'] = vi + .fn() + .mockReturnValue(true); + + const parts = await getEnvironmentContext(mockConfig as Config); + + const context = parts[0].text; + expect(context).not.toContain('Mock Environment Memory'); + expect(mockConfig.getEnvironmentMemory).not.toHaveBeenCalled(); + }); + + it('should include environment memory when JIT context is disabled', async () => { + (mockConfig as Record)['isJitContextEnabled'] = vi + .fn() + .mockReturnValue(false); + + const parts = await getEnvironmentContext(mockConfig as Config); + + const context = parts[0].text; + expect(context).toContain('Mock Environment Memory'); + }); + it('should handle read_many_files returning no content', async () => { const mockReadManyFilesTool = { build: vi.fn().mockReturnValue({ diff --git a/packages/core/src/utils/environmentContext.ts b/packages/core/src/utils/environmentContext.ts index 88dd1aab68..d5bdd2d75b 100644 --- a/packages/core/src/utils/environmentContext.ts +++ b/packages/core/src/utils/environmentContext.ts @@ -57,7 +57,12 @@ export async function getEnvironmentContext(config: Config): Promise { ? await getDirectoryContextString(config) : ''; const tempDir = config.storage.getProjectTempDir(); - const environmentMemory = config.getEnvironmentMemory(); + // When JIT context is enabled, project memory is already included in the + // system instruction via renderUserMemory(). Skip it here to avoid sending + // the same GEMINI.md content twice. + const environmentMemory = config.isJitContextEnabled?.() + ? '' + : config.getEnvironmentMemory(); const context = ` diff --git a/packages/core/src/utils/generateContentResponseUtilities.ts b/packages/core/src/utils/generateContentResponseUtilities.ts index fdd5dff81a..3b27dd372f 100644 --- a/packages/core/src/utils/generateContentResponseUtilities.ts +++ b/packages/core/src/utils/generateContentResponseUtilities.ts @@ -13,6 +13,7 @@ import type { import { getResponseText } from './partUtils.js'; import { supportsMultimodalFunctionResponse } from '../config/models.js'; import { debugLogger } from './debugLogger.js'; +import type { Config } from '../config/config.js'; /** * Formats tool output for a Gemini FunctionResponse. @@ -48,6 +49,7 @@ export function convertToFunctionResponse( callId: string, llmContent: PartListUnion, model: string, + config?: Config, ): Part[] { if (typeof llmContent === 'string') { return [createFunctionResponsePart(callId, toolName, llmContent)]; @@ -96,7 +98,10 @@ export function convertToFunctionResponse( }, }; - const isMultimodalFRSupported = supportsMultimodalFunctionResponse(model); + const isMultimodalFRSupported = supportsMultimodalFunctionResponse( + model, + config, + ); const siblingParts: Part[] = [...fileDataParts]; if (inlineDataParts.length > 0) { diff --git a/packages/core/src/utils/shell-utils.ts b/packages/core/src/utils/shell-utils.ts index 00b3533400..89f50a9ce7 100644 --- a/packages/core/src/utils/shell-utils.ts +++ b/packages/core/src/utils/shell-utils.ts @@ -1,6 +1,6 @@ /** * @license - * Copyright 2025 Google LLC + * Copyright 2026 Google LLC * SPDX-License-Identifier: Apache-2.0 */ @@ -17,6 +17,8 @@ import * as readline from 'node:readline'; import { Language, Parser, Query, type Node, type Tree } from 'web-tree-sitter'; import { loadWasmBinary } from './fileUtils.js'; import { debugLogger } from './debugLogger.js'; +import type { SandboxManager } from '../services/sandboxManager.js'; +import { NoopSandboxManager } from '../services/sandboxManager.js'; export const SHELL_TOOL_NAMES = ['run_shell_command', 'ShellTool']; @@ -737,13 +739,26 @@ export function stripShellWrapper(command: string): string { * @param config The application configuration. * @returns An object with 'allowed' boolean and optional 'reason' string if not allowed. */ -export const spawnAsync = ( +export const spawnAsync = async ( command: string, args: string[], - options?: SpawnOptionsWithoutStdio, -): Promise<{ stdout: string; stderr: string }> => - new Promise((resolve, reject) => { - const child = spawn(command, args, options); + options?: SpawnOptionsWithoutStdio & { sandboxManager?: SandboxManager }, +): Promise<{ stdout: string; stderr: string }> => { + const sandboxManager = options?.sandboxManager ?? new NoopSandboxManager(); + const prepared = await sandboxManager.prepareCommand({ + command, + args, + cwd: options?.cwd?.toString() ?? process.cwd(), + env: options?.env ?? process.env, + }); + + const { program: finalCommand, args: finalArgs, env: finalEnv } = prepared; + + return new Promise((resolve, reject) => { + const child = spawn(finalCommand, finalArgs, { + ...options, + env: finalEnv, + }); let stdout = ''; let stderr = ''; @@ -767,6 +782,7 @@ export const spawnAsync = ( reject(err); }); }); +}; /** * Executes a command and yields lines of output as they appear. @@ -782,10 +798,22 @@ export async function* execStreaming( options?: SpawnOptionsWithoutStdio & { signal?: AbortSignal; allowedExitCodes?: number[]; + sandboxManager?: SandboxManager; }, ): AsyncGenerator { - const child = spawn(command, args, { + const sandboxManager = options?.sandboxManager ?? new NoopSandboxManager(); + const prepared = await sandboxManager.prepareCommand({ + command, + args, + cwd: options?.cwd?.toString() ?? process.cwd(), + env: options?.env ?? process.env, + }); + + const { program: finalCommand, args: finalArgs, env: finalEnv } = prepared; + + const child = spawn(finalCommand, finalArgs, { ...options, + env: finalEnv, // ensure we don't open a window on windows if possible/relevant windowsHide: true, }); diff --git a/packages/devtools/package.json b/packages/devtools/package.json index 6a6da979b4..7876c78ab0 100644 --- a/packages/devtools/package.json +++ b/packages/devtools/package.json @@ -1,6 +1,6 @@ { "name": "@google/gemini-cli-devtools", - "version": "0.35.0-nightly.20260311.657f19c1f", + "version": "0.35.0-nightly.20260313.bb060d7a9", "license": "Apache-2.0", "type": "module", "main": "dist/src/index.js", diff --git a/packages/sdk/package.json b/packages/sdk/package.json index 110e7a7457..c39fb0c0fc 100644 --- a/packages/sdk/package.json +++ b/packages/sdk/package.json @@ -1,6 +1,6 @@ { "name": "@google/gemini-cli-sdk", - "version": "0.35.0-nightly.20260311.657f19c1f", + "version": "0.35.0-nightly.20260313.bb060d7a9", "description": "Gemini CLI SDK", "license": "Apache-2.0", "repository": { diff --git a/packages/test-utils/package.json b/packages/test-utils/package.json index 454d050581..7b27f429da 100644 --- a/packages/test-utils/package.json +++ b/packages/test-utils/package.json @@ -1,6 +1,6 @@ { "name": "@google/gemini-cli-test-utils", - "version": "0.35.0-nightly.20260311.657f19c1f", + "version": "0.35.0-nightly.20260313.bb060d7a9", "private": true, "main": "src/index.ts", "license": "Apache-2.0", diff --git a/packages/vscode-ide-companion/package.json b/packages/vscode-ide-companion/package.json index ea095429c6..7ab36e57d4 100644 --- a/packages/vscode-ide-companion/package.json +++ b/packages/vscode-ide-companion/package.json @@ -2,7 +2,7 @@ "name": "gemini-cli-vscode-ide-companion", "displayName": "Gemini CLI Companion", "description": "Enable Gemini CLI with direct access to your IDE workspace.", - "version": "0.35.0-nightly.20260311.657f19c1f", + "version": "0.35.0-nightly.20260313.bb060d7a9", "publisher": "google", "icon": "assets/icon.png", "repository": { diff --git a/schemas/settings.schema.json b/schemas/settings.schema.json index 4e3631fc2c..f482053d9f 100644 --- a/schemas/settings.schema.json +++ b/schemas/settings.schema.json @@ -629,7 +629,7 @@ "modelConfigs": { "title": "Model Configs", "description": "Model configurations.", - "markdownDescription": "Model configurations.\n\n- Category: `Model`\n- Requires restart: `no`\n- Default: `{\n \"aliases\": {\n \"base\": {\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"temperature\": 0,\n \"topP\": 1\n }\n }\n },\n \"chat-base\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"thinkingConfig\": {\n \"includeThoughts\": true\n },\n \"temperature\": 1,\n \"topP\": 0.95,\n \"topK\": 64\n }\n }\n },\n \"chat-base-2.5\": {\n \"extends\": \"chat-base\",\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"thinkingConfig\": {\n \"thinkingBudget\": 8192\n }\n }\n }\n },\n \"chat-base-3\": {\n \"extends\": \"chat-base\",\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"thinkingConfig\": {\n \"thinkingLevel\": \"HIGH\"\n }\n }\n }\n },\n \"gemini-3-pro-preview\": {\n \"extends\": \"chat-base-3\",\n \"modelConfig\": {\n \"model\": \"gemini-3-pro-preview\"\n }\n },\n \"gemini-3-flash-preview\": {\n \"extends\": \"chat-base-3\",\n \"modelConfig\": {\n \"model\": \"gemini-3-flash-preview\"\n }\n },\n \"gemini-2.5-pro\": {\n \"extends\": \"chat-base-2.5\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-pro\"\n }\n },\n \"gemini-2.5-flash\": {\n \"extends\": \"chat-base-2.5\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash\"\n }\n },\n \"gemini-2.5-flash-lite\": {\n \"extends\": \"chat-base-2.5\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\"\n }\n },\n \"gemini-2.5-flash-base\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash\"\n }\n },\n \"gemini-3-flash-base\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-3-flash-preview\"\n }\n },\n \"classifier\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"maxOutputTokens\": 1024,\n \"thinkingConfig\": {\n \"thinkingBudget\": 512\n }\n }\n }\n },\n \"prompt-completion\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"temperature\": 0.3,\n \"maxOutputTokens\": 16000,\n \"thinkingConfig\": {\n \"thinkingBudget\": 0\n }\n }\n }\n },\n \"fast-ack-helper\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"temperature\": 0.2,\n \"maxOutputTokens\": 120,\n \"thinkingConfig\": {\n \"thinkingBudget\": 0\n }\n }\n }\n },\n \"edit-corrector\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"thinkingConfig\": {\n \"thinkingBudget\": 0\n }\n }\n }\n },\n \"summarizer-default\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"maxOutputTokens\": 2000\n }\n }\n },\n \"summarizer-shell\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"maxOutputTokens\": 2000\n }\n }\n },\n \"web-search\": {\n \"extends\": \"gemini-3-flash-base\",\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"tools\": [\n {\n \"googleSearch\": {}\n }\n ]\n }\n }\n },\n \"web-fetch\": {\n \"extends\": \"gemini-3-flash-base\",\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"tools\": [\n {\n \"urlContext\": {}\n }\n ]\n }\n }\n },\n \"web-fetch-fallback\": {\n \"extends\": \"gemini-3-flash-base\",\n \"modelConfig\": {}\n },\n \"loop-detection\": {\n \"extends\": \"gemini-3-flash-base\",\n \"modelConfig\": {}\n },\n \"loop-detection-double-check\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-3-pro-preview\"\n }\n },\n \"llm-edit-fixer\": {\n \"extends\": \"gemini-3-flash-base\",\n \"modelConfig\": {}\n },\n \"next-speaker-checker\": {\n \"extends\": \"gemini-3-flash-base\",\n \"modelConfig\": {}\n },\n \"chat-compression-3-pro\": {\n \"modelConfig\": {\n \"model\": \"gemini-3-pro-preview\"\n }\n },\n \"chat-compression-3-flash\": {\n \"modelConfig\": {\n \"model\": \"gemini-3-flash-preview\"\n }\n },\n \"chat-compression-2.5-pro\": {\n \"modelConfig\": {\n \"model\": \"gemini-2.5-pro\"\n }\n },\n \"chat-compression-2.5-flash\": {\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash\"\n }\n },\n \"chat-compression-2.5-flash-lite\": {\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\"\n }\n },\n \"chat-compression-default\": {\n \"modelConfig\": {\n \"model\": \"gemini-3-pro-preview\"\n }\n }\n },\n \"overrides\": [\n {\n \"match\": {\n \"model\": \"chat-base\",\n \"isRetry\": true\n },\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"temperature\": 1\n }\n }\n }\n ]\n}`", + "markdownDescription": "Model configurations.\n\n- Category: `Model`\n- Requires restart: `no`\n- Default: `{\n \"aliases\": {\n \"base\": {\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"temperature\": 0,\n \"topP\": 1\n }\n }\n },\n \"chat-base\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"thinkingConfig\": {\n \"includeThoughts\": true\n },\n \"temperature\": 1,\n \"topP\": 0.95,\n \"topK\": 64\n }\n }\n },\n \"chat-base-2.5\": {\n \"extends\": \"chat-base\",\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"thinkingConfig\": {\n \"thinkingBudget\": 8192\n }\n }\n }\n },\n \"chat-base-3\": {\n \"extends\": \"chat-base\",\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"thinkingConfig\": {\n \"thinkingLevel\": \"HIGH\"\n }\n }\n }\n },\n \"gemini-3-pro-preview\": {\n \"extends\": \"chat-base-3\",\n \"modelConfig\": {\n \"model\": \"gemini-3-pro-preview\"\n }\n },\n \"gemini-3-flash-preview\": {\n \"extends\": \"chat-base-3\",\n \"modelConfig\": {\n \"model\": \"gemini-3-flash-preview\"\n }\n },\n \"gemini-2.5-pro\": {\n \"extends\": \"chat-base-2.5\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-pro\"\n }\n },\n \"gemini-2.5-flash\": {\n \"extends\": \"chat-base-2.5\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash\"\n }\n },\n \"gemini-2.5-flash-lite\": {\n \"extends\": \"chat-base-2.5\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\"\n }\n },\n \"gemini-2.5-flash-base\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash\"\n }\n },\n \"gemini-3-flash-base\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-3-flash-preview\"\n }\n },\n \"classifier\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"maxOutputTokens\": 1024,\n \"thinkingConfig\": {\n \"thinkingBudget\": 512\n }\n }\n }\n },\n \"prompt-completion\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"temperature\": 0.3,\n \"maxOutputTokens\": 16000,\n \"thinkingConfig\": {\n \"thinkingBudget\": 0\n }\n }\n }\n },\n \"fast-ack-helper\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"temperature\": 0.2,\n \"maxOutputTokens\": 120,\n \"thinkingConfig\": {\n \"thinkingBudget\": 0\n }\n }\n }\n },\n \"edit-corrector\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"thinkingConfig\": {\n \"thinkingBudget\": 0\n }\n }\n }\n },\n \"summarizer-default\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"maxOutputTokens\": 2000\n }\n }\n },\n \"summarizer-shell\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"maxOutputTokens\": 2000\n }\n }\n },\n \"web-search\": {\n \"extends\": \"gemini-3-flash-base\",\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"tools\": [\n {\n \"googleSearch\": {}\n }\n ]\n }\n }\n },\n \"web-fetch\": {\n \"extends\": \"gemini-3-flash-base\",\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"tools\": [\n {\n \"urlContext\": {}\n }\n ]\n }\n }\n },\n \"web-fetch-fallback\": {\n \"extends\": \"gemini-3-flash-base\",\n \"modelConfig\": {}\n },\n \"loop-detection\": {\n \"extends\": \"gemini-3-flash-base\",\n \"modelConfig\": {}\n },\n \"loop-detection-double-check\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-3-pro-preview\"\n }\n },\n \"llm-edit-fixer\": {\n \"extends\": \"gemini-3-flash-base\",\n \"modelConfig\": {}\n },\n \"next-speaker-checker\": {\n \"extends\": \"gemini-3-flash-base\",\n \"modelConfig\": {}\n },\n \"chat-compression-3-pro\": {\n \"modelConfig\": {\n \"model\": \"gemini-3-pro-preview\"\n }\n },\n \"chat-compression-3-flash\": {\n \"modelConfig\": {\n \"model\": \"gemini-3-flash-preview\"\n }\n },\n \"chat-compression-2.5-pro\": {\n \"modelConfig\": {\n \"model\": \"gemini-2.5-pro\"\n }\n },\n \"chat-compression-2.5-flash\": {\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash\"\n }\n },\n \"chat-compression-2.5-flash-lite\": {\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\"\n }\n },\n \"chat-compression-default\": {\n \"modelConfig\": {\n \"model\": \"gemini-3-pro-preview\"\n }\n }\n },\n \"overrides\": [\n {\n \"match\": {\n \"model\": \"chat-base\",\n \"isRetry\": true\n },\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"temperature\": 1\n }\n }\n }\n ],\n \"modelDefinitions\": {\n \"gemini-3.1-pro-preview\": {\n \"tier\": \"pro\",\n \"family\": \"gemini-3\",\n \"isPreview\": true,\n \"dialogLocation\": \"manual\",\n \"features\": {\n \"thinking\": true,\n \"multimodalToolUse\": true\n }\n },\n \"gemini-3.1-pro-preview-customtools\": {\n \"tier\": \"pro\",\n \"family\": \"gemini-3\",\n \"isPreview\": true,\n \"features\": {\n \"thinking\": true,\n \"multimodalToolUse\": true\n }\n },\n \"gemini-3-pro-preview\": {\n \"tier\": \"pro\",\n \"family\": \"gemini-3\",\n \"isPreview\": true,\n \"dialogLocation\": \"manual\",\n \"features\": {\n \"thinking\": true,\n \"multimodalToolUse\": true\n }\n },\n \"gemini-3-flash-preview\": {\n \"tier\": \"flash\",\n \"family\": \"gemini-3\",\n \"isPreview\": true,\n \"dialogLocation\": \"manual\",\n \"features\": {\n \"thinking\": false,\n \"multimodalToolUse\": true\n }\n },\n \"gemini-2.5-pro\": {\n \"tier\": \"pro\",\n \"family\": \"gemini-2.5\",\n \"isPreview\": false,\n \"dialogLocation\": \"manual\",\n \"features\": {\n \"thinking\": false,\n \"multimodalToolUse\": false\n }\n },\n \"gemini-2.5-flash\": {\n \"tier\": \"flash\",\n \"family\": \"gemini-2.5\",\n \"isPreview\": false,\n \"dialogLocation\": \"manual\",\n \"features\": {\n \"thinking\": false,\n \"multimodalToolUse\": false\n }\n },\n \"gemini-2.5-flash-lite\": {\n \"tier\": \"flash-lite\",\n \"family\": \"gemini-2.5\",\n \"isPreview\": false,\n \"dialogLocation\": \"manual\",\n \"features\": {\n \"thinking\": false,\n \"multimodalToolUse\": false\n }\n },\n \"auto\": {\n \"tier\": \"auto\",\n \"isPreview\": true,\n \"features\": {\n \"thinking\": true,\n \"multimodalToolUse\": false\n }\n },\n \"pro\": {\n \"tier\": \"pro\",\n \"isPreview\": false,\n \"features\": {\n \"thinking\": true,\n \"multimodalToolUse\": false\n }\n },\n \"flash\": {\n \"tier\": \"flash\",\n \"isPreview\": false,\n \"features\": {\n \"thinking\": false,\n \"multimodalToolUse\": false\n }\n },\n \"flash-lite\": {\n \"tier\": \"flash-lite\",\n \"isPreview\": false,\n \"features\": {\n \"thinking\": false,\n \"multimodalToolUse\": false\n }\n },\n \"auto-gemini-3\": {\n \"displayName\": \"Auto (Gemini 3)\",\n \"tier\": \"auto\",\n \"isPreview\": true,\n \"dialogLocation\": \"main\",\n \"dialogDescription\": \"Let Gemini CLI decide the best model for the task: gemini-3.1-pro, gemini-3-flash\",\n \"features\": {\n \"thinking\": true,\n \"multimodalToolUse\": false\n }\n },\n \"auto-gemini-2.5\": {\n \"displayName\": \"Auto (Gemini 2.5)\",\n \"tier\": \"auto\",\n \"isPreview\": false,\n \"dialogLocation\": \"main\",\n \"dialogDescription\": \"Let Gemini CLI decide the best model for the task: gemini-2.5-pro, gemini-2.5-flash\",\n \"features\": {\n \"thinking\": false,\n \"multimodalToolUse\": false\n }\n }\n }\n}`", "default": { "aliases": { "base": { @@ -871,7 +871,132 @@ } } } - ] + ], + "modelDefinitions": { + "gemini-3.1-pro-preview": { + "tier": "pro", + "family": "gemini-3", + "isPreview": true, + "dialogLocation": "manual", + "features": { + "thinking": true, + "multimodalToolUse": true + } + }, + "gemini-3.1-pro-preview-customtools": { + "tier": "pro", + "family": "gemini-3", + "isPreview": true, + "features": { + "thinking": true, + "multimodalToolUse": true + } + }, + "gemini-3-pro-preview": { + "tier": "pro", + "family": "gemini-3", + "isPreview": true, + "dialogLocation": "manual", + "features": { + "thinking": true, + "multimodalToolUse": true + } + }, + "gemini-3-flash-preview": { + "tier": "flash", + "family": "gemini-3", + "isPreview": true, + "dialogLocation": "manual", + "features": { + "thinking": false, + "multimodalToolUse": true + } + }, + "gemini-2.5-pro": { + "tier": "pro", + "family": "gemini-2.5", + "isPreview": false, + "dialogLocation": "manual", + "features": { + "thinking": false, + "multimodalToolUse": false + } + }, + "gemini-2.5-flash": { + "tier": "flash", + "family": "gemini-2.5", + "isPreview": false, + "dialogLocation": "manual", + "features": { + "thinking": false, + "multimodalToolUse": false + } + }, + "gemini-2.5-flash-lite": { + "tier": "flash-lite", + "family": "gemini-2.5", + "isPreview": false, + "dialogLocation": "manual", + "features": { + "thinking": false, + "multimodalToolUse": false + } + }, + "auto": { + "tier": "auto", + "isPreview": true, + "features": { + "thinking": true, + "multimodalToolUse": false + } + }, + "pro": { + "tier": "pro", + "isPreview": false, + "features": { + "thinking": true, + "multimodalToolUse": false + } + }, + "flash": { + "tier": "flash", + "isPreview": false, + "features": { + "thinking": false, + "multimodalToolUse": false + } + }, + "flash-lite": { + "tier": "flash-lite", + "isPreview": false, + "features": { + "thinking": false, + "multimodalToolUse": false + } + }, + "auto-gemini-3": { + "displayName": "Auto (Gemini 3)", + "tier": "auto", + "isPreview": true, + "dialogLocation": "main", + "dialogDescription": "Let Gemini CLI decide the best model for the task: gemini-3.1-pro, gemini-3-flash", + "features": { + "thinking": true, + "multimodalToolUse": false + } + }, + "auto-gemini-2.5": { + "displayName": "Auto (Gemini 2.5)", + "tier": "auto", + "isPreview": false, + "dialogLocation": "main", + "dialogDescription": "Let Gemini CLI decide the best model for the task: gemini-2.5-pro, gemini-2.5-flash", + "features": { + "thinking": false, + "multimodalToolUse": false + } + } + } }, "type": "object", "properties": { @@ -1133,6 +1258,140 @@ "default": [], "type": "array", "items": {} + }, + "modelDefinitions": { + "title": "Model Definitions", + "description": "Registry of model metadata, including tier, family, and features.", + "markdownDescription": "Registry of model metadata, including tier, family, and features.\n\n- Category: `Model`\n- Requires restart: `yes`\n- Default: `{\n \"gemini-3.1-pro-preview\": {\n \"tier\": \"pro\",\n \"family\": \"gemini-3\",\n \"isPreview\": true,\n \"dialogLocation\": \"manual\",\n \"features\": {\n \"thinking\": true,\n \"multimodalToolUse\": true\n }\n },\n \"gemini-3.1-pro-preview-customtools\": {\n \"tier\": \"pro\",\n \"family\": \"gemini-3\",\n \"isPreview\": true,\n \"features\": {\n \"thinking\": true,\n \"multimodalToolUse\": true\n }\n },\n \"gemini-3-pro-preview\": {\n \"tier\": \"pro\",\n \"family\": \"gemini-3\",\n \"isPreview\": true,\n \"dialogLocation\": \"manual\",\n \"features\": {\n \"thinking\": true,\n \"multimodalToolUse\": true\n }\n },\n \"gemini-3-flash-preview\": {\n \"tier\": \"flash\",\n \"family\": \"gemini-3\",\n \"isPreview\": true,\n \"dialogLocation\": \"manual\",\n \"features\": {\n \"thinking\": false,\n \"multimodalToolUse\": true\n }\n },\n \"gemini-2.5-pro\": {\n \"tier\": \"pro\",\n \"family\": \"gemini-2.5\",\n \"isPreview\": false,\n \"dialogLocation\": \"manual\",\n \"features\": {\n \"thinking\": false,\n \"multimodalToolUse\": false\n }\n },\n \"gemini-2.5-flash\": {\n \"tier\": \"flash\",\n \"family\": \"gemini-2.5\",\n \"isPreview\": false,\n \"dialogLocation\": \"manual\",\n \"features\": {\n \"thinking\": false,\n \"multimodalToolUse\": false\n }\n },\n \"gemini-2.5-flash-lite\": {\n \"tier\": \"flash-lite\",\n \"family\": \"gemini-2.5\",\n \"isPreview\": false,\n \"dialogLocation\": \"manual\",\n \"features\": {\n \"thinking\": false,\n \"multimodalToolUse\": false\n }\n },\n \"auto\": {\n \"tier\": \"auto\",\n \"isPreview\": true,\n \"features\": {\n \"thinking\": true,\n \"multimodalToolUse\": false\n }\n },\n \"pro\": {\n \"tier\": \"pro\",\n \"isPreview\": false,\n \"features\": {\n \"thinking\": true,\n \"multimodalToolUse\": false\n }\n },\n \"flash\": {\n \"tier\": \"flash\",\n \"isPreview\": false,\n \"features\": {\n \"thinking\": false,\n \"multimodalToolUse\": false\n }\n },\n \"flash-lite\": {\n \"tier\": \"flash-lite\",\n \"isPreview\": false,\n \"features\": {\n \"thinking\": false,\n \"multimodalToolUse\": false\n }\n },\n \"auto-gemini-3\": {\n \"displayName\": \"Auto (Gemini 3)\",\n \"tier\": \"auto\",\n \"isPreview\": true,\n \"dialogLocation\": \"main\",\n \"dialogDescription\": \"Let Gemini CLI decide the best model for the task: gemini-3.1-pro, gemini-3-flash\",\n \"features\": {\n \"thinking\": true,\n \"multimodalToolUse\": false\n }\n },\n \"auto-gemini-2.5\": {\n \"displayName\": \"Auto (Gemini 2.5)\",\n \"tier\": \"auto\",\n \"isPreview\": false,\n \"dialogLocation\": \"main\",\n \"dialogDescription\": \"Let Gemini CLI decide the best model for the task: gemini-2.5-pro, gemini-2.5-flash\",\n \"features\": {\n \"thinking\": false,\n \"multimodalToolUse\": false\n }\n }\n}`", + "default": { + "gemini-3.1-pro-preview": { + "tier": "pro", + "family": "gemini-3", + "isPreview": true, + "dialogLocation": "manual", + "features": { + "thinking": true, + "multimodalToolUse": true + } + }, + "gemini-3.1-pro-preview-customtools": { + "tier": "pro", + "family": "gemini-3", + "isPreview": true, + "features": { + "thinking": true, + "multimodalToolUse": true + } + }, + "gemini-3-pro-preview": { + "tier": "pro", + "family": "gemini-3", + "isPreview": true, + "dialogLocation": "manual", + "features": { + "thinking": true, + "multimodalToolUse": true + } + }, + "gemini-3-flash-preview": { + "tier": "flash", + "family": "gemini-3", + "isPreview": true, + "dialogLocation": "manual", + "features": { + "thinking": false, + "multimodalToolUse": true + } + }, + "gemini-2.5-pro": { + "tier": "pro", + "family": "gemini-2.5", + "isPreview": false, + "dialogLocation": "manual", + "features": { + "thinking": false, + "multimodalToolUse": false + } + }, + "gemini-2.5-flash": { + "tier": "flash", + "family": "gemini-2.5", + "isPreview": false, + "dialogLocation": "manual", + "features": { + "thinking": false, + "multimodalToolUse": false + } + }, + "gemini-2.5-flash-lite": { + "tier": "flash-lite", + "family": "gemini-2.5", + "isPreview": false, + "dialogLocation": "manual", + "features": { + "thinking": false, + "multimodalToolUse": false + } + }, + "auto": { + "tier": "auto", + "isPreview": true, + "features": { + "thinking": true, + "multimodalToolUse": false + } + }, + "pro": { + "tier": "pro", + "isPreview": false, + "features": { + "thinking": true, + "multimodalToolUse": false + } + }, + "flash": { + "tier": "flash", + "isPreview": false, + "features": { + "thinking": false, + "multimodalToolUse": false + } + }, + "flash-lite": { + "tier": "flash-lite", + "isPreview": false, + "features": { + "thinking": false, + "multimodalToolUse": false + } + }, + "auto-gemini-3": { + "displayName": "Auto (Gemini 3)", + "tier": "auto", + "isPreview": true, + "dialogLocation": "main", + "dialogDescription": "Let Gemini CLI decide the best model for the task: gemini-3.1-pro, gemini-3-flash", + "features": { + "thinking": true, + "multimodalToolUse": false + } + }, + "auto-gemini-2.5": { + "displayName": "Auto (Gemini 2.5)", + "tier": "auto", + "isPreview": false, + "dialogLocation": "main", + "dialogDescription": "Let Gemini CLI decide the best model for the task: gemini-2.5-pro, gemini-2.5-flash", + "features": { + "thinking": false, + "multimodalToolUse": false + } + } + }, + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/ModelDefinition" + } } }, "additionalProperties": false @@ -1188,6 +1447,16 @@ "markdownDescription": "Model override for the visual agent.\n\n- Category: `Advanced`\n- Requires restart: `yes`", "type": "string" }, + "allowedDomains": { + "title": "Allowed Domains", + "description": "A list of allowed domains for the browser agent (e.g., [\"github.com\", \"*.google.com\"]).", + "markdownDescription": "A list of allowed domains for the browser agent (e.g., [\"github.com\", \"*.google.com\"]).\n\n- Category: `Advanced`\n- Requires restart: `yes`\n- Default: `[\n \"github.com\",\n \"*.google.com\",\n \"localhost\"\n]`", + "default": ["github.com", "*.google.com", "localhost"], + "type": "array", + "items": { + "type": "string" + } + }, "disableUserInput": { "title": "Disable User Input", "description": "Disable user input on browser window during automation.", @@ -1311,8 +1580,8 @@ "properties": { "sandbox": { "title": "Sandbox", - "description": "Sandbox execution environment. Set to a boolean to enable or disable the sandbox, provide a string path to a sandbox profile, or specify an explicit sandbox command (e.g., \"docker\", \"podman\", \"lxc\").", - "markdownDescription": "Sandbox execution environment. Set to a boolean to enable or disable the sandbox, provide a string path to a sandbox profile, or specify an explicit sandbox command (e.g., \"docker\", \"podman\", \"lxc\").\n\n- Category: `Tools`\n- Requires restart: `yes`", + "description": "Legacy full-process sandbox execution environment. Set to a boolean to enable or disable the sandbox, provide a string path to a sandbox profile, or specify an explicit sandbox command (e.g., \"docker\", \"podman\", \"lxc\").", + "markdownDescription": "Legacy full-process sandbox execution environment. Set to a boolean to enable or disable the sandbox, provide a string path to a sandbox profile, or specify an explicit sandbox command (e.g., \"docker\", \"podman\", \"lxc\").\n\n- Category: `Tools`\n- Requires restart: `yes`", "$ref": "#/$defs/BooleanOrStringOrObject" }, "shell": { @@ -1471,6 +1740,13 @@ "default": {}, "type": "object", "properties": { + "toolSandboxing": { + "title": "Tool Sandboxing", + "description": "Experimental tool-level sandboxing (implementation in progress).", + "markdownDescription": "Experimental tool-level sandboxing (implementation in progress).\n\n- Category: `Security`\n- Requires restart: `no`\n- Default: `false`", + "default": false, + "type": "boolean" + }, "disableYoloMode": { "title": "Disable YOLO Mode", "description": "Disable YOLO mode, even if enabled by a flag.", @@ -1478,6 +1754,13 @@ "default": false, "type": "boolean" }, + "disableAlwaysAllow": { + "title": "Disable Always Allow", + "description": "Disable \"Always allow\" options in tool confirmation dialogs.", + "markdownDescription": "Disable \"Always allow\" options in tool confirmation dialogs.\n\n- Category: `Security`\n- Requires restart: `yes`\n- Default: `false`", + "default": false, + "type": "boolean" + }, "enablePermanentToolApproval": { "title": "Allow Permanent Tool Approval", "description": "Enable the \"Allow for all future sessions\" option in tool confirmation dialogs.", @@ -1776,6 +2059,13 @@ "default": false, "type": "boolean" }, + "dynamicModelConfiguration": { + "title": "Dynamic Model Configuration", + "description": "Enable dynamic model configuration (definitions, resolutions, and chains) via settings.", + "markdownDescription": "Enable dynamic model configuration (definitions, resolutions, and chains) via settings.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `false`", + "default": false, + "type": "boolean" + }, "gemmaModelRouter": { "title": "Gemma Model Router", "description": "Enable Gemma model router (experimental).", @@ -1816,6 +2106,13 @@ } }, "additionalProperties": false + }, + "topicUpdateNarration": { + "title": "Topic & Update Narration", + "description": "Enable the experimental Topic & Update communication model for reduced chattiness and structured progress reporting.", + "markdownDescription": "Enable the experimental Topic & Update communication model for reduced chattiness and structured progress reporting.\n\n- Category: `Experimental`\n- Requires restart: `no`\n- Default: `false`", + "default": false, + "type": "boolean" } }, "additionalProperties": false @@ -2010,8 +2307,8 @@ "properties": { "secureModeEnabled": { "title": "Secure Mode Enabled", - "description": "If true, disallows yolo mode from being used.", - "markdownDescription": "If true, disallows yolo mode from being used.\n\n- Category: `Admin`\n- Requires restart: `no`\n- Default: `false`", + "description": "If true, disallows YOLO mode and \"Always allow\" options from being used.", + "markdownDescription": "If true, disallows YOLO mode and \"Always allow\" options from being used.\n\n- Category: `Admin`\n- Requires restart: `no`\n- Default: `false`", "default": false, "type": "boolean" }, @@ -2530,6 +2827,41 @@ } } } + }, + "ModelDefinition": { + "type": "object", + "description": "Model metadata registry entry.", + "properties": { + "displayName": { + "type": "string" + }, + "tier": { + "enum": ["pro", "flash", "flash-lite", "custom", "auto"] + }, + "family": { + "type": "string" + }, + "isPreview": { + "type": "boolean" + }, + "dialogLocation": { + "enum": ["main", "manual"] + }, + "dialogDescription": { + "type": "string" + }, + "features": { + "type": "object", + "properties": { + "thinking": { + "type": "boolean" + }, + "multimodalToolUse": { + "type": "boolean" + } + } + } + } } } } diff --git a/scripts/build_package.js b/scripts/build_package.js index c201333d2c..279e46fa94 100644 --- a/scripts/build_package.js +++ b/scripts/build_package.js @@ -31,6 +31,15 @@ const packageName = basename(process.cwd()); // build typescript files execSync('tsc --build', { stdio: 'inherit' }); +// Run package-specific bundling if the script exists +const bundleScript = join(process.cwd(), 'scripts', 'bundle-browser-mcp.mjs'); +if (packageName === 'core' && existsSync(bundleScript)) { + console.log('Running chrome devtools MCP bundling...'); + execSync('npm run bundle:browser-mcp', { + stdio: 'inherit', + }); +} + // copy .{md,json} files execSync('node ../../scripts/copy_files.js', { stdio: 'inherit' }); diff --git a/scripts/copy_bundle_assets.js b/scripts/copy_bundle_assets.js index 7884bf428b..dea50101ef 100644 --- a/scripts/copy_bundle_assets.js +++ b/scripts/copy_bundle_assets.js @@ -95,4 +95,12 @@ if (existsSync(devtoolsDistSrc)) { console.log('Copied devtools package to bundle/node_modules/'); } +// 6. Copy bundled chrome-devtools-mcp +const bundleMcpSrc = join(root, 'packages/core/dist/bundled'); +const bundleMcpDest = join(bundleDir, 'bundled'); +if (existsSync(bundleMcpSrc)) { + cpSync(bundleMcpSrc, bundleMcpDest, { recursive: true, dereference: true }); + console.log('Copied bundled chrome-devtools-mcp to bundle/bundled/'); +} + console.log('Assets copied to bundle/');