diff --git a/docs/core/remote-agents.md b/docs/core/remote-agents.md index a01f015672..1c48df00a3 100644 --- a/docs/core/remote-agents.md +++ b/docs/core/remote-agents.md @@ -25,6 +25,20 @@ To use remote subagents, you must explicitly enable them in your } ``` +## Proxy support + +Gemini CLI routes traffic to remote agents through an HTTP/HTTPS proxy if one is +configured. It uses the `general.proxy` setting in your `settings.json` file or +standard environment variables (`HTTP_PROXY`, `HTTPS_PROXY`). + +```json +{ + "general": { + "proxy": "http://my-proxy:8080" + } +} +``` + ## Defining remote subagents Remote subagents are defined as Markdown files (`.md`) with YAML frontmatter. @@ -40,6 +54,7 @@ You can place them in: | `kind` | string | Yes | Must be `remote`. | | `name` | string | Yes | A unique name for the agent. Must be a valid slug (lowercase letters, numbers, hyphens, and underscores only). | | `agent_card_url` | string | Yes | The URL to the agent's A2A card endpoint. | +| `auth` | object | No | Authentication configuration. See [Authentication](#authentication). | ### Single-subagent example @@ -70,6 +85,273 @@ Markdown file. > **Note:** Mixed local and remote agents, or multiple local agents, are not > supported in a single file; the list format is currently remote-only. +## Authentication + +Many remote agents require authentication. Gemini CLI supports several +authentication methods aligned with the +[A2A security specification](https://a2a-protocol.org/latest/specification/#451-securityscheme). +Add an `auth` block to your agent's frontmatter to configure credentials. + +### Supported auth types + +Gemini CLI supports the following authentication types: + +| Type | Description | +| :------------------- | :--------------------------------------------------------------------------------------------- | +| `apiKey` | Send a static API key as an HTTP header. | +| `http` | HTTP authentication (Bearer token, Basic credentials, or any IANA-registered scheme). | +| `google-credentials` | Google Application Default Credentials (ADC). Automatically selects access or identity tokens. | +| `oauth2` | OAuth 2.0 Authorization Code flow with PKCE. Opens a browser for interactive sign-in. | + +### Dynamic values + +For `apiKey` and `http` auth types, secret values (`key`, `token`, `username`, +`password`, `value`) support dynamic resolution: + +| Format | Description | Example | +| :---------- | :-------------------------------------------------- | :------------------------- | +| `$ENV_VAR` | Read from an environment variable. | `$MY_API_KEY` | +| `!command` | Execute a shell command and use the trimmed output. | `!gcloud auth print-token` | +| literal | Use the string as-is. | `sk-abc123` | +| `$$` / `!!` | Escape prefix. `$$FOO` becomes the literal `$FOO`. | `$$NOT_AN_ENV_VAR` | + +> **Security tip:** Prefer `$ENV_VAR` or `!command` over embedding secrets +> directly in agent files, especially for project-level agents checked into +> version control. + +### API key (`apiKey`) + +Sends an API key as an HTTP header on every request. + +| Field | Type | Required | Description | +| :----- | :----- | :------- | :---------------------------------------------------- | +| `type` | string | Yes | Must be `apiKey`. | +| `key` | string | Yes | The API key value. Supports dynamic values. | +| `name` | string | No | Header name to send the key in. Default: `X-API-Key`. | + +```yaml +--- +kind: remote +name: my-agent +agent_card_url: https://example.com/agent-card +auth: + type: apiKey + key: $MY_API_KEY +--- +``` + +### HTTP authentication (`http`) + +Supports Bearer tokens, Basic auth, and arbitrary IANA-registered HTTP +authentication schemes. + +#### Bearer token + +Use the following fields to configure a Bearer token: + +| Field | Type | Required | Description | +| :------- | :----- | :------- | :----------------------------------------- | +| `type` | string | Yes | Must be `http`. | +| `scheme` | string | Yes | Must be `Bearer`. | +| `token` | string | Yes | The bearer token. Supports dynamic values. | + +```yaml +auth: + type: http + scheme: Bearer + token: $MY_BEARER_TOKEN +``` + +#### Basic authentication + +Use the following fields to configure Basic authentication: + +| Field | Type | Required | Description | +| :--------- | :----- | :------- | :------------------------------------- | +| `type` | string | Yes | Must be `http`. | +| `scheme` | string | Yes | Must be `Basic`. | +| `username` | string | Yes | The username. Supports dynamic values. | +| `password` | string | Yes | The password. Supports dynamic values. | + +```yaml +auth: + type: http + scheme: Basic + username: $MY_USERNAME + password: $MY_PASSWORD +``` + +#### Raw scheme + +For any other IANA-registered scheme (for example, Digest, HOBA), provide the +raw authorization value. + +| Field | Type | Required | Description | +| :------- | :----- | :------- | :---------------------------------------------------------------------------- | +| `type` | string | Yes | Must be `http`. | +| `scheme` | string | Yes | The scheme name (for example, `Digest`). | +| `value` | string | Yes | Raw value sent as `Authorization: `. Supports dynamic values. | + +```yaml +auth: + type: http + scheme: Digest + value: $MY_DIGEST_VALUE +``` + +### Google Application Default Credentials (`google-credentials`) + +Uses +[Google Application Default Credentials (ADC)](https://cloud.google.com/docs/authentication/application-default-credentials) +to authenticate with Google Cloud services and Cloud Run endpoints. This is the +recommended auth method for agents hosted on Google Cloud infrastructure. + +| Field | Type | Required | Description | +| :------- | :------- | :------- | :-------------------------------------------------------------------------- | +| `type` | string | Yes | Must be `google-credentials`. | +| `scopes` | string[] | No | OAuth scopes. Defaults to `https://www.googleapis.com/auth/cloud-platform`. | + +```yaml +--- +kind: remote +name: my-gcp-agent +agent_card_url: https://my-agent-xyz.run.app/.well-known/agent.json +auth: + type: google-credentials +--- +``` + +#### How token selection works + +The provider automatically selects the correct token type based on the agent's +host: + +| Host pattern | Token type | Use case | +| :----------------- | :----------------- | :------------------------------------------ | +| `*.googleapis.com` | **Access token** | Google APIs (Agent Engine, Vertex AI, etc.) | +| `*.run.app` | **Identity token** | Cloud Run services | + +- **Access tokens** authorize API calls to Google services. They are scoped + (default: `cloud-platform`) and fetched via `GoogleAuth.getClient()`. +- **Identity tokens** prove the caller's identity to a service that validates + the token's audience. The audience is set to the target host. These are + fetched via `GoogleAuth.getIdTokenClient()`. + +Both token types are cached and automatically refreshed before expiry. + +#### Setup + +`google-credentials` relies on ADC, which means your environment must have +credentials configured. Common setups: + +- **Local development:** Run `gcloud auth application-default login` to + authenticate with your Google account. +- **CI / Cloud environments:** Use a service account. Set the + `GOOGLE_APPLICATION_CREDENTIALS` environment variable to the path of your + service account key file, or use workload identity on GKE / Cloud Run. + +#### Allowed hosts + +For security, `google-credentials` only sends tokens to known Google-owned +hosts: + +- `*.googleapis.com` +- `*.run.app` + +Requests to any other host will be rejected with an error. If your agent is +hosted on a different domain, use one of the other auth types (`apiKey`, `http`, +or `oauth2`). + +#### Examples + +The following examples demonstrate how to configure Google Application Default +Credentials. + +**Cloud Run agent:** + +```yaml +--- +kind: remote +name: cloud-run-agent +agent_card_url: https://my-agent-xyz.run.app/.well-known/agent.json +auth: + type: google-credentials +--- +``` + +**Google API with custom scopes:** + +```yaml +--- +kind: remote +name: vertex-agent +agent_card_url: https://us-central1-aiplatform.googleapis.com/.well-known/agent.json +auth: + type: google-credentials + scopes: + - https://www.googleapis.com/auth/cloud-platform + - https://www.googleapis.com/auth/compute +--- +``` + +### OAuth 2.0 (`oauth2`) + +Performs an interactive OAuth 2.0 Authorization Code flow with PKCE. On first +use, Gemini CLI opens your browser for sign-in and persists the resulting tokens +for subsequent requests. + +| Field | Type | Required | Description | +| :------------------ | :------- | :------- | :------------------------------------------------------------------------------------------------------------------------------------------------- | +| `type` | string | Yes | Must be `oauth2`. | +| `client_id` | string | Yes\* | OAuth client ID. Required for interactive auth. | +| `client_secret` | string | No\* | OAuth client secret. Required by most authorization servers (confidential clients). Can be omitted for public clients that don't require a secret. | +| `scopes` | string[] | No | Requested scopes. Can also be discovered from the agent card. | +| `authorization_url` | string | No | Authorization endpoint. Discovered from the agent card if omitted. | +| `token_url` | string | No | Token endpoint. Discovered from the agent card if omitted. | + +```yaml +--- +kind: remote +name: oauth-agent +agent_card_url: https://example.com/.well-known/agent.json +auth: + type: oauth2 + client_id: my-client-id.apps.example.com +--- +``` + +If the agent card advertises an `oauth2` security scheme with +`authorizationCode` flow, the `authorization_url`, `token_url`, and `scopes` are +automatically discovered. You only need to provide `client_id` (and +`client_secret` if required). + +Tokens are persisted to disk and refreshed automatically when they expire. + +### Auth validation + +When Gemini CLI loads a remote agent, it validates your auth configuration +against the agent card's declared `securitySchemes`. If the agent requires +authentication that you haven't configured, you'll see an error describing +what's needed. + +`google-credentials` is treated as compatible with `http` Bearer security +schemes, since it produces Bearer tokens. + +### Auth retry behavior + +All auth providers automatically retry on `401` and `403` responses by +re-fetching credentials (up to 2 retries). This handles cases like expired +tokens or rotated credentials. For `apiKey` with `!command` values, the command +is re-executed on retry to fetch a fresh key. + +### Agent card fetching and auth + +When connecting to a remote agent, Gemini CLI first fetches the agent card +**without** authentication. If the card endpoint returns a `401` or `403`, it +retries the fetch **with** the configured auth headers. This lets agents have +publicly accessible cards while protecting their task endpoints, or to protect +both behind auth. + ## Managing Subagents Users can manage subagents using the following commands within the Gemini CLI: diff --git a/docs/core/subagents.md b/docs/core/subagents.md index e464566c01..659ed6d640 100644 --- a/docs/core/subagents.md +++ b/docs/core/subagents.md @@ -38,6 +38,34 @@ main agent calls the tool, it delegates the task to the subagent. Once the subagent completes its task, it reports back to the main agent with its findings. +## How to use subagents + +You can use subagents through automatic delegation or by explicitly forcing them +in your prompt. + +### Automatic delegation + +Gemini CLI's main agent is instructed to use specialized subagents when a task +matches their expertise. For example, if you ask "How does the auth system +work?", the main agent may decide to call the `codebase_investigator` subagent +to perform the research. + +### Forcing a subagent (@ syntax) + +You can explicitly direct a task to a specific subagent by using the `@` symbol +followed by the subagent's name at the beginning of your prompt. This is useful +when you want to bypass the main agent's decision-making and go straight to a +specialist. + +**Example:** + +```bash +@codebase_investigator Map out the relationship between the AgentRegistry and the LocalAgentExecutor. +``` + +When you use the `@` syntax, the CLI injects a system note that nudges the +primary model to use that specific subagent tool immediately. + ## Built-in subagents Gemini CLI comes with the following built-in subagents: @@ -49,15 +77,17 @@ Gemini CLI comes with the following built-in subagents: dependencies. - **When to use:** "How does the authentication system work?", "Map out the dependencies of the `AgentRegistry` class." -- **Configuration:** Enabled by default. You can configure it in - `settings.json`. Example (forcing a specific model): +- **Configuration:** Enabled by default. You can override its settings in + `settings.json` under `agents.overrides`. Example (forcing a specific model + and increasing turns): ```json { - "experimental": { - "codebaseInvestigatorSettings": { - "enabled": true, - "maxNumTurns": 20, - "model": "gemini-2.5-pro" + "agents": { + "overrides": { + "codebase_investigator": { + "modelConfig": { "model": "gemini-3-flash-preview" }, + "runConfig": { "maxTurns": 50 } + } } } } @@ -233,7 +263,7 @@ kind: local tools: - read_file - grep_search -model: gemini-2.5-pro +model: gemini-3-flash-preview temperature: 0.2 max_turns: 10 --- @@ -254,16 +284,102 @@ it yourself; just report it. ### Configuration schema -| Field | Type | Required | Description | -| :------------- | :----- | :------- | :------------------------------------------------------------------------------------------------------------------------ | -| `name` | string | Yes | Unique identifier (slug) used as the tool name for the agent. Only lowercase letters, numbers, hyphens, and underscores. | -| `description` | string | Yes | Short description of what the agent does. This is visible to the main agent to help it decide when to call this subagent. | -| `kind` | string | No | `local` (default) or `remote`. | -| `tools` | array | No | List of tool names this agent can use. If omitted, it may have access to a default set. | -| `model` | string | No | Specific model to use (e.g., `gemini-2.5-pro`). Defaults to `inherit` (uses the main session model). | -| `temperature` | number | No | Model temperature (0.0 - 2.0). | -| `max_turns` | number | No | Maximum number of conversation turns allowed for this agent before it must return. Defaults to `15`. | -| `timeout_mins` | number | No | Maximum execution time in minutes. Defaults to `5`. | +| Field | Type | Required | Description | +| :------------- | :----- | :------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `name` | string | Yes | Unique identifier (slug) used as the tool name for the agent. Only lowercase letters, numbers, hyphens, and underscores. | +| `description` | string | Yes | Short description of what the agent does. This is visible to the main agent to help it decide when to call this subagent. | +| `kind` | string | No | `local` (default) or `remote`. | +| `tools` | array | No | List of tool names this agent can use. Supports wildcards: `*` (all tools), `mcp_*` (all MCP tools), `mcp_server_*` (all tools from a server). **If omitted, it inherits all tools from the parent session.** | +| `model` | string | No | Specific model to use (e.g., `gemini-3-preview`). Defaults to `inherit` (uses the main session model). | +| `temperature` | number | No | Model temperature (0.0 - 2.0). Defaults to `1`. | +| `max_turns` | number | No | Maximum number of conversation turns allowed for this agent before it must return. Defaults to `30`. | +| `timeout_mins` | number | No | Maximum execution time in minutes. Defaults to `10`. | + +### Tool wildcards + +When defining `tools` for a subagent, you can use wildcards to quickly grant +access to groups of tools: + +- `*`: Grant access to all available built-in and discovered tools. +- `mcp_*`: Grant access to all tools from all connected MCP servers. +- `mcp_my-server_*`: Grant access to all tools from a specific MCP server named + `my-server`. + +### Isolation and recursion protection + +Each subagent runs in its own isolated context loop. This means: + +- **Independent history:** The subagent's conversation history does not bloat + the main agent's context. +- **Isolated tools:** The subagent only has access to the tools you explicitly + grant it. +- **Recursion protection:** To prevent infinite loops and excessive token usage, + subagents **cannot** call other subagents. If a subagent is granted the `*` + tool wildcard, it will still be unable to see or invoke other agents. + +## Managing subagents + +You can manage subagents interactively using the `/agents` command or +persistently via `settings.json`. + +### Interactive management (/agents) + +If you are in an interactive CLI session, you can use the `/agents` command to +manage subagents without editing configuration files manually. This is the +recommended way to quickly enable, disable, or re-configure agents on the fly. + +For a full list of sub-commands and usage, see the +[`/agents` command reference](../reference/commands.md#agents). + +### Persistent configuration (settings.json) + +While the `/agents` command and agent definition files provide a starting point, +you can use `settings.json` for global, persistent overrides. This is useful for +enforcing specific models or execution limits across all sessions. + +#### `agents.overrides` + +Use this to enable or disable specific agents or override their run +configurations. + +```json +{ + "agents": { + "overrides": { + "security-auditor": { + "enabled": false, + "runConfig": { + "maxTurns": 20, + "maxTimeMinutes": 10 + } + } + } + } +} +``` + +#### `modelConfigs.overrides` + +You can target specific subagents with custom model settings (like system +instruction prefixes or specific safety settings) using the `overrideScope` +field. + +```json +{ + "modelConfigs": { + "overrides": [ + { + "match": { "overrideScope": "security-auditor" }, + "modelConfig": { + "generateContentConfig": { + "temperature": 0.1 + } + } + } + ] + } +} +``` ### Optimizing your subagent @@ -298,7 +414,7 @@ Gemini CLI can also delegate tasks to remote subagents using the Agent-to-Agent > **Note: Remote subagents are currently an experimental feature.** See the [Remote Subagents documentation](remote-agents) for detailed -configuration and usage instructions. +configuration, authentication, and usage instructions. ## Extension subagents diff --git a/docs/reference/commands.md b/docs/reference/commands.md index c7c25cba1e..e9383152d2 100644 --- a/docs/reference/commands.md +++ b/docs/reference/commands.md @@ -14,6 +14,31 @@ Slash commands provide meta-level control over the CLI itself. - **Description:** Show version info. Share this information when filing issues. +### `/agents` + +- **Description:** Manage local and remote subagents. +- **Note:** This command is experimental and requires + `experimental.enableAgents: true` in your `settings.json`. +- **Sub-commands:** + - **`list`**: + - **Description:** Lists all discovered agents, including built-in, local, + and remote agents. + - **Usage:** `/agents list` + - **`reload`** (alias: `refresh`): + - **Description:** Rescans agent directories (`~/.gemini/agents` and + `.gemini/agents`) and reloads the registry. + - **Usage:** `/agents reload` + - **`enable`**: + - **Description:** Enables a specific subagent. + - **Usage:** `/agents enable ` + - **`disable`**: + - **Description:** Disables a specific subagent. + - **Usage:** `/agents disable ` + - **`config`**: + - **Description:** Opens a configuration dialog for the specified agent to + adjust its model, temperature, or execution limits. + - **Usage:** `/agents config ` + ### `/auth` - **Description:** Open a dialog that lets you change the authentication method. diff --git a/integration-tests/browser-agent.confirmation.responses b/integration-tests/browser-agent.confirmation.responses new file mode 100644 index 0000000000..4f645c6531 --- /dev/null +++ b/integration-tests/browser-agent.confirmation.responses @@ -0,0 +1 @@ +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"write_file","args":{"file_path":"test.txt","content":"hello"}}},{"text":"I've successfully written \"hello\" to test.txt. The file has been created with the specified content."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":50,"totalTokenCount":150}}]} diff --git a/integration-tests/browser-agent.test.ts b/integration-tests/browser-agent.test.ts index 0fdb3e717b..f9f07d4c9e 100644 --- a/integration-tests/browser-agent.test.ts +++ b/integration-tests/browser-agent.test.ts @@ -203,4 +203,33 @@ describe.skipIf(!chromeAvailable)('browser-agent', () => { // Should successfully complete all operations assertModelHasOutput(result); }); + + it('should handle tool confirmation for write_file without crashing', async () => { + rig.setup('tool-confirmation', { + fakeResponsesPath: join( + __dirname, + 'browser-agent.confirmation.responses', + ), + settings: { + agents: { + browser_agent: { + headless: true, + sessionMode: 'isolated', + }, + }, + }, + }); + + const run = await rig.runInteractive({ approvalMode: 'default' }); + + await run.type('Write hello to test.txt'); + await run.type('\r'); + + await run.expectText('Allow', 15000); + + await run.type('y'); + await run.type('\r'); + + await run.expectText('successfully written', 15000); + }); }); diff --git a/packages/cli/src/ui/components/SessionBrowser.tsx b/packages/cli/src/ui/components/SessionBrowser.tsx index 9e2843c570..0fc80a1d4e 100644 --- a/packages/cli/src/ui/components/SessionBrowser.tsx +++ b/packages/cli/src/ui/components/SessionBrowser.tsx @@ -116,38 +116,9 @@ const Kbd = ({ name, shortcut }: { name: string; shortcut: string }) => ( ); -/** - * Loading state component displayed while sessions are being loaded. - */ -const SessionBrowserLoading = (): React.JSX.Element => ( - - Loading sessions… - -); - -/** - * Error state component displayed when session loading fails. - */ -const SessionBrowserError = ({ - state, -}: { - state: SessionBrowserState; -}): React.JSX.Element => ( - - Error: {state.error} - Press q to exit - -); - -/** - * Empty state component displayed when no sessions are found. - */ -const SessionBrowserEmpty = (): React.JSX.Element => ( - - No auto-saved conversations found. - Press q to exit - -); +import { SessionBrowserLoading } from './SessionBrowser/SessionBrowserLoading.js'; +import { SessionBrowserError } from './SessionBrowser/SessionBrowserError.js'; +import { SessionBrowserEmpty } from './SessionBrowser/SessionBrowserEmpty.js'; import { sortSessions, filterSessions } from './SessionBrowser/utils.js'; diff --git a/packages/cli/src/ui/components/SessionBrowser/SessionBrowserEmpty.tsx b/packages/cli/src/ui/components/SessionBrowser/SessionBrowserEmpty.tsx new file mode 100644 index 0000000000..31c9544cd8 --- /dev/null +++ b/packages/cli/src/ui/components/SessionBrowser/SessionBrowserEmpty.tsx @@ -0,0 +1,19 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type React from 'react'; +import { Box, Text } from 'ink'; +import { Colors } from '../../colors.js'; + +/** + * Empty state component displayed when no sessions are found. + */ +export const SessionBrowserEmpty = (): React.JSX.Element => ( + + No auto-saved conversations found. + Press q to exit + +); diff --git a/packages/cli/src/ui/components/SessionBrowser/SessionBrowserError.tsx b/packages/cli/src/ui/components/SessionBrowser/SessionBrowserError.tsx new file mode 100644 index 0000000000..cf46fb8954 --- /dev/null +++ b/packages/cli/src/ui/components/SessionBrowser/SessionBrowserError.tsx @@ -0,0 +1,24 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type React from 'react'; +import { Box, Text } from 'ink'; +import { Colors } from '../../colors.js'; +import type { SessionBrowserState } from '../SessionBrowser.js'; + +/** + * Error state component displayed when session loading fails. + */ +export const SessionBrowserError = ({ + state, +}: { + state: SessionBrowserState; +}): React.JSX.Element => ( + + Error: {state.error} + Press q to exit + +); diff --git a/packages/cli/src/ui/components/SessionBrowser/SessionBrowserLoading.tsx b/packages/cli/src/ui/components/SessionBrowser/SessionBrowserLoading.tsx new file mode 100644 index 0000000000..e0c372eca2 --- /dev/null +++ b/packages/cli/src/ui/components/SessionBrowser/SessionBrowserLoading.tsx @@ -0,0 +1,18 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type React from 'react'; +import { Box, Text } from 'ink'; +import { Colors } from '../../colors.js'; + +/** + * Loading state component displayed while sessions are being loaded. + */ +export const SessionBrowserLoading = (): React.JSX.Element => ( + + Loading sessions… + +); diff --git a/packages/cli/src/ui/components/SessionBrowser/SessionBrowserStates.test.tsx b/packages/cli/src/ui/components/SessionBrowser/SessionBrowserStates.test.tsx new file mode 100644 index 0000000000..2b816a8211 --- /dev/null +++ b/packages/cli/src/ui/components/SessionBrowser/SessionBrowserStates.test.tsx @@ -0,0 +1,35 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { render } from '../../../test-utils/render.js'; +import { describe, it, expect } from 'vitest'; +import { SessionBrowserLoading } from './SessionBrowserLoading.js'; +import { SessionBrowserError } from './SessionBrowserError.js'; +import { SessionBrowserEmpty } from './SessionBrowserEmpty.js'; +import type { SessionBrowserState } from '../SessionBrowser.js'; + +describe('SessionBrowser UI States', () => { + it('SessionBrowserLoading renders correctly', async () => { + const { lastFrame, waitUntilReady } = render(); + await waitUntilReady(); + expect(lastFrame()).toMatchSnapshot(); + }); + + it('SessionBrowserError renders correctly', async () => { + const mockState = { error: 'Test error message' } as SessionBrowserState; + const { lastFrame, waitUntilReady } = render( + , + ); + await waitUntilReady(); + expect(lastFrame()).toMatchSnapshot(); + }); + + it('SessionBrowserEmpty renders correctly', async () => { + const { lastFrame, waitUntilReady } = render(); + await waitUntilReady(); + expect(lastFrame()).toMatchSnapshot(); + }); +}); diff --git a/packages/cli/src/ui/components/SessionBrowser/__snapshots__/SessionBrowserStates.test.tsx.snap b/packages/cli/src/ui/components/SessionBrowser/__snapshots__/SessionBrowserStates.test.tsx.snap new file mode 100644 index 0000000000..e5939219cb --- /dev/null +++ b/packages/cli/src/ui/components/SessionBrowser/__snapshots__/SessionBrowserStates.test.tsx.snap @@ -0,0 +1,18 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[`SessionBrowser UI States > SessionBrowserEmpty renders correctly 1`] = ` +" No auto-saved conversations found. + Press q to exit +" +`; + +exports[`SessionBrowser UI States > SessionBrowserError renders correctly 1`] = ` +" Error: Test error message + Press q to exit +" +`; + +exports[`SessionBrowser UI States > SessionBrowserLoading renders correctly 1`] = ` +" Loading sessions… +" +`; diff --git a/packages/cli/src/ui/components/messages/Todo.tsx b/packages/cli/src/ui/components/messages/Todo.tsx index a7201b12fb..e1fbd78a86 100644 --- a/packages/cli/src/ui/components/messages/Todo.tsx +++ b/packages/cli/src/ui/components/messages/Todo.tsx @@ -18,7 +18,7 @@ export const TodoTray: React.FC = () => { const uiState = useUIState(); const todos: TodoList | null = useMemo(() => { - // Find the most recent todo list written by the WriteTodosTool + // Find the most recent todo list written by tools that output a TodoList (e.g., WriteTodosTool or Tracker tools) for (let i = uiState.history.length - 1; i >= 0; i--) { const entry = uiState.history[i]; if (entry.type !== 'tool_group') { diff --git a/packages/core/src/agents/subagent-tool-wrapper.ts b/packages/core/src/agents/subagent-tool-wrapper.ts index ff64d4a03f..cf6d1e7112 100644 --- a/packages/core/src/agents/subagent-tool-wrapper.ts +++ b/packages/core/src/agents/subagent-tool-wrapper.ts @@ -10,7 +10,7 @@ import { type ToolInvocation, type ToolResult, } from '../tools/tools.js'; -import type { Config } from '../config/config.js'; + import { type AgentLoopContext } from '../config/agent-loop-context.js'; import type { AgentDefinition, AgentInputs } from './types.js'; import { LocalSubagentInvocation } from './local-invocation.js'; @@ -54,10 +54,6 @@ export class SubagentToolWrapper extends BaseDeclarativeTool< ); } - private get config(): Config { - return this.context.config; - } - /** * Creates an invocation instance for executing the subagent. * @@ -89,7 +85,7 @@ export class SubagentToolWrapper extends BaseDeclarativeTool< // Special handling for browser agent - needs async MCP setup if (definition.name === BROWSER_AGENT_NAME) { return new BrowserAgentInvocation( - this.config, + this.context, params, effectiveMessageBus, _toolName, diff --git a/packages/core/src/scheduler/policy.test.ts b/packages/core/src/scheduler/policy.test.ts index 750b14c2ed..e802a4b220 100644 --- a/packages/core/src/scheduler/policy.test.ts +++ b/packages/core/src/scheduler/policy.test.ts @@ -676,6 +676,43 @@ describe('policy.ts', () => { }), ); }); + + it('should work when context is created via Object.create (prototype chain)', async () => { + const mockConfig = { + setApprovalMode: vi.fn(), + } as unknown as Mocked; + const mockMessageBus = { + publish: vi.fn(), + } as unknown as Mocked; + + const baseContext = { + config: mockConfig, + messageBus: mockMessageBus, + }; + const protoContext: AgentLoopContext = Object.create(baseContext); + + expect(Object.keys(protoContext)).toHaveLength(0); + expect(protoContext.config).toBe(mockConfig); + expect(protoContext.messageBus).toBe(mockMessageBus); + + const tool = { name: 'test-tool' } as AnyDeclarativeTool; + + await updatePolicy( + tool, + ToolConfirmationOutcome.ProceedAlways, + undefined, + protoContext, + mockMessageBus, + ); + + expect(mockMessageBus.publish).toHaveBeenCalledWith( + expect.objectContaining({ + type: MessageBusType.UPDATE_POLICY, + toolName: 'test-tool', + persist: false, + }), + ); + }); }); describe('getPolicyDenialError', () => { diff --git a/packages/core/src/services/trackerTypes.ts b/packages/core/src/services/trackerTypes.ts index 7c48f5bcd4..6c21456fe1 100644 --- a/packages/core/src/services/trackerTypes.ts +++ b/packages/core/src/services/trackerTypes.ts @@ -13,6 +13,12 @@ export enum TaskType { } export const TaskTypeSchema = z.nativeEnum(TaskType); +export const TASK_TYPE_LABELS: Record = { + [TaskType.EPIC]: '[EPIC]', + [TaskType.TASK]: '[TASK]', + [TaskType.BUG]: '[BUG]', +}; + export enum TaskStatus { OPEN = 'open', IN_PROGRESS = 'in_progress', diff --git a/packages/core/src/tools/trackerTools.test.ts b/packages/core/src/tools/trackerTools.test.ts index ec0bd0e889..7edafb0fa3 100644 --- a/packages/core/src/tools/trackerTools.test.ts +++ b/packages/core/src/tools/trackerTools.test.ts @@ -14,12 +14,14 @@ import { TrackerUpdateTaskTool, TrackerVisualizeTool, TrackerAddDependencyTool, + buildTodosReturnDisplay, } from './trackerTools.js'; import * as fs from 'node:fs/promises'; import * as path from 'node:path'; import * as os from 'node:os'; import { TaskStatus, TaskType } from '../services/trackerTypes.js'; +import type { TrackerService } from '../services/trackerService.js'; describe('Tracker Tools Integration', () => { let tempDir: string; @@ -142,4 +144,90 @@ describe('Tracker Tools Integration', () => { expect(vizResult.llmContent).toContain('Child Task'); expect(vizResult.llmContent).toContain(childId); }); + + describe('buildTodosReturnDisplay', () => { + it('returns empty list for no tasks', async () => { + const mockService = { + listTasks: async () => [], + } as unknown as TrackerService; + const result = await buildTodosReturnDisplay(mockService); + expect(result.todos).toEqual([]); + }); + + it('returns formatted todos', async () => { + const parent = { + id: 'p1', + title: 'Parent', + type: TaskType.TASK, + status: TaskStatus.IN_PROGRESS, + dependencies: [], + }; + const child = { + id: 'c1', + title: 'Child', + type: TaskType.EPIC, + status: TaskStatus.OPEN, + parentId: 'p1', + dependencies: [], + }; + const closedLeaf = { + id: 'leaf', + title: 'Closed Leaf', + type: TaskType.BUG, + status: TaskStatus.CLOSED, + parentId: 'c1', + dependencies: [], + }; + + const mockService = { + listTasks: async () => [parent, child, closedLeaf], + } as unknown as TrackerService; + const display = await buildTodosReturnDisplay(mockService); + + expect(display.todos).toEqual([ + { + description: `[p1] [TASK] Parent`, + status: 'in_progress', + }, + { + description: ` [c1] [EPIC] Child`, + status: 'pending', + }, + { + description: ` [leaf] [BUG] Closed Leaf`, + status: 'completed', + }, + ]); + }); + + it('detects cycles', async () => { + // Since TrackerTask only has a single parentId, a true cycle is unreachable from roots. + // We simulate a database corruption (two tasks with same ID, one root, one child) + // just to exercise the protective cycle detection branch. + const rootP1 = { + id: 'p1', + title: 'Parent', + type: TaskType.TASK, + status: TaskStatus.OPEN, + dependencies: [], + }; + const childP1 = { ...rootP1, parentId: 'p1' }; + + const mockService = { + listTasks: async () => [rootP1, childP1], + } as unknown as TrackerService; + const display = await buildTodosReturnDisplay(mockService); + + expect(display.todos).toEqual([ + { + description: `[p1] [TASK] Parent`, + status: 'pending', + }, + { + description: ` [CYCLE DETECTED: p1]`, + status: 'cancelled', + }, + ]); + }); + }); }); diff --git a/packages/core/src/tools/trackerTools.ts b/packages/core/src/tools/trackerTools.ts index 03ee3c3a97..0a7101f55e 100644 --- a/packages/core/src/tools/trackerTools.ts +++ b/packages/core/src/tools/trackerTools.ts @@ -23,11 +23,69 @@ import { TRACKER_UPDATE_TASK_TOOL_NAME, TRACKER_VISUALIZE_TOOL_NAME, } from './tool-names.js'; -import type { ToolResult } from './tools.js'; +import type { ToolResult, TodoList } from './tools.js'; import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js'; import { ToolErrorType } from './tool-error.js'; import type { TrackerTask, TaskType } from '../services/trackerTypes.js'; -import { TaskStatus } from '../services/trackerTypes.js'; +import { TaskStatus, TASK_TYPE_LABELS } from '../services/trackerTypes.js'; +import type { TrackerService } from '../services/trackerService.js'; + +export async function buildTodosReturnDisplay( + service: TrackerService, +): Promise { + const tasks = await service.listTasks(); + const childrenMap = new Map(); + const roots: TrackerTask[] = []; + + for (const task of tasks) { + if (task.parentId) { + if (!childrenMap.has(task.parentId)) { + childrenMap.set(task.parentId, []); + } + childrenMap.get(task.parentId)!.push(task); + } else { + roots.push(task); + } + } + + const todos: TodoList['todos'] = []; + + const addTask = (task: TrackerTask, depth: number, visited: Set) => { + if (visited.has(task.id)) { + todos.push({ + description: `${' '.repeat(depth)}[CYCLE DETECTED: ${task.id}]`, + status: 'cancelled', + }); + return; + } + visited.add(task.id); + + let status: 'pending' | 'in_progress' | 'completed' | 'cancelled' = + 'pending'; + if (task.status === TaskStatus.IN_PROGRESS) { + status = 'in_progress'; + } else if (task.status === TaskStatus.CLOSED) { + status = 'completed'; + } + + const indent = ' '.repeat(depth); + const description = `${indent}[${task.id}] ${TASK_TYPE_LABELS[task.type]} ${task.title}`; + + todos.push({ description, status }); + + const children = childrenMap.get(task.id) ?? []; + for (const child of children) { + addTask(child, depth + 1, visited); + } + visited.delete(task.id); + }; + + for (const root of roots) { + addTask(root, 0, new Set()); + } + + return { todos }; +} // --- tracker_create_task --- @@ -71,7 +129,7 @@ class TrackerCreateTaskInvocation extends BaseToolInvocation< }); return { llmContent: `Created task ${task.id}: ${task.title}`, - returnDisplay: `Created task ${task.id}.`, + returnDisplay: await buildTodosReturnDisplay(this.service), }; } catch (error) { const errorMessage = @@ -155,7 +213,7 @@ class TrackerUpdateTaskInvocation extends BaseToolInvocation< const task = await this.service.updateTask(id, updates); return { llmContent: `Updated task ${task.id}. Status: ${task.status}`, - returnDisplay: `Updated task ${task.id}.`, + returnDisplay: await buildTodosReturnDisplay(this.service), }; } catch (error) { const errorMessage = @@ -239,7 +297,7 @@ class TrackerGetTaskInvocation extends BaseToolInvocation< } return { llmContent: JSON.stringify(task, null, 2), - returnDisplay: `Retrieved task ${task.id}.`, + returnDisplay: await buildTodosReturnDisplay(this.service), }; } } @@ -327,7 +385,7 @@ class TrackerListTasksInvocation extends BaseToolInvocation< .join('\n'); return { llmContent: content, - returnDisplay: `Listed ${tasks.length} tasks.`, + returnDisplay: await buildTodosReturnDisplay(this.service), }; } } @@ -427,7 +485,7 @@ class TrackerAddDependencyInvocation extends BaseToolInvocation< await this.service.updateTask(task.id, { dependencies: newDeps }); return { llmContent: `Linked ${task.id} -> ${dep.id}.`, - returnDisplay: 'Dependency added.', + returnDisplay: await buildTodosReturnDisplay(this.service), }; } catch (error) { const errorMessage = @@ -516,12 +574,6 @@ class TrackerVisualizeInvocation extends BaseToolInvocation< closed: '✅', }; - const typeLabels: Record = { - epic: '[EPIC]', - task: '[TASK]', - bug: '[BUG]', - }; - const childrenMap = new Map(); const roots: TrackerTask[] = []; @@ -550,14 +602,15 @@ class TrackerVisualizeInvocation extends BaseToolInvocation< visited.add(task.id); const indent = ' '.repeat(depth); - output += `${indent}${statusEmojis[task.status]} ${task.id} ${typeLabels[task.type]} ${task.title}\n`; + output += `${indent}${statusEmojis[task.status]} ${task.id} ${TASK_TYPE_LABELS[task.type]} ${task.title}\n`; if (task.dependencies.length > 0) { output += `${indent} └─ Depends on: ${task.dependencies.join(', ')}\n`; } const children = childrenMap.get(task.id) ?? []; for (const child of children) { - renderTask(child, depth + 1, new Set(visited)); + renderTask(child, depth + 1, visited); } + visited.delete(task.id); }; for (const root of roots) { @@ -566,7 +619,7 @@ class TrackerVisualizeInvocation extends BaseToolInvocation< return { llmContent: output, - returnDisplay: output, + returnDisplay: await buildTodosReturnDisplay(this.service), }; } }