Merge branch 'main' into mk-windows-sandboxing

This commit is contained in:
matt korwel
2026-03-13 12:13:09 -07:00
committed by GitHub
17 changed files with 792 additions and 74 deletions

View File

@@ -25,6 +25,20 @@ To use remote subagents, you must explicitly enable them in your
}
```
## Proxy support
Gemini CLI routes traffic to remote agents through an HTTP/HTTPS proxy if one is
configured. It uses the `general.proxy` setting in your `settings.json` file or
standard environment variables (`HTTP_PROXY`, `HTTPS_PROXY`).
```json
{
"general": {
"proxy": "http://my-proxy:8080"
}
}
```
## Defining remote subagents
Remote subagents are defined as Markdown files (`.md`) with YAML frontmatter.
@@ -40,6 +54,7 @@ You can place them in:
| `kind` | string | Yes | Must be `remote`. |
| `name` | string | Yes | A unique name for the agent. Must be a valid slug (lowercase letters, numbers, hyphens, and underscores only). |
| `agent_card_url` | string | Yes | The URL to the agent's A2A card endpoint. |
| `auth` | object | No | Authentication configuration. See [Authentication](#authentication). |
### Single-subagent example
@@ -70,6 +85,273 @@ Markdown file.
> **Note:** Mixed local and remote agents, or multiple local agents, are not
> supported in a single file; the list format is currently remote-only.
## Authentication
Many remote agents require authentication. Gemini CLI supports several
authentication methods aligned with the
[A2A security specification](https://a2a-protocol.org/latest/specification/#451-securityscheme).
Add an `auth` block to your agent's frontmatter to configure credentials.
### Supported auth types
Gemini CLI supports the following authentication types:
| Type | Description |
| :------------------- | :--------------------------------------------------------------------------------------------- |
| `apiKey` | Send a static API key as an HTTP header. |
| `http` | HTTP authentication (Bearer token, Basic credentials, or any IANA-registered scheme). |
| `google-credentials` | Google Application Default Credentials (ADC). Automatically selects access or identity tokens. |
| `oauth2` | OAuth 2.0 Authorization Code flow with PKCE. Opens a browser for interactive sign-in. |
### Dynamic values
For `apiKey` and `http` auth types, secret values (`key`, `token`, `username`,
`password`, `value`) support dynamic resolution:
| Format | Description | Example |
| :---------- | :-------------------------------------------------- | :------------------------- |
| `$ENV_VAR` | Read from an environment variable. | `$MY_API_KEY` |
| `!command` | Execute a shell command and use the trimmed output. | `!gcloud auth print-token` |
| literal | Use the string as-is. | `sk-abc123` |
| `$$` / `!!` | Escape prefix. `$$FOO` becomes the literal `$FOO`. | `$$NOT_AN_ENV_VAR` |
> **Security tip:** Prefer `$ENV_VAR` or `!command` over embedding secrets
> directly in agent files, especially for project-level agents checked into
> version control.
### API key (`apiKey`)
Sends an API key as an HTTP header on every request.
| Field | Type | Required | Description |
| :----- | :----- | :------- | :---------------------------------------------------- |
| `type` | string | Yes | Must be `apiKey`. |
| `key` | string | Yes | The API key value. Supports dynamic values. |
| `name` | string | No | Header name to send the key in. Default: `X-API-Key`. |
```yaml
---
kind: remote
name: my-agent
agent_card_url: https://example.com/agent-card
auth:
type: apiKey
key: $MY_API_KEY
---
```
### HTTP authentication (`http`)
Supports Bearer tokens, Basic auth, and arbitrary IANA-registered HTTP
authentication schemes.
#### Bearer token
Use the following fields to configure a Bearer token:
| Field | Type | Required | Description |
| :------- | :----- | :------- | :----------------------------------------- |
| `type` | string | Yes | Must be `http`. |
| `scheme` | string | Yes | Must be `Bearer`. |
| `token` | string | Yes | The bearer token. Supports dynamic values. |
```yaml
auth:
type: http
scheme: Bearer
token: $MY_BEARER_TOKEN
```
#### Basic authentication
Use the following fields to configure Basic authentication:
| Field | Type | Required | Description |
| :--------- | :----- | :------- | :------------------------------------- |
| `type` | string | Yes | Must be `http`. |
| `scheme` | string | Yes | Must be `Basic`. |
| `username` | string | Yes | The username. Supports dynamic values. |
| `password` | string | Yes | The password. Supports dynamic values. |
```yaml
auth:
type: http
scheme: Basic
username: $MY_USERNAME
password: $MY_PASSWORD
```
#### Raw scheme
For any other IANA-registered scheme (for example, Digest, HOBA), provide the
raw authorization value.
| Field | Type | Required | Description |
| :------- | :----- | :------- | :---------------------------------------------------------------------------- |
| `type` | string | Yes | Must be `http`. |
| `scheme` | string | Yes | The scheme name (for example, `Digest`). |
| `value` | string | Yes | Raw value sent as `Authorization: <scheme> <value>`. Supports dynamic values. |
```yaml
auth:
type: http
scheme: Digest
value: $MY_DIGEST_VALUE
```
### Google Application Default Credentials (`google-credentials`)
Uses
[Google Application Default Credentials (ADC)](https://cloud.google.com/docs/authentication/application-default-credentials)
to authenticate with Google Cloud services and Cloud Run endpoints. This is the
recommended auth method for agents hosted on Google Cloud infrastructure.
| Field | Type | Required | Description |
| :------- | :------- | :------- | :-------------------------------------------------------------------------- |
| `type` | string | Yes | Must be `google-credentials`. |
| `scopes` | string[] | No | OAuth scopes. Defaults to `https://www.googleapis.com/auth/cloud-platform`. |
```yaml
---
kind: remote
name: my-gcp-agent
agent_card_url: https://my-agent-xyz.run.app/.well-known/agent.json
auth:
type: google-credentials
---
```
#### How token selection works
The provider automatically selects the correct token type based on the agent's
host:
| Host pattern | Token type | Use case |
| :----------------- | :----------------- | :------------------------------------------ |
| `*.googleapis.com` | **Access token** | Google APIs (Agent Engine, Vertex AI, etc.) |
| `*.run.app` | **Identity token** | Cloud Run services |
- **Access tokens** authorize API calls to Google services. They are scoped
(default: `cloud-platform`) and fetched via `GoogleAuth.getClient()`.
- **Identity tokens** prove the caller's identity to a service that validates
the token's audience. The audience is set to the target host. These are
fetched via `GoogleAuth.getIdTokenClient()`.
Both token types are cached and automatically refreshed before expiry.
#### Setup
`google-credentials` relies on ADC, which means your environment must have
credentials configured. Common setups:
- **Local development:** Run `gcloud auth application-default login` to
authenticate with your Google account.
- **CI / Cloud environments:** Use a service account. Set the
`GOOGLE_APPLICATION_CREDENTIALS` environment variable to the path of your
service account key file, or use workload identity on GKE / Cloud Run.
#### Allowed hosts
For security, `google-credentials` only sends tokens to known Google-owned
hosts:
- `*.googleapis.com`
- `*.run.app`
Requests to any other host will be rejected with an error. If your agent is
hosted on a different domain, use one of the other auth types (`apiKey`, `http`,
or `oauth2`).
#### Examples
The following examples demonstrate how to configure Google Application Default
Credentials.
**Cloud Run agent:**
```yaml
---
kind: remote
name: cloud-run-agent
agent_card_url: https://my-agent-xyz.run.app/.well-known/agent.json
auth:
type: google-credentials
---
```
**Google API with custom scopes:**
```yaml
---
kind: remote
name: vertex-agent
agent_card_url: https://us-central1-aiplatform.googleapis.com/.well-known/agent.json
auth:
type: google-credentials
scopes:
- https://www.googleapis.com/auth/cloud-platform
- https://www.googleapis.com/auth/compute
---
```
### OAuth 2.0 (`oauth2`)
Performs an interactive OAuth 2.0 Authorization Code flow with PKCE. On first
use, Gemini CLI opens your browser for sign-in and persists the resulting tokens
for subsequent requests.
| Field | Type | Required | Description |
| :------------------ | :------- | :------- | :------------------------------------------------------------------------------------------------------------------------------------------------- |
| `type` | string | Yes | Must be `oauth2`. |
| `client_id` | string | Yes\* | OAuth client ID. Required for interactive auth. |
| `client_secret` | string | No\* | OAuth client secret. Required by most authorization servers (confidential clients). Can be omitted for public clients that don't require a secret. |
| `scopes` | string[] | No | Requested scopes. Can also be discovered from the agent card. |
| `authorization_url` | string | No | Authorization endpoint. Discovered from the agent card if omitted. |
| `token_url` | string | No | Token endpoint. Discovered from the agent card if omitted. |
```yaml
---
kind: remote
name: oauth-agent
agent_card_url: https://example.com/.well-known/agent.json
auth:
type: oauth2
client_id: my-client-id.apps.example.com
---
```
If the agent card advertises an `oauth2` security scheme with
`authorizationCode` flow, the `authorization_url`, `token_url`, and `scopes` are
automatically discovered. You only need to provide `client_id` (and
`client_secret` if required).
Tokens are persisted to disk and refreshed automatically when they expire.
### Auth validation
When Gemini CLI loads a remote agent, it validates your auth configuration
against the agent card's declared `securitySchemes`. If the agent requires
authentication that you haven't configured, you'll see an error describing
what's needed.
`google-credentials` is treated as compatible with `http` Bearer security
schemes, since it produces Bearer tokens.
### Auth retry behavior
All auth providers automatically retry on `401` and `403` responses by
re-fetching credentials (up to 2 retries). This handles cases like expired
tokens or rotated credentials. For `apiKey` with `!command` values, the command
is re-executed on retry to fetch a fresh key.
### Agent card fetching and auth
When connecting to a remote agent, Gemini CLI first fetches the agent card
**without** authentication. If the card endpoint returns a `401` or `403`, it
retries the fetch **with** the configured auth headers. This lets agents have
publicly accessible cards while protecting their task endpoints, or to protect
both behind auth.
## Managing Subagents
Users can manage subagents using the following commands within the Gemini CLI:

View File

@@ -38,6 +38,34 @@ main agent calls the tool, it delegates the task to the subagent. Once the
subagent completes its task, it reports back to the main agent with its
findings.
## How to use subagents
You can use subagents through automatic delegation or by explicitly forcing them
in your prompt.
### Automatic delegation
Gemini CLI's main agent is instructed to use specialized subagents when a task
matches their expertise. For example, if you ask "How does the auth system
work?", the main agent may decide to call the `codebase_investigator` subagent
to perform the research.
### Forcing a subagent (@ syntax)
You can explicitly direct a task to a specific subagent by using the `@` symbol
followed by the subagent's name at the beginning of your prompt. This is useful
when you want to bypass the main agent's decision-making and go straight to a
specialist.
**Example:**
```bash
@codebase_investigator Map out the relationship between the AgentRegistry and the LocalAgentExecutor.
```
When you use the `@` syntax, the CLI injects a system note that nudges the
primary model to use that specific subagent tool immediately.
## Built-in subagents
Gemini CLI comes with the following built-in subagents:
@@ -49,15 +77,17 @@ Gemini CLI comes with the following built-in subagents:
dependencies.
- **When to use:** "How does the authentication system work?", "Map out the
dependencies of the `AgentRegistry` class."
- **Configuration:** Enabled by default. You can configure it in
`settings.json`. Example (forcing a specific model):
- **Configuration:** Enabled by default. You can override its settings in
`settings.json` under `agents.overrides`. Example (forcing a specific model
and increasing turns):
```json
{
"experimental": {
"codebaseInvestigatorSettings": {
"enabled": true,
"maxNumTurns": 20,
"model": "gemini-2.5-pro"
"agents": {
"overrides": {
"codebase_investigator": {
"modelConfig": { "model": "gemini-3-flash-preview" },
"runConfig": { "maxTurns": 50 }
}
}
}
}
@@ -233,7 +263,7 @@ kind: local
tools:
- read_file
- grep_search
model: gemini-2.5-pro
model: gemini-3-flash-preview
temperature: 0.2
max_turns: 10
---
@@ -254,16 +284,102 @@ it yourself; just report it.
### Configuration schema
| Field | Type | Required | Description |
| :------------- | :----- | :------- | :------------------------------------------------------------------------------------------------------------------------ |
| `name` | string | Yes | Unique identifier (slug) used as the tool name for the agent. Only lowercase letters, numbers, hyphens, and underscores. |
| `description` | string | Yes | Short description of what the agent does. This is visible to the main agent to help it decide when to call this subagent. |
| `kind` | string | No | `local` (default) or `remote`. |
| `tools` | array | No | List of tool names this agent can use. If omitted, it may have access to a default set. |
| `model` | string | No | Specific model to use (e.g., `gemini-2.5-pro`). Defaults to `inherit` (uses the main session model). |
| `temperature` | number | No | Model temperature (0.0 - 2.0). |
| `max_turns` | number | No | Maximum number of conversation turns allowed for this agent before it must return. Defaults to `15`. |
| `timeout_mins` | number | No | Maximum execution time in minutes. Defaults to `5`. |
| Field | Type | Required | Description |
| :------------- | :----- | :------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `name` | string | Yes | Unique identifier (slug) used as the tool name for the agent. Only lowercase letters, numbers, hyphens, and underscores. |
| `description` | string | Yes | Short description of what the agent does. This is visible to the main agent to help it decide when to call this subagent. |
| `kind` | string | No | `local` (default) or `remote`. |
| `tools` | array | No | List of tool names this agent can use. Supports wildcards: `*` (all tools), `mcp_*` (all MCP tools), `mcp_server_*` (all tools from a server). **If omitted, it inherits all tools from the parent session.** |
| `model` | string | No | Specific model to use (e.g., `gemini-3-preview`). Defaults to `inherit` (uses the main session model). |
| `temperature` | number | No | Model temperature (0.0 - 2.0). Defaults to `1`. |
| `max_turns` | number | No | Maximum number of conversation turns allowed for this agent before it must return. Defaults to `30`. |
| `timeout_mins` | number | No | Maximum execution time in minutes. Defaults to `10`. |
### Tool wildcards
When defining `tools` for a subagent, you can use wildcards to quickly grant
access to groups of tools:
- `*`: Grant access to all available built-in and discovered tools.
- `mcp_*`: Grant access to all tools from all connected MCP servers.
- `mcp_my-server_*`: Grant access to all tools from a specific MCP server named
`my-server`.
### Isolation and recursion protection
Each subagent runs in its own isolated context loop. This means:
- **Independent history:** The subagent's conversation history does not bloat
the main agent's context.
- **Isolated tools:** The subagent only has access to the tools you explicitly
grant it.
- **Recursion protection:** To prevent infinite loops and excessive token usage,
subagents **cannot** call other subagents. If a subagent is granted the `*`
tool wildcard, it will still be unable to see or invoke other agents.
## Managing subagents
You can manage subagents interactively using the `/agents` command or
persistently via `settings.json`.
### Interactive management (/agents)
If you are in an interactive CLI session, you can use the `/agents` command to
manage subagents without editing configuration files manually. This is the
recommended way to quickly enable, disable, or re-configure agents on the fly.
For a full list of sub-commands and usage, see the
[`/agents` command reference](../reference/commands.md#agents).
### Persistent configuration (settings.json)
While the `/agents` command and agent definition files provide a starting point,
you can use `settings.json` for global, persistent overrides. This is useful for
enforcing specific models or execution limits across all sessions.
#### `agents.overrides`
Use this to enable or disable specific agents or override their run
configurations.
```json
{
"agents": {
"overrides": {
"security-auditor": {
"enabled": false,
"runConfig": {
"maxTurns": 20,
"maxTimeMinutes": 10
}
}
}
}
}
```
#### `modelConfigs.overrides`
You can target specific subagents with custom model settings (like system
instruction prefixes or specific safety settings) using the `overrideScope`
field.
```json
{
"modelConfigs": {
"overrides": [
{
"match": { "overrideScope": "security-auditor" },
"modelConfig": {
"generateContentConfig": {
"temperature": 0.1
}
}
}
]
}
}
```
### Optimizing your subagent
@@ -298,7 +414,7 @@ Gemini CLI can also delegate tasks to remote subagents using the Agent-to-Agent
> **Note: Remote subagents are currently an experimental feature.**
See the [Remote Subagents documentation](remote-agents) for detailed
configuration and usage instructions.
configuration, authentication, and usage instructions.
## Extension subagents

View File

@@ -14,6 +14,31 @@ Slash commands provide meta-level control over the CLI itself.
- **Description:** Show version info. Share this information when filing issues.
### `/agents`
- **Description:** Manage local and remote subagents.
- **Note:** This command is experimental and requires
`experimental.enableAgents: true` in your `settings.json`.
- **Sub-commands:**
- **`list`**:
- **Description:** Lists all discovered agents, including built-in, local,
and remote agents.
- **Usage:** `/agents list`
- **`reload`** (alias: `refresh`):
- **Description:** Rescans agent directories (`~/.gemini/agents` and
`.gemini/agents`) and reloads the registry.
- **Usage:** `/agents reload`
- **`enable`**:
- **Description:** Enables a specific subagent.
- **Usage:** `/agents enable <agent-name>`
- **`disable`**:
- **Description:** Disables a specific subagent.
- **Usage:** `/agents disable <agent-name>`
- **`config`**:
- **Description:** Opens a configuration dialog for the specified agent to
adjust its model, temperature, or execution limits.
- **Usage:** `/agents config <agent-name>`
### `/auth`
- **Description:** Open a dialog that lets you change the authentication method.

View File

@@ -0,0 +1 @@
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"write_file","args":{"file_path":"test.txt","content":"hello"}}},{"text":"I've successfully written \"hello\" to test.txt. The file has been created with the specified content."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":50,"totalTokenCount":150}}]}

View File

@@ -203,4 +203,33 @@ describe.skipIf(!chromeAvailable)('browser-agent', () => {
// Should successfully complete all operations
assertModelHasOutput(result);
});
it('should handle tool confirmation for write_file without crashing', async () => {
rig.setup('tool-confirmation', {
fakeResponsesPath: join(
__dirname,
'browser-agent.confirmation.responses',
),
settings: {
agents: {
browser_agent: {
headless: true,
sessionMode: 'isolated',
},
},
},
});
const run = await rig.runInteractive({ approvalMode: 'default' });
await run.type('Write hello to test.txt');
await run.type('\r');
await run.expectText('Allow', 15000);
await run.type('y');
await run.type('\r');
await run.expectText('successfully written', 15000);
});
});

View File

@@ -116,38 +116,9 @@ const Kbd = ({ name, shortcut }: { name: string; shortcut: string }) => (
</>
);
/**
* Loading state component displayed while sessions are being loaded.
*/
const SessionBrowserLoading = (): React.JSX.Element => (
<Box flexDirection="column" paddingX={1}>
<Text color={Colors.Gray}>Loading sessions</Text>
</Box>
);
/**
* Error state component displayed when session loading fails.
*/
const SessionBrowserError = ({
state,
}: {
state: SessionBrowserState;
}): React.JSX.Element => (
<Box flexDirection="column" paddingX={1}>
<Text color={Colors.AccentRed}>Error: {state.error}</Text>
<Text color={Colors.Gray}>Press q to exit</Text>
</Box>
);
/**
* Empty state component displayed when no sessions are found.
*/
const SessionBrowserEmpty = (): React.JSX.Element => (
<Box flexDirection="column" paddingX={1}>
<Text color={Colors.Gray}>No auto-saved conversations found.</Text>
<Text color={Colors.Gray}>Press q to exit</Text>
</Box>
);
import { SessionBrowserLoading } from './SessionBrowser/SessionBrowserLoading.js';
import { SessionBrowserError } from './SessionBrowser/SessionBrowserError.js';
import { SessionBrowserEmpty } from './SessionBrowser/SessionBrowserEmpty.js';
import { sortSessions, filterSessions } from './SessionBrowser/utils.js';

View File

@@ -0,0 +1,19 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import type React from 'react';
import { Box, Text } from 'ink';
import { Colors } from '../../colors.js';
/**
* Empty state component displayed when no sessions are found.
*/
export const SessionBrowserEmpty = (): React.JSX.Element => (
<Box flexDirection="column" paddingX={1}>
<Text color={Colors.Gray}>No auto-saved conversations found.</Text>
<Text color={Colors.Gray}>Press q to exit</Text>
</Box>
);

View File

@@ -0,0 +1,24 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import type React from 'react';
import { Box, Text } from 'ink';
import { Colors } from '../../colors.js';
import type { SessionBrowserState } from '../SessionBrowser.js';
/**
* Error state component displayed when session loading fails.
*/
export const SessionBrowserError = ({
state,
}: {
state: SessionBrowserState;
}): React.JSX.Element => (
<Box flexDirection="column" paddingX={1}>
<Text color={Colors.AccentRed}>Error: {state.error}</Text>
<Text color={Colors.Gray}>Press q to exit</Text>
</Box>
);

View File

@@ -0,0 +1,18 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import type React from 'react';
import { Box, Text } from 'ink';
import { Colors } from '../../colors.js';
/**
* Loading state component displayed while sessions are being loaded.
*/
export const SessionBrowserLoading = (): React.JSX.Element => (
<Box flexDirection="column" paddingX={1}>
<Text color={Colors.Gray}>Loading sessions</Text>
</Box>
);

View File

@@ -0,0 +1,35 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { render } from '../../../test-utils/render.js';
import { describe, it, expect } from 'vitest';
import { SessionBrowserLoading } from './SessionBrowserLoading.js';
import { SessionBrowserError } from './SessionBrowserError.js';
import { SessionBrowserEmpty } from './SessionBrowserEmpty.js';
import type { SessionBrowserState } from '../SessionBrowser.js';
describe('SessionBrowser UI States', () => {
it('SessionBrowserLoading renders correctly', async () => {
const { lastFrame, waitUntilReady } = render(<SessionBrowserLoading />);
await waitUntilReady();
expect(lastFrame()).toMatchSnapshot();
});
it('SessionBrowserError renders correctly', async () => {
const mockState = { error: 'Test error message' } as SessionBrowserState;
const { lastFrame, waitUntilReady } = render(
<SessionBrowserError state={mockState} />,
);
await waitUntilReady();
expect(lastFrame()).toMatchSnapshot();
});
it('SessionBrowserEmpty renders correctly', async () => {
const { lastFrame, waitUntilReady } = render(<SessionBrowserEmpty />);
await waitUntilReady();
expect(lastFrame()).toMatchSnapshot();
});
});

View File

@@ -0,0 +1,18 @@
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
exports[`SessionBrowser UI States > SessionBrowserEmpty renders correctly 1`] = `
" No auto-saved conversations found.
Press q to exit
"
`;
exports[`SessionBrowser UI States > SessionBrowserError renders correctly 1`] = `
" Error: Test error message
Press q to exit
"
`;
exports[`SessionBrowser UI States > SessionBrowserLoading renders correctly 1`] = `
" Loading sessions…
"
`;

View File

@@ -18,7 +18,7 @@ export const TodoTray: React.FC = () => {
const uiState = useUIState();
const todos: TodoList | null = useMemo(() => {
// Find the most recent todo list written by the WriteTodosTool
// Find the most recent todo list written by tools that output a TodoList (e.g., WriteTodosTool or Tracker tools)
for (let i = uiState.history.length - 1; i >= 0; i--) {
const entry = uiState.history[i];
if (entry.type !== 'tool_group') {

View File

@@ -10,7 +10,7 @@ import {
type ToolInvocation,
type ToolResult,
} from '../tools/tools.js';
import type { Config } from '../config/config.js';
import { type AgentLoopContext } from '../config/agent-loop-context.js';
import type { AgentDefinition, AgentInputs } from './types.js';
import { LocalSubagentInvocation } from './local-invocation.js';
@@ -54,10 +54,6 @@ export class SubagentToolWrapper extends BaseDeclarativeTool<
);
}
private get config(): Config {
return this.context.config;
}
/**
* Creates an invocation instance for executing the subagent.
*
@@ -89,7 +85,7 @@ export class SubagentToolWrapper extends BaseDeclarativeTool<
// Special handling for browser agent - needs async MCP setup
if (definition.name === BROWSER_AGENT_NAME) {
return new BrowserAgentInvocation(
this.config,
this.context,
params,
effectiveMessageBus,
_toolName,

View File

@@ -676,6 +676,43 @@ describe('policy.ts', () => {
}),
);
});
it('should work when context is created via Object.create (prototype chain)', async () => {
const mockConfig = {
setApprovalMode: vi.fn(),
} as unknown as Mocked<Config>;
const mockMessageBus = {
publish: vi.fn(),
} as unknown as Mocked<MessageBus>;
const baseContext = {
config: mockConfig,
messageBus: mockMessageBus,
};
const protoContext: AgentLoopContext = Object.create(baseContext);
expect(Object.keys(protoContext)).toHaveLength(0);
expect(protoContext.config).toBe(mockConfig);
expect(protoContext.messageBus).toBe(mockMessageBus);
const tool = { name: 'test-tool' } as AnyDeclarativeTool;
await updatePolicy(
tool,
ToolConfirmationOutcome.ProceedAlways,
undefined,
protoContext,
mockMessageBus,
);
expect(mockMessageBus.publish).toHaveBeenCalledWith(
expect.objectContaining({
type: MessageBusType.UPDATE_POLICY,
toolName: 'test-tool',
persist: false,
}),
);
});
});
describe('getPolicyDenialError', () => {

View File

@@ -13,6 +13,12 @@ export enum TaskType {
}
export const TaskTypeSchema = z.nativeEnum(TaskType);
export const TASK_TYPE_LABELS: Record<TaskType, string> = {
[TaskType.EPIC]: '[EPIC]',
[TaskType.TASK]: '[TASK]',
[TaskType.BUG]: '[BUG]',
};
export enum TaskStatus {
OPEN = 'open',
IN_PROGRESS = 'in_progress',

View File

@@ -14,12 +14,14 @@ import {
TrackerUpdateTaskTool,
TrackerVisualizeTool,
TrackerAddDependencyTool,
buildTodosReturnDisplay,
} from './trackerTools.js';
import * as fs from 'node:fs/promises';
import * as path from 'node:path';
import * as os from 'node:os';
import { TaskStatus, TaskType } from '../services/trackerTypes.js';
import type { TrackerService } from '../services/trackerService.js';
describe('Tracker Tools Integration', () => {
let tempDir: string;
@@ -142,4 +144,90 @@ describe('Tracker Tools Integration', () => {
expect(vizResult.llmContent).toContain('Child Task');
expect(vizResult.llmContent).toContain(childId);
});
describe('buildTodosReturnDisplay', () => {
it('returns empty list for no tasks', async () => {
const mockService = {
listTasks: async () => [],
} as unknown as TrackerService;
const result = await buildTodosReturnDisplay(mockService);
expect(result.todos).toEqual([]);
});
it('returns formatted todos', async () => {
const parent = {
id: 'p1',
title: 'Parent',
type: TaskType.TASK,
status: TaskStatus.IN_PROGRESS,
dependencies: [],
};
const child = {
id: 'c1',
title: 'Child',
type: TaskType.EPIC,
status: TaskStatus.OPEN,
parentId: 'p1',
dependencies: [],
};
const closedLeaf = {
id: 'leaf',
title: 'Closed Leaf',
type: TaskType.BUG,
status: TaskStatus.CLOSED,
parentId: 'c1',
dependencies: [],
};
const mockService = {
listTasks: async () => [parent, child, closedLeaf],
} as unknown as TrackerService;
const display = await buildTodosReturnDisplay(mockService);
expect(display.todos).toEqual([
{
description: `[p1] [TASK] Parent`,
status: 'in_progress',
},
{
description: ` [c1] [EPIC] Child`,
status: 'pending',
},
{
description: ` [leaf] [BUG] Closed Leaf`,
status: 'completed',
},
]);
});
it('detects cycles', async () => {
// Since TrackerTask only has a single parentId, a true cycle is unreachable from roots.
// We simulate a database corruption (two tasks with same ID, one root, one child)
// just to exercise the protective cycle detection branch.
const rootP1 = {
id: 'p1',
title: 'Parent',
type: TaskType.TASK,
status: TaskStatus.OPEN,
dependencies: [],
};
const childP1 = { ...rootP1, parentId: 'p1' };
const mockService = {
listTasks: async () => [rootP1, childP1],
} as unknown as TrackerService;
const display = await buildTodosReturnDisplay(mockService);
expect(display.todos).toEqual([
{
description: `[p1] [TASK] Parent`,
status: 'pending',
},
{
description: ` [CYCLE DETECTED: p1]`,
status: 'cancelled',
},
]);
});
});
});

View File

@@ -23,11 +23,69 @@ import {
TRACKER_UPDATE_TASK_TOOL_NAME,
TRACKER_VISUALIZE_TOOL_NAME,
} from './tool-names.js';
import type { ToolResult } from './tools.js';
import type { ToolResult, TodoList } from './tools.js';
import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js';
import { ToolErrorType } from './tool-error.js';
import type { TrackerTask, TaskType } from '../services/trackerTypes.js';
import { TaskStatus } from '../services/trackerTypes.js';
import { TaskStatus, TASK_TYPE_LABELS } from '../services/trackerTypes.js';
import type { TrackerService } from '../services/trackerService.js';
export async function buildTodosReturnDisplay(
service: TrackerService,
): Promise<TodoList> {
const tasks = await service.listTasks();
const childrenMap = new Map<string, TrackerTask[]>();
const roots: TrackerTask[] = [];
for (const task of tasks) {
if (task.parentId) {
if (!childrenMap.has(task.parentId)) {
childrenMap.set(task.parentId, []);
}
childrenMap.get(task.parentId)!.push(task);
} else {
roots.push(task);
}
}
const todos: TodoList['todos'] = [];
const addTask = (task: TrackerTask, depth: number, visited: Set<string>) => {
if (visited.has(task.id)) {
todos.push({
description: `${' '.repeat(depth)}[CYCLE DETECTED: ${task.id}]`,
status: 'cancelled',
});
return;
}
visited.add(task.id);
let status: 'pending' | 'in_progress' | 'completed' | 'cancelled' =
'pending';
if (task.status === TaskStatus.IN_PROGRESS) {
status = 'in_progress';
} else if (task.status === TaskStatus.CLOSED) {
status = 'completed';
}
const indent = ' '.repeat(depth);
const description = `${indent}[${task.id}] ${TASK_TYPE_LABELS[task.type]} ${task.title}`;
todos.push({ description, status });
const children = childrenMap.get(task.id) ?? [];
for (const child of children) {
addTask(child, depth + 1, visited);
}
visited.delete(task.id);
};
for (const root of roots) {
addTask(root, 0, new Set());
}
return { todos };
}
// --- tracker_create_task ---
@@ -71,7 +129,7 @@ class TrackerCreateTaskInvocation extends BaseToolInvocation<
});
return {
llmContent: `Created task ${task.id}: ${task.title}`,
returnDisplay: `Created task ${task.id}.`,
returnDisplay: await buildTodosReturnDisplay(this.service),
};
} catch (error) {
const errorMessage =
@@ -155,7 +213,7 @@ class TrackerUpdateTaskInvocation extends BaseToolInvocation<
const task = await this.service.updateTask(id, updates);
return {
llmContent: `Updated task ${task.id}. Status: ${task.status}`,
returnDisplay: `Updated task ${task.id}.`,
returnDisplay: await buildTodosReturnDisplay(this.service),
};
} catch (error) {
const errorMessage =
@@ -239,7 +297,7 @@ class TrackerGetTaskInvocation extends BaseToolInvocation<
}
return {
llmContent: JSON.stringify(task, null, 2),
returnDisplay: `Retrieved task ${task.id}.`,
returnDisplay: await buildTodosReturnDisplay(this.service),
};
}
}
@@ -327,7 +385,7 @@ class TrackerListTasksInvocation extends BaseToolInvocation<
.join('\n');
return {
llmContent: content,
returnDisplay: `Listed ${tasks.length} tasks.`,
returnDisplay: await buildTodosReturnDisplay(this.service),
};
}
}
@@ -427,7 +485,7 @@ class TrackerAddDependencyInvocation extends BaseToolInvocation<
await this.service.updateTask(task.id, { dependencies: newDeps });
return {
llmContent: `Linked ${task.id} -> ${dep.id}.`,
returnDisplay: 'Dependency added.',
returnDisplay: await buildTodosReturnDisplay(this.service),
};
} catch (error) {
const errorMessage =
@@ -516,12 +574,6 @@ class TrackerVisualizeInvocation extends BaseToolInvocation<
closed: '✅',
};
const typeLabels: Record<TaskType, string> = {
epic: '[EPIC]',
task: '[TASK]',
bug: '[BUG]',
};
const childrenMap = new Map<string, TrackerTask[]>();
const roots: TrackerTask[] = [];
@@ -550,14 +602,15 @@ class TrackerVisualizeInvocation extends BaseToolInvocation<
visited.add(task.id);
const indent = ' '.repeat(depth);
output += `${indent}${statusEmojis[task.status]} ${task.id} ${typeLabels[task.type]} ${task.title}\n`;
output += `${indent}${statusEmojis[task.status]} ${task.id} ${TASK_TYPE_LABELS[task.type]} ${task.title}\n`;
if (task.dependencies.length > 0) {
output += `${indent} └─ Depends on: ${task.dependencies.join(', ')}\n`;
}
const children = childrenMap.get(task.id) ?? [];
for (const child of children) {
renderTask(child, depth + 1, new Set(visited));
renderTask(child, depth + 1, visited);
}
visited.delete(task.id);
};
for (const root of roots) {
@@ -566,7 +619,7 @@ class TrackerVisualizeInvocation extends BaseToolInvocation<
return {
llmContent: output,
returnDisplay: output,
returnDisplay: await buildTodosReturnDisplay(this.service),
};
}
}