Merge branch 'main' into akkr/subagents-policy

This commit is contained in:
AK
2026-03-04 12:19:11 -08:00
committed by GitHub
36 changed files with 2073 additions and 321 deletions
+6 -1
View File
@@ -14,4 +14,9 @@
# Docs have a dedicated approver group in addition to maintainers # Docs have a dedicated approver group in addition to maintainers
/docs/ @google-gemini/gemini-cli-maintainers @google-gemini/gemini-cli-docs /docs/ @google-gemini/gemini-cli-maintainers @google-gemini/gemini-cli-docs
/README.md @google-gemini/gemini-cli-maintainers @google-gemini/gemini-cli-docs /README.md @google-gemini/gemini-cli-maintainers @google-gemini/gemini-cli-docs
# Prompt contents, tool definitions, and evals require reviews from prompt approvers
/packages/core/src/prompts/ @google-gemini/gemini-cli-prompt-approvers
/packages/core/src/tools/ @google-gemini/gemini-cli-prompt-approvers
/evals/ @google-gemini/gemini-cli-prompt-approvers
@@ -0,0 +1,315 @@
name: 'Unassign Inactive Issue Assignees'
# This workflow runs daily and scans every open "help wanted" issue that has
# one or more assignees. For each assignee it checks whether they have a
# non-draft pull request (open and ready for review, or already merged) that
# is linked to the issue. Draft PRs are intentionally excluded so that
# contributors cannot reset the check by opening a no-op PR. If no
# qualifying PR is found within 7 days of assignment the assignee is
# automatically removed and a friendly comment is posted so that other
# contributors can pick up the work.
# Maintainers, org members, and collaborators (anyone with write access or
# above) are always exempted and will never be auto-unassigned.
on:
schedule:
- cron: '0 9 * * *' # Every day at 09:00 UTC
workflow_dispatch:
inputs:
dry_run:
description: 'Run in dry-run mode (no changes will be applied)'
required: false
default: false
type: 'boolean'
concurrency:
group: '${{ github.workflow }}'
cancel-in-progress: true
defaults:
run:
shell: 'bash'
jobs:
unassign-inactive-assignees:
if: "github.repository == 'google-gemini/gemini-cli'"
runs-on: 'ubuntu-latest'
permissions:
issues: 'write'
steps:
- name: 'Generate GitHub App Token'
id: 'generate_token'
uses: 'actions/create-github-app-token@v2'
with:
app-id: '${{ secrets.APP_ID }}'
private-key: '${{ secrets.PRIVATE_KEY }}'
- name: 'Unassign inactive assignees'
uses: 'actions/github-script@v7'
env:
DRY_RUN: '${{ inputs.dry_run }}'
with:
github-token: '${{ steps.generate_token.outputs.token }}'
script: |
const dryRun = process.env.DRY_RUN === 'true';
if (dryRun) {
core.info('DRY RUN MODE ENABLED: No changes will be applied.');
}
const owner = context.repo.owner;
const repo = context.repo.repo;
const GRACE_PERIOD_DAYS = 7;
const now = new Date();
let maintainerLogins = new Set();
const teams = ['gemini-cli-maintainers', 'gemini-cli-askmode-approvers', 'gemini-cli-docs'];
for (const team_slug of teams) {
try {
const members = await github.paginate(github.rest.teams.listMembersInOrg, {
org: owner,
team_slug,
});
for (const m of members) maintainerLogins.add(m.login.toLowerCase());
core.info(`Fetched ${members.length} members from team ${team_slug}.`);
} catch (e) {
core.warning(`Could not fetch team ${team_slug}: ${e.message}`);
}
}
const isGooglerCache = new Map();
const isGoogler = async (login) => {
if (isGooglerCache.has(login)) return isGooglerCache.get(login);
try {
for (const org of ['googlers', 'google']) {
try {
await github.rest.orgs.checkMembershipForUser({ org, username: login });
isGooglerCache.set(login, true);
return true;
} catch (e) {
if (e.status !== 404) throw e;
}
}
} catch (e) {
core.warning(`Could not check org membership for ${login}: ${e.message}`);
}
isGooglerCache.set(login, false);
return false;
};
const permissionCache = new Map();
const isPrivilegedUser = async (login) => {
if (maintainerLogins.has(login.toLowerCase())) return true;
if (permissionCache.has(login)) return permissionCache.get(login);
try {
const { data } = await github.rest.repos.getCollaboratorPermissionLevel({
owner,
repo,
username: login,
});
const privileged = ['admin', 'maintain', 'write', 'triage'].includes(data.permission);
permissionCache.set(login, privileged);
if (privileged) {
core.info(` @${login} is a repo collaborator (${data.permission}) — exempt.`);
return true;
}
} catch (e) {
if (e.status !== 404) {
core.warning(`Could not check permission for ${login}: ${e.message}`);
}
}
const googler = await isGoogler(login);
permissionCache.set(login, googler);
return googler;
};
core.info('Fetching open "help wanted" issues with assignees...');
const issues = await github.paginate(github.rest.issues.listForRepo, {
owner,
repo,
state: 'open',
labels: 'help wanted',
per_page: 100,
});
const assignedIssues = issues.filter(
(issue) => !issue.pull_request && issue.assignees && issue.assignees.length > 0
);
core.info(`Found ${assignedIssues.length} assigned "help wanted" issues.`);
let totalUnassigned = 0;
let timelineEvents = [];
try {
timelineEvents = await github.paginate(github.rest.issues.listEventsForTimeline, {
owner,
repo,
issue_number: issue.number,
per_page: 100,
mediaType: { previews: ['mockingbird'] },
});
} catch (err) {
core.warning(`Could not fetch timeline for issue #${issue.number}: ${err.message}`);
continue;
}
const assignedAtMap = new Map();
for (const event of timelineEvents) {
if (event.event === 'assigned' && event.assignee) {
const login = event.assignee.login.toLowerCase();
const at = new Date(event.created_at);
assignedAtMap.set(login, at);
} else if (event.event === 'unassigned' && event.assignee) {
assignedAtMap.delete(event.assignee.login.toLowerCase());
}
}
const linkedPRAuthorSet = new Set();
const seenPRKeys = new Set();
for (const event of timelineEvents) {
if (
event.event !== 'cross-referenced' ||
!event.source ||
event.source.type !== 'pull_request' ||
!event.source.issue ||
!event.source.issue.user ||
!event.source.issue.number ||
!event.source.issue.repository
) continue;
const prOwner = event.source.issue.repository.owner.login;
const prRepo = event.source.issue.repository.name;
const prNumber = event.source.issue.number;
const prAuthor = event.source.issue.user.login.toLowerCase();
const prKey = `${prOwner}/${prRepo}#${prNumber}`;
if (seenPRKeys.has(prKey)) continue;
seenPRKeys.add(prKey);
try {
const { data: pr } = await github.rest.pulls.get({
owner: prOwner,
repo: prRepo,
pull_number: prNumber,
});
const isReady = (pr.state === 'open' && !pr.draft) ||
(pr.state === 'closed' && pr.merged_at !== null);
core.info(
` PR ${prKey} by @${prAuthor}: ` +
`state=${pr.state}, draft=${pr.draft}, merged=${!!pr.merged_at} → ` +
(isReady ? 'qualifies' : 'does NOT qualify (draft or closed without merge)')
);
if (isReady) linkedPRAuthorSet.add(prAuthor);
} catch (err) {
core.warning(`Could not fetch PR ${prKey}: ${err.message}`);
}
}
const assigneesToRemove = [];
for (const assignee of issue.assignees) {
const login = assignee.login.toLowerCase();
if (await isPrivilegedUser(assignee.login)) {
core.info(` @${assignee.login}: privileged user — skipping.`);
continue;
}
const assignedAt = assignedAtMap.get(login);
if (!assignedAt) {
core.warning(
`No 'assigned' event found for @${login} on issue #${issue.number}; ` +
`falling back to issue creation date (${issue.created_at}).`
);
assignedAtMap.set(login, new Date(issue.created_at));
}
const resolvedAssignedAt = assignedAtMap.get(login);
const daysSinceAssignment = (now - resolvedAssignedAt) / (1000 * 60 * 60 * 24);
core.info(
` @${login}: assigned ${daysSinceAssignment.toFixed(1)} day(s) ago, ` +
`ready-for-review PR: ${linkedPRAuthorSet.has(login) ? 'yes' : 'no'}`
);
if (daysSinceAssignment < GRACE_PERIOD_DAYS) {
core.info(` → within grace period, skipping.`);
continue;
}
if (linkedPRAuthorSet.has(login)) {
core.info(` → ready-for-review PR found, keeping assignment.`);
continue;
}
core.info(` → no ready-for-review PR after ${GRACE_PERIOD_DAYS} days, will unassign.`);
assigneesToRemove.push(assignee.login);
}
if (assigneesToRemove.length === 0) {
continue;
}
if (!dryRun) {
try {
await github.rest.issues.removeAssignees({
owner,
repo,
issue_number: issue.number,
assignees: assigneesToRemove,
});
} catch (err) {
core.warning(
`Failed to unassign ${assigneesToRemove.join(', ')} from issue #${issue.number}: ${err.message}`
);
continue;
}
const mentionList = assigneesToRemove.map((l) => `@${l}`).join(', ');
const commentBody =
`👋 ${mentionList} — it has been more than ${GRACE_PERIOD_DAYS} days since ` +
`you were assigned to this issue and we could not find a pull request ` +
`ready for review.\n\n` +
`To keep the backlog moving and ensure issues stay accessible to all ` +
`contributors, we require a PR that is open and ready for review (not a ` +
`draft) within ${GRACE_PERIOD_DAYS} days of assignment.\n\n` +
`We are automatically unassigning you so that other contributors can pick ` +
`this up. If you are still actively working on this, please:\n` +
`1. Re-assign yourself by commenting \`/assign\`.\n` +
`2. Open a PR (not a draft) linked to this issue (e.g. \`Fixes #${issue.number}\`) ` +
`within ${GRACE_PERIOD_DAYS} days so the automation knows real progress is being made.\n\n` +
`Thank you for your contribution — we hope to see a PR from you soon! 🙏`;
try {
await github.rest.issues.createComment({
owner,
repo,
issue_number: issue.number,
body: commentBody,
});
} catch (err) {
core.warning(
`Failed to post comment on issue #${issue.number}: ${err.message}`
);
}
}
totalUnassigned += assigneesToRemove.length;
core.info(
` ${dryRun ? '[DRY RUN] Would have unassigned' : 'Unassigned'}: ${assigneesToRemove.join(', ')}`
);
}
core.info(`\nDone. Total assignees ${dryRun ? 'that would be' : ''} unassigned: ${totalUnassigned}`);
+6 -3
View File
@@ -1,6 +1,6 @@
# Latest stable release: v0.32.0 # Latest stable release: v0.32.1
Released: March 03, 2026 Released: March 4, 2026
For most users, our latest stable release is the recommended release. Install For most users, our latest stable release is the recommended release. Install
the latest stable version with: the latest stable version with:
@@ -29,6 +29,9 @@ npm install -g @google/gemini-cli
## What's Changed ## What's Changed
- fix(patch): cherry-pick 0659ad1 to release/v0.32.0-pr-21042 to patch version
v0.32.0 and create version 0.32.1 by @gemini-cli-robot in
[#21048](https://github.com/google-gemini/gemini-cli/pull/21048)
- feat(plan): add integration tests for plan mode by @Adib234 in - feat(plan): add integration tests for plan mode by @Adib234 in
[#20214](https://github.com/google-gemini/gemini-cli/pull/20214) [#20214](https://github.com/google-gemini/gemini-cli/pull/20214)
- fix(acp): update auth handshake to spec by @skeshive in - fix(acp): update auth handshake to spec by @skeshive in
@@ -202,4 +205,4 @@ npm install -g @google/gemini-cli
[#19781](https://github.com/google-gemini/gemini-cli/pull/19781) [#19781](https://github.com/google-gemini/gemini-cli/pull/19781)
**Full Changelog**: **Full Changelog**:
https://github.com/google-gemini/gemini-cli/compare/v0.31.0...v0.32.0 https://github.com/google-gemini/gemini-cli/compare/v0.31.0...v0.32.1
+46 -1
View File
@@ -50,6 +50,50 @@ Cross-platform sandboxing with complete process isolation.
**Note**: Requires building the sandbox image locally or using a published image **Note**: Requires building the sandbox image locally or using a published image
from your organization's registry. from your organization's registry.
### 3. LXC/LXD (Linux only, experimental)
Full-system container sandboxing using LXC/LXD. Unlike Docker/Podman, LXC
containers run a complete Linux system with `systemd`, `snapd`, and other system
services. This is ideal for tools that don't work in standard Docker containers,
such as Snapcraft and Rockcraft.
**Prerequisites**:
- Linux only.
- LXC/LXD must be installed (`snap install lxd` or `apt install lxd`).
- A container must be created and running before starting Gemini CLI. Gemini
does **not** create the container automatically.
**Quick setup**:
```bash
# Initialize LXD (first time only)
lxd init --auto
# Create and start an Ubuntu container
lxc launch ubuntu:24.04 gemini-sandbox
# Enable LXC sandboxing
export GEMINI_SANDBOX=lxc
gemini -p "build the project"
```
**Custom container name**:
```bash
export GEMINI_SANDBOX=lxc
export GEMINI_SANDBOX_IMAGE=my-snapcraft-container
gemini -p "build the snap"
```
**Limitations**:
- Linux only (LXC is not available on macOS or Windows).
- The container must already exist and be running.
- The workspace directory is bind-mounted into the container at the same
absolute path — the path must be writable inside the container.
- Used with tools like Snapcraft or Rockcraft that require a full system.
## Quickstart ## Quickstart
```bash ```bash
@@ -88,7 +132,8 @@ gemini -p "run the test suite"
### Enable sandboxing (in order of precedence) ### Enable sandboxing (in order of precedence)
1. **Command flag**: `-s` or `--sandbox` 1. **Command flag**: `-s` or `--sandbox`
2. **Environment variable**: `GEMINI_SANDBOX=true|docker|podman|sandbox-exec` 2. **Environment variable**:
`GEMINI_SANDBOX=true|docker|podman|sandbox-exec|lxc`
3. **Settings file**: `"sandbox": true` in the `tools` object of your 3. **Settings file**: `"sandbox": true` in the `tools` object of your
`settings.json` file (e.g., `{"tools": {"sandbox": true}}`). `settings.json` file (e.g., `{"tools": {"sandbox": true}}`).
+39 -1
View File
@@ -113,7 +113,45 @@ process.
ensure every issue is eventually categorized, even if the initial triage ensure every issue is eventually categorized, even if the initial triage
fails. fails.
### 5. Release automation ### 5. Automatic unassignment of inactive contributors: `Unassign Inactive Issue Assignees`
To keep the list of open `help wanted` issues accessible to all contributors,
this workflow automatically removes **external contributors** who have not
opened a linked pull request within **7 days** of being assigned. Maintainers,
org members, and repo collaborators with write access or above are always exempt
and will never be auto-unassigned.
- **Workflow File**: `.github/workflows/unassign-inactive-assignees.yml`
- **When it runs**: Every day at 09:00 UTC, and can be triggered manually with
an optional `dry_run` mode.
- **What it does**:
1. Finds every open issue labeled `help wanted` that has at least one
assignee.
2. Identifies privileged users (team members, repo collaborators with write+
access, maintainers) and skips them entirely.
3. For each remaining (external) assignee it reads the issue's timeline to
determine:
- The exact date they were assigned (using `assigned` timeline events).
- Whether they have opened a PR that is already linked/cross-referenced to
the issue.
4. Each cross-referenced PR is fetched to verify it is **ready for review**:
open and non-draft, or already merged. Draft PRs do not count.
5. If an assignee has been assigned for **more than 7 days** and no qualifying
PR is found, they are automatically unassigned and a comment is posted
explaining the reason and how to re-claim the issue.
6. Assignees who have a non-draft, open or merged PR linked to the issue are
**never** unassigned by this workflow.
- **What you should do**:
- **Open a real PR, not a draft**: Within 7 days of being assigned, open a PR
that is ready for review and include `Fixes #<issue-number>` in the
description. Draft PRs do not satisfy the requirement and will not prevent
auto-unassignment.
- **Re-assign if unassigned by mistake**: Comment `/assign` on the issue to
assign yourself again.
- **Unassign yourself** if you can no longer work on the issue by commenting
`/unassign`, so other contributors can pick it up right away.
### 6. Release automation
This workflow handles the process of packaging and publishing new versions of This workflow handles the process of packaging and publishing new versions of
the Gemini CLI. the Gemini CLI.
+2 -1
View File
@@ -747,7 +747,8 @@ their corresponding top-level category object in your `settings.json` file.
- **`tools.sandbox`** (boolean | string): - **`tools.sandbox`** (boolean | string):
- **Description:** Sandbox execution environment. Set to a boolean to enable - **Description:** Sandbox execution environment. Set to a boolean to enable
or disable the sandbox, or provide a string path to a sandbox profile. or disable the sandbox, provide a string path to a sandbox profile, or
specify an explicit sandbox command (e.g., "docker", "podman", "lxc").
- **Default:** `undefined` - **Default:** `undefined`
- **Requires restart:** Yes - **Requires restart:** Yes
+17 -1
View File
@@ -97,7 +97,7 @@ describe('loadSandboxConfig', () => {
it('should throw if GEMINI_SANDBOX is an invalid command', async () => { it('should throw if GEMINI_SANDBOX is an invalid command', async () => {
process.env['GEMINI_SANDBOX'] = 'invalid-command'; process.env['GEMINI_SANDBOX'] = 'invalid-command';
await expect(loadSandboxConfig({}, {})).rejects.toThrow( await expect(loadSandboxConfig({}, {})).rejects.toThrow(
"Invalid sandbox command 'invalid-command'. Must be one of docker, podman, sandbox-exec", "Invalid sandbox command 'invalid-command'. Must be one of docker, podman, sandbox-exec, lxc",
); );
}); });
@@ -108,6 +108,22 @@ describe('loadSandboxConfig', () => {
"Missing sandbox command 'docker' (from GEMINI_SANDBOX)", "Missing sandbox command 'docker' (from GEMINI_SANDBOX)",
); );
}); });
it('should use lxc if GEMINI_SANDBOX=lxc and it exists', async () => {
process.env['GEMINI_SANDBOX'] = 'lxc';
mockedCommandExistsSync.mockReturnValue(true);
const config = await loadSandboxConfig({}, {});
expect(config).toEqual({ command: 'lxc', image: 'default/image' });
expect(mockedCommandExistsSync).toHaveBeenCalledWith('lxc');
});
it('should throw if GEMINI_SANDBOX=lxc but lxc command does not exist', async () => {
process.env['GEMINI_SANDBOX'] = 'lxc';
mockedCommandExistsSync.mockReturnValue(false);
await expect(loadSandboxConfig({}, {})).rejects.toThrow(
"Missing sandbox command 'lxc' (from GEMINI_SANDBOX)",
);
});
}); });
describe('with sandbox: true', () => { describe('with sandbox: true', () => {
+4
View File
@@ -27,6 +27,7 @@ const VALID_SANDBOX_COMMANDS: ReadonlyArray<SandboxConfig['command']> = [
'docker', 'docker',
'podman', 'podman',
'sandbox-exec', 'sandbox-exec',
'lxc',
]; ];
function isSandboxCommand(value: string): value is SandboxConfig['command'] { function isSandboxCommand(value: string): value is SandboxConfig['command'] {
@@ -91,6 +92,9 @@ function getSandboxCommand(
} }
return ''; return '';
// Note: 'lxc' is intentionally not auto-detected because it requires a
// pre-existing, running container managed by the user. Use
// GEMINI_SANDBOX=lxc or sandbox: "lxc" in settings to enable it.
} }
export async function loadSandboxConfig( export async function loadSandboxConfig(
+2 -1
View File
@@ -1236,7 +1236,8 @@ const SETTINGS_SCHEMA = {
ref: 'BooleanOrString', ref: 'BooleanOrString',
description: oneLine` description: oneLine`
Sandbox execution environment. Sandbox execution environment.
Set to a boolean to enable or disable the sandbox, or provide a string path to a sandbox profile. Set to a boolean to enable or disable the sandbox, provide a string path to a sandbox profile,
or specify an explicit sandbox command (e.g., "docker", "podman", "lxc").
`, `,
showInDialog: false, showInDialog: false,
}, },
+19
View File
@@ -9,6 +9,7 @@ import { performInitialAuth } from './auth.js';
import { import {
type Config, type Config,
ValidationRequiredError, ValidationRequiredError,
ProjectIdRequiredError,
AuthType, AuthType,
} from '@google/gemini-cli-core'; } from '@google/gemini-cli-core';
@@ -116,4 +117,22 @@ describe('auth', () => {
AuthType.LOGIN_WITH_GOOGLE, AuthType.LOGIN_WITH_GOOGLE,
); );
}); });
it('should return ProjectIdRequiredError message without "Failed to login" prefix', async () => {
const projectIdError = new ProjectIdRequiredError();
vi.mocked(mockConfig.refreshAuth).mockRejectedValue(projectIdError);
const result = await performInitialAuth(
mockConfig,
AuthType.LOGIN_WITH_GOOGLE,
);
expect(result).toEqual({
authError:
'This account requires setting the GOOGLE_CLOUD_PROJECT or GOOGLE_CLOUD_PROJECT_ID env var. See https://goo.gle/gemini-cli-auth-docs#workspace-gca',
accountSuspensionInfo: null,
});
expect(result.authError).not.toContain('Failed to login');
expect(mockConfig.refreshAuth).toHaveBeenCalledWith(
AuthType.LOGIN_WITH_GOOGLE,
);
});
}); });
+9
View File
@@ -10,6 +10,7 @@ import {
getErrorMessage, getErrorMessage,
ValidationRequiredError, ValidationRequiredError,
isAccountSuspendedError, isAccountSuspendedError,
ProjectIdRequiredError,
} from '@google/gemini-cli-core'; } from '@google/gemini-cli-core';
import type { AccountSuspensionInfo } from '../ui/contexts/UIStateContext.js'; import type { AccountSuspensionInfo } from '../ui/contexts/UIStateContext.js';
@@ -54,6 +55,14 @@ export async function performInitialAuth(
}, },
}; };
} }
if (e instanceof ProjectIdRequiredError) {
// OAuth succeeded but account setup requires project ID
// Show the error message directly without "Failed to login" prefix
return {
authError: getErrorMessage(e),
accountSuspensionInfo: null,
};
}
return { return {
authError: `Failed to login. Message: ${getErrorMessage(e)}`, authError: `Failed to login. Message: ${getErrorMessage(e)}`,
accountSuspensionInfo: null, accountSuspensionInfo: null,
+7
View File
@@ -80,6 +80,7 @@ import {
type ConsentRequestPayload, type ConsentRequestPayload,
type AgentsDiscoveredPayload, type AgentsDiscoveredPayload,
ChangeAuthRequestedError, ChangeAuthRequestedError,
ProjectIdRequiredError,
CoreToolCallStatus, CoreToolCallStatus,
generateSteeringAckMessage, generateSteeringAckMessage,
buildUserSteeringHintPrompt, buildUserSteeringHintPrompt,
@@ -771,6 +772,12 @@ export const AppContainer = (props: AppContainerProps) => {
if (e instanceof ChangeAuthRequestedError) { if (e instanceof ChangeAuthRequestedError) {
return; return;
} }
if (e instanceof ProjectIdRequiredError) {
// OAuth succeeded but account setup requires project ID
// Show the error message directly without "Failed to authenticate" prefix
onAuthError(getErrorMessage(e));
return;
}
onAuthError( onAuthError(
`Failed to authenticate: ${e instanceof Error ? e.message : String(e)}`, `Failed to authenticate: ${e instanceof Error ? e.message : String(e)}`,
); );
+21 -1
View File
@@ -15,7 +15,11 @@ import {
} from 'vitest'; } from 'vitest';
import { renderHook } from '../../test-utils/render.js'; import { renderHook } from '../../test-utils/render.js';
import { useAuthCommand, validateAuthMethodWithSettings } from './useAuth.js'; import { useAuthCommand, validateAuthMethodWithSettings } from './useAuth.js';
import { AuthType, type Config } from '@google/gemini-cli-core'; import {
AuthType,
type Config,
ProjectIdRequiredError,
} from '@google/gemini-cli-core';
import { AuthState } from '../types.js'; import { AuthState } from '../types.js';
import type { LoadedSettings } from '../../config/settings.js'; import type { LoadedSettings } from '../../config/settings.js';
import { waitFor } from '../../test-utils/async.js'; import { waitFor } from '../../test-utils/async.js';
@@ -288,5 +292,21 @@ describe('useAuth', () => {
expect(result.current.authState).toBe(AuthState.Updating); expect(result.current.authState).toBe(AuthState.Updating);
}); });
}); });
it('should handle ProjectIdRequiredError without "Failed to login" prefix', async () => {
const projectIdError = new ProjectIdRequiredError();
(mockConfig.refreshAuth as Mock).mockRejectedValue(projectIdError);
const { result } = renderHook(() =>
useAuthCommand(createSettings(AuthType.LOGIN_WITH_GOOGLE), mockConfig),
);
await waitFor(() => {
expect(result.current.authError).toBe(
'This account requires setting the GOOGLE_CLOUD_PROJECT or GOOGLE_CLOUD_PROJECT_ID env var. See https://goo.gle/gemini-cli-auth-docs#workspace-gca',
);
expect(result.current.authError).not.toContain('Failed to login');
expect(result.current.authState).toBe(AuthState.Updating);
});
});
}); });
}); });
+5
View File
@@ -12,6 +12,7 @@ import {
loadApiKey, loadApiKey,
debugLogger, debugLogger,
isAccountSuspendedError, isAccountSuspendedError,
ProjectIdRequiredError,
} from '@google/gemini-cli-core'; } from '@google/gemini-cli-core';
import { getErrorMessage } from '@google/gemini-cli-core'; import { getErrorMessage } from '@google/gemini-cli-core';
import { AuthState } from '../types.js'; import { AuthState } from '../types.js';
@@ -143,6 +144,10 @@ export const useAuthCommand = (
appealUrl: suspendedError.appealUrl, appealUrl: suspendedError.appealUrl,
appealLinkText: suspendedError.appealLinkText, appealLinkText: suspendedError.appealLinkText,
}); });
} else if (e instanceof ProjectIdRequiredError) {
// OAuth succeeded but account setup requires project ID
// Show the error message directly without "Failed to login" prefix
onAuthError(getErrorMessage(e));
} else { } else {
onAuthError(`Failed to login. Message: ${getErrorMessage(e)}`); onAuthError(`Failed to login. Message: ${getErrorMessage(e)}`);
} }
@@ -120,8 +120,8 @@ describe('useAtCompletion', () => {
expect(result.current.suggestions.map((s) => s.value)).toEqual([ expect(result.current.suggestions.map((s) => s.value)).toEqual([
'src/', 'src/',
'src/components/',
'src/index.js', 'src/index.js',
'src/components/',
'src/components/Button.tsx', 'src/components/Button.tsx',
]); ]);
}); });
+100 -1
View File
@@ -5,7 +5,7 @@
*/ */
import { vi, describe, it, expect, beforeEach, afterEach } from 'vitest'; import { vi, describe, it, expect, beforeEach, afterEach } from 'vitest';
import { spawn, exec, execSync } from 'node:child_process'; import { spawn, exec, execFile, execSync } from 'node:child_process';
import os from 'node:os'; import os from 'node:os';
import fs from 'node:fs'; import fs from 'node:fs';
import { start_sandbox } from './sandbox.js'; import { start_sandbox } from './sandbox.js';
@@ -50,6 +50,26 @@ vi.mock('node:util', async (importOriginal) => {
return { stdout: '', stderr: '' }; return { stdout: '', stderr: '' };
}; };
} }
if (fn === execFile) {
return async (file: string, args: string[]) => {
if (file === 'lxc' && args[0] === 'list') {
const output = process.env['TEST_LXC_LIST_OUTPUT'];
if (output === 'throw') {
throw new Error('lxc command not found');
}
return { stdout: output ?? '[]', stderr: '' };
}
if (
file === 'lxc' &&
args[0] === 'config' &&
args[1] === 'device' &&
args[2] === 'add'
) {
return { stdout: '', stderr: '' };
}
return { stdout: '', stderr: '' };
};
}
return actual.promisify(fn); return actual.promisify(fn);
}, },
}; };
@@ -473,5 +493,84 @@ describe('sandbox', () => {
expect(entrypointCmd).toContain('useradd'); expect(entrypointCmd).toContain('useradd');
expect(entrypointCmd).toContain('su -p gemini'); expect(entrypointCmd).toContain('su -p gemini');
}); });
describe('LXC sandbox', () => {
const LXC_RUNNING = JSON.stringify([
{ name: 'gemini-sandbox', status: 'Running' },
]);
const LXC_STOPPED = JSON.stringify([
{ name: 'gemini-sandbox', status: 'Stopped' },
]);
beforeEach(() => {
delete process.env['TEST_LXC_LIST_OUTPUT'];
});
it('should run lxc exec with correct args for a running container', async () => {
process.env['TEST_LXC_LIST_OUTPUT'] = LXC_RUNNING;
const config: SandboxConfig = {
command: 'lxc',
image: 'gemini-sandbox',
};
const mockSpawnProcess = new EventEmitter() as unknown as ReturnType<
typeof spawn
>;
mockSpawnProcess.on = vi.fn().mockImplementation((event, cb) => {
if (event === 'close') {
setTimeout(() => cb(0), 10);
}
return mockSpawnProcess;
});
vi.mocked(spawn).mockImplementation((cmd) => {
if (cmd === 'lxc') {
return mockSpawnProcess;
}
return new EventEmitter() as unknown as ReturnType<typeof spawn>;
});
const promise = start_sandbox(config, [], undefined, ['arg1']);
await expect(promise).resolves.toBe(0);
expect(spawn).toHaveBeenCalledWith(
'lxc',
expect.arrayContaining(['exec', 'gemini-sandbox', '--cwd']),
expect.objectContaining({ stdio: 'inherit' }),
);
});
it('should throw FatalSandboxError if lxc list fails', async () => {
process.env['TEST_LXC_LIST_OUTPUT'] = 'throw';
const config: SandboxConfig = {
command: 'lxc',
image: 'gemini-sandbox',
};
await expect(start_sandbox(config)).rejects.toThrow(
/Failed to query LXC container/,
);
});
it('should throw FatalSandboxError if container is not running', async () => {
process.env['TEST_LXC_LIST_OUTPUT'] = LXC_STOPPED;
const config: SandboxConfig = {
command: 'lxc',
image: 'gemini-sandbox',
};
await expect(start_sandbox(config)).rejects.toThrow(/is not running/);
});
it('should throw FatalSandboxError if container is not found in list', async () => {
process.env['TEST_LXC_LIST_OUTPUT'] = '[]';
const config: SandboxConfig = {
command: 'lxc',
image: 'gemini-sandbox',
};
await expect(start_sandbox(config)).rejects.toThrow(/not found/);
});
});
}); });
}); });
+215 -1
View File
@@ -4,7 +4,14 @@
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */
import { exec, execSync, spawn, type ChildProcess } from 'node:child_process'; import {
exec,
execFile,
execFileSync,
execSync,
spawn,
type ChildProcess,
} from 'node:child_process';
import path from 'node:path'; import path from 'node:path';
import fs from 'node:fs'; import fs from 'node:fs';
import os from 'node:os'; import os from 'node:os';
@@ -34,6 +41,7 @@ import {
} from './sandboxUtils.js'; } from './sandboxUtils.js';
const execAsync = promisify(exec); const execAsync = promisify(exec);
const execFileAsync = promisify(execFile);
export async function start_sandbox( export async function start_sandbox(
config: SandboxConfig, config: SandboxConfig,
@@ -203,6 +211,10 @@ export async function start_sandbox(
}); });
} }
if (config.command === 'lxc') {
return await start_lxc_sandbox(config, nodeArgs, cliArgs);
}
debugLogger.log(`hopping into sandbox (command: ${config.command}) ...`); debugLogger.log(`hopping into sandbox (command: ${config.command}) ...`);
// determine full path for gemini-cli to distinguish linked vs installed setting // determine full path for gemini-cli to distinguish linked vs installed setting
@@ -722,6 +734,208 @@ export async function start_sandbox(
} }
} }
// Helper function to start a sandbox using LXC/LXD.
// Unlike Docker/Podman, LXC does not launch a transient container from an
// image. The user creates and manages their own LXC container; Gemini runs
// inside it via `lxc exec`. The container name is stored in config.image
// (default: "gemini-sandbox"). The workspace is bind-mounted into the
// container at the same absolute path.
async function start_lxc_sandbox(
config: SandboxConfig,
nodeArgs: string[] = [],
cliArgs: string[] = [],
): Promise<number> {
const containerName = config.image || 'gemini-sandbox';
const workdir = path.resolve(process.cwd());
debugLogger.log(
`starting lxc sandbox (container: ${containerName}, workdir: ${workdir}) ...`,
);
// Verify the container exists and is running.
let listOutput: string;
try {
const { stdout } = await execFileAsync('lxc', [
'list',
containerName,
'--format=json',
]);
listOutput = stdout.trim();
} catch (err) {
throw new FatalSandboxError(
`Failed to query LXC container '${containerName}': ${err instanceof Error ? err.message : String(err)}. ` +
`Make sure LXC/LXD is installed and '${containerName}' container exists. ` +
`Create one with: lxc launch ubuntu:24.04 ${containerName}`,
);
}
let containers: Array<{ name: string; status: string }> = [];
try {
const parsed: unknown = JSON.parse(listOutput);
if (Array.isArray(parsed)) {
containers = parsed
.filter(
(item): item is Record<string, unknown> =>
item !== null &&
typeof item === 'object' &&
'name' in item &&
'status' in item,
)
.map((item) => ({
name: String(item['name']),
status: String(item['status']),
}));
}
} catch {
containers = [];
}
const container = containers.find((c) => c.name === containerName);
if (!container) {
throw new FatalSandboxError(
`LXC container '${containerName}' not found. ` +
`Create one with: lxc launch ubuntu:24.04 ${containerName}`,
);
}
if (container.status.toLowerCase() !== 'running') {
throw new FatalSandboxError(
`LXC container '${containerName}' is not running (current status: ${container.status}). ` +
`Start it with: lxc start ${containerName}`,
);
}
// Bind-mount the working directory into the container at the same path.
// Using "lxc config device add" is idempotent when the device name matches.
const deviceName = `gemini-workspace-${randomBytes(4).toString('hex')}`;
try {
await execFileAsync('lxc', [
'config',
'device',
'add',
containerName,
deviceName,
'disk',
`source=${workdir}`,
`path=${workdir}`,
]);
debugLogger.log(
`mounted workspace '${workdir}' into container as device '${deviceName}'`,
);
} catch (err) {
throw new FatalSandboxError(
`Failed to mount workspace into LXC container '${containerName}': ${err instanceof Error ? err.message : String(err)}`,
);
}
// Remove the workspace device from the container when the process exits.
// Only the 'exit' event is needed — the CLI's cleanup.ts already handles
// SIGINT and SIGTERM by calling process.exit(), which fires 'exit'.
const removeDevice = () => {
try {
execFileSync(
'lxc',
['config', 'device', 'remove', containerName, deviceName],
{ timeout: 2000 },
);
} catch {
// Best-effort cleanup; ignore errors on exit.
}
};
process.on('exit', removeDevice);
// Build the environment variable arguments for `lxc exec`.
const envArgs: string[] = [];
const envVarsToForward: Record<string, string | undefined> = {
GEMINI_API_KEY: process.env['GEMINI_API_KEY'],
GOOGLE_API_KEY: process.env['GOOGLE_API_KEY'],
GOOGLE_GEMINI_BASE_URL: process.env['GOOGLE_GEMINI_BASE_URL'],
GOOGLE_VERTEX_BASE_URL: process.env['GOOGLE_VERTEX_BASE_URL'],
GOOGLE_GENAI_USE_VERTEXAI: process.env['GOOGLE_GENAI_USE_VERTEXAI'],
GOOGLE_GENAI_USE_GCA: process.env['GOOGLE_GENAI_USE_GCA'],
GOOGLE_CLOUD_PROJECT: process.env['GOOGLE_CLOUD_PROJECT'],
GOOGLE_CLOUD_LOCATION: process.env['GOOGLE_CLOUD_LOCATION'],
GEMINI_MODEL: process.env['GEMINI_MODEL'],
TERM: process.env['TERM'],
COLORTERM: process.env['COLORTERM'],
GEMINI_CLI_IDE_SERVER_PORT: process.env['GEMINI_CLI_IDE_SERVER_PORT'],
GEMINI_CLI_IDE_WORKSPACE_PATH: process.env['GEMINI_CLI_IDE_WORKSPACE_PATH'],
TERM_PROGRAM: process.env['TERM_PROGRAM'],
};
for (const [key, value] of Object.entries(envVarsToForward)) {
if (value) {
envArgs.push('--env', `${key}=${value}`);
}
}
// Forward SANDBOX_ENV key=value pairs
if (process.env['SANDBOX_ENV']) {
for (let env of process.env['SANDBOX_ENV'].split(',')) {
if ((env = env.trim())) {
if (env.includes('=')) {
envArgs.push('--env', env);
} else {
throw new FatalSandboxError(
'SANDBOX_ENV must be a comma-separated list of key=value pairs',
);
}
}
}
}
// Forward NODE_OPTIONS (e.g. from --inspect flags)
const existingNodeOptions = process.env['NODE_OPTIONS'] || '';
const allNodeOptions = [
...(existingNodeOptions ? [existingNodeOptions] : []),
...nodeArgs,
].join(' ');
if (allNodeOptions.length > 0) {
envArgs.push('--env', `NODE_OPTIONS=${allNodeOptions}`);
}
// Mark that we're running inside an LXC sandbox.
envArgs.push('--env', `SANDBOX=${containerName}`);
// Build the command entrypoint (same logic as Docker path).
const finalEntrypoint = entrypoint(workdir, cliArgs);
// Build the full lxc exec command args.
const args = [
'exec',
containerName,
'--cwd',
workdir,
...envArgs,
'--',
...finalEntrypoint,
];
debugLogger.log(`lxc exec args: ${args.join(' ')}`);
process.stdin.pause();
const sandboxProcess = spawn('lxc', args, {
stdio: 'inherit',
});
return new Promise<number>((resolve, reject) => {
sandboxProcess.on('error', (err) => {
coreEvents.emitFeedback('error', 'LXC sandbox process error', err);
reject(err);
});
sandboxProcess.on('close', (code, signal) => {
process.stdin.resume();
process.off('exit', removeDevice);
removeDevice();
if (code !== 0 && code !== null) {
debugLogger.log(
`LXC sandbox process exited with code: ${code}, signal: ${signal}`,
);
}
resolve(code ?? 1);
});
});
}
// Helper functions to ensure sandbox image is present // Helper functions to ensure sandbox image is present
async function imageExists(sandbox: string, image: string): Promise<boolean> { async function imageExists(sandbox: string, image: string): Promise<boolean> {
return new Promise((resolve) => { return new Promise((resolve) => {
+247 -3
View File
@@ -10,8 +10,14 @@ import { OAuth2Client } from 'google-auth-library';
import { UserTierId, ActionStatus } from './types.js'; import { UserTierId, ActionStatus } from './types.js';
import { FinishReason } from '@google/genai'; import { FinishReason } from '@google/genai';
import { LlmRole } from '../telemetry/types.js'; import { LlmRole } from '../telemetry/types.js';
import { logInvalidChunk } from '../telemetry/loggers.js';
import { makeFakeConfig } from '../test-utils/config.js';
vi.mock('google-auth-library'); vi.mock('google-auth-library');
vi.mock('../telemetry/loggers.js', () => ({
logBillingEvent: vi.fn(),
logInvalidChunk: vi.fn(),
}));
function createTestServer(headers: Record<string, string> = {}) { function createTestServer(headers: Record<string, string> = {}) {
const mockRequest = vi.fn(); const mockRequest = vi.fn();
@@ -116,7 +122,7 @@ describe('CodeAssistServer', () => {
role: 'model', role: 'model',
parts: [ parts: [
{ text: 'response' }, { text: 'response' },
{ functionCall: { name: 'test', args: {} } }, { functionCall: { name: 'replace', args: {} } },
], ],
}, },
finishReason: FinishReason.SAFETY, finishReason: FinishReason.SAFETY,
@@ -160,7 +166,7 @@ describe('CodeAssistServer', () => {
role: 'model', role: 'model',
parts: [ parts: [
{ text: 'response' }, { text: 'response' },
{ functionCall: { name: 'test', args: {} } }, { functionCall: { name: 'replace', args: {} } },
], ],
}, },
finishReason: FinishReason.STOP, finishReason: FinishReason.STOP,
@@ -233,7 +239,7 @@ describe('CodeAssistServer', () => {
content: { content: {
parts: [ parts: [
{ text: 'chunk' }, { text: 'chunk' },
{ functionCall: { name: 'test', args: {} } }, { functionCall: { name: 'replace', args: {} } },
], ],
}, },
}, },
@@ -671,4 +677,242 @@ describe('CodeAssistServer', () => {
expect(requestPostSpy).toHaveBeenCalledWith('retrieveUserQuota', req); expect(requestPostSpy).toHaveBeenCalledWith('retrieveUserQuota', req);
expect(response).toEqual(mockResponse); expect(response).toEqual(mockResponse);
}); });
describe('robustness testing', () => {
it('should not crash on random error objects in loadCodeAssist (isVpcScAffectedUser)', async () => {
const { server } = createTestServer();
const errors = [
null,
undefined,
'string error',
123,
{ some: 'object' },
new Error('standard error'),
{ response: {} },
{ response: { data: {} } },
];
for (const err of errors) {
vi.spyOn(server, 'requestPost').mockRejectedValueOnce(err);
try {
await server.loadCodeAssist({ metadata: {} });
} catch (e) {
expect(e).toBe(err);
}
}
});
it('should handle randomly fragmented SSE streams gracefully', async () => {
const { server, mockRequest } = createTestServer();
const { Readable } = await import('node:stream');
const fragmentedCases = [
{
chunks: ['d', 'ata: {"foo":', ' "bar"}\n\n'],
expected: [{ foo: 'bar' }],
},
{
chunks: ['data: {"foo": "bar"}\n', '\n'],
expected: [{ foo: 'bar' }],
},
{
chunks: ['data: ', '{"foo": "bar"}', '\n\n'],
expected: [{ foo: 'bar' }],
},
{
chunks: ['data: {"foo": "bar"}\n\n', 'data: {"baz": 1}\n\n'],
expected: [{ foo: 'bar' }, { baz: 1 }],
},
];
for (const { chunks, expected } of fragmentedCases) {
const mockStream = new Readable({
read() {
for (const chunk of chunks) {
this.push(chunk);
}
this.push(null);
},
});
mockRequest.mockResolvedValueOnce({ data: mockStream });
const stream = await server.requestStreamingPost('testStream', {});
const results = [];
for await (const res of stream) {
results.push(res);
}
expect(results).toEqual(expected);
}
});
it('should correctly parse valid JSON split across multiple data lines', async () => {
const { server, mockRequest } = createTestServer();
const { Readable } = await import('node:stream');
const jsonObj = {
complex: { structure: [1, 2, 3] },
bool: true,
str: 'value',
};
const jsonString = JSON.stringify(jsonObj, null, 2);
const lines = jsonString.split('\n');
const ssePayload = lines.map((line) => `data: ${line}\n`).join('') + '\n';
const mockStream = new Readable({
read() {
this.push(ssePayload);
this.push(null);
},
});
mockRequest.mockResolvedValueOnce({ data: mockStream });
const stream = await server.requestStreamingPost('testStream', {});
const results = [];
for await (const res of stream) {
results.push(res);
}
expect(results).toHaveLength(1);
expect(results[0]).toEqual(jsonObj);
});
it('should not crash on objects partially matching VPC SC error structure', async () => {
const { server } = createTestServer();
const partialErrors = [
{ response: { data: { error: { details: [{ reason: 'OTHER' }] } } } },
{ response: { data: { error: { details: [] } } } },
{ response: { data: { error: {} } } },
{ response: { data: {} } },
];
for (const err of partialErrors) {
vi.spyOn(server, 'requestPost').mockRejectedValueOnce(err);
try {
await server.loadCodeAssist({ metadata: {} });
} catch (e) {
expect(e).toBe(err);
}
}
});
it('should correctly ignore arbitrary SSE comments and ID lines and empty lines before data', async () => {
const { server, mockRequest } = createTestServer();
const { Readable } = await import('node:stream');
const jsonObj = { foo: 'bar' };
const jsonString = JSON.stringify(jsonObj);
const ssePayload = `id: 123
:comment
retry: 100
data: ${jsonString}
`;
const mockStream = new Readable({
read() {
this.push(ssePayload);
this.push(null);
},
});
mockRequest.mockResolvedValueOnce({ data: mockStream });
const stream = await server.requestStreamingPost('testStream', {});
const results = [];
for await (const res of stream) {
results.push(res);
}
expect(results).toHaveLength(1);
expect(results[0]).toEqual(jsonObj);
});
it('should log InvalidChunkEvent when SSE chunk is not valid JSON', async () => {
const config = makeFakeConfig();
const mockRequest = vi.fn();
const client = { request: mockRequest } as unknown as OAuth2Client;
const server = new CodeAssistServer(
client,
'test-project',
{},
'test-session',
UserTierId.FREE,
undefined,
undefined,
config,
);
const { Readable } = await import('node:stream');
const mockStream = new Readable({
read() {},
});
mockRequest.mockResolvedValue({ data: mockStream });
const stream = await server.requestStreamingPost('testStream', {});
setTimeout(() => {
mockStream.push('data: { "invalid": json }\n\n');
mockStream.push(null);
}, 0);
const results = [];
for await (const res of stream) {
results.push(res);
}
expect(results).toHaveLength(0);
expect(logInvalidChunk).toHaveBeenCalledWith(
config,
expect.objectContaining({
error_message: 'Malformed JSON chunk',
}),
);
});
it('should safely process random response streams in generateContentStream (consumed/remaining credits)', async () => {
const { mockRequest, client } = createTestServer();
const testServer = new CodeAssistServer(
client,
'test-project',
{},
'test-session',
UserTierId.FREE,
undefined,
{ id: 'test-tier', name: 'tier', availableCredits: [] },
);
const { Readable } = await import('node:stream');
const streamResponses = [
{
traceId: '1',
consumedCredits: [{ creditType: 'A', creditAmount: '10' }],
},
{ traceId: '2', remainingCredits: [{ creditType: 'B' }] },
{ traceId: '3' },
{ traceId: '4', consumedCredits: null, remainingCredits: undefined },
];
const mockStream = new Readable({
read() {
for (const resp of streamResponses) {
this.push(`data: ${JSON.stringify(resp)}\n\n`);
}
this.push(null);
},
});
mockRequest.mockResolvedValueOnce({ data: mockStream });
vi.spyOn(testServer, 'recordCodeAssistMetrics').mockResolvedValue(
undefined,
);
const stream = await testServer.generateContentStream(
{ model: 'test-model', contents: [] },
'user-prompt-id',
LlmRole.MAIN,
);
for await (const _ of stream) {
// Drain stream
}
// Should not crash
});
});
}); });
+16 -5
View File
@@ -47,7 +47,7 @@ import {
isOverageEligibleModel, isOverageEligibleModel,
shouldAutoUseCredits, shouldAutoUseCredits,
} from '../billing/billing.js'; } from '../billing/billing.js';
import { logBillingEvent } from '../telemetry/loggers.js'; import { logBillingEvent, logInvalidChunk } from '../telemetry/loggers.js';
import { CreditsUsedEvent } from '../telemetry/billingEvents.js'; import { CreditsUsedEvent } from '../telemetry/billingEvents.js';
import { import {
fromCountTokenResponse, fromCountTokenResponse,
@@ -62,7 +62,7 @@ import {
recordConversationOffered, recordConversationOffered,
} from './telemetry.js'; } from './telemetry.js';
import { getClientMetadata } from './experiments/client_metadata.js'; import { getClientMetadata } from './experiments/client_metadata.js';
import type { LlmRole } from '../telemetry/types.js'; import { InvalidChunkEvent, type LlmRole } from '../telemetry/types.js';
/** HTTP options to be used in each of the requests. */ /** HTTP options to be used in each of the requests. */
export interface HttpOptions { export interface HttpOptions {
/** Additional HTTP headers to be sent with the request. */ /** Additional HTTP headers to be sent with the request. */
@@ -466,7 +466,7 @@ export class CodeAssistServer implements ContentGenerator {
retry: false, retry: false,
}); });
return (async function* (): AsyncGenerator<T> { return (async function* (server: CodeAssistServer): AsyncGenerator<T> {
const rl = readline.createInterface({ const rl = readline.createInterface({
input: Readable.from(res.data), input: Readable.from(res.data),
crlfDelay: Infinity, // Recognizes '\r\n' and '\n' as line breaks crlfDelay: Infinity, // Recognizes '\r\n' and '\n' as line breaks
@@ -480,12 +480,23 @@ export class CodeAssistServer implements ContentGenerator {
if (bufferedLines.length === 0) { if (bufferedLines.length === 0) {
continue; // no data to yield continue; // no data to yield
} }
yield JSON.parse(bufferedLines.join('\n')); const chunk = bufferedLines.join('\n');
try {
yield JSON.parse(chunk);
} catch (_e) {
if (server.config) {
logInvalidChunk(
server.config,
// Don't include the chunk content in the log for security/privacy reasons.
new InvalidChunkEvent('Malformed JSON chunk'),
);
}
}
bufferedLines = []; // Reset the buffer after yielding bufferedLines = []; // Reset the buffer after yielding
} }
// Ignore other lines like comments or id fields // Ignore other lines like comments or id fields
} }
})(); })(this);
} }
private getBaseUrl(): string { private getBaseUrl(): string {
+93 -26
View File
@@ -82,7 +82,7 @@ describe('telemetry', () => {
}, },
], ],
true, true,
[{ name: 'someTool', args: {} }], [{ name: 'replace', args: {} }],
); );
const traceId = 'test-trace-id'; const traceId = 'test-trace-id';
const streamingLatency: StreamingLatency = { totalLatency: '1s' }; const streamingLatency: StreamingLatency = { totalLatency: '1s' };
@@ -130,7 +130,7 @@ describe('telemetry', () => {
it('should set status to CANCELLED if signal is aborted', () => { it('should set status to CANCELLED if signal is aborted', () => {
const response = createMockResponse([], true, [ const response = createMockResponse([], true, [
{ name: 'tool', args: {} }, { name: 'replace', args: {} },
]); ]);
const signal = new AbortController().signal; const signal = new AbortController().signal;
vi.spyOn(signal, 'aborted', 'get').mockReturnValue(true); vi.spyOn(signal, 'aborted', 'get').mockReturnValue(true);
@@ -147,7 +147,7 @@ describe('telemetry', () => {
it('should set status to ERROR_UNKNOWN if response has error (non-OK SDK response)', () => { it('should set status to ERROR_UNKNOWN if response has error (non-OK SDK response)', () => {
const response = createMockResponse([], false, [ const response = createMockResponse([], false, [
{ name: 'tool', args: {} }, { name: 'replace', args: {} },
]); ]);
const result = createConversationOffered( const result = createConversationOffered(
@@ -169,7 +169,7 @@ describe('telemetry', () => {
}, },
], ],
true, true,
[{ name: 'tool', args: {} }], [{ name: 'replace', args: {} }],
); );
const result = createConversationOffered( const result = createConversationOffered(
@@ -186,7 +186,7 @@ describe('telemetry', () => {
// We force functionCalls to be present to bypass the guard, // We force functionCalls to be present to bypass the guard,
// simulating a state where we want to test the candidates check. // simulating a state where we want to test the candidates check.
const response = createMockResponse([], true, [ const response = createMockResponse([], true, [
{ name: 'tool', args: {} }, { name: 'replace', args: {} },
]); ]);
const result = createConversationOffered( const result = createConversationOffered(
@@ -212,7 +212,7 @@ describe('telemetry', () => {
}, },
], ],
true, true,
[{ name: 'tool', args: {} }], [{ name: 'replace', args: {} }],
); );
const result = createConversationOffered(response, 'id', undefined, {}); const result = createConversationOffered(response, 'id', undefined, {});
expect(result?.includedCode).toBe(true); expect(result?.includedCode).toBe(true);
@@ -229,7 +229,7 @@ describe('telemetry', () => {
}, },
], ],
true, true,
[{ name: 'tool', args: {} }], [{ name: 'replace', args: {} }],
); );
const result = createConversationOffered(response, 'id', undefined, {}); const result = createConversationOffered(response, 'id', undefined, {});
expect(result?.includedCode).toBe(false); expect(result?.includedCode).toBe(false);
@@ -250,7 +250,7 @@ describe('telemetry', () => {
} as unknown as CodeAssistServer; } as unknown as CodeAssistServer;
const response = createMockResponse([], true, [ const response = createMockResponse([], true, [
{ name: 'tool', args: {} }, { name: 'replace', args: {} },
]); ]);
const streamingLatency = {}; const streamingLatency = {};
@@ -274,7 +274,7 @@ describe('telemetry', () => {
recordConversationOffered: vi.fn(), recordConversationOffered: vi.fn(),
} as unknown as CodeAssistServer; } as unknown as CodeAssistServer;
const response = createMockResponse([], true, [ const response = createMockResponse([], true, [
{ name: 'tool', args: {} }, { name: 'replace', args: {} },
]); ]);
await recordConversationOffered( await recordConversationOffered(
@@ -331,17 +331,89 @@ describe('telemetry', () => {
await recordToolCallInteractions({} as Config, toolCalls); await recordToolCallInteractions({} as Config, toolCalls);
expect(mockServer.recordConversationInteraction).toHaveBeenCalledWith({ expect(mockServer.recordConversationInteraction).toHaveBeenCalledWith(
traceId: 'trace-1', expect.objectContaining({
status: ActionStatus.ACTION_STATUS_NO_ERROR, traceId: 'trace-1',
interaction: ConversationInteractionInteraction.ACCEPT_FILE, status: ActionStatus.ACTION_STATUS_NO_ERROR,
acceptedLines: '5', interaction: ConversationInteractionInteraction.ACCEPT_FILE,
removedLines: '3', acceptedLines: '8',
isAgentic: true, removedLines: '3',
}); isAgentic: true,
}),
);
}); });
it('should record UNKNOWN interaction for other accepted tools', async () => { it('should include language in interaction if file_path is present', async () => {
const toolCalls: CompletedToolCall[] = [
{
request: {
name: 'replace',
args: {
file_path: 'test.ts',
old_string: 'old',
new_string: 'new',
},
callId: 'call-1',
isClientInitiated: false,
prompt_id: 'p1',
traceId: 'trace-1',
},
response: {
resultDisplay: {
diffStat: {
model_added_lines: 5,
model_removed_lines: 3,
},
},
},
outcome: ToolConfirmationOutcome.ProceedOnce,
status: 'success',
} as unknown as CompletedToolCall,
];
await recordToolCallInteractions({} as Config, toolCalls);
expect(mockServer.recordConversationInteraction).toHaveBeenCalledWith(
expect.objectContaining({
language: 'TypeScript',
}),
);
});
it('should include language in interaction if write_file is used', async () => {
const toolCalls: CompletedToolCall[] = [
{
request: {
name: 'write_file',
args: { file_path: 'test.py', content: 'test' },
callId: 'call-1',
isClientInitiated: false,
prompt_id: 'p1',
traceId: 'trace-1',
},
response: {
resultDisplay: {
diffStat: {
model_added_lines: 5,
model_removed_lines: 3,
},
},
},
outcome: ToolConfirmationOutcome.ProceedOnce,
status: 'success',
} as unknown as CompletedToolCall,
];
await recordToolCallInteractions({} as Config, toolCalls);
expect(mockServer.recordConversationInteraction).toHaveBeenCalledWith(
expect.objectContaining({
language: 'Python',
}),
);
});
it('should not record interaction for other accepted tools', async () => {
const toolCalls: CompletedToolCall[] = [ const toolCalls: CompletedToolCall[] = [
{ {
request: { request: {
@@ -359,19 +431,14 @@ describe('telemetry', () => {
await recordToolCallInteractions({} as Config, toolCalls); await recordToolCallInteractions({} as Config, toolCalls);
expect(mockServer.recordConversationInteraction).toHaveBeenCalledWith({ expect(mockServer.recordConversationInteraction).not.toHaveBeenCalled();
traceId: 'trace-2',
status: ActionStatus.ACTION_STATUS_NO_ERROR,
interaction: ConversationInteractionInteraction.UNKNOWN,
isAgentic: true,
});
}); });
it('should not record interaction for cancelled status', async () => { it('should not record interaction for cancelled status', async () => {
const toolCalls: CompletedToolCall[] = [ const toolCalls: CompletedToolCall[] = [
{ {
request: { request: {
name: 'tool', name: 'replace',
args: {}, args: {},
callId: 'call-3', callId: 'call-3',
isClientInitiated: false, isClientInitiated: false,
@@ -394,7 +461,7 @@ describe('telemetry', () => {
const toolCalls: CompletedToolCall[] = [ const toolCalls: CompletedToolCall[] = [
{ {
request: { request: {
name: 'tool', name: 'replace',
args: {}, args: {},
callId: 'call-4', callId: 'call-4',
isClientInitiated: false, isClientInitiated: false,
+31 -12
View File
@@ -22,10 +22,13 @@ import { EDIT_TOOL_NAMES } from '../tools/tool-names.js';
import { getErrorMessage } from '../utils/errors.js'; import { getErrorMessage } from '../utils/errors.js';
import type { CodeAssistServer } from './server.js'; import type { CodeAssistServer } from './server.js';
import { ToolConfirmationOutcome } from '../tools/tools.js'; import { ToolConfirmationOutcome } from '../tools/tools.js';
import { getLanguageFromFilePath } from '../utils/language-detection.js';
import { import {
computeModelAddedAndRemovedLines, computeModelAddedAndRemovedLines,
getFileDiffFromResultDisplay, getFileDiffFromResultDisplay,
} from '../utils/fileDiffUtils.js'; } from '../utils/fileDiffUtils.js';
import { isEditToolParams } from '../tools/edit.js';
import { isWriteFileToolParams } from '../tools/write-file.js';
export async function recordConversationOffered( export async function recordConversationOffered(
server: CodeAssistServer, server: CodeAssistServer,
@@ -85,10 +88,12 @@ export function createConversationOffered(
signal: AbortSignal | undefined, signal: AbortSignal | undefined,
streamingLatency: StreamingLatency, streamingLatency: StreamingLatency,
): ConversationOffered | undefined { ): ConversationOffered | undefined {
// Only send conversation offered events for responses that contain function // Only send conversation offered events for responses that contain edit
// calls. Non-function call events don't represent user actionable // function calls. Non-edit function calls don't represent file modifications.
// 'suggestions'. if (
if ((response.functionCalls?.length || 0) === 0) { !response.functionCalls ||
!response.functionCalls.some((call) => EDIT_TOOL_NAMES.has(call.name || ''))
) {
return; return;
} }
@@ -116,6 +121,7 @@ function summarizeToolCalls(
let isEdit = false; let isEdit = false;
let acceptedLines = 0; let acceptedLines = 0;
let removedLines = 0; let removedLines = 0;
let language = undefined;
// Iterate the tool calls and summarize them into a single conversation // Iterate the tool calls and summarize them into a single conversation
// interaction so that the ConversationOffered and ConversationInteraction // interaction so that the ConversationOffered and ConversationInteraction
@@ -144,13 +150,23 @@ function summarizeToolCalls(
if (EDIT_TOOL_NAMES.has(toolCall.request.name)) { if (EDIT_TOOL_NAMES.has(toolCall.request.name)) {
isEdit = true; isEdit = true;
if (
!language &&
(isEditToolParams(toolCall.request.args) ||
isWriteFileToolParams(toolCall.request.args))
) {
language = getLanguageFromFilePath(toolCall.request.args.file_path);
}
if (toolCall.status === 'success') { if (toolCall.status === 'success') {
const fileDiff = getFileDiffFromResultDisplay( const fileDiff = getFileDiffFromResultDisplay(
toolCall.response.resultDisplay, toolCall.response.resultDisplay,
); );
if (fileDiff?.diffStat) { if (fileDiff?.diffStat) {
const lines = computeModelAddedAndRemovedLines(fileDiff.diffStat); const lines = computeModelAddedAndRemovedLines(fileDiff.diffStat);
acceptedLines += lines.addedLines;
// The API expects acceptedLines to be addedLines + removedLines.
acceptedLines += lines.addedLines + lines.removedLines;
removedLines += lines.removedLines; removedLines += lines.removedLines;
} }
} }
@@ -158,16 +174,16 @@ function summarizeToolCalls(
} }
} }
// Only file interaction telemetry if 100% of the tool calls were accepted. // Only file interaction telemetry if 100% of the tool calls were accepted
return traceId && acceptedToolCalls / toolCalls.length >= 1 // and at least one of them was an edit.
return traceId && acceptedToolCalls / toolCalls.length >= 1 && isEdit
? createConversationInteraction( ? createConversationInteraction(
traceId, traceId,
actionStatus || ActionStatus.ACTION_STATUS_NO_ERROR, actionStatus || ActionStatus.ACTION_STATUS_NO_ERROR,
isEdit ConversationInteractionInteraction.ACCEPT_FILE,
? ConversationInteractionInteraction.ACCEPT_FILE String(acceptedLines),
: ConversationInteractionInteraction.UNKNOWN, String(removedLines),
isEdit ? String(acceptedLines) : undefined, language,
isEdit ? String(removedLines) : undefined,
) )
: undefined; : undefined;
} }
@@ -178,6 +194,7 @@ function createConversationInteraction(
interaction: ConversationInteractionInteraction, interaction: ConversationInteractionInteraction,
acceptedLines?: string, acceptedLines?: string,
removedLines?: string, removedLines?: string,
language?: string,
): ConversationInteraction { ): ConversationInteraction {
return { return {
traceId, traceId,
@@ -185,9 +202,11 @@ function createConversationInteraction(
interaction, interaction,
acceptedLines, acceptedLines,
removedLines, removedLines,
language,
isAgentic: true, isAgentic: true,
}; };
} }
function includesCode(resp: GenerateContentResponse): boolean { function includesCode(resp: GenerateContentResponse): boolean {
if (!resp.candidates) { if (!resp.candidates) {
return false; return false;
+1 -1
View File
@@ -447,7 +447,7 @@ export enum AuthProviderType {
} }
export interface SandboxConfig { export interface SandboxConfig {
command: 'docker' | 'podman' | 'sandbox-exec'; command: 'docker' | 'podman' | 'sandbox-exec' | 'lxc';
image: string; image: string;
} }
+247 -35
View File
@@ -47,7 +47,7 @@ import type {
} from '../services/modelConfigService.js'; } from '../services/modelConfigService.js';
import { ClearcutLogger } from '../telemetry/clearcut-logger/clearcut-logger.js'; import { ClearcutLogger } from '../telemetry/clearcut-logger/clearcut-logger.js';
import * as policyCatalog from '../availability/policyCatalog.js'; import * as policyCatalog from '../availability/policyCatalog.js';
import { LlmRole } from '../telemetry/types.js'; import { LlmRole, LoopType } from '../telemetry/types.js';
import { partToString } from '../utils/partUtils.js'; import { partToString } from '../utils/partUtils.js';
import { coreEvents } from '../utils/events.js'; import { coreEvents } from '../utils/events.js';
@@ -2915,45 +2915,257 @@ ${JSON.stringify(
expect(mockCheckNextSpeaker).not.toHaveBeenCalled(); expect(mockCheckNextSpeaker).not.toHaveBeenCalled();
}); });
it('should abort linked signal when loop is detected', async () => { describe('Loop Recovery (Two-Strike)', () => {
// Arrange beforeEach(() => {
vi.spyOn(client['loopDetector'], 'turnStarted').mockResolvedValue(false); const mockChat: Partial<GeminiChat> = {
vi.spyOn(client['loopDetector'], 'addAndCheck') addHistory: vi.fn(),
.mockReturnValueOnce(false) setTools: vi.fn(),
.mockReturnValueOnce(true); getHistory: vi.fn().mockReturnValue([]),
getLastPromptTokenCount: vi.fn(),
let capturedSignal: AbortSignal; };
mockTurnRunFn.mockImplementation((_modelConfigKey, _request, signal) => { client['chat'] = mockChat as GeminiChat;
capturedSignal = signal; vi.spyOn(client['loopDetector'], 'clearDetection');
return (async function* () { vi.spyOn(client['loopDetector'], 'reset');
yield { type: 'content', value: 'First event' };
yield { type: 'content', value: 'Second event' };
})();
}); });
const mockChat: Partial<GeminiChat> = { it('should trigger recovery (Strike 1) and continue', async () => {
addHistory: vi.fn(), // Arrange
setTools: vi.fn(), vi.spyOn(client['loopDetector'], 'turnStarted').mockResolvedValue({
getHistory: vi.fn().mockReturnValue([]), count: 0,
getLastPromptTokenCount: vi.fn(), });
}; vi.spyOn(client['loopDetector'], 'addAndCheck')
client['chat'] = mockChat as GeminiChat; .mockReturnValueOnce({ count: 0 })
.mockReturnValueOnce({ count: 1, detail: 'Repetitive tool call' });
// Act const sendMessageStreamSpy = vi.spyOn(client, 'sendMessageStream');
const stream = client.sendMessageStream(
[{ text: 'Hi' }],
new AbortController().signal,
'prompt-id-loop',
);
const events = []; mockTurnRunFn.mockImplementation(() =>
for await (const event of stream) { (async function* () {
events.push(event); yield { type: GeminiEventType.Content, value: 'First event' };
} yield { type: GeminiEventType.Content, value: 'Second event' };
})(),
);
// Assert // Act
expect(events).toContainEqual({ type: GeminiEventType.LoopDetected }); const stream = client.sendMessageStream(
expect(capturedSignal!.aborted).toBe(true); [{ text: 'Hi' }],
new AbortController().signal,
'prompt-id-loop-1',
);
const events = [];
for await (const event of stream) {
events.push(event);
}
// Assert
// sendMessageStream should be called twice (original + recovery)
expect(sendMessageStreamSpy).toHaveBeenCalledTimes(2);
// Verify recovery call parameters
const recoveryCall = sendMessageStreamSpy.mock.calls[1];
expect((recoveryCall[0] as Part[])[0].text).toContain(
'System: Potential loop detected',
);
expect((recoveryCall[0] as Part[])[0].text).toContain(
'Repetitive tool call',
);
// Verify loopDetector.clearDetection was called
expect(client['loopDetector'].clearDetection).toHaveBeenCalled();
});
it('should terminate (Strike 2) after recovery fails', async () => {
// Arrange
vi.spyOn(client['loopDetector'], 'turnStarted').mockResolvedValue({
count: 0,
});
// First call triggers Strike 1, Second call triggers Strike 2
vi.spyOn(client['loopDetector'], 'addAndCheck')
.mockReturnValueOnce({ count: 0 })
.mockReturnValueOnce({ count: 1, detail: 'Strike 1' }) // Triggers recovery in turn 1
.mockReturnValueOnce({ count: 2, detail: 'Strike 2' }); // Triggers termination in turn 2 (recovery turn)
const sendMessageStreamSpy = vi.spyOn(client, 'sendMessageStream');
mockTurnRunFn.mockImplementation(() =>
(async function* () {
yield { type: GeminiEventType.Content, value: 'Event' };
yield { type: GeminiEventType.Content, value: 'Event' };
})(),
);
// Act
const stream = client.sendMessageStream(
[{ text: 'Hi' }],
new AbortController().signal,
'prompt-id-loop-2',
);
const events = [];
for await (const event of stream) {
events.push(event);
}
// Assert
expect(events).toContainEqual({ type: GeminiEventType.LoopDetected });
expect(sendMessageStreamSpy).toHaveBeenCalledTimes(2); // One original, one recovery
});
it('should respect boundedTurns during recovery', async () => {
// Arrange
vi.spyOn(client['loopDetector'], 'turnStarted').mockResolvedValue({
count: 0,
});
vi.spyOn(client['loopDetector'], 'addAndCheck').mockReturnValue({
count: 1,
detail: 'Loop',
});
const sendMessageStreamSpy = vi.spyOn(client, 'sendMessageStream');
mockTurnRunFn.mockImplementation(() =>
(async function* () {
yield { type: GeminiEventType.Content, value: 'Event' };
})(),
);
// Act
const stream = client.sendMessageStream(
[{ text: 'Hi' }],
new AbortController().signal,
'prompt-id-loop-3',
1, // Only 1 turn allowed
);
const events = [];
for await (const event of stream) {
events.push(event);
}
// Assert
// Should NOT trigger recovery because boundedTurns would reach 0
expect(events).toContainEqual({
type: GeminiEventType.MaxSessionTurns,
});
expect(sendMessageStreamSpy).toHaveBeenCalledTimes(1);
});
it('should suppress LoopDetected event on Strike 1', async () => {
// Arrange
vi.spyOn(client['loopDetector'], 'turnStarted').mockResolvedValue({
count: 0,
});
vi.spyOn(client['loopDetector'], 'addAndCheck')
.mockReturnValueOnce({ count: 0 })
.mockReturnValueOnce({ count: 1, detail: 'Strike 1' });
const sendMessageStreamSpy = vi.spyOn(client, 'sendMessageStream');
mockTurnRunFn.mockImplementation(() =>
(async function* () {
yield { type: GeminiEventType.Content, value: 'Event' };
yield { type: GeminiEventType.Content, value: 'Event 2' };
})(),
);
// Act
const stream = client.sendMessageStream(
[{ text: 'Hi' }],
new AbortController().signal,
'prompt-telemetry',
);
const events = [];
for await (const event of stream) {
events.push(event);
}
// Assert
// Strike 1 should trigger recovery call but NOT emit LoopDetected event
expect(events).not.toContainEqual({
type: GeminiEventType.LoopDetected,
});
expect(sendMessageStreamSpy).toHaveBeenCalledTimes(2);
});
it('should escalate Strike 2 even if loop type changes', async () => {
// Arrange
vi.spyOn(client['loopDetector'], 'turnStarted').mockResolvedValue({
count: 0,
});
// Strike 1: Tool Call Loop, Strike 2: LLM Detected Loop
vi.spyOn(client['loopDetector'], 'addAndCheck')
.mockReturnValueOnce({ count: 0 })
.mockReturnValueOnce({
count: 1,
type: LoopType.TOOL_CALL_LOOP,
detail: 'Repetitive tool',
})
.mockReturnValueOnce({
count: 2,
type: LoopType.LLM_DETECTED_LOOP,
detail: 'LLM loop',
});
const sendMessageStreamSpy = vi.spyOn(client, 'sendMessageStream');
mockTurnRunFn.mockImplementation(() =>
(async function* () {
yield { type: GeminiEventType.Content, value: 'Event' };
yield { type: GeminiEventType.Content, value: 'Event 2' };
})(),
);
// Act
const stream = client.sendMessageStream(
[{ text: 'Hi' }],
new AbortController().signal,
'prompt-escalate',
);
const events = [];
for await (const event of stream) {
events.push(event);
}
// Assert
expect(events).toContainEqual({ type: GeminiEventType.LoopDetected });
expect(sendMessageStreamSpy).toHaveBeenCalledTimes(2);
});
it('should reset loop detector on new prompt', async () => {
// Arrange
vi.spyOn(client['loopDetector'], 'turnStarted').mockResolvedValue({
count: 0,
});
vi.spyOn(client['loopDetector'], 'addAndCheck').mockReturnValue({
count: 0,
});
mockTurnRunFn.mockImplementation(() =>
(async function* () {
yield { type: GeminiEventType.Content, value: 'Event' };
})(),
);
// Act
const stream = client.sendMessageStream(
[{ text: 'Hi' }],
new AbortController().signal,
'prompt-id-new',
);
for await (const _ of stream) {
// Consume stream
}
// Assert
expect(client['loopDetector'].reset).toHaveBeenCalledWith(
'prompt-id-new',
'Hi',
);
});
}); });
}); });
+70 -3
View File
@@ -642,10 +642,23 @@ export class GeminiClient {
const controller = new AbortController(); const controller = new AbortController();
const linkedSignal = AbortSignal.any([signal, controller.signal]); const linkedSignal = AbortSignal.any([signal, controller.signal]);
const loopDetected = await this.loopDetector.turnStarted(signal); const loopResult = await this.loopDetector.turnStarted(signal);
if (loopDetected) { if (loopResult.count > 1) {
yield { type: GeminiEventType.LoopDetected }; yield { type: GeminiEventType.LoopDetected };
return turn; return turn;
} else if (loopResult.count === 1) {
if (boundedTurns <= 1) {
yield { type: GeminiEventType.MaxSessionTurns };
return turn;
}
return yield* this._recoverFromLoop(
loopResult,
signal,
prompt_id,
boundedTurns,
isInvalidStreamRetry,
displayContent,
);
} }
const routingContext: RoutingContext = { const routingContext: RoutingContext = {
@@ -696,10 +709,26 @@ export class GeminiClient {
let isInvalidStream = false; let isInvalidStream = false;
for await (const event of resultStream) { for await (const event of resultStream) {
if (this.loopDetector.addAndCheck(event)) { const loopResult = this.loopDetector.addAndCheck(event);
if (loopResult.count > 1) {
yield { type: GeminiEventType.LoopDetected }; yield { type: GeminiEventType.LoopDetected };
controller.abort(); controller.abort();
return turn; return turn;
} else if (loopResult.count === 1) {
if (boundedTurns <= 1) {
yield { type: GeminiEventType.MaxSessionTurns };
controller.abort();
return turn;
}
return yield* this._recoverFromLoop(
loopResult,
signal,
prompt_id,
boundedTurns,
isInvalidStreamRetry,
displayContent,
controller,
);
} }
yield event; yield event;
@@ -1128,4 +1157,42 @@ export class GeminiClient {
this.getChat().setHistory(result.newHistory); this.getChat().setHistory(result.newHistory);
} }
} }
/**
* Handles loop recovery by providing feedback to the model and initiating a new turn.
*/
private _recoverFromLoop(
loopResult: { detail?: string },
signal: AbortSignal,
prompt_id: string,
boundedTurns: number,
isInvalidStreamRetry: boolean,
displayContent?: PartListUnion,
controllerToAbort?: AbortController,
): AsyncGenerator<ServerGeminiStreamEvent, Turn> {
controllerToAbort?.abort();
// Clear the detection flag so the recursive turn can proceed, but the count remains 1.
this.loopDetector.clearDetection();
const feedbackText = `System: Potential loop detected. Details: ${loopResult.detail || 'Repetitive patterns identified'}. Please take a step back and confirm you're making forward progress. If not, take a step back, analyze your previous actions and rethink how you're approaching the problem. Avoid repeating the same tool calls or responses without new results.`;
if (this.config.getDebugMode()) {
debugLogger.warn(
'Iterative Loop Recovery: Injecting feedback message to model.',
);
}
const feedback = [{ text: feedbackText }];
// Recursive call with feedback
return this.sendMessageStream(
feedback,
signal,
prompt_id,
boundedTurns - 1,
isInvalidStreamRetry,
displayContent,
);
}
} }
@@ -79,7 +79,7 @@ describe('LoopDetectionService', () => {
it(`should not detect a loop for fewer than TOOL_CALL_LOOP_THRESHOLD identical calls`, () => { it(`should not detect a loop for fewer than TOOL_CALL_LOOP_THRESHOLD identical calls`, () => {
const event = createToolCallRequestEvent('testTool', { param: 'value' }); const event = createToolCallRequestEvent('testTool', { param: 'value' });
for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD - 1; i++) { for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD - 1; i++) {
expect(service.addAndCheck(event)).toBe(false); expect(service.addAndCheck(event).count).toBe(0);
} }
expect(loggers.logLoopDetected).not.toHaveBeenCalled(); expect(loggers.logLoopDetected).not.toHaveBeenCalled();
}); });
@@ -89,7 +89,7 @@ describe('LoopDetectionService', () => {
for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD - 1; i++) { for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD - 1; i++) {
service.addAndCheck(event); service.addAndCheck(event);
} }
expect(service.addAndCheck(event)).toBe(true); expect(service.addAndCheck(event).count).toBe(1);
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1); expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
}); });
@@ -98,7 +98,7 @@ describe('LoopDetectionService', () => {
for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD; i++) { for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD; i++) {
service.addAndCheck(event); service.addAndCheck(event);
} }
expect(service.addAndCheck(event)).toBe(true); expect(service.addAndCheck(event).count).toBe(1);
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1); expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
}); });
@@ -114,9 +114,9 @@ describe('LoopDetectionService', () => {
}); });
for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD - 2; i++) { for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD - 2; i++) {
expect(service.addAndCheck(event1)).toBe(false); expect(service.addAndCheck(event1).count).toBe(0);
expect(service.addAndCheck(event2)).toBe(false); expect(service.addAndCheck(event2).count).toBe(0);
expect(service.addAndCheck(event3)).toBe(false); expect(service.addAndCheck(event3).count).toBe(0);
} }
}); });
@@ -130,14 +130,14 @@ describe('LoopDetectionService', () => {
// Send events just below the threshold // Send events just below the threshold
for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD - 1; i++) { for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD - 1; i++) {
expect(service.addAndCheck(toolCallEvent)).toBe(false); expect(service.addAndCheck(toolCallEvent).count).toBe(0);
} }
// Send a different event type // Send a different event type
expect(service.addAndCheck(otherEvent)).toBe(false); expect(service.addAndCheck(otherEvent).count).toBe(0);
// Send the tool call event again, which should now trigger the loop // Send the tool call event again, which should now trigger the loop
expect(service.addAndCheck(toolCallEvent)).toBe(true); expect(service.addAndCheck(toolCallEvent).count).toBe(1);
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1); expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
}); });
@@ -146,7 +146,7 @@ describe('LoopDetectionService', () => {
expect(loggers.logLoopDetectionDisabled).toHaveBeenCalledTimes(1); expect(loggers.logLoopDetectionDisabled).toHaveBeenCalledTimes(1);
const event = createToolCallRequestEvent('testTool', { param: 'value' }); const event = createToolCallRequestEvent('testTool', { param: 'value' });
for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD; i++) { for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD; i++) {
expect(service.addAndCheck(event)).toBe(false); expect(service.addAndCheck(event).count).toBe(0);
} }
expect(loggers.logLoopDetected).not.toHaveBeenCalled(); expect(loggers.logLoopDetected).not.toHaveBeenCalled();
}); });
@@ -156,19 +156,19 @@ describe('LoopDetectionService', () => {
for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD; i++) { for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD; i++) {
service.addAndCheck(event); service.addAndCheck(event);
} }
expect(service.addAndCheck(event)).toBe(true); expect(service.addAndCheck(event).count).toBe(1);
service.disableForSession(); service.disableForSession();
// Should now return false even though a loop was previously detected // Should now return 0 even though a loop was previously detected
expect(service.addAndCheck(event)).toBe(false); expect(service.addAndCheck(event).count).toBe(0);
}); });
it('should skip loop detection if disabled in config', () => { it('should skip loop detection if disabled in config', () => {
vi.spyOn(mockConfig, 'getDisableLoopDetection').mockReturnValue(true); vi.spyOn(mockConfig, 'getDisableLoopDetection').mockReturnValue(true);
const event = createToolCallRequestEvent('testTool', { param: 'value' }); const event = createToolCallRequestEvent('testTool', { param: 'value' });
for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD + 2; i++) { for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD + 2; i++) {
expect(service.addAndCheck(event)).toBe(false); expect(service.addAndCheck(event).count).toBe(0);
} }
expect(loggers.logLoopDetected).not.toHaveBeenCalled(); expect(loggers.logLoopDetected).not.toHaveBeenCalled();
}); });
@@ -192,8 +192,8 @@ describe('LoopDetectionService', () => {
service.reset(''); service.reset('');
for (let i = 0; i < 1000; i++) { for (let i = 0; i < 1000; i++) {
const content = generateRandomString(10); const content = generateRandomString(10);
const isLoop = service.addAndCheck(createContentEvent(content)); const result = service.addAndCheck(createContentEvent(content));
expect(isLoop).toBe(false); expect(result.count).toBe(0);
} }
expect(loggers.logLoopDetected).not.toHaveBeenCalled(); expect(loggers.logLoopDetected).not.toHaveBeenCalled();
}); });
@@ -202,17 +202,17 @@ describe('LoopDetectionService', () => {
service.reset(''); service.reset('');
const repeatedContent = createRepetitiveContent(1, CONTENT_CHUNK_SIZE); const repeatedContent = createRepetitiveContent(1, CONTENT_CHUNK_SIZE);
let isLoop = false; let result = { count: 0 };
for (let i = 0; i < CONTENT_LOOP_THRESHOLD; i++) { for (let i = 0; i < CONTENT_LOOP_THRESHOLD; i++) {
isLoop = service.addAndCheck(createContentEvent(repeatedContent)); result = service.addAndCheck(createContentEvent(repeatedContent));
} }
expect(isLoop).toBe(true); expect(result.count).toBe(1);
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1); expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
}); });
it('should not detect a loop for a list with a long shared prefix', () => { it('should not detect a loop for a list with a long shared prefix', () => {
service.reset(''); service.reset('');
let isLoop = false; let result = { count: 0 };
const longPrefix = const longPrefix =
'projects/my-google-cloud-project-12345/locations/us-central1/services/'; 'projects/my-google-cloud-project-12345/locations/us-central1/services/';
@@ -223,9 +223,9 @@ describe('LoopDetectionService', () => {
// Simulate receiving the list in a single large chunk or a few chunks // Simulate receiving the list in a single large chunk or a few chunks
// This is the specific case where the issue occurs, as list boundaries might not reset tracking properly // This is the specific case where the issue occurs, as list boundaries might not reset tracking properly
isLoop = service.addAndCheck(createContentEvent(listContent)); result = service.addAndCheck(createContentEvent(listContent));
expect(isLoop).toBe(false); expect(result.count).toBe(0);
expect(loggers.logLoopDetected).not.toHaveBeenCalled(); expect(loggers.logLoopDetected).not.toHaveBeenCalled();
}); });
@@ -234,12 +234,12 @@ describe('LoopDetectionService', () => {
const repeatedContent = createRepetitiveContent(1, CONTENT_CHUNK_SIZE); const repeatedContent = createRepetitiveContent(1, CONTENT_CHUNK_SIZE);
const fillerContent = generateRandomString(500); const fillerContent = generateRandomString(500);
let isLoop = false; let result = { count: 0 };
for (let i = 0; i < CONTENT_LOOP_THRESHOLD; i++) { for (let i = 0; i < CONTENT_LOOP_THRESHOLD; i++) {
isLoop = service.addAndCheck(createContentEvent(repeatedContent)); result = service.addAndCheck(createContentEvent(repeatedContent));
isLoop = service.addAndCheck(createContentEvent(fillerContent)); result = service.addAndCheck(createContentEvent(fillerContent));
} }
expect(isLoop).toBe(false); expect(result.count).toBe(0);
expect(loggers.logLoopDetected).not.toHaveBeenCalled(); expect(loggers.logLoopDetected).not.toHaveBeenCalled();
}); });
@@ -248,12 +248,12 @@ describe('LoopDetectionService', () => {
const longPattern = createRepetitiveContent(1, 150); const longPattern = createRepetitiveContent(1, 150);
expect(longPattern.length).toBe(150); expect(longPattern.length).toBe(150);
let isLoop = false; let result = { count: 0 };
for (let i = 0; i < CONTENT_LOOP_THRESHOLD + 2; i++) { for (let i = 0; i < CONTENT_LOOP_THRESHOLD + 2; i++) {
isLoop = service.addAndCheck(createContentEvent(longPattern)); result = service.addAndCheck(createContentEvent(longPattern));
if (isLoop) break; if (result.count > 0) break;
} }
expect(isLoop).toBe(true); expect(result.count).toBe(1);
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1); expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
}); });
@@ -266,13 +266,13 @@ describe('LoopDetectionService', () => {
I will wait for the user's next command. I will wait for the user's next command.
`; `;
let isLoop = false; let result = { count: 0 };
// Loop enough times to trigger the threshold // Loop enough times to trigger the threshold
for (let i = 0; i < CONTENT_LOOP_THRESHOLD + 5; i++) { for (let i = 0; i < CONTENT_LOOP_THRESHOLD + 5; i++) {
isLoop = service.addAndCheck(createContentEvent(userPattern)); result = service.addAndCheck(createContentEvent(userPattern));
if (isLoop) break; if (result.count > 0) break;
} }
expect(isLoop).toBe(true); expect(result.count).toBe(1);
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1); expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
}); });
@@ -281,12 +281,12 @@ describe('LoopDetectionService', () => {
const userPattern = const userPattern =
'I have added all the requested logs and verified the test file. I will now mark the task as complete.\n '; 'I have added all the requested logs and verified the test file. I will now mark the task as complete.\n ';
let isLoop = false; let result = { count: 0 };
for (let i = 0; i < CONTENT_LOOP_THRESHOLD + 5; i++) { for (let i = 0; i < CONTENT_LOOP_THRESHOLD + 5; i++) {
isLoop = service.addAndCheck(createContentEvent(userPattern)); result = service.addAndCheck(createContentEvent(userPattern));
if (isLoop) break; if (result.count > 0) break;
} }
expect(isLoop).toBe(true); expect(result.count).toBe(1);
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1); expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
}); });
@@ -294,14 +294,14 @@ describe('LoopDetectionService', () => {
service.reset(''); service.reset('');
const alternatingPattern = 'Thinking... Done. '; const alternatingPattern = 'Thinking... Done. ';
let isLoop = false; let result = { count: 0 };
// Needs more iterations because the pattern is short relative to chunk size, // Needs more iterations because the pattern is short relative to chunk size,
// so it takes a few slides of the window to find the exact alignment. // so it takes a few slides of the window to find the exact alignment.
for (let i = 0; i < CONTENT_LOOP_THRESHOLD * 3; i++) { for (let i = 0; i < CONTENT_LOOP_THRESHOLD * 3; i++) {
isLoop = service.addAndCheck(createContentEvent(alternatingPattern)); result = service.addAndCheck(createContentEvent(alternatingPattern));
if (isLoop) break; if (result.count > 0) break;
} }
expect(isLoop).toBe(true); expect(result.count).toBe(1);
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1); expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
}); });
@@ -310,12 +310,12 @@ describe('LoopDetectionService', () => {
const thoughtPattern = const thoughtPattern =
'I need to check the file. The file does not exist. I will create the file. '; 'I need to check the file. The file does not exist. I will create the file. ';
let isLoop = false; let result = { count: 0 };
for (let i = 0; i < CONTENT_LOOP_THRESHOLD + 5; i++) { for (let i = 0; i < CONTENT_LOOP_THRESHOLD + 5; i++) {
isLoop = service.addAndCheck(createContentEvent(thoughtPattern)); result = service.addAndCheck(createContentEvent(thoughtPattern));
if (isLoop) break; if (result.count > 0) break;
} }
expect(isLoop).toBe(true); expect(result.count).toBe(1);
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1); expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
}); });
}); });
@@ -328,12 +328,12 @@ describe('LoopDetectionService', () => {
service.addAndCheck(createContentEvent('```\n')); service.addAndCheck(createContentEvent('```\n'));
for (let i = 0; i < CONTENT_LOOP_THRESHOLD; i++) { for (let i = 0; i < CONTENT_LOOP_THRESHOLD; i++) {
const isLoop = service.addAndCheck(createContentEvent(repeatedContent)); const result = service.addAndCheck(createContentEvent(repeatedContent));
expect(isLoop).toBe(false); expect(result.count).toBe(0);
} }
const isLoop = service.addAndCheck(createContentEvent('\n```')); const result = service.addAndCheck(createContentEvent('\n```'));
expect(isLoop).toBe(false); expect(result.count).toBe(0);
expect(loggers.logLoopDetected).not.toHaveBeenCalled(); expect(loggers.logLoopDetected).not.toHaveBeenCalled();
}); });
@@ -349,15 +349,15 @@ describe('LoopDetectionService', () => {
// Now transition into a code block - this should prevent loop detection // Now transition into a code block - this should prevent loop detection
// even though we were already close to the threshold // even though we were already close to the threshold
const codeBlockStart = '```javascript\n'; const codeBlockStart = '```javascript\n';
const isLoop = service.addAndCheck(createContentEvent(codeBlockStart)); const result = service.addAndCheck(createContentEvent(codeBlockStart));
expect(isLoop).toBe(false); expect(result.count).toBe(0);
// Continue adding repetitive content inside the code block - should not trigger loop // Continue adding repetitive content inside the code block - should not trigger loop
for (let i = 0; i < CONTENT_LOOP_THRESHOLD; i++) { for (let i = 0; i < CONTENT_LOOP_THRESHOLD; i++) {
const isLoopInside = service.addAndCheck( const resultInside = service.addAndCheck(
createContentEvent(repeatedContent), createContentEvent(repeatedContent),
); );
expect(isLoopInside).toBe(false); expect(resultInside.count).toBe(0);
} }
expect(loggers.logLoopDetected).not.toHaveBeenCalled(); expect(loggers.logLoopDetected).not.toHaveBeenCalled();
@@ -372,8 +372,8 @@ describe('LoopDetectionService', () => {
// Verify we are now inside a code block and any content should be ignored for loop detection // Verify we are now inside a code block and any content should be ignored for loop detection
const repeatedContent = createRepetitiveContent(1, CONTENT_CHUNK_SIZE); const repeatedContent = createRepetitiveContent(1, CONTENT_CHUNK_SIZE);
for (let i = 0; i < CONTENT_LOOP_THRESHOLD + 5; i++) { for (let i = 0; i < CONTENT_LOOP_THRESHOLD + 5; i++) {
const isLoop = service.addAndCheck(createContentEvent(repeatedContent)); const result = service.addAndCheck(createContentEvent(repeatedContent));
expect(isLoop).toBe(false); expect(result.count).toBe(0);
} }
expect(loggers.logLoopDetected).not.toHaveBeenCalled(); expect(loggers.logLoopDetected).not.toHaveBeenCalled();
@@ -388,25 +388,25 @@ describe('LoopDetectionService', () => {
// Enter code block (1 fence) - should stop tracking // Enter code block (1 fence) - should stop tracking
const enterResult = service.addAndCheck(createContentEvent('```\n')); const enterResult = service.addAndCheck(createContentEvent('```\n'));
expect(enterResult).toBe(false); expect(enterResult.count).toBe(0);
// Inside code block - should not track loops // Inside code block - should not track loops
for (let i = 0; i < 5; i++) { for (let i = 0; i < 5; i++) {
const insideResult = service.addAndCheck( const insideResult = service.addAndCheck(
createContentEvent(repeatedContent), createContentEvent(repeatedContent),
); );
expect(insideResult).toBe(false); expect(insideResult.count).toBe(0);
} }
// Exit code block (2nd fence) - should reset tracking but still return false // Exit code block (2nd fence) - should reset tracking but still return false
const exitResult = service.addAndCheck(createContentEvent('```\n')); const exitResult = service.addAndCheck(createContentEvent('```\n'));
expect(exitResult).toBe(false); expect(exitResult.count).toBe(0);
// Enter code block again (3rd fence) - should stop tracking again // Enter code block again (3rd fence) - should stop tracking again
const reenterResult = service.addAndCheck( const reenterResult = service.addAndCheck(
createContentEvent('```python\n'), createContentEvent('```python\n'),
); );
expect(reenterResult).toBe(false); expect(reenterResult.count).toBe(0);
expect(loggers.logLoopDetected).not.toHaveBeenCalled(); expect(loggers.logLoopDetected).not.toHaveBeenCalled();
}); });
@@ -419,11 +419,11 @@ describe('LoopDetectionService', () => {
service.addAndCheck(createContentEvent('\nsome code\n')); service.addAndCheck(createContentEvent('\nsome code\n'));
service.addAndCheck(createContentEvent('```')); service.addAndCheck(createContentEvent('```'));
let isLoop = false; let result = { count: 0 };
for (let i = 0; i < CONTENT_LOOP_THRESHOLD; i++) { for (let i = 0; i < CONTENT_LOOP_THRESHOLD; i++) {
isLoop = service.addAndCheck(createContentEvent(repeatedContent)); result = service.addAndCheck(createContentEvent(repeatedContent));
} }
expect(isLoop).toBe(true); expect(result.count).toBe(1);
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1); expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
}); });
@@ -431,9 +431,9 @@ describe('LoopDetectionService', () => {
service.reset(''); service.reset('');
service.addAndCheck(createContentEvent('```\ncode1\n```')); service.addAndCheck(createContentEvent('```\ncode1\n```'));
service.addAndCheck(createContentEvent('\nsome text\n')); service.addAndCheck(createContentEvent('\nsome text\n'));
const isLoop = service.addAndCheck(createContentEvent('```\ncode2\n```')); const result = service.addAndCheck(createContentEvent('```\ncode2\n```'));
expect(isLoop).toBe(false); expect(result.count).toBe(0);
expect(loggers.logLoopDetected).not.toHaveBeenCalled(); expect(loggers.logLoopDetected).not.toHaveBeenCalled();
}); });
@@ -445,12 +445,12 @@ describe('LoopDetectionService', () => {
service.addAndCheck(createContentEvent('\ncode1\n')); service.addAndCheck(createContentEvent('\ncode1\n'));
service.addAndCheck(createContentEvent('```')); service.addAndCheck(createContentEvent('```'));
let isLoop = false; let result = { count: 0 };
for (let i = 0; i < CONTENT_LOOP_THRESHOLD; i++) { for (let i = 0; i < CONTENT_LOOP_THRESHOLD; i++) {
isLoop = service.addAndCheck(createContentEvent(repeatedContent)); result = service.addAndCheck(createContentEvent(repeatedContent));
} }
expect(isLoop).toBe(true); expect(result.count).toBe(1);
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1); expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
}); });
@@ -462,12 +462,12 @@ describe('LoopDetectionService', () => {
service.addAndCheck(createContentEvent('```\n')); service.addAndCheck(createContentEvent('```\n'));
for (let i = 0; i < 20; i++) { for (let i = 0; i < 20; i++) {
const isLoop = service.addAndCheck(createContentEvent(repeatingTokens)); const result = service.addAndCheck(createContentEvent(repeatingTokens));
expect(isLoop).toBe(false); expect(result.count).toBe(0);
} }
const isLoop = service.addAndCheck(createContentEvent('\n```')); const result = service.addAndCheck(createContentEvent('\n```'));
expect(isLoop).toBe(false); expect(result.count).toBe(0);
expect(loggers.logLoopDetected).not.toHaveBeenCalled(); expect(loggers.logLoopDetected).not.toHaveBeenCalled();
}); });
@@ -484,10 +484,10 @@ describe('LoopDetectionService', () => {
// We are now in a code block, so loop detection should be off. // We are now in a code block, so loop detection should be off.
// Let's add the repeated content again, it should not trigger a loop. // Let's add the repeated content again, it should not trigger a loop.
let isLoop = false; let result = { count: 0 };
for (let i = 0; i < CONTENT_LOOP_THRESHOLD; i++) { for (let i = 0; i < CONTENT_LOOP_THRESHOLD; i++) {
isLoop = service.addAndCheck(createContentEvent(repeatedContent)); result = service.addAndCheck(createContentEvent(repeatedContent));
expect(isLoop).toBe(false); expect(result.count).toBe(0);
} }
expect(loggers.logLoopDetected).not.toHaveBeenCalled(); expect(loggers.logLoopDetected).not.toHaveBeenCalled();
@@ -505,8 +505,8 @@ describe('LoopDetectionService', () => {
// Add more repeated content after table - should not trigger loop // Add more repeated content after table - should not trigger loop
for (let i = 0; i < CONTENT_LOOP_THRESHOLD - 1; i++) { for (let i = 0; i < CONTENT_LOOP_THRESHOLD - 1; i++) {
const isLoop = service.addAndCheck(createContentEvent(repeatedContent)); const result = service.addAndCheck(createContentEvent(repeatedContent));
expect(isLoop).toBe(false); expect(result.count).toBe(0);
} }
expect(loggers.logLoopDetected).not.toHaveBeenCalled(); expect(loggers.logLoopDetected).not.toHaveBeenCalled();
@@ -525,8 +525,8 @@ describe('LoopDetectionService', () => {
// Add more repeated content after list - should not trigger loop // Add more repeated content after list - should not trigger loop
for (let i = 0; i < CONTENT_LOOP_THRESHOLD - 1; i++) { for (let i = 0; i < CONTENT_LOOP_THRESHOLD - 1; i++) {
const isLoop = service.addAndCheck(createContentEvent(repeatedContent)); const result = service.addAndCheck(createContentEvent(repeatedContent));
expect(isLoop).toBe(false); expect(result.count).toBe(0);
} }
expect(loggers.logLoopDetected).not.toHaveBeenCalled(); expect(loggers.logLoopDetected).not.toHaveBeenCalled();
@@ -545,8 +545,8 @@ describe('LoopDetectionService', () => {
// Add more repeated content after heading - should not trigger loop // Add more repeated content after heading - should not trigger loop
for (let i = 0; i < CONTENT_LOOP_THRESHOLD - 1; i++) { for (let i = 0; i < CONTENT_LOOP_THRESHOLD - 1; i++) {
const isLoop = service.addAndCheck(createContentEvent(repeatedContent)); const result = service.addAndCheck(createContentEvent(repeatedContent));
expect(isLoop).toBe(false); expect(result.count).toBe(0);
} }
expect(loggers.logLoopDetected).not.toHaveBeenCalled(); expect(loggers.logLoopDetected).not.toHaveBeenCalled();
@@ -565,8 +565,8 @@ describe('LoopDetectionService', () => {
// Add more repeated content after blockquote - should not trigger loop // Add more repeated content after blockquote - should not trigger loop
for (let i = 0; i < CONTENT_LOOP_THRESHOLD - 1; i++) { for (let i = 0; i < CONTENT_LOOP_THRESHOLD - 1; i++) {
const isLoop = service.addAndCheck(createContentEvent(repeatedContent)); const result = service.addAndCheck(createContentEvent(repeatedContent));
expect(isLoop).toBe(false); expect(result.count).toBe(0);
} }
expect(loggers.logLoopDetected).not.toHaveBeenCalled(); expect(loggers.logLoopDetected).not.toHaveBeenCalled();
@@ -601,10 +601,10 @@ describe('LoopDetectionService', () => {
CONTENT_CHUNK_SIZE, CONTENT_CHUNK_SIZE,
); );
for (let i = 0; i < CONTENT_LOOP_THRESHOLD - 1; i++) { for (let i = 0; i < CONTENT_LOOP_THRESHOLD - 1; i++) {
const isLoop = service.addAndCheck( const result = service.addAndCheck(
createContentEvent(newRepeatedContent), createContentEvent(newRepeatedContent),
); );
expect(isLoop).toBe(false); expect(result.count).toBe(0);
} }
}); });
@@ -638,10 +638,10 @@ describe('LoopDetectionService', () => {
CONTENT_CHUNK_SIZE, CONTENT_CHUNK_SIZE,
); );
for (let i = 0; i < CONTENT_LOOP_THRESHOLD - 1; i++) { for (let i = 0; i < CONTENT_LOOP_THRESHOLD - 1; i++) {
const isLoop = service.addAndCheck( const result = service.addAndCheck(
createContentEvent(newRepeatedContent), createContentEvent(newRepeatedContent),
); );
expect(isLoop).toBe(false); expect(result.count).toBe(0);
} }
}); });
@@ -677,10 +677,10 @@ describe('LoopDetectionService', () => {
CONTENT_CHUNK_SIZE, CONTENT_CHUNK_SIZE,
); );
for (let i = 0; i < CONTENT_LOOP_THRESHOLD - 1; i++) { for (let i = 0; i < CONTENT_LOOP_THRESHOLD - 1; i++) {
const isLoop = service.addAndCheck( const result = service.addAndCheck(
createContentEvent(newRepeatedContent), createContentEvent(newRepeatedContent),
); );
expect(isLoop).toBe(false); expect(result.count).toBe(0);
} }
}); });
@@ -691,7 +691,7 @@ describe('LoopDetectionService', () => {
describe('Edge Cases', () => { describe('Edge Cases', () => {
it('should handle empty content', () => { it('should handle empty content', () => {
const event = createContentEvent(''); const event = createContentEvent('');
expect(service.addAndCheck(event)).toBe(false); expect(service.addAndCheck(event).count).toBe(0);
}); });
}); });
@@ -699,10 +699,10 @@ describe('LoopDetectionService', () => {
it('should not detect a loop for repeating divider-like content', () => { it('should not detect a loop for repeating divider-like content', () => {
service.reset(''); service.reset('');
const dividerContent = '-'.repeat(CONTENT_CHUNK_SIZE); const dividerContent = '-'.repeat(CONTENT_CHUNK_SIZE);
let isLoop = false; let result = { count: 0 };
for (let i = 0; i < CONTENT_LOOP_THRESHOLD + 5; i++) { for (let i = 0; i < CONTENT_LOOP_THRESHOLD + 5; i++) {
isLoop = service.addAndCheck(createContentEvent(dividerContent)); result = service.addAndCheck(createContentEvent(dividerContent));
expect(isLoop).toBe(false); expect(result.count).toBe(0);
} }
expect(loggers.logLoopDetected).not.toHaveBeenCalled(); expect(loggers.logLoopDetected).not.toHaveBeenCalled();
}); });
@@ -710,15 +710,52 @@ describe('LoopDetectionService', () => {
it('should not detect a loop for repeating complex box-drawing dividers', () => { it('should not detect a loop for repeating complex box-drawing dividers', () => {
service.reset(''); service.reset('');
const dividerContent = '╭─'.repeat(CONTENT_CHUNK_SIZE / 2); const dividerContent = '╭─'.repeat(CONTENT_CHUNK_SIZE / 2);
let isLoop = false; let result = { count: 0 };
for (let i = 0; i < CONTENT_LOOP_THRESHOLD + 5; i++) { for (let i = 0; i < CONTENT_LOOP_THRESHOLD + 5; i++) {
isLoop = service.addAndCheck(createContentEvent(dividerContent)); result = service.addAndCheck(createContentEvent(dividerContent));
expect(isLoop).toBe(false); expect(result.count).toBe(0);
} }
expect(loggers.logLoopDetected).not.toHaveBeenCalled(); expect(loggers.logLoopDetected).not.toHaveBeenCalled();
}); });
}); });
describe('Strike Management', () => {
it('should increment strike count for repeated detections', () => {
const event = createToolCallRequestEvent('testTool', { param: 'value' });
// First strike
for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD; i++) {
service.addAndCheck(event);
}
expect(service.addAndCheck(event).count).toBe(1);
// Recovery simulated by caller calling clearDetection()
service.clearDetection();
// Second strike
expect(service.addAndCheck(event).count).toBe(2);
});
it('should allow recovery turn to proceed after clearDetection', () => {
const event = createToolCallRequestEvent('testTool', { param: 'value' });
// Trigger loop
for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD; i++) {
service.addAndCheck(event);
}
expect(service.addAndCheck(event).count).toBe(1);
// Caller clears detection to allow recovery
service.clearDetection();
// Subsequent call in the same turn (or next turn before it repeats) should be 0
// In reality, addAndCheck is called per event.
// If the model sends a NEW event, it should not immediately trigger.
const newEvent = createContentEvent('Recovery text');
expect(service.addAndCheck(newEvent).count).toBe(0);
});
});
describe('Reset Functionality', () => { describe('Reset Functionality', () => {
it('tool call should reset content count', () => { it('tool call should reset content count', () => {
const contentEvent = createContentEvent('Some content.'); const contentEvent = createContentEvent('Some content.');
@@ -732,19 +769,19 @@ describe('LoopDetectionService', () => {
service.addAndCheck(toolEvent); service.addAndCheck(toolEvent);
// Should start fresh // Should start fresh
expect(service.addAndCheck(createContentEvent('Fresh content.'))).toBe( expect(
false, service.addAndCheck(createContentEvent('Fresh content.')).count,
); ).toBe(0);
}); });
}); });
describe('General Behavior', () => { describe('General Behavior', () => {
it('should return false for unhandled event types', () => { it('should return 0 count for unhandled event types', () => {
const otherEvent = { const otherEvent = {
type: 'unhandled_event', type: 'unhandled_event',
} as unknown as ServerGeminiStreamEvent; } as unknown as ServerGeminiStreamEvent;
expect(service.addAndCheck(otherEvent)).toBe(false); expect(service.addAndCheck(otherEvent).count).toBe(0);
expect(service.addAndCheck(otherEvent)).toBe(false); expect(service.addAndCheck(otherEvent).count).toBe(0);
}); });
}); });
}); });
@@ -805,16 +842,16 @@ describe('LoopDetectionService LLM Checks', () => {
} }
}; };
it('should not trigger LLM check before LLM_CHECK_AFTER_TURNS', async () => { it('should not trigger LLM check before LLM_CHECK_AFTER_TURNS (30)', async () => {
await advanceTurns(39); await advanceTurns(29);
expect(mockBaseLlmClient.generateJson).not.toHaveBeenCalled(); expect(mockBaseLlmClient.generateJson).not.toHaveBeenCalled();
}); });
it('should trigger LLM check on the 40th turn', async () => { it('should trigger LLM check on the 30th turn', async () => {
mockBaseLlmClient.generateJson = vi mockBaseLlmClient.generateJson = vi
.fn() .fn()
.mockResolvedValue({ unproductive_state_confidence: 0.1 }); .mockResolvedValue({ unproductive_state_confidence: 0.1 });
await advanceTurns(40); await advanceTurns(30);
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1); expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledWith( expect(mockBaseLlmClient.generateJson).toHaveBeenCalledWith(
expect.objectContaining({ expect.objectContaining({
@@ -828,12 +865,12 @@ describe('LoopDetectionService LLM Checks', () => {
}); });
it('should detect a cognitive loop when confidence is high', async () => { it('should detect a cognitive loop when confidence is high', async () => {
// First check at turn 40 // First check at turn 30
mockBaseLlmClient.generateJson = vi.fn().mockResolvedValue({ mockBaseLlmClient.generateJson = vi.fn().mockResolvedValue({
unproductive_state_confidence: 0.85, unproductive_state_confidence: 0.85,
unproductive_state_analysis: 'Repetitive actions', unproductive_state_analysis: 'Repetitive actions',
}); });
await advanceTurns(40); await advanceTurns(30);
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1); expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledWith( expect(mockBaseLlmClient.generateJson).toHaveBeenCalledWith(
expect.objectContaining({ expect.objectContaining({
@@ -842,16 +879,16 @@ describe('LoopDetectionService LLM Checks', () => {
); );
// The confidence of 0.85 will result in a low interval. // The confidence of 0.85 will result in a low interval.
// The interval will be: 7 + (15 - 7) * (1 - 0.85) = 7 + 8 * 0.15 = 8.2 -> rounded to 8 // The interval will be: 5 + (15 - 5) * (1 - 0.85) = 5 + 10 * 0.15 = 6.5 -> rounded to 7
await advanceTurns(7); // advance to turn 47 await advanceTurns(6); // advance to turn 36
mockBaseLlmClient.generateJson = vi.fn().mockResolvedValue({ mockBaseLlmClient.generateJson = vi.fn().mockResolvedValue({
unproductive_state_confidence: 0.95, unproductive_state_confidence: 0.95,
unproductive_state_analysis: 'Repetitive actions', unproductive_state_analysis: 'Repetitive actions',
}); });
const finalResult = await service.turnStarted(abortController.signal); // This is turn 48 const finalResult = await service.turnStarted(abortController.signal); // This is turn 37
expect(finalResult).toBe(true); expect(finalResult.count).toBe(1);
expect(loggers.logLoopDetected).toHaveBeenCalledWith( expect(loggers.logLoopDetected).toHaveBeenCalledWith(
mockConfig, mockConfig,
expect.objectContaining({ expect.objectContaining({
@@ -867,25 +904,25 @@ describe('LoopDetectionService LLM Checks', () => {
unproductive_state_confidence: 0.5, unproductive_state_confidence: 0.5,
unproductive_state_analysis: 'Looks okay', unproductive_state_analysis: 'Looks okay',
}); });
await advanceTurns(40); await advanceTurns(30);
const result = await service.turnStarted(abortController.signal); const result = await service.turnStarted(abortController.signal);
expect(result).toBe(false); expect(result.count).toBe(0);
expect(loggers.logLoopDetected).not.toHaveBeenCalled(); expect(loggers.logLoopDetected).not.toHaveBeenCalled();
}); });
it('should adjust the check interval based on confidence', async () => { it('should adjust the check interval based on confidence', async () => {
// Confidence is 0.0, so interval should be MAX_LLM_CHECK_INTERVAL (15) // Confidence is 0.0, so interval should be MAX_LLM_CHECK_INTERVAL (15)
// Interval = 7 + (15 - 7) * (1 - 0.0) = 15 // Interval = 5 + (15 - 5) * (1 - 0.0) = 15
mockBaseLlmClient.generateJson = vi mockBaseLlmClient.generateJson = vi
.fn() .fn()
.mockResolvedValue({ unproductive_state_confidence: 0.0 }); .mockResolvedValue({ unproductive_state_confidence: 0.0 });
await advanceTurns(40); // First check at turn 40 await advanceTurns(30); // First check at turn 30
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1); expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
await advanceTurns(14); // Advance to turn 54 await advanceTurns(14); // Advance to turn 44
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1); expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
await service.turnStarted(abortController.signal); // Turn 55 await service.turnStarted(abortController.signal); // Turn 45
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(2); expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(2);
}); });
@@ -893,18 +930,18 @@ describe('LoopDetectionService LLM Checks', () => {
mockBaseLlmClient.generateJson = vi mockBaseLlmClient.generateJson = vi
.fn() .fn()
.mockRejectedValue(new Error('API error')); .mockRejectedValue(new Error('API error'));
await advanceTurns(40); await advanceTurns(30);
const result = await service.turnStarted(abortController.signal); const result = await service.turnStarted(abortController.signal);
expect(result).toBe(false); expect(result.count).toBe(0);
expect(loggers.logLoopDetected).not.toHaveBeenCalled(); expect(loggers.logLoopDetected).not.toHaveBeenCalled();
}); });
it('should not trigger LLM check when disabled for session', async () => { it('should not trigger LLM check when disabled for session', async () => {
service.disableForSession(); service.disableForSession();
expect(loggers.logLoopDetectionDisabled).toHaveBeenCalledTimes(1); expect(loggers.logLoopDetectionDisabled).toHaveBeenCalledTimes(1);
await advanceTurns(40); await advanceTurns(30);
const result = await service.turnStarted(abortController.signal); const result = await service.turnStarted(abortController.signal);
expect(result).toBe(false); expect(result.count).toBe(0);
expect(mockBaseLlmClient.generateJson).not.toHaveBeenCalled(); expect(mockBaseLlmClient.generateJson).not.toHaveBeenCalled();
}); });
@@ -925,7 +962,7 @@ describe('LoopDetectionService LLM Checks', () => {
.fn() .fn()
.mockResolvedValue({ unproductive_state_confidence: 0.1 }); .mockResolvedValue({ unproductive_state_confidence: 0.1 });
await advanceTurns(40); await advanceTurns(30);
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1); expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
const calledArg = vi.mocked(mockBaseLlmClient.generateJson).mock const calledArg = vi.mocked(mockBaseLlmClient.generateJson).mock
@@ -950,7 +987,7 @@ describe('LoopDetectionService LLM Checks', () => {
unproductive_state_analysis: 'Main says loop', unproductive_state_analysis: 'Main says loop',
}); });
await advanceTurns(40); await advanceTurns(30);
// It should have called generateJson twice // It should have called generateJson twice
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(2); expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(2);
@@ -990,7 +1027,7 @@ describe('LoopDetectionService LLM Checks', () => {
unproductive_state_analysis: 'Main says no loop', unproductive_state_analysis: 'Main says no loop',
}); });
await advanceTurns(40); await advanceTurns(30);
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(2); expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(2);
expect(mockBaseLlmClient.generateJson).toHaveBeenNthCalledWith( expect(mockBaseLlmClient.generateJson).toHaveBeenNthCalledWith(
@@ -1010,12 +1047,12 @@ describe('LoopDetectionService LLM Checks', () => {
expect(loggers.logLoopDetected).not.toHaveBeenCalled(); expect(loggers.logLoopDetected).not.toHaveBeenCalled();
// But should have updated the interval based on the main model's confidence (0.89) // But should have updated the interval based on the main model's confidence (0.89)
// Interval = 7 + (15-7) * (1 - 0.89) = 7 + 8 * 0.11 = 7 + 0.88 = 7.88 -> 8 // Interval = 5 + (15-5) * (1 - 0.89) = 5 + 10 * 0.11 = 5 + 1.1 = 6.1 -> 6
// Advance by 7 turns // Advance by 5 turns
await advanceTurns(7); await advanceTurns(5);
// Next turn (48) should trigger another check // Next turn (36) should trigger another check
await service.turnStarted(abortController.signal); await service.turnStarted(abortController.signal);
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(3); expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(3);
}); });
@@ -1033,7 +1070,7 @@ describe('LoopDetectionService LLM Checks', () => {
unproductive_state_analysis: 'Flash says loop', unproductive_state_analysis: 'Flash says loop',
}); });
await advanceTurns(40); await advanceTurns(30);
// It should have called generateJson only once // It should have called generateJson only once
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1); expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
@@ -1047,8 +1084,6 @@ describe('LoopDetectionService LLM Checks', () => {
expect(loggers.logLoopDetected).toHaveBeenCalledWith( expect(loggers.logLoopDetected).toHaveBeenCalledWith(
mockConfig, mockConfig,
expect.objectContaining({ expect.objectContaining({
'event.name': 'loop_detected',
loop_type: LoopType.LLM_DETECTED_LOOP,
confirmed_by_model: 'gemini-2.5-flash', confirmed_by_model: 'gemini-2.5-flash',
}), }),
); );
@@ -1061,7 +1096,7 @@ describe('LoopDetectionService LLM Checks', () => {
.fn() .fn()
.mockResolvedValue({ unproductive_state_confidence: 0.1 }); .mockResolvedValue({ unproductive_state_confidence: 0.1 });
await advanceTurns(40); await advanceTurns(30);
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1); expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
const calledArg = vi.mocked(mockBaseLlmClient.generateJson).mock const calledArg = vi.mocked(mockBaseLlmClient.generateJson).mock
@@ -1091,7 +1126,7 @@ describe('LoopDetectionService LLM Checks', () => {
.fn() .fn()
.mockResolvedValue({ unproductive_state_confidence: 0.1 }); .mockResolvedValue({ unproductive_state_confidence: 0.1 });
await advanceTurns(40); await advanceTurns(30);
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1); expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
const calledArg = vi.mocked(mockBaseLlmClient.generateJson).mock const calledArg = vi.mocked(mockBaseLlmClient.generateJson).mock
@@ -39,7 +39,7 @@ const LLM_LOOP_CHECK_HISTORY_COUNT = 20;
/** /**
* The number of turns that must pass in a single prompt before the LLM-based loop check is activated. * The number of turns that must pass in a single prompt before the LLM-based loop check is activated.
*/ */
const LLM_CHECK_AFTER_TURNS = 40; const LLM_CHECK_AFTER_TURNS = 30;
/** /**
* The default interval, in number of turns, at which the LLM-based loop check is performed. * The default interval, in number of turns, at which the LLM-based loop check is performed.
@@ -51,7 +51,7 @@ const DEFAULT_LLM_CHECK_INTERVAL = 10;
* The minimum interval for LLM-based loop checks. * The minimum interval for LLM-based loop checks.
* This is used when the confidence of a loop is high, to check more frequently. * This is used when the confidence of a loop is high, to check more frequently.
*/ */
const MIN_LLM_CHECK_INTERVAL = 7; const MIN_LLM_CHECK_INTERVAL = 5;
/** /**
* The maximum interval for LLM-based loop checks. * The maximum interval for LLM-based loop checks.
@@ -117,6 +117,15 @@ const LOOP_DETECTION_SCHEMA: Record<string, unknown> = {
required: ['unproductive_state_analysis', 'unproductive_state_confidence'], required: ['unproductive_state_analysis', 'unproductive_state_confidence'],
}; };
/**
* Result of a loop detection check.
*/
export interface LoopDetectionResult {
count: number;
type?: LoopType;
detail?: string;
confirmedByModel?: string;
}
/** /**
* Service for detecting and preventing infinite loops in AI responses. * Service for detecting and preventing infinite loops in AI responses.
* Monitors tool call repetitions and content sentence repetitions. * Monitors tool call repetitions and content sentence repetitions.
@@ -135,8 +144,11 @@ export class LoopDetectionService {
private contentStats = new Map<string, number[]>(); private contentStats = new Map<string, number[]>();
private lastContentIndex = 0; private lastContentIndex = 0;
private loopDetected = false; private loopDetected = false;
private detectedCount = 0;
private lastLoopDetail?: string;
private inCodeBlock = false; private inCodeBlock = false;
private lastLoopType?: LoopType;
// LLM loop track tracking // LLM loop track tracking
private turnsInCurrentPrompt = 0; private turnsInCurrentPrompt = 0;
private llmCheckInterval = DEFAULT_LLM_CHECK_INTERVAL; private llmCheckInterval = DEFAULT_LLM_CHECK_INTERVAL;
@@ -169,31 +181,68 @@ export class LoopDetectionService {
/** /**
* Processes a stream event and checks for loop conditions. * Processes a stream event and checks for loop conditions.
* @param event - The stream event to process * @param event - The stream event to process
* @returns true if a loop is detected, false otherwise * @returns A LoopDetectionResult
*/ */
addAndCheck(event: ServerGeminiStreamEvent): boolean { addAndCheck(event: ServerGeminiStreamEvent): LoopDetectionResult {
if (this.disabledForSession || this.config.getDisableLoopDetection()) { if (this.disabledForSession || this.config.getDisableLoopDetection()) {
return false; return { count: 0 };
}
if (this.loopDetected) {
return {
count: this.detectedCount,
type: this.lastLoopType,
detail: this.lastLoopDetail,
};
} }
if (this.loopDetected) { let isLoop = false;
return this.loopDetected; let detail: string | undefined;
}
switch (event.type) { switch (event.type) {
case GeminiEventType.ToolCallRequest: case GeminiEventType.ToolCallRequest:
// content chanting only happens in one single stream, reset if there // content chanting only happens in one single stream, reset if there
// is a tool call in between // is a tool call in between
this.resetContentTracking(); this.resetContentTracking();
this.loopDetected = this.checkToolCallLoop(event.value); isLoop = this.checkToolCallLoop(event.value);
if (isLoop) {
detail = `Repeated tool call: ${event.value.name} with arguments ${JSON.stringify(event.value.args)}`;
}
break; break;
case GeminiEventType.Content: case GeminiEventType.Content:
this.loopDetected = this.checkContentLoop(event.value); isLoop = this.checkContentLoop(event.value);
if (isLoop) {
detail = `Repeating content detected: "${this.streamContentHistory.substring(Math.max(0, this.lastContentIndex - 20), this.lastContentIndex + CONTENT_CHUNK_SIZE).trim()}..."`;
}
break; break;
default: default:
break; break;
} }
return this.loopDetected;
if (isLoop) {
this.loopDetected = true;
this.detectedCount++;
this.lastLoopDetail = detail;
this.lastLoopType =
event.type === GeminiEventType.ToolCallRequest
? LoopType.CONSECUTIVE_IDENTICAL_TOOL_CALLS
: LoopType.CONTENT_CHANTING_LOOP;
logLoopDetected(
this.config,
new LoopDetectedEvent(
this.lastLoopType,
this.promptId,
this.detectedCount,
),
);
}
return isLoop
? {
count: this.detectedCount,
type: this.lastLoopType,
detail: this.lastLoopDetail,
}
: { count: 0 };
} }
/** /**
@@ -204,12 +253,20 @@ export class LoopDetectionService {
* is performed periodically based on the `llmCheckInterval`. * is performed periodically based on the `llmCheckInterval`.
* *
* @param signal - An AbortSignal to allow for cancellation of the asynchronous LLM check. * @param signal - An AbortSignal to allow for cancellation of the asynchronous LLM check.
* @returns A promise that resolves to `true` if a loop is detected, and `false` otherwise. * @returns A promise that resolves to a LoopDetectionResult.
*/ */
async turnStarted(signal: AbortSignal) { async turnStarted(signal: AbortSignal): Promise<LoopDetectionResult> {
if (this.disabledForSession || this.config.getDisableLoopDetection()) { if (this.disabledForSession || this.config.getDisableLoopDetection()) {
return false; return { count: 0 };
} }
if (this.loopDetected) {
return {
count: this.detectedCount,
type: this.lastLoopType,
detail: this.lastLoopDetail,
};
}
this.turnsInCurrentPrompt++; this.turnsInCurrentPrompt++;
if ( if (
@@ -217,10 +274,35 @@ export class LoopDetectionService {
this.turnsInCurrentPrompt - this.lastCheckTurn >= this.llmCheckInterval this.turnsInCurrentPrompt - this.lastCheckTurn >= this.llmCheckInterval
) { ) {
this.lastCheckTurn = this.turnsInCurrentPrompt; this.lastCheckTurn = this.turnsInCurrentPrompt;
return this.checkForLoopWithLLM(signal); const { isLoop, analysis, confirmedByModel } =
} await this.checkForLoopWithLLM(signal);
if (isLoop) {
this.loopDetected = true;
this.detectedCount++;
this.lastLoopDetail = analysis;
this.lastLoopType = LoopType.LLM_DETECTED_LOOP;
return false; logLoopDetected(
this.config,
new LoopDetectedEvent(
this.lastLoopType,
this.promptId,
this.detectedCount,
confirmedByModel,
analysis,
LLM_CONFIDENCE_THRESHOLD,
),
);
return {
count: this.detectedCount,
type: this.lastLoopType,
detail: this.lastLoopDetail,
confirmedByModel,
};
}
}
return { count: 0 };
} }
private checkToolCallLoop(toolCall: { name: string; args: object }): boolean { private checkToolCallLoop(toolCall: { name: string; args: object }): boolean {
@@ -232,13 +314,6 @@ export class LoopDetectionService {
this.toolCallRepetitionCount = 1; this.toolCallRepetitionCount = 1;
} }
if (this.toolCallRepetitionCount >= TOOL_CALL_LOOP_THRESHOLD) { if (this.toolCallRepetitionCount >= TOOL_CALL_LOOP_THRESHOLD) {
logLoopDetected(
this.config,
new LoopDetectedEvent(
LoopType.CONSECUTIVE_IDENTICAL_TOOL_CALLS,
this.promptId,
),
);
return true; return true;
} }
return false; return false;
@@ -345,13 +420,6 @@ export class LoopDetectionService {
const chunkHash = createHash('sha256').update(currentChunk).digest('hex'); const chunkHash = createHash('sha256').update(currentChunk).digest('hex');
if (this.isLoopDetectedForChunk(currentChunk, chunkHash)) { if (this.isLoopDetectedForChunk(currentChunk, chunkHash)) {
logLoopDetected(
this.config,
new LoopDetectedEvent(
LoopType.CHANTING_IDENTICAL_SENTENCES,
this.promptId,
),
);
return true; return true;
} }
@@ -445,28 +513,29 @@ export class LoopDetectionService {
return originalChunk === currentChunk; return originalChunk === currentChunk;
} }
private trimRecentHistory(recentHistory: Content[]): Content[] { private trimRecentHistory(history: Content[]): Content[] {
// A function response must be preceded by a function call. // A function response must be preceded by a function call.
// Continuously removes dangling function calls from the end of the history // Continuously removes dangling function calls from the end of the history
// until the last turn is not a function call. // until the last turn is not a function call.
while ( while (history.length > 0 && isFunctionCall(history[history.length - 1])) {
recentHistory.length > 0 && history.pop();
isFunctionCall(recentHistory[recentHistory.length - 1])
) {
recentHistory.pop();
} }
// A function response should follow a function call. // A function response should follow a function call.
// Continuously removes leading function responses from the beginning of history // Continuously removes leading function responses from the beginning of history
// until the first turn is not a function response. // until the first turn is not a function response.
while (recentHistory.length > 0 && isFunctionResponse(recentHistory[0])) { while (history.length > 0 && isFunctionResponse(history[0])) {
recentHistory.shift(); history.shift();
} }
return recentHistory; return history;
} }
private async checkForLoopWithLLM(signal: AbortSignal) { private async checkForLoopWithLLM(signal: AbortSignal): Promise<{
isLoop: boolean;
analysis?: string;
confirmedByModel?: string;
}> {
const recentHistory = this.config const recentHistory = this.config
.getGeminiClient() .getGeminiClient()
.getHistory() .getHistory()
@@ -506,13 +575,17 @@ export class LoopDetectionService {
); );
if (!flashResult) { if (!flashResult) {
return false; return { isLoop: false };
} }
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion const flashConfidence =
const flashConfidence = flashResult[ typeof flashResult['unproductive_state_confidence'] === 'number'
'unproductive_state_confidence' ? flashResult['unproductive_state_confidence']
] as number; : 0;
const flashAnalysis =
typeof flashResult['unproductive_state_analysis'] === 'string'
? flashResult['unproductive_state_analysis']
: '';
const doubleCheckModelName = const doubleCheckModelName =
this.config.modelConfigService.getResolvedConfig({ this.config.modelConfigService.getResolvedConfig({
@@ -530,7 +603,7 @@ export class LoopDetectionService {
), ),
); );
this.updateCheckInterval(flashConfidence); this.updateCheckInterval(flashConfidence);
return false; return { isLoop: false };
} }
const availability = this.config.getModelAvailabilityService(); const availability = this.config.getModelAvailabilityService();
@@ -539,8 +612,11 @@ export class LoopDetectionService {
const flashModelName = this.config.modelConfigService.getResolvedConfig({ const flashModelName = this.config.modelConfigService.getResolvedConfig({
model: 'loop-detection', model: 'loop-detection',
}).model; }).model;
this.handleConfirmedLoop(flashResult, flashModelName); return {
return true; isLoop: true,
analysis: flashAnalysis,
confirmedByModel: flashModelName,
};
} }
// Double check with configured model // Double check with configured model
@@ -550,10 +626,16 @@ export class LoopDetectionService {
signal, signal,
); );
const mainModelConfidence = mainModelResult const mainModelConfidence =
? // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion mainModelResult &&
(mainModelResult['unproductive_state_confidence'] as number) typeof mainModelResult['unproductive_state_confidence'] === 'number'
: 0; ? mainModelResult['unproductive_state_confidence']
: 0;
const mainModelAnalysis =
mainModelResult &&
typeof mainModelResult['unproductive_state_analysis'] === 'string'
? mainModelResult['unproductive_state_analysis']
: undefined;
logLlmLoopCheck( logLlmLoopCheck(
this.config, this.config,
@@ -567,14 +649,17 @@ export class LoopDetectionService {
if (mainModelResult) { if (mainModelResult) {
if (mainModelConfidence >= LLM_CONFIDENCE_THRESHOLD) { if (mainModelConfidence >= LLM_CONFIDENCE_THRESHOLD) {
this.handleConfirmedLoop(mainModelResult, doubleCheckModelName); return {
return true; isLoop: true,
analysis: mainModelAnalysis,
confirmedByModel: doubleCheckModelName,
};
} else { } else {
this.updateCheckInterval(mainModelConfidence); this.updateCheckInterval(mainModelConfidence);
} }
} }
return false; return { isLoop: false };
} }
private async queryLoopDetectionModel( private async queryLoopDetectionModel(
@@ -601,32 +686,16 @@ export class LoopDetectionService {
return result; return result;
} }
return null; return null;
} catch (e) { } catch (error) {
this.config.getDebugMode() ? debugLogger.warn(e) : debugLogger.debug(e); if (this.config.getDebugMode()) {
debugLogger.warn(
`Error querying loop detection model (${model}): ${String(error)}`,
);
}
return null; return null;
} }
} }
private handleConfirmedLoop(
result: Record<string, unknown>,
modelName: string,
): void {
if (
typeof result['unproductive_state_analysis'] === 'string' &&
result['unproductive_state_analysis']
) {
debugLogger.warn(result['unproductive_state_analysis']);
}
logLoopDetected(
this.config,
new LoopDetectedEvent(
LoopType.LLM_DETECTED_LOOP,
this.promptId,
modelName,
),
);
}
private updateCheckInterval(unproductive_state_confidence: number): void { private updateCheckInterval(unproductive_state_confidence: number): void {
this.llmCheckInterval = Math.round( this.llmCheckInterval = Math.round(
MIN_LLM_CHECK_INTERVAL + MIN_LLM_CHECK_INTERVAL +
@@ -645,6 +714,17 @@ export class LoopDetectionService {
this.resetContentTracking(); this.resetContentTracking();
this.resetLlmCheckTracking(); this.resetLlmCheckTracking();
this.loopDetected = false; this.loopDetected = false;
this.detectedCount = 0;
this.lastLoopDetail = undefined;
this.lastLoopType = undefined;
}
/**
* Resets the loop detected flag to allow a recovery turn to proceed.
* This preserves the detectedCount so that the next detection will be count 2.
*/
clearDetection(): void {
this.loopDetected = false;
} }
private resetToolCallCount(): void { private resetToolCallCount(): void {
@@ -33,6 +33,7 @@ import {
logFlashFallback, logFlashFallback,
logChatCompression, logChatCompression,
logMalformedJsonResponse, logMalformedJsonResponse,
logInvalidChunk,
logFileOperation, logFileOperation,
logRipgrepFallback, logRipgrepFallback,
logToolOutputTruncated, logToolOutputTruncated,
@@ -68,6 +69,7 @@ import {
EVENT_AGENT_START, EVENT_AGENT_START,
EVENT_AGENT_FINISH, EVENT_AGENT_FINISH,
EVENT_WEB_FETCH_FALLBACK_ATTEMPT, EVENT_WEB_FETCH_FALLBACK_ATTEMPT,
EVENT_INVALID_CHUNK,
ApiErrorEvent, ApiErrorEvent,
ApiRequestEvent, ApiRequestEvent,
ApiResponseEvent, ApiResponseEvent,
@@ -77,6 +79,7 @@ import {
FlashFallbackEvent, FlashFallbackEvent,
RipgrepFallbackEvent, RipgrepFallbackEvent,
MalformedJsonResponseEvent, MalformedJsonResponseEvent,
InvalidChunkEvent,
makeChatCompressionEvent, makeChatCompressionEvent,
FileOperationEvent, FileOperationEvent,
ToolOutputTruncatedEvent, ToolOutputTruncatedEvent,
@@ -1736,6 +1739,39 @@ describe('loggers', () => {
}); });
}); });
describe('logInvalidChunk', () => {
beforeEach(() => {
vi.spyOn(ClearcutLogger.prototype, 'logInvalidChunkEvent');
vi.spyOn(metrics, 'recordInvalidChunk');
});
it('logs the event to Clearcut and OTEL', () => {
const mockConfig = makeFakeConfig();
const event = new InvalidChunkEvent('Unexpected token');
logInvalidChunk(mockConfig, event);
expect(
ClearcutLogger.prototype.logInvalidChunkEvent,
).toHaveBeenCalledWith(event);
expect(mockLogger.emit).toHaveBeenCalledWith({
body: 'Invalid chunk received from stream.',
attributes: {
'session.id': 'test-session-id',
'user.email': 'test-user@example.com',
'installation.id': 'test-installation-id',
'event.name': EVENT_INVALID_CHUNK,
'event.timestamp': '2025-01-01T00:00:00.000Z',
interactive: false,
'error.message': 'Unexpected token',
},
});
expect(metrics.recordInvalidChunk).toHaveBeenCalledWith(mockConfig);
});
});
describe('logFileOperation', () => { describe('logFileOperation', () => {
const mockConfig = { const mockConfig = {
getSessionId: () => 'test-session-id', getSessionId: () => 'test-session-id',
+18
View File
@@ -29,6 +29,7 @@ import {
type ConversationFinishedEvent, type ConversationFinishedEvent,
type ChatCompressionEvent, type ChatCompressionEvent,
type MalformedJsonResponseEvent, type MalformedJsonResponseEvent,
type InvalidChunkEvent,
type ContentRetryEvent, type ContentRetryEvent,
type ContentRetryFailureEvent, type ContentRetryFailureEvent,
type RipgrepFallbackEvent, type RipgrepFallbackEvent,
@@ -75,6 +76,7 @@ import {
recordPlanExecution, recordPlanExecution,
recordKeychainAvailability, recordKeychainAvailability,
recordTokenStorageInitialization, recordTokenStorageInitialization,
recordInvalidChunk,
} from './metrics.js'; } from './metrics.js';
import { bufferTelemetryEvent } from './sdk.js'; import { bufferTelemetryEvent } from './sdk.js';
import { uiTelemetryService, type UiEvent } from './uiTelemetry.js'; import { uiTelemetryService, type UiEvent } from './uiTelemetry.js';
@@ -467,6 +469,22 @@ export function logMalformedJsonResponse(
}); });
} }
export function logInvalidChunk(
config: Config,
event: InvalidChunkEvent,
): void {
ClearcutLogger.getInstance(config)?.logInvalidChunkEvent(event);
bufferTelemetryEvent(() => {
const logger = logs.getLogger(SERVICE_NAME);
const logRecord: LogRecord = {
body: event.toLogBody(),
attributes: event.toOpenTelemetryAttributes(config),
};
logger.emit(logRecord);
recordInvalidChunk(config);
});
}
export function logContentRetry( export function logContentRetry(
config: Config, config: Config,
event: ContentRetryEvent, event: ContentRetryEvent,
@@ -105,6 +105,7 @@ describe('Telemetry Metrics', () => {
let recordPlanExecutionModule: typeof import('./metrics.js').recordPlanExecution; let recordPlanExecutionModule: typeof import('./metrics.js').recordPlanExecution;
let recordKeychainAvailabilityModule: typeof import('./metrics.js').recordKeychainAvailability; let recordKeychainAvailabilityModule: typeof import('./metrics.js').recordKeychainAvailability;
let recordTokenStorageInitializationModule: typeof import('./metrics.js').recordTokenStorageInitialization; let recordTokenStorageInitializationModule: typeof import('./metrics.js').recordTokenStorageInitialization;
let recordInvalidChunkModule: typeof import('./metrics.js').recordInvalidChunk;
beforeEach(async () => { beforeEach(async () => {
vi.resetModules(); vi.resetModules();
@@ -154,6 +155,7 @@ describe('Telemetry Metrics', () => {
metricsJsModule.recordKeychainAvailability; metricsJsModule.recordKeychainAvailability;
recordTokenStorageInitializationModule = recordTokenStorageInitializationModule =
metricsJsModule.recordTokenStorageInitialization; metricsJsModule.recordTokenStorageInitialization;
recordInvalidChunkModule = metricsJsModule.recordInvalidChunk;
const otelApiModule = await import('@opentelemetry/api'); const otelApiModule = await import('@opentelemetry/api');
@@ -1555,5 +1557,27 @@ describe('Telemetry Metrics', () => {
}); });
}); });
}); });
describe('recordInvalidChunk', () => {
it('should not record metrics if not initialized', () => {
const config = makeFakeConfig({});
recordInvalidChunkModule(config);
expect(mockCounterAddFn).not.toHaveBeenCalled();
});
it('should record invalid chunk when initialized', () => {
const config = makeFakeConfig({});
initializeMetricsModule(config);
mockCounterAddFn.mockClear();
recordInvalidChunkModule(config);
expect(mockCounterAddFn).toHaveBeenCalledWith(1, {
'session.id': 'test-session-id',
'installation.id': 'test-installation-id',
'user.email': 'test@example.com',
});
});
});
}); });
}); });
+24 -2
View File
@@ -790,25 +790,36 @@ export enum LoopType {
CONSECUTIVE_IDENTICAL_TOOL_CALLS = 'consecutive_identical_tool_calls', CONSECUTIVE_IDENTICAL_TOOL_CALLS = 'consecutive_identical_tool_calls',
CHANTING_IDENTICAL_SENTENCES = 'chanting_identical_sentences', CHANTING_IDENTICAL_SENTENCES = 'chanting_identical_sentences',
LLM_DETECTED_LOOP = 'llm_detected_loop', LLM_DETECTED_LOOP = 'llm_detected_loop',
// Aliases for tests/internal use
TOOL_CALL_LOOP = CONSECUTIVE_IDENTICAL_TOOL_CALLS,
CONTENT_CHANTING_LOOP = CHANTING_IDENTICAL_SENTENCES,
} }
export class LoopDetectedEvent implements BaseTelemetryEvent { export class LoopDetectedEvent implements BaseTelemetryEvent {
'event.name': 'loop_detected'; 'event.name': 'loop_detected';
'event.timestamp': string; 'event.timestamp': string;
loop_type: LoopType; loop_type: LoopType;
prompt_id: string; prompt_id: string;
count: number;
confirmed_by_model?: string; confirmed_by_model?: string;
analysis?: string;
confidence?: number;
constructor( constructor(
loop_type: LoopType, loop_type: LoopType,
prompt_id: string, prompt_id: string,
count: number,
confirmed_by_model?: string, confirmed_by_model?: string,
analysis?: string,
confidence?: number,
) { ) {
this['event.name'] = 'loop_detected'; this['event.name'] = 'loop_detected';
this['event.timestamp'] = new Date().toISOString(); this['event.timestamp'] = new Date().toISOString();
this.loop_type = loop_type; this.loop_type = loop_type;
this.prompt_id = prompt_id; this.prompt_id = prompt_id;
this.count = count;
this.confirmed_by_model = confirmed_by_model; this.confirmed_by_model = confirmed_by_model;
this.analysis = analysis;
this.confidence = confidence;
} }
toOpenTelemetryAttributes(config: Config): LogAttributes { toOpenTelemetryAttributes(config: Config): LogAttributes {
@@ -818,17 +829,28 @@ export class LoopDetectedEvent implements BaseTelemetryEvent {
'event.timestamp': this['event.timestamp'], 'event.timestamp': this['event.timestamp'],
loop_type: this.loop_type, loop_type: this.loop_type,
prompt_id: this.prompt_id, prompt_id: this.prompt_id,
count: this.count,
}; };
if (this.confirmed_by_model) { if (this.confirmed_by_model) {
attributes['confirmed_by_model'] = this.confirmed_by_model; attributes['confirmed_by_model'] = this.confirmed_by_model;
} }
if (this.analysis) {
attributes['analysis'] = this.analysis;
}
if (this.confidence !== undefined) {
attributes['confidence'] = this.confidence;
}
return attributes; return attributes;
} }
toLogBody(): string { toLogBody(): string {
return `Loop detected. Type: ${this.loop_type}.${this.confirmed_by_model ? ` Confirmed by: ${this.confirmed_by_model}` : ''}`; const status =
this.count === 1 ? 'Attempting recovery' : 'Terminating session';
return `Loop detected (Strike ${this.count}: ${status}). Type: ${this.loop_type}.${this.confirmed_by_model ? ` Confirmed by: ${this.confirmed_by_model}` : ''}`;
} }
} }
+14
View File
@@ -413,6 +413,20 @@ export interface EditToolParams {
ai_proposed_content?: string; ai_proposed_content?: string;
} }
export function isEditToolParams(args: unknown): args is EditToolParams {
if (typeof args !== 'object' || args === null) {
return false;
}
return (
'file_path' in args &&
typeof args.file_path === 'string' &&
'old_string' in args &&
typeof args.old_string === 'string' &&
'new_string' in args &&
typeof args.new_string === 'string'
);
}
interface CalculatedEdit { interface CalculatedEdit {
currentContent: string | null; currentContent: string | null;
newContent: string; newContent: string;
+14
View File
@@ -74,6 +74,20 @@ export interface WriteFileToolParams {
ai_proposed_content?: string; ai_proposed_content?: string;
} }
export function isWriteFileToolParams(
args: unknown,
): args is WriteFileToolParams {
if (typeof args !== 'object' || args === null) {
return false;
}
return (
'file_path' in args &&
typeof args.file_path === 'string' &&
'content' in args &&
typeof args.content === 'string'
);
}
interface GetCorrectedFileContentResult { interface GetCorrectedFileContentResult {
originalContent: string; originalContent: string;
correctedContent: string; correctedContent: string;
@@ -421,6 +421,47 @@ describe('FileSearch', () => {
); );
}); });
it('should prioritize filenames closer to the end of the path and shorter paths', async () => {
tmpDir = await createTmpDir({
src: {
'hooks.ts': '',
hooks: {
'index.ts': '',
},
utils: {
'hooks.tsx': '',
},
'hooks-dev': {
'test.ts': '',
},
},
});
const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
fileDiscoveryService: new FileDiscoveryService(tmpDir, {
respectGitIgnore: false,
respectGeminiIgnore: false,
}),
ignoreDirs: [],
cache: false,
cacheTtl: 0,
enableRecursiveFileSearch: true,
enableFuzzySearch: true,
});
await fileSearch.initialize();
const results = await fileSearch.search('hooks');
// The order should prioritize matches closer to the end and shorter strings.
// FZF matches right-to-left.
expect(results[0]).toBe('src/hooks/');
expect(results[1]).toBe('src/hooks.ts');
expect(results[2]).toBe('src/utils/hooks.tsx');
expect(results[3]).toBe('src/hooks-dev/');
expect(results[4]).toBe('src/hooks/index.ts');
expect(results[5]).toBe('src/hooks-dev/test.ts');
});
it('should return empty array when no matches are found', async () => { it('should return empty array when no matches are found', async () => {
tmpDir = await createTmpDir({ tmpDir = await createTmpDir({
src: ['file1.js'], src: ['file1.js'],
@@ -13,6 +13,44 @@ import { AsyncFzf, type FzfResultItem } from 'fzf';
import { unescapePath } from '../paths.js'; import { unescapePath } from '../paths.js';
import type { FileDiscoveryService } from '../../services/fileDiscoveryService.js'; import type { FileDiscoveryService } from '../../services/fileDiscoveryService.js';
// Tiebreaker: Prefers shorter paths.
const byLengthAsc = (a: { item: string }, b: { item: string }) =>
a.item.length - b.item.length;
// Tiebreaker: Prefers matches at the start of the filename (basename prefix).
const byBasenamePrefix = (
a: { item: string; positions: Set<number> },
b: { item: string; positions: Set<number> },
) => {
const getBasenameStart = (p: string) => {
const trimmed = p.endsWith('/') ? p.slice(0, -1) : p;
return Math.max(trimmed.lastIndexOf('/'), trimmed.lastIndexOf('\\')) + 1;
};
const aDiff = Math.min(...a.positions) - getBasenameStart(a.item);
const bDiff = Math.min(...b.positions) - getBasenameStart(b.item);
const aIsFilenameMatch = aDiff >= 0;
const bIsFilenameMatch = bDiff >= 0;
if (aIsFilenameMatch && !bIsFilenameMatch) return -1;
if (!aIsFilenameMatch && bIsFilenameMatch) return 1;
if (aIsFilenameMatch && bIsFilenameMatch) return aDiff - bDiff;
return 0; // Both are directory matches, let subsequent tiebreakers decide.
};
// Tiebreaker: Prefers matches closer to the end of the path.
const byMatchPosFromEnd = (
a: { item: string; positions: Set<number> },
b: { item: string; positions: Set<number> },
) => {
const maxPosA = Math.max(-1, ...a.positions);
const maxPosB = Math.max(-1, ...b.positions);
const distA = a.item.length - maxPosA;
const distB = b.item.length - maxPosB;
return distA - distB;
};
export interface FileSearchOptions { export interface FileSearchOptions {
projectRoot: string; projectRoot: string;
ignoreDirs: string[]; ignoreDirs: string[];
@@ -192,6 +230,8 @@ class RecursiveFileSearch implements FileSearch {
// files, because the v2 algorithm is just too slow in those cases. // files, because the v2 algorithm is just too slow in those cases.
this.fzf = new AsyncFzf(this.allFiles, { this.fzf = new AsyncFzf(this.allFiles, {
fuzzy: this.allFiles.length > 20000 ? 'v1' : 'v2', fuzzy: this.allFiles.length > 20000 ? 'v1' : 'v2',
forward: false,
tiebreakers: [byBasenamePrefix, byMatchPosFromEnd, byLengthAsc],
}); });
} }
} }
+2 -2
View File
@@ -1271,8 +1271,8 @@
"properties": { "properties": {
"sandbox": { "sandbox": {
"title": "Sandbox", "title": "Sandbox",
"description": "Sandbox execution environment. Set to a boolean to enable or disable the sandbox, or provide a string path to a sandbox profile.", "description": "Sandbox execution environment. Set to a boolean to enable or disable the sandbox, provide a string path to a sandbox profile, or specify an explicit sandbox command (e.g., \"docker\", \"podman\", \"lxc\").",
"markdownDescription": "Sandbox execution environment. Set to a boolean to enable or disable the sandbox, or provide a string path to a sandbox profile.\n\n- Category: `Tools`\n- Requires restart: `yes`", "markdownDescription": "Sandbox execution environment. Set to a boolean to enable or disable the sandbox, provide a string path to a sandbox profile, or specify an explicit sandbox command (e.g., \"docker\", \"podman\", \"lxc\").\n\n- Category: `Tools`\n- Requires restart: `yes`",
"$ref": "#/$defs/BooleanOrString" "$ref": "#/$defs/BooleanOrString"
}, },
"shell": { "shell": {
+14 -7
View File
@@ -155,9 +155,9 @@ function generateMarkdown(currentStatsByModel, history) {
const models = Object.keys(currentStatsByModel).sort(); const models = Object.keys(currentStatsByModel).sort();
for (const model of models) { const getPassRate = (statsForModel) => {
const currentStats = currentStatsByModel[model]; if (!statsForModel) return '-';
const totalStats = Object.values(currentStats).reduce( const totalStats = Object.values(statsForModel).reduce(
(acc, stats) => { (acc, stats) => {
acc.passed += stats.passed; acc.passed += stats.passed;
acc.total += stats.total; acc.total += stats.total;
@@ -165,11 +165,14 @@ function generateMarkdown(currentStatsByModel, history) {
}, },
{ passed: 0, total: 0 }, { passed: 0, total: 0 },
); );
return totalStats.total > 0
? ((totalStats.passed / totalStats.total) * 100).toFixed(1) + '%'
: '-';
};
const totalPassRate = for (const model of models) {
totalStats.total > 0 const currentStats = currentStatsByModel[model];
? ((totalStats.passed / totalStats.total) * 100).toFixed(1) + '%' const totalPassRate = getPassRate(currentStats);
: 'N/A';
console.log(`#### Model: ${model}`); console.log(`#### Model: ${model}`);
console.log(`**Total Pass Rate: ${totalPassRate}**\n`); console.log(`**Total Pass Rate: ${totalPassRate}**\n`);
@@ -177,18 +180,22 @@ function generateMarkdown(currentStatsByModel, history) {
// Header // Header
let header = '| Test Name |'; let header = '| Test Name |';
let separator = '| :--- |'; let separator = '| :--- |';
let passRateRow = '| **Overall Pass Rate** |';
for (const item of reversedHistory) { for (const item of reversedHistory) {
header += ` [${item.run.databaseId}](${item.run.url}) |`; header += ` [${item.run.databaseId}](${item.run.url}) |`;
separator += ' :---: |'; separator += ' :---: |';
passRateRow += ` **${getPassRate(item.stats[model])}** |`;
} }
// Add Current column last // Add Current column last
header += ' Current |'; header += ' Current |';
separator += ' :---: |'; separator += ' :---: |';
passRateRow += ` **${totalPassRate}** |`;
console.log(header); console.log(header);
console.log(separator); console.log(separator);
console.log(passRateRow);
// Collect all test names for this model // Collect all test names for this model
const allTestNames = new Set(Object.keys(currentStats)); const allTestNames = new Set(Object.keys(currentStats));