mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-04-27 13:34:15 -07:00
Merge branch 'main' into akkr/subagents-policy
This commit is contained in:
+6
-1
@@ -14,4 +14,9 @@
|
|||||||
|
|
||||||
# Docs have a dedicated approver group in addition to maintainers
|
# Docs have a dedicated approver group in addition to maintainers
|
||||||
/docs/ @google-gemini/gemini-cli-maintainers @google-gemini/gemini-cli-docs
|
/docs/ @google-gemini/gemini-cli-maintainers @google-gemini/gemini-cli-docs
|
||||||
/README.md @google-gemini/gemini-cli-maintainers @google-gemini/gemini-cli-docs
|
/README.md @google-gemini/gemini-cli-maintainers @google-gemini/gemini-cli-docs
|
||||||
|
|
||||||
|
# Prompt contents, tool definitions, and evals require reviews from prompt approvers
|
||||||
|
/packages/core/src/prompts/ @google-gemini/gemini-cli-prompt-approvers
|
||||||
|
/packages/core/src/tools/ @google-gemini/gemini-cli-prompt-approvers
|
||||||
|
/evals/ @google-gemini/gemini-cli-prompt-approvers
|
||||||
|
|||||||
@@ -0,0 +1,315 @@
|
|||||||
|
name: 'Unassign Inactive Issue Assignees'
|
||||||
|
|
||||||
|
# This workflow runs daily and scans every open "help wanted" issue that has
|
||||||
|
# one or more assignees. For each assignee it checks whether they have a
|
||||||
|
# non-draft pull request (open and ready for review, or already merged) that
|
||||||
|
# is linked to the issue. Draft PRs are intentionally excluded so that
|
||||||
|
# contributors cannot reset the check by opening a no-op PR. If no
|
||||||
|
# qualifying PR is found within 7 days of assignment the assignee is
|
||||||
|
# automatically removed and a friendly comment is posted so that other
|
||||||
|
# contributors can pick up the work.
|
||||||
|
# Maintainers, org members, and collaborators (anyone with write access or
|
||||||
|
# above) are always exempted and will never be auto-unassigned.
|
||||||
|
|
||||||
|
on:
|
||||||
|
schedule:
|
||||||
|
- cron: '0 9 * * *' # Every day at 09:00 UTC
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
dry_run:
|
||||||
|
description: 'Run in dry-run mode (no changes will be applied)'
|
||||||
|
required: false
|
||||||
|
default: false
|
||||||
|
type: 'boolean'
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: '${{ github.workflow }}'
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: 'bash'
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
unassign-inactive-assignees:
|
||||||
|
if: "github.repository == 'google-gemini/gemini-cli'"
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
permissions:
|
||||||
|
issues: 'write'
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: 'Generate GitHub App Token'
|
||||||
|
id: 'generate_token'
|
||||||
|
uses: 'actions/create-github-app-token@v2'
|
||||||
|
with:
|
||||||
|
app-id: '${{ secrets.APP_ID }}'
|
||||||
|
private-key: '${{ secrets.PRIVATE_KEY }}'
|
||||||
|
|
||||||
|
- name: 'Unassign inactive assignees'
|
||||||
|
uses: 'actions/github-script@v7'
|
||||||
|
env:
|
||||||
|
DRY_RUN: '${{ inputs.dry_run }}'
|
||||||
|
with:
|
||||||
|
github-token: '${{ steps.generate_token.outputs.token }}'
|
||||||
|
script: |
|
||||||
|
const dryRun = process.env.DRY_RUN === 'true';
|
||||||
|
if (dryRun) {
|
||||||
|
core.info('DRY RUN MODE ENABLED: No changes will be applied.');
|
||||||
|
}
|
||||||
|
|
||||||
|
const owner = context.repo.owner;
|
||||||
|
const repo = context.repo.repo;
|
||||||
|
const GRACE_PERIOD_DAYS = 7;
|
||||||
|
const now = new Date();
|
||||||
|
|
||||||
|
let maintainerLogins = new Set();
|
||||||
|
const teams = ['gemini-cli-maintainers', 'gemini-cli-askmode-approvers', 'gemini-cli-docs'];
|
||||||
|
|
||||||
|
for (const team_slug of teams) {
|
||||||
|
try {
|
||||||
|
const members = await github.paginate(github.rest.teams.listMembersInOrg, {
|
||||||
|
org: owner,
|
||||||
|
team_slug,
|
||||||
|
});
|
||||||
|
for (const m of members) maintainerLogins.add(m.login.toLowerCase());
|
||||||
|
core.info(`Fetched ${members.length} members from team ${team_slug}.`);
|
||||||
|
} catch (e) {
|
||||||
|
core.warning(`Could not fetch team ${team_slug}: ${e.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const isGooglerCache = new Map();
|
||||||
|
const isGoogler = async (login) => {
|
||||||
|
if (isGooglerCache.has(login)) return isGooglerCache.get(login);
|
||||||
|
try {
|
||||||
|
for (const org of ['googlers', 'google']) {
|
||||||
|
try {
|
||||||
|
await github.rest.orgs.checkMembershipForUser({ org, username: login });
|
||||||
|
isGooglerCache.set(login, true);
|
||||||
|
return true;
|
||||||
|
} catch (e) {
|
||||||
|
if (e.status !== 404) throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
core.warning(`Could not check org membership for ${login}: ${e.message}`);
|
||||||
|
}
|
||||||
|
isGooglerCache.set(login, false);
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
|
const permissionCache = new Map();
|
||||||
|
const isPrivilegedUser = async (login) => {
|
||||||
|
if (maintainerLogins.has(login.toLowerCase())) return true;
|
||||||
|
|
||||||
|
if (permissionCache.has(login)) return permissionCache.get(login);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const { data } = await github.rest.repos.getCollaboratorPermissionLevel({
|
||||||
|
owner,
|
||||||
|
repo,
|
||||||
|
username: login,
|
||||||
|
});
|
||||||
|
const privileged = ['admin', 'maintain', 'write', 'triage'].includes(data.permission);
|
||||||
|
permissionCache.set(login, privileged);
|
||||||
|
if (privileged) {
|
||||||
|
core.info(` @${login} is a repo collaborator (${data.permission}) — exempt.`);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
if (e.status !== 404) {
|
||||||
|
core.warning(`Could not check permission for ${login}: ${e.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const googler = await isGoogler(login);
|
||||||
|
permissionCache.set(login, googler);
|
||||||
|
return googler;
|
||||||
|
};
|
||||||
|
|
||||||
|
core.info('Fetching open "help wanted" issues with assignees...');
|
||||||
|
|
||||||
|
const issues = await github.paginate(github.rest.issues.listForRepo, {
|
||||||
|
owner,
|
||||||
|
repo,
|
||||||
|
state: 'open',
|
||||||
|
labels: 'help wanted',
|
||||||
|
per_page: 100,
|
||||||
|
});
|
||||||
|
|
||||||
|
const assignedIssues = issues.filter(
|
||||||
|
(issue) => !issue.pull_request && issue.assignees && issue.assignees.length > 0
|
||||||
|
);
|
||||||
|
|
||||||
|
core.info(`Found ${assignedIssues.length} assigned "help wanted" issues.`);
|
||||||
|
|
||||||
|
let totalUnassigned = 0;
|
||||||
|
|
||||||
|
let timelineEvents = [];
|
||||||
|
try {
|
||||||
|
timelineEvents = await github.paginate(github.rest.issues.listEventsForTimeline, {
|
||||||
|
owner,
|
||||||
|
repo,
|
||||||
|
issue_number: issue.number,
|
||||||
|
per_page: 100,
|
||||||
|
mediaType: { previews: ['mockingbird'] },
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
core.warning(`Could not fetch timeline for issue #${issue.number}: ${err.message}`);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const assignedAtMap = new Map();
|
||||||
|
|
||||||
|
for (const event of timelineEvents) {
|
||||||
|
if (event.event === 'assigned' && event.assignee) {
|
||||||
|
const login = event.assignee.login.toLowerCase();
|
||||||
|
const at = new Date(event.created_at);
|
||||||
|
assignedAtMap.set(login, at);
|
||||||
|
} else if (event.event === 'unassigned' && event.assignee) {
|
||||||
|
assignedAtMap.delete(event.assignee.login.toLowerCase());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const linkedPRAuthorSet = new Set();
|
||||||
|
const seenPRKeys = new Set();
|
||||||
|
|
||||||
|
for (const event of timelineEvents) {
|
||||||
|
if (
|
||||||
|
event.event !== 'cross-referenced' ||
|
||||||
|
!event.source ||
|
||||||
|
event.source.type !== 'pull_request' ||
|
||||||
|
!event.source.issue ||
|
||||||
|
!event.source.issue.user ||
|
||||||
|
!event.source.issue.number ||
|
||||||
|
!event.source.issue.repository
|
||||||
|
) continue;
|
||||||
|
|
||||||
|
const prOwner = event.source.issue.repository.owner.login;
|
||||||
|
const prRepo = event.source.issue.repository.name;
|
||||||
|
const prNumber = event.source.issue.number;
|
||||||
|
const prAuthor = event.source.issue.user.login.toLowerCase();
|
||||||
|
const prKey = `${prOwner}/${prRepo}#${prNumber}`;
|
||||||
|
|
||||||
|
if (seenPRKeys.has(prKey)) continue;
|
||||||
|
seenPRKeys.add(prKey);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const { data: pr } = await github.rest.pulls.get({
|
||||||
|
owner: prOwner,
|
||||||
|
repo: prRepo,
|
||||||
|
pull_number: prNumber,
|
||||||
|
});
|
||||||
|
|
||||||
|
const isReady = (pr.state === 'open' && !pr.draft) ||
|
||||||
|
(pr.state === 'closed' && pr.merged_at !== null);
|
||||||
|
|
||||||
|
core.info(
|
||||||
|
` PR ${prKey} by @${prAuthor}: ` +
|
||||||
|
`state=${pr.state}, draft=${pr.draft}, merged=${!!pr.merged_at} → ` +
|
||||||
|
(isReady ? 'qualifies' : 'does NOT qualify (draft or closed without merge)')
|
||||||
|
);
|
||||||
|
|
||||||
|
if (isReady) linkedPRAuthorSet.add(prAuthor);
|
||||||
|
} catch (err) {
|
||||||
|
core.warning(`Could not fetch PR ${prKey}: ${err.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const assigneesToRemove = [];
|
||||||
|
|
||||||
|
for (const assignee of issue.assignees) {
|
||||||
|
const login = assignee.login.toLowerCase();
|
||||||
|
|
||||||
|
if (await isPrivilegedUser(assignee.login)) {
|
||||||
|
core.info(` @${assignee.login}: privileged user — skipping.`);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const assignedAt = assignedAtMap.get(login);
|
||||||
|
|
||||||
|
if (!assignedAt) {
|
||||||
|
core.warning(
|
||||||
|
`No 'assigned' event found for @${login} on issue #${issue.number}; ` +
|
||||||
|
`falling back to issue creation date (${issue.created_at}).`
|
||||||
|
);
|
||||||
|
assignedAtMap.set(login, new Date(issue.created_at));
|
||||||
|
}
|
||||||
|
const resolvedAssignedAt = assignedAtMap.get(login);
|
||||||
|
|
||||||
|
const daysSinceAssignment = (now - resolvedAssignedAt) / (1000 * 60 * 60 * 24);
|
||||||
|
|
||||||
|
core.info(
|
||||||
|
` @${login}: assigned ${daysSinceAssignment.toFixed(1)} day(s) ago, ` +
|
||||||
|
`ready-for-review PR: ${linkedPRAuthorSet.has(login) ? 'yes' : 'no'}`
|
||||||
|
);
|
||||||
|
|
||||||
|
if (daysSinceAssignment < GRACE_PERIOD_DAYS) {
|
||||||
|
core.info(` → within grace period, skipping.`);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (linkedPRAuthorSet.has(login)) {
|
||||||
|
core.info(` → ready-for-review PR found, keeping assignment.`);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
core.info(` → no ready-for-review PR after ${GRACE_PERIOD_DAYS} days, will unassign.`);
|
||||||
|
assigneesToRemove.push(assignee.login);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (assigneesToRemove.length === 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!dryRun) {
|
||||||
|
try {
|
||||||
|
await github.rest.issues.removeAssignees({
|
||||||
|
owner,
|
||||||
|
repo,
|
||||||
|
issue_number: issue.number,
|
||||||
|
assignees: assigneesToRemove,
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
core.warning(
|
||||||
|
`Failed to unassign ${assigneesToRemove.join(', ')} from issue #${issue.number}: ${err.message}`
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const mentionList = assigneesToRemove.map((l) => `@${l}`).join(', ');
|
||||||
|
const commentBody =
|
||||||
|
`👋 ${mentionList} — it has been more than ${GRACE_PERIOD_DAYS} days since ` +
|
||||||
|
`you were assigned to this issue and we could not find a pull request ` +
|
||||||
|
`ready for review.\n\n` +
|
||||||
|
`To keep the backlog moving and ensure issues stay accessible to all ` +
|
||||||
|
`contributors, we require a PR that is open and ready for review (not a ` +
|
||||||
|
`draft) within ${GRACE_PERIOD_DAYS} days of assignment.\n\n` +
|
||||||
|
`We are automatically unassigning you so that other contributors can pick ` +
|
||||||
|
`this up. If you are still actively working on this, please:\n` +
|
||||||
|
`1. Re-assign yourself by commenting \`/assign\`.\n` +
|
||||||
|
`2. Open a PR (not a draft) linked to this issue (e.g. \`Fixes #${issue.number}\`) ` +
|
||||||
|
`within ${GRACE_PERIOD_DAYS} days so the automation knows real progress is being made.\n\n` +
|
||||||
|
`Thank you for your contribution — we hope to see a PR from you soon! 🙏`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
await github.rest.issues.createComment({
|
||||||
|
owner,
|
||||||
|
repo,
|
||||||
|
issue_number: issue.number,
|
||||||
|
body: commentBody,
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
core.warning(
|
||||||
|
`Failed to post comment on issue #${issue.number}: ${err.message}`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
totalUnassigned += assigneesToRemove.length;
|
||||||
|
core.info(
|
||||||
|
` ${dryRun ? '[DRY RUN] Would have unassigned' : 'Unassigned'}: ${assigneesToRemove.join(', ')}`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
core.info(`\nDone. Total assignees ${dryRun ? 'that would be' : ''} unassigned: ${totalUnassigned}`);
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
# Latest stable release: v0.32.0
|
# Latest stable release: v0.32.1
|
||||||
|
|
||||||
Released: March 03, 2026
|
Released: March 4, 2026
|
||||||
|
|
||||||
For most users, our latest stable release is the recommended release. Install
|
For most users, our latest stable release is the recommended release. Install
|
||||||
the latest stable version with:
|
the latest stable version with:
|
||||||
@@ -29,6 +29,9 @@ npm install -g @google/gemini-cli
|
|||||||
|
|
||||||
## What's Changed
|
## What's Changed
|
||||||
|
|
||||||
|
- fix(patch): cherry-pick 0659ad1 to release/v0.32.0-pr-21042 to patch version
|
||||||
|
v0.32.0 and create version 0.32.1 by @gemini-cli-robot in
|
||||||
|
[#21048](https://github.com/google-gemini/gemini-cli/pull/21048)
|
||||||
- feat(plan): add integration tests for plan mode by @Adib234 in
|
- feat(plan): add integration tests for plan mode by @Adib234 in
|
||||||
[#20214](https://github.com/google-gemini/gemini-cli/pull/20214)
|
[#20214](https://github.com/google-gemini/gemini-cli/pull/20214)
|
||||||
- fix(acp): update auth handshake to spec by @skeshive in
|
- fix(acp): update auth handshake to spec by @skeshive in
|
||||||
@@ -202,4 +205,4 @@ npm install -g @google/gemini-cli
|
|||||||
[#19781](https://github.com/google-gemini/gemini-cli/pull/19781)
|
[#19781](https://github.com/google-gemini/gemini-cli/pull/19781)
|
||||||
|
|
||||||
**Full Changelog**:
|
**Full Changelog**:
|
||||||
https://github.com/google-gemini/gemini-cli/compare/v0.31.0...v0.32.0
|
https://github.com/google-gemini/gemini-cli/compare/v0.31.0...v0.32.1
|
||||||
|
|||||||
+46
-1
@@ -50,6 +50,50 @@ Cross-platform sandboxing with complete process isolation.
|
|||||||
**Note**: Requires building the sandbox image locally or using a published image
|
**Note**: Requires building the sandbox image locally or using a published image
|
||||||
from your organization's registry.
|
from your organization's registry.
|
||||||
|
|
||||||
|
### 3. LXC/LXD (Linux only, experimental)
|
||||||
|
|
||||||
|
Full-system container sandboxing using LXC/LXD. Unlike Docker/Podman, LXC
|
||||||
|
containers run a complete Linux system with `systemd`, `snapd`, and other system
|
||||||
|
services. This is ideal for tools that don't work in standard Docker containers,
|
||||||
|
such as Snapcraft and Rockcraft.
|
||||||
|
|
||||||
|
**Prerequisites**:
|
||||||
|
|
||||||
|
- Linux only.
|
||||||
|
- LXC/LXD must be installed (`snap install lxd` or `apt install lxd`).
|
||||||
|
- A container must be created and running before starting Gemini CLI. Gemini
|
||||||
|
does **not** create the container automatically.
|
||||||
|
|
||||||
|
**Quick setup**:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Initialize LXD (first time only)
|
||||||
|
lxd init --auto
|
||||||
|
|
||||||
|
# Create and start an Ubuntu container
|
||||||
|
lxc launch ubuntu:24.04 gemini-sandbox
|
||||||
|
|
||||||
|
# Enable LXC sandboxing
|
||||||
|
export GEMINI_SANDBOX=lxc
|
||||||
|
gemini -p "build the project"
|
||||||
|
```
|
||||||
|
|
||||||
|
**Custom container name**:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export GEMINI_SANDBOX=lxc
|
||||||
|
export GEMINI_SANDBOX_IMAGE=my-snapcraft-container
|
||||||
|
gemini -p "build the snap"
|
||||||
|
```
|
||||||
|
|
||||||
|
**Limitations**:
|
||||||
|
|
||||||
|
- Linux only (LXC is not available on macOS or Windows).
|
||||||
|
- The container must already exist and be running.
|
||||||
|
- The workspace directory is bind-mounted into the container at the same
|
||||||
|
absolute path — the path must be writable inside the container.
|
||||||
|
- Used with tools like Snapcraft or Rockcraft that require a full system.
|
||||||
|
|
||||||
## Quickstart
|
## Quickstart
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -88,7 +132,8 @@ gemini -p "run the test suite"
|
|||||||
### Enable sandboxing (in order of precedence)
|
### Enable sandboxing (in order of precedence)
|
||||||
|
|
||||||
1. **Command flag**: `-s` or `--sandbox`
|
1. **Command flag**: `-s` or `--sandbox`
|
||||||
2. **Environment variable**: `GEMINI_SANDBOX=true|docker|podman|sandbox-exec`
|
2. **Environment variable**:
|
||||||
|
`GEMINI_SANDBOX=true|docker|podman|sandbox-exec|lxc`
|
||||||
3. **Settings file**: `"sandbox": true` in the `tools` object of your
|
3. **Settings file**: `"sandbox": true` in the `tools` object of your
|
||||||
`settings.json` file (e.g., `{"tools": {"sandbox": true}}`).
|
`settings.json` file (e.g., `{"tools": {"sandbox": true}}`).
|
||||||
|
|
||||||
|
|||||||
@@ -113,7 +113,45 @@ process.
|
|||||||
ensure every issue is eventually categorized, even if the initial triage
|
ensure every issue is eventually categorized, even if the initial triage
|
||||||
fails.
|
fails.
|
||||||
|
|
||||||
### 5. Release automation
|
### 5. Automatic unassignment of inactive contributors: `Unassign Inactive Issue Assignees`
|
||||||
|
|
||||||
|
To keep the list of open `help wanted` issues accessible to all contributors,
|
||||||
|
this workflow automatically removes **external contributors** who have not
|
||||||
|
opened a linked pull request within **7 days** of being assigned. Maintainers,
|
||||||
|
org members, and repo collaborators with write access or above are always exempt
|
||||||
|
and will never be auto-unassigned.
|
||||||
|
|
||||||
|
- **Workflow File**: `.github/workflows/unassign-inactive-assignees.yml`
|
||||||
|
- **When it runs**: Every day at 09:00 UTC, and can be triggered manually with
|
||||||
|
an optional `dry_run` mode.
|
||||||
|
- **What it does**:
|
||||||
|
1. Finds every open issue labeled `help wanted` that has at least one
|
||||||
|
assignee.
|
||||||
|
2. Identifies privileged users (team members, repo collaborators with write+
|
||||||
|
access, maintainers) and skips them entirely.
|
||||||
|
3. For each remaining (external) assignee it reads the issue's timeline to
|
||||||
|
determine:
|
||||||
|
- The exact date they were assigned (using `assigned` timeline events).
|
||||||
|
- Whether they have opened a PR that is already linked/cross-referenced to
|
||||||
|
the issue.
|
||||||
|
4. Each cross-referenced PR is fetched to verify it is **ready for review**:
|
||||||
|
open and non-draft, or already merged. Draft PRs do not count.
|
||||||
|
5. If an assignee has been assigned for **more than 7 days** and no qualifying
|
||||||
|
PR is found, they are automatically unassigned and a comment is posted
|
||||||
|
explaining the reason and how to re-claim the issue.
|
||||||
|
6. Assignees who have a non-draft, open or merged PR linked to the issue are
|
||||||
|
**never** unassigned by this workflow.
|
||||||
|
- **What you should do**:
|
||||||
|
- **Open a real PR, not a draft**: Within 7 days of being assigned, open a PR
|
||||||
|
that is ready for review and include `Fixes #<issue-number>` in the
|
||||||
|
description. Draft PRs do not satisfy the requirement and will not prevent
|
||||||
|
auto-unassignment.
|
||||||
|
- **Re-assign if unassigned by mistake**: Comment `/assign` on the issue to
|
||||||
|
assign yourself again.
|
||||||
|
- **Unassign yourself** if you can no longer work on the issue by commenting
|
||||||
|
`/unassign`, so other contributors can pick it up right away.
|
||||||
|
|
||||||
|
### 6. Release automation
|
||||||
|
|
||||||
This workflow handles the process of packaging and publishing new versions of
|
This workflow handles the process of packaging and publishing new versions of
|
||||||
the Gemini CLI.
|
the Gemini CLI.
|
||||||
|
|||||||
@@ -747,7 +747,8 @@ their corresponding top-level category object in your `settings.json` file.
|
|||||||
|
|
||||||
- **`tools.sandbox`** (boolean | string):
|
- **`tools.sandbox`** (boolean | string):
|
||||||
- **Description:** Sandbox execution environment. Set to a boolean to enable
|
- **Description:** Sandbox execution environment. Set to a boolean to enable
|
||||||
or disable the sandbox, or provide a string path to a sandbox profile.
|
or disable the sandbox, provide a string path to a sandbox profile, or
|
||||||
|
specify an explicit sandbox command (e.g., "docker", "podman", "lxc").
|
||||||
- **Default:** `undefined`
|
- **Default:** `undefined`
|
||||||
- **Requires restart:** Yes
|
- **Requires restart:** Yes
|
||||||
|
|
||||||
|
|||||||
@@ -97,7 +97,7 @@ describe('loadSandboxConfig', () => {
|
|||||||
it('should throw if GEMINI_SANDBOX is an invalid command', async () => {
|
it('should throw if GEMINI_SANDBOX is an invalid command', async () => {
|
||||||
process.env['GEMINI_SANDBOX'] = 'invalid-command';
|
process.env['GEMINI_SANDBOX'] = 'invalid-command';
|
||||||
await expect(loadSandboxConfig({}, {})).rejects.toThrow(
|
await expect(loadSandboxConfig({}, {})).rejects.toThrow(
|
||||||
"Invalid sandbox command 'invalid-command'. Must be one of docker, podman, sandbox-exec",
|
"Invalid sandbox command 'invalid-command'. Must be one of docker, podman, sandbox-exec, lxc",
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -108,6 +108,22 @@ describe('loadSandboxConfig', () => {
|
|||||||
"Missing sandbox command 'docker' (from GEMINI_SANDBOX)",
|
"Missing sandbox command 'docker' (from GEMINI_SANDBOX)",
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should use lxc if GEMINI_SANDBOX=lxc and it exists', async () => {
|
||||||
|
process.env['GEMINI_SANDBOX'] = 'lxc';
|
||||||
|
mockedCommandExistsSync.mockReturnValue(true);
|
||||||
|
const config = await loadSandboxConfig({}, {});
|
||||||
|
expect(config).toEqual({ command: 'lxc', image: 'default/image' });
|
||||||
|
expect(mockedCommandExistsSync).toHaveBeenCalledWith('lxc');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should throw if GEMINI_SANDBOX=lxc but lxc command does not exist', async () => {
|
||||||
|
process.env['GEMINI_SANDBOX'] = 'lxc';
|
||||||
|
mockedCommandExistsSync.mockReturnValue(false);
|
||||||
|
await expect(loadSandboxConfig({}, {})).rejects.toThrow(
|
||||||
|
"Missing sandbox command 'lxc' (from GEMINI_SANDBOX)",
|
||||||
|
);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe('with sandbox: true', () => {
|
describe('with sandbox: true', () => {
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ const VALID_SANDBOX_COMMANDS: ReadonlyArray<SandboxConfig['command']> = [
|
|||||||
'docker',
|
'docker',
|
||||||
'podman',
|
'podman',
|
||||||
'sandbox-exec',
|
'sandbox-exec',
|
||||||
|
'lxc',
|
||||||
];
|
];
|
||||||
|
|
||||||
function isSandboxCommand(value: string): value is SandboxConfig['command'] {
|
function isSandboxCommand(value: string): value is SandboxConfig['command'] {
|
||||||
@@ -91,6 +92,9 @@ function getSandboxCommand(
|
|||||||
}
|
}
|
||||||
|
|
||||||
return '';
|
return '';
|
||||||
|
// Note: 'lxc' is intentionally not auto-detected because it requires a
|
||||||
|
// pre-existing, running container managed by the user. Use
|
||||||
|
// GEMINI_SANDBOX=lxc or sandbox: "lxc" in settings to enable it.
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function loadSandboxConfig(
|
export async function loadSandboxConfig(
|
||||||
|
|||||||
@@ -1236,7 +1236,8 @@ const SETTINGS_SCHEMA = {
|
|||||||
ref: 'BooleanOrString',
|
ref: 'BooleanOrString',
|
||||||
description: oneLine`
|
description: oneLine`
|
||||||
Sandbox execution environment.
|
Sandbox execution environment.
|
||||||
Set to a boolean to enable or disable the sandbox, or provide a string path to a sandbox profile.
|
Set to a boolean to enable or disable the sandbox, provide a string path to a sandbox profile,
|
||||||
|
or specify an explicit sandbox command (e.g., "docker", "podman", "lxc").
|
||||||
`,
|
`,
|
||||||
showInDialog: false,
|
showInDialog: false,
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import { performInitialAuth } from './auth.js';
|
|||||||
import {
|
import {
|
||||||
type Config,
|
type Config,
|
||||||
ValidationRequiredError,
|
ValidationRequiredError,
|
||||||
|
ProjectIdRequiredError,
|
||||||
AuthType,
|
AuthType,
|
||||||
} from '@google/gemini-cli-core';
|
} from '@google/gemini-cli-core';
|
||||||
|
|
||||||
@@ -116,4 +117,22 @@ describe('auth', () => {
|
|||||||
AuthType.LOGIN_WITH_GOOGLE,
|
AuthType.LOGIN_WITH_GOOGLE,
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should return ProjectIdRequiredError message without "Failed to login" prefix', async () => {
|
||||||
|
const projectIdError = new ProjectIdRequiredError();
|
||||||
|
vi.mocked(mockConfig.refreshAuth).mockRejectedValue(projectIdError);
|
||||||
|
const result = await performInitialAuth(
|
||||||
|
mockConfig,
|
||||||
|
AuthType.LOGIN_WITH_GOOGLE,
|
||||||
|
);
|
||||||
|
expect(result).toEqual({
|
||||||
|
authError:
|
||||||
|
'This account requires setting the GOOGLE_CLOUD_PROJECT or GOOGLE_CLOUD_PROJECT_ID env var. See https://goo.gle/gemini-cli-auth-docs#workspace-gca',
|
||||||
|
accountSuspensionInfo: null,
|
||||||
|
});
|
||||||
|
expect(result.authError).not.toContain('Failed to login');
|
||||||
|
expect(mockConfig.refreshAuth).toHaveBeenCalledWith(
|
||||||
|
AuthType.LOGIN_WITH_GOOGLE,
|
||||||
|
);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import {
|
|||||||
getErrorMessage,
|
getErrorMessage,
|
||||||
ValidationRequiredError,
|
ValidationRequiredError,
|
||||||
isAccountSuspendedError,
|
isAccountSuspendedError,
|
||||||
|
ProjectIdRequiredError,
|
||||||
} from '@google/gemini-cli-core';
|
} from '@google/gemini-cli-core';
|
||||||
|
|
||||||
import type { AccountSuspensionInfo } from '../ui/contexts/UIStateContext.js';
|
import type { AccountSuspensionInfo } from '../ui/contexts/UIStateContext.js';
|
||||||
@@ -54,6 +55,14 @@ export async function performInitialAuth(
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
if (e instanceof ProjectIdRequiredError) {
|
||||||
|
// OAuth succeeded but account setup requires project ID
|
||||||
|
// Show the error message directly without "Failed to login" prefix
|
||||||
|
return {
|
||||||
|
authError: getErrorMessage(e),
|
||||||
|
accountSuspensionInfo: null,
|
||||||
|
};
|
||||||
|
}
|
||||||
return {
|
return {
|
||||||
authError: `Failed to login. Message: ${getErrorMessage(e)}`,
|
authError: `Failed to login. Message: ${getErrorMessage(e)}`,
|
||||||
accountSuspensionInfo: null,
|
accountSuspensionInfo: null,
|
||||||
|
|||||||
@@ -80,6 +80,7 @@ import {
|
|||||||
type ConsentRequestPayload,
|
type ConsentRequestPayload,
|
||||||
type AgentsDiscoveredPayload,
|
type AgentsDiscoveredPayload,
|
||||||
ChangeAuthRequestedError,
|
ChangeAuthRequestedError,
|
||||||
|
ProjectIdRequiredError,
|
||||||
CoreToolCallStatus,
|
CoreToolCallStatus,
|
||||||
generateSteeringAckMessage,
|
generateSteeringAckMessage,
|
||||||
buildUserSteeringHintPrompt,
|
buildUserSteeringHintPrompt,
|
||||||
@@ -771,6 +772,12 @@ export const AppContainer = (props: AppContainerProps) => {
|
|||||||
if (e instanceof ChangeAuthRequestedError) {
|
if (e instanceof ChangeAuthRequestedError) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (e instanceof ProjectIdRequiredError) {
|
||||||
|
// OAuth succeeded but account setup requires project ID
|
||||||
|
// Show the error message directly without "Failed to authenticate" prefix
|
||||||
|
onAuthError(getErrorMessage(e));
|
||||||
|
return;
|
||||||
|
}
|
||||||
onAuthError(
|
onAuthError(
|
||||||
`Failed to authenticate: ${e instanceof Error ? e.message : String(e)}`,
|
`Failed to authenticate: ${e instanceof Error ? e.message : String(e)}`,
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -15,7 +15,11 @@ import {
|
|||||||
} from 'vitest';
|
} from 'vitest';
|
||||||
import { renderHook } from '../../test-utils/render.js';
|
import { renderHook } from '../../test-utils/render.js';
|
||||||
import { useAuthCommand, validateAuthMethodWithSettings } from './useAuth.js';
|
import { useAuthCommand, validateAuthMethodWithSettings } from './useAuth.js';
|
||||||
import { AuthType, type Config } from '@google/gemini-cli-core';
|
import {
|
||||||
|
AuthType,
|
||||||
|
type Config,
|
||||||
|
ProjectIdRequiredError,
|
||||||
|
} from '@google/gemini-cli-core';
|
||||||
import { AuthState } from '../types.js';
|
import { AuthState } from '../types.js';
|
||||||
import type { LoadedSettings } from '../../config/settings.js';
|
import type { LoadedSettings } from '../../config/settings.js';
|
||||||
import { waitFor } from '../../test-utils/async.js';
|
import { waitFor } from '../../test-utils/async.js';
|
||||||
@@ -288,5 +292,21 @@ describe('useAuth', () => {
|
|||||||
expect(result.current.authState).toBe(AuthState.Updating);
|
expect(result.current.authState).toBe(AuthState.Updating);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should handle ProjectIdRequiredError without "Failed to login" prefix', async () => {
|
||||||
|
const projectIdError = new ProjectIdRequiredError();
|
||||||
|
(mockConfig.refreshAuth as Mock).mockRejectedValue(projectIdError);
|
||||||
|
const { result } = renderHook(() =>
|
||||||
|
useAuthCommand(createSettings(AuthType.LOGIN_WITH_GOOGLE), mockConfig),
|
||||||
|
);
|
||||||
|
|
||||||
|
await waitFor(() => {
|
||||||
|
expect(result.current.authError).toBe(
|
||||||
|
'This account requires setting the GOOGLE_CLOUD_PROJECT or GOOGLE_CLOUD_PROJECT_ID env var. See https://goo.gle/gemini-cli-auth-docs#workspace-gca',
|
||||||
|
);
|
||||||
|
expect(result.current.authError).not.toContain('Failed to login');
|
||||||
|
expect(result.current.authState).toBe(AuthState.Updating);
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import {
|
|||||||
loadApiKey,
|
loadApiKey,
|
||||||
debugLogger,
|
debugLogger,
|
||||||
isAccountSuspendedError,
|
isAccountSuspendedError,
|
||||||
|
ProjectIdRequiredError,
|
||||||
} from '@google/gemini-cli-core';
|
} from '@google/gemini-cli-core';
|
||||||
import { getErrorMessage } from '@google/gemini-cli-core';
|
import { getErrorMessage } from '@google/gemini-cli-core';
|
||||||
import { AuthState } from '../types.js';
|
import { AuthState } from '../types.js';
|
||||||
@@ -143,6 +144,10 @@ export const useAuthCommand = (
|
|||||||
appealUrl: suspendedError.appealUrl,
|
appealUrl: suspendedError.appealUrl,
|
||||||
appealLinkText: suspendedError.appealLinkText,
|
appealLinkText: suspendedError.appealLinkText,
|
||||||
});
|
});
|
||||||
|
} else if (e instanceof ProjectIdRequiredError) {
|
||||||
|
// OAuth succeeded but account setup requires project ID
|
||||||
|
// Show the error message directly without "Failed to login" prefix
|
||||||
|
onAuthError(getErrorMessage(e));
|
||||||
} else {
|
} else {
|
||||||
onAuthError(`Failed to login. Message: ${getErrorMessage(e)}`);
|
onAuthError(`Failed to login. Message: ${getErrorMessage(e)}`);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -120,8 +120,8 @@ describe('useAtCompletion', () => {
|
|||||||
|
|
||||||
expect(result.current.suggestions.map((s) => s.value)).toEqual([
|
expect(result.current.suggestions.map((s) => s.value)).toEqual([
|
||||||
'src/',
|
'src/',
|
||||||
'src/components/',
|
|
||||||
'src/index.js',
|
'src/index.js',
|
||||||
|
'src/components/',
|
||||||
'src/components/Button.tsx',
|
'src/components/Button.tsx',
|
||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import { vi, describe, it, expect, beforeEach, afterEach } from 'vitest';
|
import { vi, describe, it, expect, beforeEach, afterEach } from 'vitest';
|
||||||
import { spawn, exec, execSync } from 'node:child_process';
|
import { spawn, exec, execFile, execSync } from 'node:child_process';
|
||||||
import os from 'node:os';
|
import os from 'node:os';
|
||||||
import fs from 'node:fs';
|
import fs from 'node:fs';
|
||||||
import { start_sandbox } from './sandbox.js';
|
import { start_sandbox } from './sandbox.js';
|
||||||
@@ -50,6 +50,26 @@ vi.mock('node:util', async (importOriginal) => {
|
|||||||
return { stdout: '', stderr: '' };
|
return { stdout: '', stderr: '' };
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
if (fn === execFile) {
|
||||||
|
return async (file: string, args: string[]) => {
|
||||||
|
if (file === 'lxc' && args[0] === 'list') {
|
||||||
|
const output = process.env['TEST_LXC_LIST_OUTPUT'];
|
||||||
|
if (output === 'throw') {
|
||||||
|
throw new Error('lxc command not found');
|
||||||
|
}
|
||||||
|
return { stdout: output ?? '[]', stderr: '' };
|
||||||
|
}
|
||||||
|
if (
|
||||||
|
file === 'lxc' &&
|
||||||
|
args[0] === 'config' &&
|
||||||
|
args[1] === 'device' &&
|
||||||
|
args[2] === 'add'
|
||||||
|
) {
|
||||||
|
return { stdout: '', stderr: '' };
|
||||||
|
}
|
||||||
|
return { stdout: '', stderr: '' };
|
||||||
|
};
|
||||||
|
}
|
||||||
return actual.promisify(fn);
|
return actual.promisify(fn);
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
@@ -473,5 +493,84 @@ describe('sandbox', () => {
|
|||||||
expect(entrypointCmd).toContain('useradd');
|
expect(entrypointCmd).toContain('useradd');
|
||||||
expect(entrypointCmd).toContain('su -p gemini');
|
expect(entrypointCmd).toContain('su -p gemini');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('LXC sandbox', () => {
|
||||||
|
const LXC_RUNNING = JSON.stringify([
|
||||||
|
{ name: 'gemini-sandbox', status: 'Running' },
|
||||||
|
]);
|
||||||
|
const LXC_STOPPED = JSON.stringify([
|
||||||
|
{ name: 'gemini-sandbox', status: 'Stopped' },
|
||||||
|
]);
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
delete process.env['TEST_LXC_LIST_OUTPUT'];
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should run lxc exec with correct args for a running container', async () => {
|
||||||
|
process.env['TEST_LXC_LIST_OUTPUT'] = LXC_RUNNING;
|
||||||
|
const config: SandboxConfig = {
|
||||||
|
command: 'lxc',
|
||||||
|
image: 'gemini-sandbox',
|
||||||
|
};
|
||||||
|
|
||||||
|
const mockSpawnProcess = new EventEmitter() as unknown as ReturnType<
|
||||||
|
typeof spawn
|
||||||
|
>;
|
||||||
|
mockSpawnProcess.on = vi.fn().mockImplementation((event, cb) => {
|
||||||
|
if (event === 'close') {
|
||||||
|
setTimeout(() => cb(0), 10);
|
||||||
|
}
|
||||||
|
return mockSpawnProcess;
|
||||||
|
});
|
||||||
|
|
||||||
|
vi.mocked(spawn).mockImplementation((cmd) => {
|
||||||
|
if (cmd === 'lxc') {
|
||||||
|
return mockSpawnProcess;
|
||||||
|
}
|
||||||
|
return new EventEmitter() as unknown as ReturnType<typeof spawn>;
|
||||||
|
});
|
||||||
|
|
||||||
|
const promise = start_sandbox(config, [], undefined, ['arg1']);
|
||||||
|
await expect(promise).resolves.toBe(0);
|
||||||
|
|
||||||
|
expect(spawn).toHaveBeenCalledWith(
|
||||||
|
'lxc',
|
||||||
|
expect.arrayContaining(['exec', 'gemini-sandbox', '--cwd']),
|
||||||
|
expect.objectContaining({ stdio: 'inherit' }),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should throw FatalSandboxError if lxc list fails', async () => {
|
||||||
|
process.env['TEST_LXC_LIST_OUTPUT'] = 'throw';
|
||||||
|
const config: SandboxConfig = {
|
||||||
|
command: 'lxc',
|
||||||
|
image: 'gemini-sandbox',
|
||||||
|
};
|
||||||
|
|
||||||
|
await expect(start_sandbox(config)).rejects.toThrow(
|
||||||
|
/Failed to query LXC container/,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should throw FatalSandboxError if container is not running', async () => {
|
||||||
|
process.env['TEST_LXC_LIST_OUTPUT'] = LXC_STOPPED;
|
||||||
|
const config: SandboxConfig = {
|
||||||
|
command: 'lxc',
|
||||||
|
image: 'gemini-sandbox',
|
||||||
|
};
|
||||||
|
|
||||||
|
await expect(start_sandbox(config)).rejects.toThrow(/is not running/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should throw FatalSandboxError if container is not found in list', async () => {
|
||||||
|
process.env['TEST_LXC_LIST_OUTPUT'] = '[]';
|
||||||
|
const config: SandboxConfig = {
|
||||||
|
command: 'lxc',
|
||||||
|
image: 'gemini-sandbox',
|
||||||
|
};
|
||||||
|
|
||||||
|
await expect(start_sandbox(config)).rejects.toThrow(/not found/);
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -4,7 +4,14 @@
|
|||||||
* SPDX-License-Identifier: Apache-2.0
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { exec, execSync, spawn, type ChildProcess } from 'node:child_process';
|
import {
|
||||||
|
exec,
|
||||||
|
execFile,
|
||||||
|
execFileSync,
|
||||||
|
execSync,
|
||||||
|
spawn,
|
||||||
|
type ChildProcess,
|
||||||
|
} from 'node:child_process';
|
||||||
import path from 'node:path';
|
import path from 'node:path';
|
||||||
import fs from 'node:fs';
|
import fs from 'node:fs';
|
||||||
import os from 'node:os';
|
import os from 'node:os';
|
||||||
@@ -34,6 +41,7 @@ import {
|
|||||||
} from './sandboxUtils.js';
|
} from './sandboxUtils.js';
|
||||||
|
|
||||||
const execAsync = promisify(exec);
|
const execAsync = promisify(exec);
|
||||||
|
const execFileAsync = promisify(execFile);
|
||||||
|
|
||||||
export async function start_sandbox(
|
export async function start_sandbox(
|
||||||
config: SandboxConfig,
|
config: SandboxConfig,
|
||||||
@@ -203,6 +211,10 @@ export async function start_sandbox(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (config.command === 'lxc') {
|
||||||
|
return await start_lxc_sandbox(config, nodeArgs, cliArgs);
|
||||||
|
}
|
||||||
|
|
||||||
debugLogger.log(`hopping into sandbox (command: ${config.command}) ...`);
|
debugLogger.log(`hopping into sandbox (command: ${config.command}) ...`);
|
||||||
|
|
||||||
// determine full path for gemini-cli to distinguish linked vs installed setting
|
// determine full path for gemini-cli to distinguish linked vs installed setting
|
||||||
@@ -722,6 +734,208 @@ export async function start_sandbox(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Helper function to start a sandbox using LXC/LXD.
|
||||||
|
// Unlike Docker/Podman, LXC does not launch a transient container from an
|
||||||
|
// image. The user creates and manages their own LXC container; Gemini runs
|
||||||
|
// inside it via `lxc exec`. The container name is stored in config.image
|
||||||
|
// (default: "gemini-sandbox"). The workspace is bind-mounted into the
|
||||||
|
// container at the same absolute path.
|
||||||
|
async function start_lxc_sandbox(
|
||||||
|
config: SandboxConfig,
|
||||||
|
nodeArgs: string[] = [],
|
||||||
|
cliArgs: string[] = [],
|
||||||
|
): Promise<number> {
|
||||||
|
const containerName = config.image || 'gemini-sandbox';
|
||||||
|
const workdir = path.resolve(process.cwd());
|
||||||
|
|
||||||
|
debugLogger.log(
|
||||||
|
`starting lxc sandbox (container: ${containerName}, workdir: ${workdir}) ...`,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Verify the container exists and is running.
|
||||||
|
let listOutput: string;
|
||||||
|
try {
|
||||||
|
const { stdout } = await execFileAsync('lxc', [
|
||||||
|
'list',
|
||||||
|
containerName,
|
||||||
|
'--format=json',
|
||||||
|
]);
|
||||||
|
listOutput = stdout.trim();
|
||||||
|
} catch (err) {
|
||||||
|
throw new FatalSandboxError(
|
||||||
|
`Failed to query LXC container '${containerName}': ${err instanceof Error ? err.message : String(err)}. ` +
|
||||||
|
`Make sure LXC/LXD is installed and '${containerName}' container exists. ` +
|
||||||
|
`Create one with: lxc launch ubuntu:24.04 ${containerName}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
let containers: Array<{ name: string; status: string }> = [];
|
||||||
|
try {
|
||||||
|
const parsed: unknown = JSON.parse(listOutput);
|
||||||
|
if (Array.isArray(parsed)) {
|
||||||
|
containers = parsed
|
||||||
|
.filter(
|
||||||
|
(item): item is Record<string, unknown> =>
|
||||||
|
item !== null &&
|
||||||
|
typeof item === 'object' &&
|
||||||
|
'name' in item &&
|
||||||
|
'status' in item,
|
||||||
|
)
|
||||||
|
.map((item) => ({
|
||||||
|
name: String(item['name']),
|
||||||
|
status: String(item['status']),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
containers = [];
|
||||||
|
}
|
||||||
|
|
||||||
|
const container = containers.find((c) => c.name === containerName);
|
||||||
|
if (!container) {
|
||||||
|
throw new FatalSandboxError(
|
||||||
|
`LXC container '${containerName}' not found. ` +
|
||||||
|
`Create one with: lxc launch ubuntu:24.04 ${containerName}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (container.status.toLowerCase() !== 'running') {
|
||||||
|
throw new FatalSandboxError(
|
||||||
|
`LXC container '${containerName}' is not running (current status: ${container.status}). ` +
|
||||||
|
`Start it with: lxc start ${containerName}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bind-mount the working directory into the container at the same path.
|
||||||
|
// Using "lxc config device add" is idempotent when the device name matches.
|
||||||
|
const deviceName = `gemini-workspace-${randomBytes(4).toString('hex')}`;
|
||||||
|
try {
|
||||||
|
await execFileAsync('lxc', [
|
||||||
|
'config',
|
||||||
|
'device',
|
||||||
|
'add',
|
||||||
|
containerName,
|
||||||
|
deviceName,
|
||||||
|
'disk',
|
||||||
|
`source=${workdir}`,
|
||||||
|
`path=${workdir}`,
|
||||||
|
]);
|
||||||
|
debugLogger.log(
|
||||||
|
`mounted workspace '${workdir}' into container as device '${deviceName}'`,
|
||||||
|
);
|
||||||
|
} catch (err) {
|
||||||
|
throw new FatalSandboxError(
|
||||||
|
`Failed to mount workspace into LXC container '${containerName}': ${err instanceof Error ? err.message : String(err)}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove the workspace device from the container when the process exits.
|
||||||
|
// Only the 'exit' event is needed — the CLI's cleanup.ts already handles
|
||||||
|
// SIGINT and SIGTERM by calling process.exit(), which fires 'exit'.
|
||||||
|
const removeDevice = () => {
|
||||||
|
try {
|
||||||
|
execFileSync(
|
||||||
|
'lxc',
|
||||||
|
['config', 'device', 'remove', containerName, deviceName],
|
||||||
|
{ timeout: 2000 },
|
||||||
|
);
|
||||||
|
} catch {
|
||||||
|
// Best-effort cleanup; ignore errors on exit.
|
||||||
|
}
|
||||||
|
};
|
||||||
|
process.on('exit', removeDevice);
|
||||||
|
|
||||||
|
// Build the environment variable arguments for `lxc exec`.
|
||||||
|
const envArgs: string[] = [];
|
||||||
|
const envVarsToForward: Record<string, string | undefined> = {
|
||||||
|
GEMINI_API_KEY: process.env['GEMINI_API_KEY'],
|
||||||
|
GOOGLE_API_KEY: process.env['GOOGLE_API_KEY'],
|
||||||
|
GOOGLE_GEMINI_BASE_URL: process.env['GOOGLE_GEMINI_BASE_URL'],
|
||||||
|
GOOGLE_VERTEX_BASE_URL: process.env['GOOGLE_VERTEX_BASE_URL'],
|
||||||
|
GOOGLE_GENAI_USE_VERTEXAI: process.env['GOOGLE_GENAI_USE_VERTEXAI'],
|
||||||
|
GOOGLE_GENAI_USE_GCA: process.env['GOOGLE_GENAI_USE_GCA'],
|
||||||
|
GOOGLE_CLOUD_PROJECT: process.env['GOOGLE_CLOUD_PROJECT'],
|
||||||
|
GOOGLE_CLOUD_LOCATION: process.env['GOOGLE_CLOUD_LOCATION'],
|
||||||
|
GEMINI_MODEL: process.env['GEMINI_MODEL'],
|
||||||
|
TERM: process.env['TERM'],
|
||||||
|
COLORTERM: process.env['COLORTERM'],
|
||||||
|
GEMINI_CLI_IDE_SERVER_PORT: process.env['GEMINI_CLI_IDE_SERVER_PORT'],
|
||||||
|
GEMINI_CLI_IDE_WORKSPACE_PATH: process.env['GEMINI_CLI_IDE_WORKSPACE_PATH'],
|
||||||
|
TERM_PROGRAM: process.env['TERM_PROGRAM'],
|
||||||
|
};
|
||||||
|
for (const [key, value] of Object.entries(envVarsToForward)) {
|
||||||
|
if (value) {
|
||||||
|
envArgs.push('--env', `${key}=${value}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Forward SANDBOX_ENV key=value pairs
|
||||||
|
if (process.env['SANDBOX_ENV']) {
|
||||||
|
for (let env of process.env['SANDBOX_ENV'].split(',')) {
|
||||||
|
if ((env = env.trim())) {
|
||||||
|
if (env.includes('=')) {
|
||||||
|
envArgs.push('--env', env);
|
||||||
|
} else {
|
||||||
|
throw new FatalSandboxError(
|
||||||
|
'SANDBOX_ENV must be a comma-separated list of key=value pairs',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Forward NODE_OPTIONS (e.g. from --inspect flags)
|
||||||
|
const existingNodeOptions = process.env['NODE_OPTIONS'] || '';
|
||||||
|
const allNodeOptions = [
|
||||||
|
...(existingNodeOptions ? [existingNodeOptions] : []),
|
||||||
|
...nodeArgs,
|
||||||
|
].join(' ');
|
||||||
|
if (allNodeOptions.length > 0) {
|
||||||
|
envArgs.push('--env', `NODE_OPTIONS=${allNodeOptions}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mark that we're running inside an LXC sandbox.
|
||||||
|
envArgs.push('--env', `SANDBOX=${containerName}`);
|
||||||
|
|
||||||
|
// Build the command entrypoint (same logic as Docker path).
|
||||||
|
const finalEntrypoint = entrypoint(workdir, cliArgs);
|
||||||
|
|
||||||
|
// Build the full lxc exec command args.
|
||||||
|
const args = [
|
||||||
|
'exec',
|
||||||
|
containerName,
|
||||||
|
'--cwd',
|
||||||
|
workdir,
|
||||||
|
...envArgs,
|
||||||
|
'--',
|
||||||
|
...finalEntrypoint,
|
||||||
|
];
|
||||||
|
|
||||||
|
debugLogger.log(`lxc exec args: ${args.join(' ')}`);
|
||||||
|
|
||||||
|
process.stdin.pause();
|
||||||
|
const sandboxProcess = spawn('lxc', args, {
|
||||||
|
stdio: 'inherit',
|
||||||
|
});
|
||||||
|
|
||||||
|
return new Promise<number>((resolve, reject) => {
|
||||||
|
sandboxProcess.on('error', (err) => {
|
||||||
|
coreEvents.emitFeedback('error', 'LXC sandbox process error', err);
|
||||||
|
reject(err);
|
||||||
|
});
|
||||||
|
|
||||||
|
sandboxProcess.on('close', (code, signal) => {
|
||||||
|
process.stdin.resume();
|
||||||
|
process.off('exit', removeDevice);
|
||||||
|
removeDevice();
|
||||||
|
if (code !== 0 && code !== null) {
|
||||||
|
debugLogger.log(
|
||||||
|
`LXC sandbox process exited with code: ${code}, signal: ${signal}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
resolve(code ?? 1);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// Helper functions to ensure sandbox image is present
|
// Helper functions to ensure sandbox image is present
|
||||||
async function imageExists(sandbox: string, image: string): Promise<boolean> {
|
async function imageExists(sandbox: string, image: string): Promise<boolean> {
|
||||||
return new Promise((resolve) => {
|
return new Promise((resolve) => {
|
||||||
|
|||||||
@@ -10,8 +10,14 @@ import { OAuth2Client } from 'google-auth-library';
|
|||||||
import { UserTierId, ActionStatus } from './types.js';
|
import { UserTierId, ActionStatus } from './types.js';
|
||||||
import { FinishReason } from '@google/genai';
|
import { FinishReason } from '@google/genai';
|
||||||
import { LlmRole } from '../telemetry/types.js';
|
import { LlmRole } from '../telemetry/types.js';
|
||||||
|
import { logInvalidChunk } from '../telemetry/loggers.js';
|
||||||
|
import { makeFakeConfig } from '../test-utils/config.js';
|
||||||
|
|
||||||
vi.mock('google-auth-library');
|
vi.mock('google-auth-library');
|
||||||
|
vi.mock('../telemetry/loggers.js', () => ({
|
||||||
|
logBillingEvent: vi.fn(),
|
||||||
|
logInvalidChunk: vi.fn(),
|
||||||
|
}));
|
||||||
|
|
||||||
function createTestServer(headers: Record<string, string> = {}) {
|
function createTestServer(headers: Record<string, string> = {}) {
|
||||||
const mockRequest = vi.fn();
|
const mockRequest = vi.fn();
|
||||||
@@ -116,7 +122,7 @@ describe('CodeAssistServer', () => {
|
|||||||
role: 'model',
|
role: 'model',
|
||||||
parts: [
|
parts: [
|
||||||
{ text: 'response' },
|
{ text: 'response' },
|
||||||
{ functionCall: { name: 'test', args: {} } },
|
{ functionCall: { name: 'replace', args: {} } },
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
finishReason: FinishReason.SAFETY,
|
finishReason: FinishReason.SAFETY,
|
||||||
@@ -160,7 +166,7 @@ describe('CodeAssistServer', () => {
|
|||||||
role: 'model',
|
role: 'model',
|
||||||
parts: [
|
parts: [
|
||||||
{ text: 'response' },
|
{ text: 'response' },
|
||||||
{ functionCall: { name: 'test', args: {} } },
|
{ functionCall: { name: 'replace', args: {} } },
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
finishReason: FinishReason.STOP,
|
finishReason: FinishReason.STOP,
|
||||||
@@ -233,7 +239,7 @@ describe('CodeAssistServer', () => {
|
|||||||
content: {
|
content: {
|
||||||
parts: [
|
parts: [
|
||||||
{ text: 'chunk' },
|
{ text: 'chunk' },
|
||||||
{ functionCall: { name: 'test', args: {} } },
|
{ functionCall: { name: 'replace', args: {} } },
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -671,4 +677,242 @@ describe('CodeAssistServer', () => {
|
|||||||
expect(requestPostSpy).toHaveBeenCalledWith('retrieveUserQuota', req);
|
expect(requestPostSpy).toHaveBeenCalledWith('retrieveUserQuota', req);
|
||||||
expect(response).toEqual(mockResponse);
|
expect(response).toEqual(mockResponse);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('robustness testing', () => {
|
||||||
|
it('should not crash on random error objects in loadCodeAssist (isVpcScAffectedUser)', async () => {
|
||||||
|
const { server } = createTestServer();
|
||||||
|
const errors = [
|
||||||
|
null,
|
||||||
|
undefined,
|
||||||
|
'string error',
|
||||||
|
123,
|
||||||
|
{ some: 'object' },
|
||||||
|
new Error('standard error'),
|
||||||
|
{ response: {} },
|
||||||
|
{ response: { data: {} } },
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const err of errors) {
|
||||||
|
vi.spyOn(server, 'requestPost').mockRejectedValueOnce(err);
|
||||||
|
try {
|
||||||
|
await server.loadCodeAssist({ metadata: {} });
|
||||||
|
} catch (e) {
|
||||||
|
expect(e).toBe(err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle randomly fragmented SSE streams gracefully', async () => {
|
||||||
|
const { server, mockRequest } = createTestServer();
|
||||||
|
const { Readable } = await import('node:stream');
|
||||||
|
|
||||||
|
const fragmentedCases = [
|
||||||
|
{
|
||||||
|
chunks: ['d', 'ata: {"foo":', ' "bar"}\n\n'],
|
||||||
|
expected: [{ foo: 'bar' }],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
chunks: ['data: {"foo": "bar"}\n', '\n'],
|
||||||
|
expected: [{ foo: 'bar' }],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
chunks: ['data: ', '{"foo": "bar"}', '\n\n'],
|
||||||
|
expected: [{ foo: 'bar' }],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
chunks: ['data: {"foo": "bar"}\n\n', 'data: {"baz": 1}\n\n'],
|
||||||
|
expected: [{ foo: 'bar' }, { baz: 1 }],
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const { chunks, expected } of fragmentedCases) {
|
||||||
|
const mockStream = new Readable({
|
||||||
|
read() {
|
||||||
|
for (const chunk of chunks) {
|
||||||
|
this.push(chunk);
|
||||||
|
}
|
||||||
|
this.push(null);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
mockRequest.mockResolvedValueOnce({ data: mockStream });
|
||||||
|
|
||||||
|
const stream = await server.requestStreamingPost('testStream', {});
|
||||||
|
const results = [];
|
||||||
|
for await (const res of stream) {
|
||||||
|
results.push(res);
|
||||||
|
}
|
||||||
|
expect(results).toEqual(expected);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should correctly parse valid JSON split across multiple data lines', async () => {
|
||||||
|
const { server, mockRequest } = createTestServer();
|
||||||
|
const { Readable } = await import('node:stream');
|
||||||
|
const jsonObj = {
|
||||||
|
complex: { structure: [1, 2, 3] },
|
||||||
|
bool: true,
|
||||||
|
str: 'value',
|
||||||
|
};
|
||||||
|
const jsonString = JSON.stringify(jsonObj, null, 2);
|
||||||
|
const lines = jsonString.split('\n');
|
||||||
|
const ssePayload = lines.map((line) => `data: ${line}\n`).join('') + '\n';
|
||||||
|
|
||||||
|
const mockStream = new Readable({
|
||||||
|
read() {
|
||||||
|
this.push(ssePayload);
|
||||||
|
this.push(null);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
mockRequest.mockResolvedValueOnce({ data: mockStream });
|
||||||
|
|
||||||
|
const stream = await server.requestStreamingPost('testStream', {});
|
||||||
|
const results = [];
|
||||||
|
for await (const res of stream) {
|
||||||
|
results.push(res);
|
||||||
|
}
|
||||||
|
expect(results).toHaveLength(1);
|
||||||
|
expect(results[0]).toEqual(jsonObj);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should not crash on objects partially matching VPC SC error structure', async () => {
|
||||||
|
const { server } = createTestServer();
|
||||||
|
const partialErrors = [
|
||||||
|
{ response: { data: { error: { details: [{ reason: 'OTHER' }] } } } },
|
||||||
|
{ response: { data: { error: { details: [] } } } },
|
||||||
|
{ response: { data: { error: {} } } },
|
||||||
|
{ response: { data: {} } },
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const err of partialErrors) {
|
||||||
|
vi.spyOn(server, 'requestPost').mockRejectedValueOnce(err);
|
||||||
|
try {
|
||||||
|
await server.loadCodeAssist({ metadata: {} });
|
||||||
|
} catch (e) {
|
||||||
|
expect(e).toBe(err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should correctly ignore arbitrary SSE comments and ID lines and empty lines before data', async () => {
|
||||||
|
const { server, mockRequest } = createTestServer();
|
||||||
|
const { Readable } = await import('node:stream');
|
||||||
|
const jsonObj = { foo: 'bar' };
|
||||||
|
const jsonString = JSON.stringify(jsonObj);
|
||||||
|
|
||||||
|
const ssePayload = `id: 123
|
||||||
|
:comment
|
||||||
|
retry: 100
|
||||||
|
|
||||||
|
data: ${jsonString}
|
||||||
|
|
||||||
|
`;
|
||||||
|
|
||||||
|
const mockStream = new Readable({
|
||||||
|
read() {
|
||||||
|
this.push(ssePayload);
|
||||||
|
this.push(null);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
mockRequest.mockResolvedValueOnce({ data: mockStream });
|
||||||
|
|
||||||
|
const stream = await server.requestStreamingPost('testStream', {});
|
||||||
|
const results = [];
|
||||||
|
for await (const res of stream) {
|
||||||
|
results.push(res);
|
||||||
|
}
|
||||||
|
expect(results).toHaveLength(1);
|
||||||
|
expect(results[0]).toEqual(jsonObj);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should log InvalidChunkEvent when SSE chunk is not valid JSON', async () => {
|
||||||
|
const config = makeFakeConfig();
|
||||||
|
const mockRequest = vi.fn();
|
||||||
|
const client = { request: mockRequest } as unknown as OAuth2Client;
|
||||||
|
const server = new CodeAssistServer(
|
||||||
|
client,
|
||||||
|
'test-project',
|
||||||
|
{},
|
||||||
|
'test-session',
|
||||||
|
UserTierId.FREE,
|
||||||
|
undefined,
|
||||||
|
undefined,
|
||||||
|
config,
|
||||||
|
);
|
||||||
|
|
||||||
|
const { Readable } = await import('node:stream');
|
||||||
|
const mockStream = new Readable({
|
||||||
|
read() {},
|
||||||
|
});
|
||||||
|
|
||||||
|
mockRequest.mockResolvedValue({ data: mockStream });
|
||||||
|
|
||||||
|
const stream = await server.requestStreamingPost('testStream', {});
|
||||||
|
|
||||||
|
setTimeout(() => {
|
||||||
|
mockStream.push('data: { "invalid": json }\n\n');
|
||||||
|
mockStream.push(null);
|
||||||
|
}, 0);
|
||||||
|
|
||||||
|
const results = [];
|
||||||
|
for await (const res of stream) {
|
||||||
|
results.push(res);
|
||||||
|
}
|
||||||
|
|
||||||
|
expect(results).toHaveLength(0);
|
||||||
|
expect(logInvalidChunk).toHaveBeenCalledWith(
|
||||||
|
config,
|
||||||
|
expect.objectContaining({
|
||||||
|
error_message: 'Malformed JSON chunk',
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should safely process random response streams in generateContentStream (consumed/remaining credits)', async () => {
|
||||||
|
const { mockRequest, client } = createTestServer();
|
||||||
|
const testServer = new CodeAssistServer(
|
||||||
|
client,
|
||||||
|
'test-project',
|
||||||
|
{},
|
||||||
|
'test-session',
|
||||||
|
UserTierId.FREE,
|
||||||
|
undefined,
|
||||||
|
{ id: 'test-tier', name: 'tier', availableCredits: [] },
|
||||||
|
);
|
||||||
|
const { Readable } = await import('node:stream');
|
||||||
|
|
||||||
|
const streamResponses = [
|
||||||
|
{
|
||||||
|
traceId: '1',
|
||||||
|
consumedCredits: [{ creditType: 'A', creditAmount: '10' }],
|
||||||
|
},
|
||||||
|
{ traceId: '2', remainingCredits: [{ creditType: 'B' }] },
|
||||||
|
{ traceId: '3' },
|
||||||
|
{ traceId: '4', consumedCredits: null, remainingCredits: undefined },
|
||||||
|
];
|
||||||
|
|
||||||
|
const mockStream = new Readable({
|
||||||
|
read() {
|
||||||
|
for (const resp of streamResponses) {
|
||||||
|
this.push(`data: ${JSON.stringify(resp)}\n\n`);
|
||||||
|
}
|
||||||
|
this.push(null);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
mockRequest.mockResolvedValueOnce({ data: mockStream });
|
||||||
|
vi.spyOn(testServer, 'recordCodeAssistMetrics').mockResolvedValue(
|
||||||
|
undefined,
|
||||||
|
);
|
||||||
|
|
||||||
|
const stream = await testServer.generateContentStream(
|
||||||
|
{ model: 'test-model', contents: [] },
|
||||||
|
'user-prompt-id',
|
||||||
|
LlmRole.MAIN,
|
||||||
|
);
|
||||||
|
|
||||||
|
for await (const _ of stream) {
|
||||||
|
// Drain stream
|
||||||
|
}
|
||||||
|
// Should not crash
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -47,7 +47,7 @@ import {
|
|||||||
isOverageEligibleModel,
|
isOverageEligibleModel,
|
||||||
shouldAutoUseCredits,
|
shouldAutoUseCredits,
|
||||||
} from '../billing/billing.js';
|
} from '../billing/billing.js';
|
||||||
import { logBillingEvent } from '../telemetry/loggers.js';
|
import { logBillingEvent, logInvalidChunk } from '../telemetry/loggers.js';
|
||||||
import { CreditsUsedEvent } from '../telemetry/billingEvents.js';
|
import { CreditsUsedEvent } from '../telemetry/billingEvents.js';
|
||||||
import {
|
import {
|
||||||
fromCountTokenResponse,
|
fromCountTokenResponse,
|
||||||
@@ -62,7 +62,7 @@ import {
|
|||||||
recordConversationOffered,
|
recordConversationOffered,
|
||||||
} from './telemetry.js';
|
} from './telemetry.js';
|
||||||
import { getClientMetadata } from './experiments/client_metadata.js';
|
import { getClientMetadata } from './experiments/client_metadata.js';
|
||||||
import type { LlmRole } from '../telemetry/types.js';
|
import { InvalidChunkEvent, type LlmRole } from '../telemetry/types.js';
|
||||||
/** HTTP options to be used in each of the requests. */
|
/** HTTP options to be used in each of the requests. */
|
||||||
export interface HttpOptions {
|
export interface HttpOptions {
|
||||||
/** Additional HTTP headers to be sent with the request. */
|
/** Additional HTTP headers to be sent with the request. */
|
||||||
@@ -466,7 +466,7 @@ export class CodeAssistServer implements ContentGenerator {
|
|||||||
retry: false,
|
retry: false,
|
||||||
});
|
});
|
||||||
|
|
||||||
return (async function* (): AsyncGenerator<T> {
|
return (async function* (server: CodeAssistServer): AsyncGenerator<T> {
|
||||||
const rl = readline.createInterface({
|
const rl = readline.createInterface({
|
||||||
input: Readable.from(res.data),
|
input: Readable.from(res.data),
|
||||||
crlfDelay: Infinity, // Recognizes '\r\n' and '\n' as line breaks
|
crlfDelay: Infinity, // Recognizes '\r\n' and '\n' as line breaks
|
||||||
@@ -480,12 +480,23 @@ export class CodeAssistServer implements ContentGenerator {
|
|||||||
if (bufferedLines.length === 0) {
|
if (bufferedLines.length === 0) {
|
||||||
continue; // no data to yield
|
continue; // no data to yield
|
||||||
}
|
}
|
||||||
yield JSON.parse(bufferedLines.join('\n'));
|
const chunk = bufferedLines.join('\n');
|
||||||
|
try {
|
||||||
|
yield JSON.parse(chunk);
|
||||||
|
} catch (_e) {
|
||||||
|
if (server.config) {
|
||||||
|
logInvalidChunk(
|
||||||
|
server.config,
|
||||||
|
// Don't include the chunk content in the log for security/privacy reasons.
|
||||||
|
new InvalidChunkEvent('Malformed JSON chunk'),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
bufferedLines = []; // Reset the buffer after yielding
|
bufferedLines = []; // Reset the buffer after yielding
|
||||||
}
|
}
|
||||||
// Ignore other lines like comments or id fields
|
// Ignore other lines like comments or id fields
|
||||||
}
|
}
|
||||||
})();
|
})(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
private getBaseUrl(): string {
|
private getBaseUrl(): string {
|
||||||
|
|||||||
@@ -82,7 +82,7 @@ describe('telemetry', () => {
|
|||||||
},
|
},
|
||||||
],
|
],
|
||||||
true,
|
true,
|
||||||
[{ name: 'someTool', args: {} }],
|
[{ name: 'replace', args: {} }],
|
||||||
);
|
);
|
||||||
const traceId = 'test-trace-id';
|
const traceId = 'test-trace-id';
|
||||||
const streamingLatency: StreamingLatency = { totalLatency: '1s' };
|
const streamingLatency: StreamingLatency = { totalLatency: '1s' };
|
||||||
@@ -130,7 +130,7 @@ describe('telemetry', () => {
|
|||||||
|
|
||||||
it('should set status to CANCELLED if signal is aborted', () => {
|
it('should set status to CANCELLED if signal is aborted', () => {
|
||||||
const response = createMockResponse([], true, [
|
const response = createMockResponse([], true, [
|
||||||
{ name: 'tool', args: {} },
|
{ name: 'replace', args: {} },
|
||||||
]);
|
]);
|
||||||
const signal = new AbortController().signal;
|
const signal = new AbortController().signal;
|
||||||
vi.spyOn(signal, 'aborted', 'get').mockReturnValue(true);
|
vi.spyOn(signal, 'aborted', 'get').mockReturnValue(true);
|
||||||
@@ -147,7 +147,7 @@ describe('telemetry', () => {
|
|||||||
|
|
||||||
it('should set status to ERROR_UNKNOWN if response has error (non-OK SDK response)', () => {
|
it('should set status to ERROR_UNKNOWN if response has error (non-OK SDK response)', () => {
|
||||||
const response = createMockResponse([], false, [
|
const response = createMockResponse([], false, [
|
||||||
{ name: 'tool', args: {} },
|
{ name: 'replace', args: {} },
|
||||||
]);
|
]);
|
||||||
|
|
||||||
const result = createConversationOffered(
|
const result = createConversationOffered(
|
||||||
@@ -169,7 +169,7 @@ describe('telemetry', () => {
|
|||||||
},
|
},
|
||||||
],
|
],
|
||||||
true,
|
true,
|
||||||
[{ name: 'tool', args: {} }],
|
[{ name: 'replace', args: {} }],
|
||||||
);
|
);
|
||||||
|
|
||||||
const result = createConversationOffered(
|
const result = createConversationOffered(
|
||||||
@@ -186,7 +186,7 @@ describe('telemetry', () => {
|
|||||||
// We force functionCalls to be present to bypass the guard,
|
// We force functionCalls to be present to bypass the guard,
|
||||||
// simulating a state where we want to test the candidates check.
|
// simulating a state where we want to test the candidates check.
|
||||||
const response = createMockResponse([], true, [
|
const response = createMockResponse([], true, [
|
||||||
{ name: 'tool', args: {} },
|
{ name: 'replace', args: {} },
|
||||||
]);
|
]);
|
||||||
|
|
||||||
const result = createConversationOffered(
|
const result = createConversationOffered(
|
||||||
@@ -212,7 +212,7 @@ describe('telemetry', () => {
|
|||||||
},
|
},
|
||||||
],
|
],
|
||||||
true,
|
true,
|
||||||
[{ name: 'tool', args: {} }],
|
[{ name: 'replace', args: {} }],
|
||||||
);
|
);
|
||||||
const result = createConversationOffered(response, 'id', undefined, {});
|
const result = createConversationOffered(response, 'id', undefined, {});
|
||||||
expect(result?.includedCode).toBe(true);
|
expect(result?.includedCode).toBe(true);
|
||||||
@@ -229,7 +229,7 @@ describe('telemetry', () => {
|
|||||||
},
|
},
|
||||||
],
|
],
|
||||||
true,
|
true,
|
||||||
[{ name: 'tool', args: {} }],
|
[{ name: 'replace', args: {} }],
|
||||||
);
|
);
|
||||||
const result = createConversationOffered(response, 'id', undefined, {});
|
const result = createConversationOffered(response, 'id', undefined, {});
|
||||||
expect(result?.includedCode).toBe(false);
|
expect(result?.includedCode).toBe(false);
|
||||||
@@ -250,7 +250,7 @@ describe('telemetry', () => {
|
|||||||
} as unknown as CodeAssistServer;
|
} as unknown as CodeAssistServer;
|
||||||
|
|
||||||
const response = createMockResponse([], true, [
|
const response = createMockResponse([], true, [
|
||||||
{ name: 'tool', args: {} },
|
{ name: 'replace', args: {} },
|
||||||
]);
|
]);
|
||||||
const streamingLatency = {};
|
const streamingLatency = {};
|
||||||
|
|
||||||
@@ -274,7 +274,7 @@ describe('telemetry', () => {
|
|||||||
recordConversationOffered: vi.fn(),
|
recordConversationOffered: vi.fn(),
|
||||||
} as unknown as CodeAssistServer;
|
} as unknown as CodeAssistServer;
|
||||||
const response = createMockResponse([], true, [
|
const response = createMockResponse([], true, [
|
||||||
{ name: 'tool', args: {} },
|
{ name: 'replace', args: {} },
|
||||||
]);
|
]);
|
||||||
|
|
||||||
await recordConversationOffered(
|
await recordConversationOffered(
|
||||||
@@ -331,17 +331,89 @@ describe('telemetry', () => {
|
|||||||
|
|
||||||
await recordToolCallInteractions({} as Config, toolCalls);
|
await recordToolCallInteractions({} as Config, toolCalls);
|
||||||
|
|
||||||
expect(mockServer.recordConversationInteraction).toHaveBeenCalledWith({
|
expect(mockServer.recordConversationInteraction).toHaveBeenCalledWith(
|
||||||
traceId: 'trace-1',
|
expect.objectContaining({
|
||||||
status: ActionStatus.ACTION_STATUS_NO_ERROR,
|
traceId: 'trace-1',
|
||||||
interaction: ConversationInteractionInteraction.ACCEPT_FILE,
|
status: ActionStatus.ACTION_STATUS_NO_ERROR,
|
||||||
acceptedLines: '5',
|
interaction: ConversationInteractionInteraction.ACCEPT_FILE,
|
||||||
removedLines: '3',
|
acceptedLines: '8',
|
||||||
isAgentic: true,
|
removedLines: '3',
|
||||||
});
|
isAgentic: true,
|
||||||
|
}),
|
||||||
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should record UNKNOWN interaction for other accepted tools', async () => {
|
it('should include language in interaction if file_path is present', async () => {
|
||||||
|
const toolCalls: CompletedToolCall[] = [
|
||||||
|
{
|
||||||
|
request: {
|
||||||
|
name: 'replace',
|
||||||
|
args: {
|
||||||
|
file_path: 'test.ts',
|
||||||
|
old_string: 'old',
|
||||||
|
new_string: 'new',
|
||||||
|
},
|
||||||
|
callId: 'call-1',
|
||||||
|
isClientInitiated: false,
|
||||||
|
prompt_id: 'p1',
|
||||||
|
traceId: 'trace-1',
|
||||||
|
},
|
||||||
|
response: {
|
||||||
|
resultDisplay: {
|
||||||
|
diffStat: {
|
||||||
|
model_added_lines: 5,
|
||||||
|
model_removed_lines: 3,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
outcome: ToolConfirmationOutcome.ProceedOnce,
|
||||||
|
status: 'success',
|
||||||
|
} as unknown as CompletedToolCall,
|
||||||
|
];
|
||||||
|
|
||||||
|
await recordToolCallInteractions({} as Config, toolCalls);
|
||||||
|
|
||||||
|
expect(mockServer.recordConversationInteraction).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({
|
||||||
|
language: 'TypeScript',
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should include language in interaction if write_file is used', async () => {
|
||||||
|
const toolCalls: CompletedToolCall[] = [
|
||||||
|
{
|
||||||
|
request: {
|
||||||
|
name: 'write_file',
|
||||||
|
args: { file_path: 'test.py', content: 'test' },
|
||||||
|
callId: 'call-1',
|
||||||
|
isClientInitiated: false,
|
||||||
|
prompt_id: 'p1',
|
||||||
|
traceId: 'trace-1',
|
||||||
|
},
|
||||||
|
response: {
|
||||||
|
resultDisplay: {
|
||||||
|
diffStat: {
|
||||||
|
model_added_lines: 5,
|
||||||
|
model_removed_lines: 3,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
outcome: ToolConfirmationOutcome.ProceedOnce,
|
||||||
|
status: 'success',
|
||||||
|
} as unknown as CompletedToolCall,
|
||||||
|
];
|
||||||
|
|
||||||
|
await recordToolCallInteractions({} as Config, toolCalls);
|
||||||
|
|
||||||
|
expect(mockServer.recordConversationInteraction).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({
|
||||||
|
language: 'Python',
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should not record interaction for other accepted tools', async () => {
|
||||||
const toolCalls: CompletedToolCall[] = [
|
const toolCalls: CompletedToolCall[] = [
|
||||||
{
|
{
|
||||||
request: {
|
request: {
|
||||||
@@ -359,19 +431,14 @@ describe('telemetry', () => {
|
|||||||
|
|
||||||
await recordToolCallInteractions({} as Config, toolCalls);
|
await recordToolCallInteractions({} as Config, toolCalls);
|
||||||
|
|
||||||
expect(mockServer.recordConversationInteraction).toHaveBeenCalledWith({
|
expect(mockServer.recordConversationInteraction).not.toHaveBeenCalled();
|
||||||
traceId: 'trace-2',
|
|
||||||
status: ActionStatus.ACTION_STATUS_NO_ERROR,
|
|
||||||
interaction: ConversationInteractionInteraction.UNKNOWN,
|
|
||||||
isAgentic: true,
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should not record interaction for cancelled status', async () => {
|
it('should not record interaction for cancelled status', async () => {
|
||||||
const toolCalls: CompletedToolCall[] = [
|
const toolCalls: CompletedToolCall[] = [
|
||||||
{
|
{
|
||||||
request: {
|
request: {
|
||||||
name: 'tool',
|
name: 'replace',
|
||||||
args: {},
|
args: {},
|
||||||
callId: 'call-3',
|
callId: 'call-3',
|
||||||
isClientInitiated: false,
|
isClientInitiated: false,
|
||||||
@@ -394,7 +461,7 @@ describe('telemetry', () => {
|
|||||||
const toolCalls: CompletedToolCall[] = [
|
const toolCalls: CompletedToolCall[] = [
|
||||||
{
|
{
|
||||||
request: {
|
request: {
|
||||||
name: 'tool',
|
name: 'replace',
|
||||||
args: {},
|
args: {},
|
||||||
callId: 'call-4',
|
callId: 'call-4',
|
||||||
isClientInitiated: false,
|
isClientInitiated: false,
|
||||||
|
|||||||
@@ -22,10 +22,13 @@ import { EDIT_TOOL_NAMES } from '../tools/tool-names.js';
|
|||||||
import { getErrorMessage } from '../utils/errors.js';
|
import { getErrorMessage } from '../utils/errors.js';
|
||||||
import type { CodeAssistServer } from './server.js';
|
import type { CodeAssistServer } from './server.js';
|
||||||
import { ToolConfirmationOutcome } from '../tools/tools.js';
|
import { ToolConfirmationOutcome } from '../tools/tools.js';
|
||||||
|
import { getLanguageFromFilePath } from '../utils/language-detection.js';
|
||||||
import {
|
import {
|
||||||
computeModelAddedAndRemovedLines,
|
computeModelAddedAndRemovedLines,
|
||||||
getFileDiffFromResultDisplay,
|
getFileDiffFromResultDisplay,
|
||||||
} from '../utils/fileDiffUtils.js';
|
} from '../utils/fileDiffUtils.js';
|
||||||
|
import { isEditToolParams } from '../tools/edit.js';
|
||||||
|
import { isWriteFileToolParams } from '../tools/write-file.js';
|
||||||
|
|
||||||
export async function recordConversationOffered(
|
export async function recordConversationOffered(
|
||||||
server: CodeAssistServer,
|
server: CodeAssistServer,
|
||||||
@@ -85,10 +88,12 @@ export function createConversationOffered(
|
|||||||
signal: AbortSignal | undefined,
|
signal: AbortSignal | undefined,
|
||||||
streamingLatency: StreamingLatency,
|
streamingLatency: StreamingLatency,
|
||||||
): ConversationOffered | undefined {
|
): ConversationOffered | undefined {
|
||||||
// Only send conversation offered events for responses that contain function
|
// Only send conversation offered events for responses that contain edit
|
||||||
// calls. Non-function call events don't represent user actionable
|
// function calls. Non-edit function calls don't represent file modifications.
|
||||||
// 'suggestions'.
|
if (
|
||||||
if ((response.functionCalls?.length || 0) === 0) {
|
!response.functionCalls ||
|
||||||
|
!response.functionCalls.some((call) => EDIT_TOOL_NAMES.has(call.name || ''))
|
||||||
|
) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -116,6 +121,7 @@ function summarizeToolCalls(
|
|||||||
let isEdit = false;
|
let isEdit = false;
|
||||||
let acceptedLines = 0;
|
let acceptedLines = 0;
|
||||||
let removedLines = 0;
|
let removedLines = 0;
|
||||||
|
let language = undefined;
|
||||||
|
|
||||||
// Iterate the tool calls and summarize them into a single conversation
|
// Iterate the tool calls and summarize them into a single conversation
|
||||||
// interaction so that the ConversationOffered and ConversationInteraction
|
// interaction so that the ConversationOffered and ConversationInteraction
|
||||||
@@ -144,13 +150,23 @@ function summarizeToolCalls(
|
|||||||
if (EDIT_TOOL_NAMES.has(toolCall.request.name)) {
|
if (EDIT_TOOL_NAMES.has(toolCall.request.name)) {
|
||||||
isEdit = true;
|
isEdit = true;
|
||||||
|
|
||||||
|
if (
|
||||||
|
!language &&
|
||||||
|
(isEditToolParams(toolCall.request.args) ||
|
||||||
|
isWriteFileToolParams(toolCall.request.args))
|
||||||
|
) {
|
||||||
|
language = getLanguageFromFilePath(toolCall.request.args.file_path);
|
||||||
|
}
|
||||||
|
|
||||||
if (toolCall.status === 'success') {
|
if (toolCall.status === 'success') {
|
||||||
const fileDiff = getFileDiffFromResultDisplay(
|
const fileDiff = getFileDiffFromResultDisplay(
|
||||||
toolCall.response.resultDisplay,
|
toolCall.response.resultDisplay,
|
||||||
);
|
);
|
||||||
if (fileDiff?.diffStat) {
|
if (fileDiff?.diffStat) {
|
||||||
const lines = computeModelAddedAndRemovedLines(fileDiff.diffStat);
|
const lines = computeModelAddedAndRemovedLines(fileDiff.diffStat);
|
||||||
acceptedLines += lines.addedLines;
|
|
||||||
|
// The API expects acceptedLines to be addedLines + removedLines.
|
||||||
|
acceptedLines += lines.addedLines + lines.removedLines;
|
||||||
removedLines += lines.removedLines;
|
removedLines += lines.removedLines;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -158,16 +174,16 @@ function summarizeToolCalls(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Only file interaction telemetry if 100% of the tool calls were accepted.
|
// Only file interaction telemetry if 100% of the tool calls were accepted
|
||||||
return traceId && acceptedToolCalls / toolCalls.length >= 1
|
// and at least one of them was an edit.
|
||||||
|
return traceId && acceptedToolCalls / toolCalls.length >= 1 && isEdit
|
||||||
? createConversationInteraction(
|
? createConversationInteraction(
|
||||||
traceId,
|
traceId,
|
||||||
actionStatus || ActionStatus.ACTION_STATUS_NO_ERROR,
|
actionStatus || ActionStatus.ACTION_STATUS_NO_ERROR,
|
||||||
isEdit
|
ConversationInteractionInteraction.ACCEPT_FILE,
|
||||||
? ConversationInteractionInteraction.ACCEPT_FILE
|
String(acceptedLines),
|
||||||
: ConversationInteractionInteraction.UNKNOWN,
|
String(removedLines),
|
||||||
isEdit ? String(acceptedLines) : undefined,
|
language,
|
||||||
isEdit ? String(removedLines) : undefined,
|
|
||||||
)
|
)
|
||||||
: undefined;
|
: undefined;
|
||||||
}
|
}
|
||||||
@@ -178,6 +194,7 @@ function createConversationInteraction(
|
|||||||
interaction: ConversationInteractionInteraction,
|
interaction: ConversationInteractionInteraction,
|
||||||
acceptedLines?: string,
|
acceptedLines?: string,
|
||||||
removedLines?: string,
|
removedLines?: string,
|
||||||
|
language?: string,
|
||||||
): ConversationInteraction {
|
): ConversationInteraction {
|
||||||
return {
|
return {
|
||||||
traceId,
|
traceId,
|
||||||
@@ -185,9 +202,11 @@ function createConversationInteraction(
|
|||||||
interaction,
|
interaction,
|
||||||
acceptedLines,
|
acceptedLines,
|
||||||
removedLines,
|
removedLines,
|
||||||
|
language,
|
||||||
isAgentic: true,
|
isAgentic: true,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
function includesCode(resp: GenerateContentResponse): boolean {
|
function includesCode(resp: GenerateContentResponse): boolean {
|
||||||
if (!resp.candidates) {
|
if (!resp.candidates) {
|
||||||
return false;
|
return false;
|
||||||
|
|||||||
@@ -447,7 +447,7 @@ export enum AuthProviderType {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export interface SandboxConfig {
|
export interface SandboxConfig {
|
||||||
command: 'docker' | 'podman' | 'sandbox-exec';
|
command: 'docker' | 'podman' | 'sandbox-exec' | 'lxc';
|
||||||
image: string;
|
image: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -47,7 +47,7 @@ import type {
|
|||||||
} from '../services/modelConfigService.js';
|
} from '../services/modelConfigService.js';
|
||||||
import { ClearcutLogger } from '../telemetry/clearcut-logger/clearcut-logger.js';
|
import { ClearcutLogger } from '../telemetry/clearcut-logger/clearcut-logger.js';
|
||||||
import * as policyCatalog from '../availability/policyCatalog.js';
|
import * as policyCatalog from '../availability/policyCatalog.js';
|
||||||
import { LlmRole } from '../telemetry/types.js';
|
import { LlmRole, LoopType } from '../telemetry/types.js';
|
||||||
import { partToString } from '../utils/partUtils.js';
|
import { partToString } from '../utils/partUtils.js';
|
||||||
import { coreEvents } from '../utils/events.js';
|
import { coreEvents } from '../utils/events.js';
|
||||||
|
|
||||||
@@ -2915,45 +2915,257 @@ ${JSON.stringify(
|
|||||||
expect(mockCheckNextSpeaker).not.toHaveBeenCalled();
|
expect(mockCheckNextSpeaker).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should abort linked signal when loop is detected', async () => {
|
describe('Loop Recovery (Two-Strike)', () => {
|
||||||
// Arrange
|
beforeEach(() => {
|
||||||
vi.spyOn(client['loopDetector'], 'turnStarted').mockResolvedValue(false);
|
const mockChat: Partial<GeminiChat> = {
|
||||||
vi.spyOn(client['loopDetector'], 'addAndCheck')
|
addHistory: vi.fn(),
|
||||||
.mockReturnValueOnce(false)
|
setTools: vi.fn(),
|
||||||
.mockReturnValueOnce(true);
|
getHistory: vi.fn().mockReturnValue([]),
|
||||||
|
getLastPromptTokenCount: vi.fn(),
|
||||||
let capturedSignal: AbortSignal;
|
};
|
||||||
mockTurnRunFn.mockImplementation((_modelConfigKey, _request, signal) => {
|
client['chat'] = mockChat as GeminiChat;
|
||||||
capturedSignal = signal;
|
vi.spyOn(client['loopDetector'], 'clearDetection');
|
||||||
return (async function* () {
|
vi.spyOn(client['loopDetector'], 'reset');
|
||||||
yield { type: 'content', value: 'First event' };
|
|
||||||
yield { type: 'content', value: 'Second event' };
|
|
||||||
})();
|
|
||||||
});
|
});
|
||||||
|
|
||||||
const mockChat: Partial<GeminiChat> = {
|
it('should trigger recovery (Strike 1) and continue', async () => {
|
||||||
addHistory: vi.fn(),
|
// Arrange
|
||||||
setTools: vi.fn(),
|
vi.spyOn(client['loopDetector'], 'turnStarted').mockResolvedValue({
|
||||||
getHistory: vi.fn().mockReturnValue([]),
|
count: 0,
|
||||||
getLastPromptTokenCount: vi.fn(),
|
});
|
||||||
};
|
vi.spyOn(client['loopDetector'], 'addAndCheck')
|
||||||
client['chat'] = mockChat as GeminiChat;
|
.mockReturnValueOnce({ count: 0 })
|
||||||
|
.mockReturnValueOnce({ count: 1, detail: 'Repetitive tool call' });
|
||||||
|
|
||||||
// Act
|
const sendMessageStreamSpy = vi.spyOn(client, 'sendMessageStream');
|
||||||
const stream = client.sendMessageStream(
|
|
||||||
[{ text: 'Hi' }],
|
|
||||||
new AbortController().signal,
|
|
||||||
'prompt-id-loop',
|
|
||||||
);
|
|
||||||
|
|
||||||
const events = [];
|
mockTurnRunFn.mockImplementation(() =>
|
||||||
for await (const event of stream) {
|
(async function* () {
|
||||||
events.push(event);
|
yield { type: GeminiEventType.Content, value: 'First event' };
|
||||||
}
|
yield { type: GeminiEventType.Content, value: 'Second event' };
|
||||||
|
})(),
|
||||||
|
);
|
||||||
|
|
||||||
// Assert
|
// Act
|
||||||
expect(events).toContainEqual({ type: GeminiEventType.LoopDetected });
|
const stream = client.sendMessageStream(
|
||||||
expect(capturedSignal!.aborted).toBe(true);
|
[{ text: 'Hi' }],
|
||||||
|
new AbortController().signal,
|
||||||
|
'prompt-id-loop-1',
|
||||||
|
);
|
||||||
|
|
||||||
|
const events = [];
|
||||||
|
for await (const event of stream) {
|
||||||
|
events.push(event);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assert
|
||||||
|
// sendMessageStream should be called twice (original + recovery)
|
||||||
|
expect(sendMessageStreamSpy).toHaveBeenCalledTimes(2);
|
||||||
|
|
||||||
|
// Verify recovery call parameters
|
||||||
|
const recoveryCall = sendMessageStreamSpy.mock.calls[1];
|
||||||
|
expect((recoveryCall[0] as Part[])[0].text).toContain(
|
||||||
|
'System: Potential loop detected',
|
||||||
|
);
|
||||||
|
expect((recoveryCall[0] as Part[])[0].text).toContain(
|
||||||
|
'Repetitive tool call',
|
||||||
|
);
|
||||||
|
|
||||||
|
// Verify loopDetector.clearDetection was called
|
||||||
|
expect(client['loopDetector'].clearDetection).toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should terminate (Strike 2) after recovery fails', async () => {
|
||||||
|
// Arrange
|
||||||
|
vi.spyOn(client['loopDetector'], 'turnStarted').mockResolvedValue({
|
||||||
|
count: 0,
|
||||||
|
});
|
||||||
|
|
||||||
|
// First call triggers Strike 1, Second call triggers Strike 2
|
||||||
|
vi.spyOn(client['loopDetector'], 'addAndCheck')
|
||||||
|
.mockReturnValueOnce({ count: 0 })
|
||||||
|
.mockReturnValueOnce({ count: 1, detail: 'Strike 1' }) // Triggers recovery in turn 1
|
||||||
|
.mockReturnValueOnce({ count: 2, detail: 'Strike 2' }); // Triggers termination in turn 2 (recovery turn)
|
||||||
|
|
||||||
|
const sendMessageStreamSpy = vi.spyOn(client, 'sendMessageStream');
|
||||||
|
|
||||||
|
mockTurnRunFn.mockImplementation(() =>
|
||||||
|
(async function* () {
|
||||||
|
yield { type: GeminiEventType.Content, value: 'Event' };
|
||||||
|
yield { type: GeminiEventType.Content, value: 'Event' };
|
||||||
|
})(),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Act
|
||||||
|
const stream = client.sendMessageStream(
|
||||||
|
[{ text: 'Hi' }],
|
||||||
|
new AbortController().signal,
|
||||||
|
'prompt-id-loop-2',
|
||||||
|
);
|
||||||
|
|
||||||
|
const events = [];
|
||||||
|
for await (const event of stream) {
|
||||||
|
events.push(event);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assert
|
||||||
|
expect(events).toContainEqual({ type: GeminiEventType.LoopDetected });
|
||||||
|
expect(sendMessageStreamSpy).toHaveBeenCalledTimes(2); // One original, one recovery
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should respect boundedTurns during recovery', async () => {
|
||||||
|
// Arrange
|
||||||
|
vi.spyOn(client['loopDetector'], 'turnStarted').mockResolvedValue({
|
||||||
|
count: 0,
|
||||||
|
});
|
||||||
|
vi.spyOn(client['loopDetector'], 'addAndCheck').mockReturnValue({
|
||||||
|
count: 1,
|
||||||
|
detail: 'Loop',
|
||||||
|
});
|
||||||
|
|
||||||
|
const sendMessageStreamSpy = vi.spyOn(client, 'sendMessageStream');
|
||||||
|
|
||||||
|
mockTurnRunFn.mockImplementation(() =>
|
||||||
|
(async function* () {
|
||||||
|
yield { type: GeminiEventType.Content, value: 'Event' };
|
||||||
|
})(),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Act
|
||||||
|
const stream = client.sendMessageStream(
|
||||||
|
[{ text: 'Hi' }],
|
||||||
|
new AbortController().signal,
|
||||||
|
'prompt-id-loop-3',
|
||||||
|
1, // Only 1 turn allowed
|
||||||
|
);
|
||||||
|
|
||||||
|
const events = [];
|
||||||
|
for await (const event of stream) {
|
||||||
|
events.push(event);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assert
|
||||||
|
// Should NOT trigger recovery because boundedTurns would reach 0
|
||||||
|
expect(events).toContainEqual({
|
||||||
|
type: GeminiEventType.MaxSessionTurns,
|
||||||
|
});
|
||||||
|
expect(sendMessageStreamSpy).toHaveBeenCalledTimes(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should suppress LoopDetected event on Strike 1', async () => {
|
||||||
|
// Arrange
|
||||||
|
vi.spyOn(client['loopDetector'], 'turnStarted').mockResolvedValue({
|
||||||
|
count: 0,
|
||||||
|
});
|
||||||
|
vi.spyOn(client['loopDetector'], 'addAndCheck')
|
||||||
|
.mockReturnValueOnce({ count: 0 })
|
||||||
|
.mockReturnValueOnce({ count: 1, detail: 'Strike 1' });
|
||||||
|
|
||||||
|
const sendMessageStreamSpy = vi.spyOn(client, 'sendMessageStream');
|
||||||
|
|
||||||
|
mockTurnRunFn.mockImplementation(() =>
|
||||||
|
(async function* () {
|
||||||
|
yield { type: GeminiEventType.Content, value: 'Event' };
|
||||||
|
yield { type: GeminiEventType.Content, value: 'Event 2' };
|
||||||
|
})(),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Act
|
||||||
|
const stream = client.sendMessageStream(
|
||||||
|
[{ text: 'Hi' }],
|
||||||
|
new AbortController().signal,
|
||||||
|
'prompt-telemetry',
|
||||||
|
);
|
||||||
|
|
||||||
|
const events = [];
|
||||||
|
for await (const event of stream) {
|
||||||
|
events.push(event);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assert
|
||||||
|
// Strike 1 should trigger recovery call but NOT emit LoopDetected event
|
||||||
|
expect(events).not.toContainEqual({
|
||||||
|
type: GeminiEventType.LoopDetected,
|
||||||
|
});
|
||||||
|
expect(sendMessageStreamSpy).toHaveBeenCalledTimes(2);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should escalate Strike 2 even if loop type changes', async () => {
|
||||||
|
// Arrange
|
||||||
|
vi.spyOn(client['loopDetector'], 'turnStarted').mockResolvedValue({
|
||||||
|
count: 0,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Strike 1: Tool Call Loop, Strike 2: LLM Detected Loop
|
||||||
|
vi.spyOn(client['loopDetector'], 'addAndCheck')
|
||||||
|
.mockReturnValueOnce({ count: 0 })
|
||||||
|
.mockReturnValueOnce({
|
||||||
|
count: 1,
|
||||||
|
type: LoopType.TOOL_CALL_LOOP,
|
||||||
|
detail: 'Repetitive tool',
|
||||||
|
})
|
||||||
|
.mockReturnValueOnce({
|
||||||
|
count: 2,
|
||||||
|
type: LoopType.LLM_DETECTED_LOOP,
|
||||||
|
detail: 'LLM loop',
|
||||||
|
});
|
||||||
|
|
||||||
|
const sendMessageStreamSpy = vi.spyOn(client, 'sendMessageStream');
|
||||||
|
|
||||||
|
mockTurnRunFn.mockImplementation(() =>
|
||||||
|
(async function* () {
|
||||||
|
yield { type: GeminiEventType.Content, value: 'Event' };
|
||||||
|
yield { type: GeminiEventType.Content, value: 'Event 2' };
|
||||||
|
})(),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Act
|
||||||
|
const stream = client.sendMessageStream(
|
||||||
|
[{ text: 'Hi' }],
|
||||||
|
new AbortController().signal,
|
||||||
|
'prompt-escalate',
|
||||||
|
);
|
||||||
|
|
||||||
|
const events = [];
|
||||||
|
for await (const event of stream) {
|
||||||
|
events.push(event);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assert
|
||||||
|
expect(events).toContainEqual({ type: GeminiEventType.LoopDetected });
|
||||||
|
expect(sendMessageStreamSpy).toHaveBeenCalledTimes(2);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should reset loop detector on new prompt', async () => {
|
||||||
|
// Arrange
|
||||||
|
vi.spyOn(client['loopDetector'], 'turnStarted').mockResolvedValue({
|
||||||
|
count: 0,
|
||||||
|
});
|
||||||
|
vi.spyOn(client['loopDetector'], 'addAndCheck').mockReturnValue({
|
||||||
|
count: 0,
|
||||||
|
});
|
||||||
|
mockTurnRunFn.mockImplementation(() =>
|
||||||
|
(async function* () {
|
||||||
|
yield { type: GeminiEventType.Content, value: 'Event' };
|
||||||
|
})(),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Act
|
||||||
|
const stream = client.sendMessageStream(
|
||||||
|
[{ text: 'Hi' }],
|
||||||
|
new AbortController().signal,
|
||||||
|
'prompt-id-new',
|
||||||
|
);
|
||||||
|
for await (const _ of stream) {
|
||||||
|
// Consume stream
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assert
|
||||||
|
expect(client['loopDetector'].reset).toHaveBeenCalledWith(
|
||||||
|
'prompt-id-new',
|
||||||
|
'Hi',
|
||||||
|
);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -642,10 +642,23 @@ export class GeminiClient {
|
|||||||
const controller = new AbortController();
|
const controller = new AbortController();
|
||||||
const linkedSignal = AbortSignal.any([signal, controller.signal]);
|
const linkedSignal = AbortSignal.any([signal, controller.signal]);
|
||||||
|
|
||||||
const loopDetected = await this.loopDetector.turnStarted(signal);
|
const loopResult = await this.loopDetector.turnStarted(signal);
|
||||||
if (loopDetected) {
|
if (loopResult.count > 1) {
|
||||||
yield { type: GeminiEventType.LoopDetected };
|
yield { type: GeminiEventType.LoopDetected };
|
||||||
return turn;
|
return turn;
|
||||||
|
} else if (loopResult.count === 1) {
|
||||||
|
if (boundedTurns <= 1) {
|
||||||
|
yield { type: GeminiEventType.MaxSessionTurns };
|
||||||
|
return turn;
|
||||||
|
}
|
||||||
|
return yield* this._recoverFromLoop(
|
||||||
|
loopResult,
|
||||||
|
signal,
|
||||||
|
prompt_id,
|
||||||
|
boundedTurns,
|
||||||
|
isInvalidStreamRetry,
|
||||||
|
displayContent,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
const routingContext: RoutingContext = {
|
const routingContext: RoutingContext = {
|
||||||
@@ -696,10 +709,26 @@ export class GeminiClient {
|
|||||||
let isInvalidStream = false;
|
let isInvalidStream = false;
|
||||||
|
|
||||||
for await (const event of resultStream) {
|
for await (const event of resultStream) {
|
||||||
if (this.loopDetector.addAndCheck(event)) {
|
const loopResult = this.loopDetector.addAndCheck(event);
|
||||||
|
if (loopResult.count > 1) {
|
||||||
yield { type: GeminiEventType.LoopDetected };
|
yield { type: GeminiEventType.LoopDetected };
|
||||||
controller.abort();
|
controller.abort();
|
||||||
return turn;
|
return turn;
|
||||||
|
} else if (loopResult.count === 1) {
|
||||||
|
if (boundedTurns <= 1) {
|
||||||
|
yield { type: GeminiEventType.MaxSessionTurns };
|
||||||
|
controller.abort();
|
||||||
|
return turn;
|
||||||
|
}
|
||||||
|
return yield* this._recoverFromLoop(
|
||||||
|
loopResult,
|
||||||
|
signal,
|
||||||
|
prompt_id,
|
||||||
|
boundedTurns,
|
||||||
|
isInvalidStreamRetry,
|
||||||
|
displayContent,
|
||||||
|
controller,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
yield event;
|
yield event;
|
||||||
|
|
||||||
@@ -1128,4 +1157,42 @@ export class GeminiClient {
|
|||||||
this.getChat().setHistory(result.newHistory);
|
this.getChat().setHistory(result.newHistory);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handles loop recovery by providing feedback to the model and initiating a new turn.
|
||||||
|
*/
|
||||||
|
private _recoverFromLoop(
|
||||||
|
loopResult: { detail?: string },
|
||||||
|
signal: AbortSignal,
|
||||||
|
prompt_id: string,
|
||||||
|
boundedTurns: number,
|
||||||
|
isInvalidStreamRetry: boolean,
|
||||||
|
displayContent?: PartListUnion,
|
||||||
|
controllerToAbort?: AbortController,
|
||||||
|
): AsyncGenerator<ServerGeminiStreamEvent, Turn> {
|
||||||
|
controllerToAbort?.abort();
|
||||||
|
|
||||||
|
// Clear the detection flag so the recursive turn can proceed, but the count remains 1.
|
||||||
|
this.loopDetector.clearDetection();
|
||||||
|
|
||||||
|
const feedbackText = `System: Potential loop detected. Details: ${loopResult.detail || 'Repetitive patterns identified'}. Please take a step back and confirm you're making forward progress. If not, take a step back, analyze your previous actions and rethink how you're approaching the problem. Avoid repeating the same tool calls or responses without new results.`;
|
||||||
|
|
||||||
|
if (this.config.getDebugMode()) {
|
||||||
|
debugLogger.warn(
|
||||||
|
'Iterative Loop Recovery: Injecting feedback message to model.',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const feedback = [{ text: feedbackText }];
|
||||||
|
|
||||||
|
// Recursive call with feedback
|
||||||
|
return this.sendMessageStream(
|
||||||
|
feedback,
|
||||||
|
signal,
|
||||||
|
prompt_id,
|
||||||
|
boundedTurns - 1,
|
||||||
|
isInvalidStreamRetry,
|
||||||
|
displayContent,
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -79,7 +79,7 @@ describe('LoopDetectionService', () => {
|
|||||||
it(`should not detect a loop for fewer than TOOL_CALL_LOOP_THRESHOLD identical calls`, () => {
|
it(`should not detect a loop for fewer than TOOL_CALL_LOOP_THRESHOLD identical calls`, () => {
|
||||||
const event = createToolCallRequestEvent('testTool', { param: 'value' });
|
const event = createToolCallRequestEvent('testTool', { param: 'value' });
|
||||||
for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD - 1; i++) {
|
for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD - 1; i++) {
|
||||||
expect(service.addAndCheck(event)).toBe(false);
|
expect(service.addAndCheck(event).count).toBe(0);
|
||||||
}
|
}
|
||||||
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
@@ -89,7 +89,7 @@ describe('LoopDetectionService', () => {
|
|||||||
for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD - 1; i++) {
|
for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD - 1; i++) {
|
||||||
service.addAndCheck(event);
|
service.addAndCheck(event);
|
||||||
}
|
}
|
||||||
expect(service.addAndCheck(event)).toBe(true);
|
expect(service.addAndCheck(event).count).toBe(1);
|
||||||
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
|
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -98,7 +98,7 @@ describe('LoopDetectionService', () => {
|
|||||||
for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD; i++) {
|
for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD; i++) {
|
||||||
service.addAndCheck(event);
|
service.addAndCheck(event);
|
||||||
}
|
}
|
||||||
expect(service.addAndCheck(event)).toBe(true);
|
expect(service.addAndCheck(event).count).toBe(1);
|
||||||
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
|
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -114,9 +114,9 @@ describe('LoopDetectionService', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD - 2; i++) {
|
for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD - 2; i++) {
|
||||||
expect(service.addAndCheck(event1)).toBe(false);
|
expect(service.addAndCheck(event1).count).toBe(0);
|
||||||
expect(service.addAndCheck(event2)).toBe(false);
|
expect(service.addAndCheck(event2).count).toBe(0);
|
||||||
expect(service.addAndCheck(event3)).toBe(false);
|
expect(service.addAndCheck(event3).count).toBe(0);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -130,14 +130,14 @@ describe('LoopDetectionService', () => {
|
|||||||
|
|
||||||
// Send events just below the threshold
|
// Send events just below the threshold
|
||||||
for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD - 1; i++) {
|
for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD - 1; i++) {
|
||||||
expect(service.addAndCheck(toolCallEvent)).toBe(false);
|
expect(service.addAndCheck(toolCallEvent).count).toBe(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Send a different event type
|
// Send a different event type
|
||||||
expect(service.addAndCheck(otherEvent)).toBe(false);
|
expect(service.addAndCheck(otherEvent).count).toBe(0);
|
||||||
|
|
||||||
// Send the tool call event again, which should now trigger the loop
|
// Send the tool call event again, which should now trigger the loop
|
||||||
expect(service.addAndCheck(toolCallEvent)).toBe(true);
|
expect(service.addAndCheck(toolCallEvent).count).toBe(1);
|
||||||
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
|
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -146,7 +146,7 @@ describe('LoopDetectionService', () => {
|
|||||||
expect(loggers.logLoopDetectionDisabled).toHaveBeenCalledTimes(1);
|
expect(loggers.logLoopDetectionDisabled).toHaveBeenCalledTimes(1);
|
||||||
const event = createToolCallRequestEvent('testTool', { param: 'value' });
|
const event = createToolCallRequestEvent('testTool', { param: 'value' });
|
||||||
for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD; i++) {
|
for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD; i++) {
|
||||||
expect(service.addAndCheck(event)).toBe(false);
|
expect(service.addAndCheck(event).count).toBe(0);
|
||||||
}
|
}
|
||||||
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
@@ -156,19 +156,19 @@ describe('LoopDetectionService', () => {
|
|||||||
for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD; i++) {
|
for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD; i++) {
|
||||||
service.addAndCheck(event);
|
service.addAndCheck(event);
|
||||||
}
|
}
|
||||||
expect(service.addAndCheck(event)).toBe(true);
|
expect(service.addAndCheck(event).count).toBe(1);
|
||||||
|
|
||||||
service.disableForSession();
|
service.disableForSession();
|
||||||
|
|
||||||
// Should now return false even though a loop was previously detected
|
// Should now return 0 even though a loop was previously detected
|
||||||
expect(service.addAndCheck(event)).toBe(false);
|
expect(service.addAndCheck(event).count).toBe(0);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should skip loop detection if disabled in config', () => {
|
it('should skip loop detection if disabled in config', () => {
|
||||||
vi.spyOn(mockConfig, 'getDisableLoopDetection').mockReturnValue(true);
|
vi.spyOn(mockConfig, 'getDisableLoopDetection').mockReturnValue(true);
|
||||||
const event = createToolCallRequestEvent('testTool', { param: 'value' });
|
const event = createToolCallRequestEvent('testTool', { param: 'value' });
|
||||||
for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD + 2; i++) {
|
for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD + 2; i++) {
|
||||||
expect(service.addAndCheck(event)).toBe(false);
|
expect(service.addAndCheck(event).count).toBe(0);
|
||||||
}
|
}
|
||||||
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
@@ -192,8 +192,8 @@ describe('LoopDetectionService', () => {
|
|||||||
service.reset('');
|
service.reset('');
|
||||||
for (let i = 0; i < 1000; i++) {
|
for (let i = 0; i < 1000; i++) {
|
||||||
const content = generateRandomString(10);
|
const content = generateRandomString(10);
|
||||||
const isLoop = service.addAndCheck(createContentEvent(content));
|
const result = service.addAndCheck(createContentEvent(content));
|
||||||
expect(isLoop).toBe(false);
|
expect(result.count).toBe(0);
|
||||||
}
|
}
|
||||||
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
@@ -202,17 +202,17 @@ describe('LoopDetectionService', () => {
|
|||||||
service.reset('');
|
service.reset('');
|
||||||
const repeatedContent = createRepetitiveContent(1, CONTENT_CHUNK_SIZE);
|
const repeatedContent = createRepetitiveContent(1, CONTENT_CHUNK_SIZE);
|
||||||
|
|
||||||
let isLoop = false;
|
let result = { count: 0 };
|
||||||
for (let i = 0; i < CONTENT_LOOP_THRESHOLD; i++) {
|
for (let i = 0; i < CONTENT_LOOP_THRESHOLD; i++) {
|
||||||
isLoop = service.addAndCheck(createContentEvent(repeatedContent));
|
result = service.addAndCheck(createContentEvent(repeatedContent));
|
||||||
}
|
}
|
||||||
expect(isLoop).toBe(true);
|
expect(result.count).toBe(1);
|
||||||
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
|
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should not detect a loop for a list with a long shared prefix', () => {
|
it('should not detect a loop for a list with a long shared prefix', () => {
|
||||||
service.reset('');
|
service.reset('');
|
||||||
let isLoop = false;
|
let result = { count: 0 };
|
||||||
const longPrefix =
|
const longPrefix =
|
||||||
'projects/my-google-cloud-project-12345/locations/us-central1/services/';
|
'projects/my-google-cloud-project-12345/locations/us-central1/services/';
|
||||||
|
|
||||||
@@ -223,9 +223,9 @@ describe('LoopDetectionService', () => {
|
|||||||
|
|
||||||
// Simulate receiving the list in a single large chunk or a few chunks
|
// Simulate receiving the list in a single large chunk or a few chunks
|
||||||
// This is the specific case where the issue occurs, as list boundaries might not reset tracking properly
|
// This is the specific case where the issue occurs, as list boundaries might not reset tracking properly
|
||||||
isLoop = service.addAndCheck(createContentEvent(listContent));
|
result = service.addAndCheck(createContentEvent(listContent));
|
||||||
|
|
||||||
expect(isLoop).toBe(false);
|
expect(result.count).toBe(0);
|
||||||
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -234,12 +234,12 @@ describe('LoopDetectionService', () => {
|
|||||||
const repeatedContent = createRepetitiveContent(1, CONTENT_CHUNK_SIZE);
|
const repeatedContent = createRepetitiveContent(1, CONTENT_CHUNK_SIZE);
|
||||||
const fillerContent = generateRandomString(500);
|
const fillerContent = generateRandomString(500);
|
||||||
|
|
||||||
let isLoop = false;
|
let result = { count: 0 };
|
||||||
for (let i = 0; i < CONTENT_LOOP_THRESHOLD; i++) {
|
for (let i = 0; i < CONTENT_LOOP_THRESHOLD; i++) {
|
||||||
isLoop = service.addAndCheck(createContentEvent(repeatedContent));
|
result = service.addAndCheck(createContentEvent(repeatedContent));
|
||||||
isLoop = service.addAndCheck(createContentEvent(fillerContent));
|
result = service.addAndCheck(createContentEvent(fillerContent));
|
||||||
}
|
}
|
||||||
expect(isLoop).toBe(false);
|
expect(result.count).toBe(0);
|
||||||
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -248,12 +248,12 @@ describe('LoopDetectionService', () => {
|
|||||||
const longPattern = createRepetitiveContent(1, 150);
|
const longPattern = createRepetitiveContent(1, 150);
|
||||||
expect(longPattern.length).toBe(150);
|
expect(longPattern.length).toBe(150);
|
||||||
|
|
||||||
let isLoop = false;
|
let result = { count: 0 };
|
||||||
for (let i = 0; i < CONTENT_LOOP_THRESHOLD + 2; i++) {
|
for (let i = 0; i < CONTENT_LOOP_THRESHOLD + 2; i++) {
|
||||||
isLoop = service.addAndCheck(createContentEvent(longPattern));
|
result = service.addAndCheck(createContentEvent(longPattern));
|
||||||
if (isLoop) break;
|
if (result.count > 0) break;
|
||||||
}
|
}
|
||||||
expect(isLoop).toBe(true);
|
expect(result.count).toBe(1);
|
||||||
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
|
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -266,13 +266,13 @@ describe('LoopDetectionService', () => {
|
|||||||
I will wait for the user's next command.
|
I will wait for the user's next command.
|
||||||
`;
|
`;
|
||||||
|
|
||||||
let isLoop = false;
|
let result = { count: 0 };
|
||||||
// Loop enough times to trigger the threshold
|
// Loop enough times to trigger the threshold
|
||||||
for (let i = 0; i < CONTENT_LOOP_THRESHOLD + 5; i++) {
|
for (let i = 0; i < CONTENT_LOOP_THRESHOLD + 5; i++) {
|
||||||
isLoop = service.addAndCheck(createContentEvent(userPattern));
|
result = service.addAndCheck(createContentEvent(userPattern));
|
||||||
if (isLoop) break;
|
if (result.count > 0) break;
|
||||||
}
|
}
|
||||||
expect(isLoop).toBe(true);
|
expect(result.count).toBe(1);
|
||||||
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
|
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -281,12 +281,12 @@ describe('LoopDetectionService', () => {
|
|||||||
const userPattern =
|
const userPattern =
|
||||||
'I have added all the requested logs and verified the test file. I will now mark the task as complete.\n ';
|
'I have added all the requested logs and verified the test file. I will now mark the task as complete.\n ';
|
||||||
|
|
||||||
let isLoop = false;
|
let result = { count: 0 };
|
||||||
for (let i = 0; i < CONTENT_LOOP_THRESHOLD + 5; i++) {
|
for (let i = 0; i < CONTENT_LOOP_THRESHOLD + 5; i++) {
|
||||||
isLoop = service.addAndCheck(createContentEvent(userPattern));
|
result = service.addAndCheck(createContentEvent(userPattern));
|
||||||
if (isLoop) break;
|
if (result.count > 0) break;
|
||||||
}
|
}
|
||||||
expect(isLoop).toBe(true);
|
expect(result.count).toBe(1);
|
||||||
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
|
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -294,14 +294,14 @@ describe('LoopDetectionService', () => {
|
|||||||
service.reset('');
|
service.reset('');
|
||||||
const alternatingPattern = 'Thinking... Done. ';
|
const alternatingPattern = 'Thinking... Done. ';
|
||||||
|
|
||||||
let isLoop = false;
|
let result = { count: 0 };
|
||||||
// Needs more iterations because the pattern is short relative to chunk size,
|
// Needs more iterations because the pattern is short relative to chunk size,
|
||||||
// so it takes a few slides of the window to find the exact alignment.
|
// so it takes a few slides of the window to find the exact alignment.
|
||||||
for (let i = 0; i < CONTENT_LOOP_THRESHOLD * 3; i++) {
|
for (let i = 0; i < CONTENT_LOOP_THRESHOLD * 3; i++) {
|
||||||
isLoop = service.addAndCheck(createContentEvent(alternatingPattern));
|
result = service.addAndCheck(createContentEvent(alternatingPattern));
|
||||||
if (isLoop) break;
|
if (result.count > 0) break;
|
||||||
}
|
}
|
||||||
expect(isLoop).toBe(true);
|
expect(result.count).toBe(1);
|
||||||
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
|
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -310,12 +310,12 @@ describe('LoopDetectionService', () => {
|
|||||||
const thoughtPattern =
|
const thoughtPattern =
|
||||||
'I need to check the file. The file does not exist. I will create the file. ';
|
'I need to check the file. The file does not exist. I will create the file. ';
|
||||||
|
|
||||||
let isLoop = false;
|
let result = { count: 0 };
|
||||||
for (let i = 0; i < CONTENT_LOOP_THRESHOLD + 5; i++) {
|
for (let i = 0; i < CONTENT_LOOP_THRESHOLD + 5; i++) {
|
||||||
isLoop = service.addAndCheck(createContentEvent(thoughtPattern));
|
result = service.addAndCheck(createContentEvent(thoughtPattern));
|
||||||
if (isLoop) break;
|
if (result.count > 0) break;
|
||||||
}
|
}
|
||||||
expect(isLoop).toBe(true);
|
expect(result.count).toBe(1);
|
||||||
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
|
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
@@ -328,12 +328,12 @@ describe('LoopDetectionService', () => {
|
|||||||
service.addAndCheck(createContentEvent('```\n'));
|
service.addAndCheck(createContentEvent('```\n'));
|
||||||
|
|
||||||
for (let i = 0; i < CONTENT_LOOP_THRESHOLD; i++) {
|
for (let i = 0; i < CONTENT_LOOP_THRESHOLD; i++) {
|
||||||
const isLoop = service.addAndCheck(createContentEvent(repeatedContent));
|
const result = service.addAndCheck(createContentEvent(repeatedContent));
|
||||||
expect(isLoop).toBe(false);
|
expect(result.count).toBe(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
const isLoop = service.addAndCheck(createContentEvent('\n```'));
|
const result = service.addAndCheck(createContentEvent('\n```'));
|
||||||
expect(isLoop).toBe(false);
|
expect(result.count).toBe(0);
|
||||||
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -349,15 +349,15 @@ describe('LoopDetectionService', () => {
|
|||||||
// Now transition into a code block - this should prevent loop detection
|
// Now transition into a code block - this should prevent loop detection
|
||||||
// even though we were already close to the threshold
|
// even though we were already close to the threshold
|
||||||
const codeBlockStart = '```javascript\n';
|
const codeBlockStart = '```javascript\n';
|
||||||
const isLoop = service.addAndCheck(createContentEvent(codeBlockStart));
|
const result = service.addAndCheck(createContentEvent(codeBlockStart));
|
||||||
expect(isLoop).toBe(false);
|
expect(result.count).toBe(0);
|
||||||
|
|
||||||
// Continue adding repetitive content inside the code block - should not trigger loop
|
// Continue adding repetitive content inside the code block - should not trigger loop
|
||||||
for (let i = 0; i < CONTENT_LOOP_THRESHOLD; i++) {
|
for (let i = 0; i < CONTENT_LOOP_THRESHOLD; i++) {
|
||||||
const isLoopInside = service.addAndCheck(
|
const resultInside = service.addAndCheck(
|
||||||
createContentEvent(repeatedContent),
|
createContentEvent(repeatedContent),
|
||||||
);
|
);
|
||||||
expect(isLoopInside).toBe(false);
|
expect(resultInside.count).toBe(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
||||||
@@ -372,8 +372,8 @@ describe('LoopDetectionService', () => {
|
|||||||
// Verify we are now inside a code block and any content should be ignored for loop detection
|
// Verify we are now inside a code block and any content should be ignored for loop detection
|
||||||
const repeatedContent = createRepetitiveContent(1, CONTENT_CHUNK_SIZE);
|
const repeatedContent = createRepetitiveContent(1, CONTENT_CHUNK_SIZE);
|
||||||
for (let i = 0; i < CONTENT_LOOP_THRESHOLD + 5; i++) {
|
for (let i = 0; i < CONTENT_LOOP_THRESHOLD + 5; i++) {
|
||||||
const isLoop = service.addAndCheck(createContentEvent(repeatedContent));
|
const result = service.addAndCheck(createContentEvent(repeatedContent));
|
||||||
expect(isLoop).toBe(false);
|
expect(result.count).toBe(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
||||||
@@ -388,25 +388,25 @@ describe('LoopDetectionService', () => {
|
|||||||
|
|
||||||
// Enter code block (1 fence) - should stop tracking
|
// Enter code block (1 fence) - should stop tracking
|
||||||
const enterResult = service.addAndCheck(createContentEvent('```\n'));
|
const enterResult = service.addAndCheck(createContentEvent('```\n'));
|
||||||
expect(enterResult).toBe(false);
|
expect(enterResult.count).toBe(0);
|
||||||
|
|
||||||
// Inside code block - should not track loops
|
// Inside code block - should not track loops
|
||||||
for (let i = 0; i < 5; i++) {
|
for (let i = 0; i < 5; i++) {
|
||||||
const insideResult = service.addAndCheck(
|
const insideResult = service.addAndCheck(
|
||||||
createContentEvent(repeatedContent),
|
createContentEvent(repeatedContent),
|
||||||
);
|
);
|
||||||
expect(insideResult).toBe(false);
|
expect(insideResult.count).toBe(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Exit code block (2nd fence) - should reset tracking but still return false
|
// Exit code block (2nd fence) - should reset tracking but still return false
|
||||||
const exitResult = service.addAndCheck(createContentEvent('```\n'));
|
const exitResult = service.addAndCheck(createContentEvent('```\n'));
|
||||||
expect(exitResult).toBe(false);
|
expect(exitResult.count).toBe(0);
|
||||||
|
|
||||||
// Enter code block again (3rd fence) - should stop tracking again
|
// Enter code block again (3rd fence) - should stop tracking again
|
||||||
const reenterResult = service.addAndCheck(
|
const reenterResult = service.addAndCheck(
|
||||||
createContentEvent('```python\n'),
|
createContentEvent('```python\n'),
|
||||||
);
|
);
|
||||||
expect(reenterResult).toBe(false);
|
expect(reenterResult.count).toBe(0);
|
||||||
|
|
||||||
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
@@ -419,11 +419,11 @@ describe('LoopDetectionService', () => {
|
|||||||
service.addAndCheck(createContentEvent('\nsome code\n'));
|
service.addAndCheck(createContentEvent('\nsome code\n'));
|
||||||
service.addAndCheck(createContentEvent('```'));
|
service.addAndCheck(createContentEvent('```'));
|
||||||
|
|
||||||
let isLoop = false;
|
let result = { count: 0 };
|
||||||
for (let i = 0; i < CONTENT_LOOP_THRESHOLD; i++) {
|
for (let i = 0; i < CONTENT_LOOP_THRESHOLD; i++) {
|
||||||
isLoop = service.addAndCheck(createContentEvent(repeatedContent));
|
result = service.addAndCheck(createContentEvent(repeatedContent));
|
||||||
}
|
}
|
||||||
expect(isLoop).toBe(true);
|
expect(result.count).toBe(1);
|
||||||
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
|
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -431,9 +431,9 @@ describe('LoopDetectionService', () => {
|
|||||||
service.reset('');
|
service.reset('');
|
||||||
service.addAndCheck(createContentEvent('```\ncode1\n```'));
|
service.addAndCheck(createContentEvent('```\ncode1\n```'));
|
||||||
service.addAndCheck(createContentEvent('\nsome text\n'));
|
service.addAndCheck(createContentEvent('\nsome text\n'));
|
||||||
const isLoop = service.addAndCheck(createContentEvent('```\ncode2\n```'));
|
const result = service.addAndCheck(createContentEvent('```\ncode2\n```'));
|
||||||
|
|
||||||
expect(isLoop).toBe(false);
|
expect(result.count).toBe(0);
|
||||||
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -445,12 +445,12 @@ describe('LoopDetectionService', () => {
|
|||||||
service.addAndCheck(createContentEvent('\ncode1\n'));
|
service.addAndCheck(createContentEvent('\ncode1\n'));
|
||||||
service.addAndCheck(createContentEvent('```'));
|
service.addAndCheck(createContentEvent('```'));
|
||||||
|
|
||||||
let isLoop = false;
|
let result = { count: 0 };
|
||||||
for (let i = 0; i < CONTENT_LOOP_THRESHOLD; i++) {
|
for (let i = 0; i < CONTENT_LOOP_THRESHOLD; i++) {
|
||||||
isLoop = service.addAndCheck(createContentEvent(repeatedContent));
|
result = service.addAndCheck(createContentEvent(repeatedContent));
|
||||||
}
|
}
|
||||||
|
|
||||||
expect(isLoop).toBe(true);
|
expect(result.count).toBe(1);
|
||||||
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
|
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -462,12 +462,12 @@ describe('LoopDetectionService', () => {
|
|||||||
service.addAndCheck(createContentEvent('```\n'));
|
service.addAndCheck(createContentEvent('```\n'));
|
||||||
|
|
||||||
for (let i = 0; i < 20; i++) {
|
for (let i = 0; i < 20; i++) {
|
||||||
const isLoop = service.addAndCheck(createContentEvent(repeatingTokens));
|
const result = service.addAndCheck(createContentEvent(repeatingTokens));
|
||||||
expect(isLoop).toBe(false);
|
expect(result.count).toBe(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
const isLoop = service.addAndCheck(createContentEvent('\n```'));
|
const result = service.addAndCheck(createContentEvent('\n```'));
|
||||||
expect(isLoop).toBe(false);
|
expect(result.count).toBe(0);
|
||||||
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -484,10 +484,10 @@ describe('LoopDetectionService', () => {
|
|||||||
|
|
||||||
// We are now in a code block, so loop detection should be off.
|
// We are now in a code block, so loop detection should be off.
|
||||||
// Let's add the repeated content again, it should not trigger a loop.
|
// Let's add the repeated content again, it should not trigger a loop.
|
||||||
let isLoop = false;
|
let result = { count: 0 };
|
||||||
for (let i = 0; i < CONTENT_LOOP_THRESHOLD; i++) {
|
for (let i = 0; i < CONTENT_LOOP_THRESHOLD; i++) {
|
||||||
isLoop = service.addAndCheck(createContentEvent(repeatedContent));
|
result = service.addAndCheck(createContentEvent(repeatedContent));
|
||||||
expect(isLoop).toBe(false);
|
expect(result.count).toBe(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
||||||
@@ -505,8 +505,8 @@ describe('LoopDetectionService', () => {
|
|||||||
|
|
||||||
// Add more repeated content after table - should not trigger loop
|
// Add more repeated content after table - should not trigger loop
|
||||||
for (let i = 0; i < CONTENT_LOOP_THRESHOLD - 1; i++) {
|
for (let i = 0; i < CONTENT_LOOP_THRESHOLD - 1; i++) {
|
||||||
const isLoop = service.addAndCheck(createContentEvent(repeatedContent));
|
const result = service.addAndCheck(createContentEvent(repeatedContent));
|
||||||
expect(isLoop).toBe(false);
|
expect(result.count).toBe(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
||||||
@@ -525,8 +525,8 @@ describe('LoopDetectionService', () => {
|
|||||||
|
|
||||||
// Add more repeated content after list - should not trigger loop
|
// Add more repeated content after list - should not trigger loop
|
||||||
for (let i = 0; i < CONTENT_LOOP_THRESHOLD - 1; i++) {
|
for (let i = 0; i < CONTENT_LOOP_THRESHOLD - 1; i++) {
|
||||||
const isLoop = service.addAndCheck(createContentEvent(repeatedContent));
|
const result = service.addAndCheck(createContentEvent(repeatedContent));
|
||||||
expect(isLoop).toBe(false);
|
expect(result.count).toBe(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
||||||
@@ -545,8 +545,8 @@ describe('LoopDetectionService', () => {
|
|||||||
|
|
||||||
// Add more repeated content after heading - should not trigger loop
|
// Add more repeated content after heading - should not trigger loop
|
||||||
for (let i = 0; i < CONTENT_LOOP_THRESHOLD - 1; i++) {
|
for (let i = 0; i < CONTENT_LOOP_THRESHOLD - 1; i++) {
|
||||||
const isLoop = service.addAndCheck(createContentEvent(repeatedContent));
|
const result = service.addAndCheck(createContentEvent(repeatedContent));
|
||||||
expect(isLoop).toBe(false);
|
expect(result.count).toBe(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
||||||
@@ -565,8 +565,8 @@ describe('LoopDetectionService', () => {
|
|||||||
|
|
||||||
// Add more repeated content after blockquote - should not trigger loop
|
// Add more repeated content after blockquote - should not trigger loop
|
||||||
for (let i = 0; i < CONTENT_LOOP_THRESHOLD - 1; i++) {
|
for (let i = 0; i < CONTENT_LOOP_THRESHOLD - 1; i++) {
|
||||||
const isLoop = service.addAndCheck(createContentEvent(repeatedContent));
|
const result = service.addAndCheck(createContentEvent(repeatedContent));
|
||||||
expect(isLoop).toBe(false);
|
expect(result.count).toBe(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
||||||
@@ -601,10 +601,10 @@ describe('LoopDetectionService', () => {
|
|||||||
CONTENT_CHUNK_SIZE,
|
CONTENT_CHUNK_SIZE,
|
||||||
);
|
);
|
||||||
for (let i = 0; i < CONTENT_LOOP_THRESHOLD - 1; i++) {
|
for (let i = 0; i < CONTENT_LOOP_THRESHOLD - 1; i++) {
|
||||||
const isLoop = service.addAndCheck(
|
const result = service.addAndCheck(
|
||||||
createContentEvent(newRepeatedContent),
|
createContentEvent(newRepeatedContent),
|
||||||
);
|
);
|
||||||
expect(isLoop).toBe(false);
|
expect(result.count).toBe(0);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -638,10 +638,10 @@ describe('LoopDetectionService', () => {
|
|||||||
CONTENT_CHUNK_SIZE,
|
CONTENT_CHUNK_SIZE,
|
||||||
);
|
);
|
||||||
for (let i = 0; i < CONTENT_LOOP_THRESHOLD - 1; i++) {
|
for (let i = 0; i < CONTENT_LOOP_THRESHOLD - 1; i++) {
|
||||||
const isLoop = service.addAndCheck(
|
const result = service.addAndCheck(
|
||||||
createContentEvent(newRepeatedContent),
|
createContentEvent(newRepeatedContent),
|
||||||
);
|
);
|
||||||
expect(isLoop).toBe(false);
|
expect(result.count).toBe(0);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -677,10 +677,10 @@ describe('LoopDetectionService', () => {
|
|||||||
CONTENT_CHUNK_SIZE,
|
CONTENT_CHUNK_SIZE,
|
||||||
);
|
);
|
||||||
for (let i = 0; i < CONTENT_LOOP_THRESHOLD - 1; i++) {
|
for (let i = 0; i < CONTENT_LOOP_THRESHOLD - 1; i++) {
|
||||||
const isLoop = service.addAndCheck(
|
const result = service.addAndCheck(
|
||||||
createContentEvent(newRepeatedContent),
|
createContentEvent(newRepeatedContent),
|
||||||
);
|
);
|
||||||
expect(isLoop).toBe(false);
|
expect(result.count).toBe(0);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -691,7 +691,7 @@ describe('LoopDetectionService', () => {
|
|||||||
describe('Edge Cases', () => {
|
describe('Edge Cases', () => {
|
||||||
it('should handle empty content', () => {
|
it('should handle empty content', () => {
|
||||||
const event = createContentEvent('');
|
const event = createContentEvent('');
|
||||||
expect(service.addAndCheck(event)).toBe(false);
|
expect(service.addAndCheck(event).count).toBe(0);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -699,10 +699,10 @@ describe('LoopDetectionService', () => {
|
|||||||
it('should not detect a loop for repeating divider-like content', () => {
|
it('should not detect a loop for repeating divider-like content', () => {
|
||||||
service.reset('');
|
service.reset('');
|
||||||
const dividerContent = '-'.repeat(CONTENT_CHUNK_SIZE);
|
const dividerContent = '-'.repeat(CONTENT_CHUNK_SIZE);
|
||||||
let isLoop = false;
|
let result = { count: 0 };
|
||||||
for (let i = 0; i < CONTENT_LOOP_THRESHOLD + 5; i++) {
|
for (let i = 0; i < CONTENT_LOOP_THRESHOLD + 5; i++) {
|
||||||
isLoop = service.addAndCheck(createContentEvent(dividerContent));
|
result = service.addAndCheck(createContentEvent(dividerContent));
|
||||||
expect(isLoop).toBe(false);
|
expect(result.count).toBe(0);
|
||||||
}
|
}
|
||||||
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
@@ -710,15 +710,52 @@ describe('LoopDetectionService', () => {
|
|||||||
it('should not detect a loop for repeating complex box-drawing dividers', () => {
|
it('should not detect a loop for repeating complex box-drawing dividers', () => {
|
||||||
service.reset('');
|
service.reset('');
|
||||||
const dividerContent = '╭─'.repeat(CONTENT_CHUNK_SIZE / 2);
|
const dividerContent = '╭─'.repeat(CONTENT_CHUNK_SIZE / 2);
|
||||||
let isLoop = false;
|
let result = { count: 0 };
|
||||||
for (let i = 0; i < CONTENT_LOOP_THRESHOLD + 5; i++) {
|
for (let i = 0; i < CONTENT_LOOP_THRESHOLD + 5; i++) {
|
||||||
isLoop = service.addAndCheck(createContentEvent(dividerContent));
|
result = service.addAndCheck(createContentEvent(dividerContent));
|
||||||
expect(isLoop).toBe(false);
|
expect(result.count).toBe(0);
|
||||||
}
|
}
|
||||||
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('Strike Management', () => {
|
||||||
|
it('should increment strike count for repeated detections', () => {
|
||||||
|
const event = createToolCallRequestEvent('testTool', { param: 'value' });
|
||||||
|
|
||||||
|
// First strike
|
||||||
|
for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD; i++) {
|
||||||
|
service.addAndCheck(event);
|
||||||
|
}
|
||||||
|
expect(service.addAndCheck(event).count).toBe(1);
|
||||||
|
|
||||||
|
// Recovery simulated by caller calling clearDetection()
|
||||||
|
service.clearDetection();
|
||||||
|
|
||||||
|
// Second strike
|
||||||
|
expect(service.addAndCheck(event).count).toBe(2);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should allow recovery turn to proceed after clearDetection', () => {
|
||||||
|
const event = createToolCallRequestEvent('testTool', { param: 'value' });
|
||||||
|
|
||||||
|
// Trigger loop
|
||||||
|
for (let i = 0; i < TOOL_CALL_LOOP_THRESHOLD; i++) {
|
||||||
|
service.addAndCheck(event);
|
||||||
|
}
|
||||||
|
expect(service.addAndCheck(event).count).toBe(1);
|
||||||
|
|
||||||
|
// Caller clears detection to allow recovery
|
||||||
|
service.clearDetection();
|
||||||
|
|
||||||
|
// Subsequent call in the same turn (or next turn before it repeats) should be 0
|
||||||
|
// In reality, addAndCheck is called per event.
|
||||||
|
// If the model sends a NEW event, it should not immediately trigger.
|
||||||
|
const newEvent = createContentEvent('Recovery text');
|
||||||
|
expect(service.addAndCheck(newEvent).count).toBe(0);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe('Reset Functionality', () => {
|
describe('Reset Functionality', () => {
|
||||||
it('tool call should reset content count', () => {
|
it('tool call should reset content count', () => {
|
||||||
const contentEvent = createContentEvent('Some content.');
|
const contentEvent = createContentEvent('Some content.');
|
||||||
@@ -732,19 +769,19 @@ describe('LoopDetectionService', () => {
|
|||||||
service.addAndCheck(toolEvent);
|
service.addAndCheck(toolEvent);
|
||||||
|
|
||||||
// Should start fresh
|
// Should start fresh
|
||||||
expect(service.addAndCheck(createContentEvent('Fresh content.'))).toBe(
|
expect(
|
||||||
false,
|
service.addAndCheck(createContentEvent('Fresh content.')).count,
|
||||||
);
|
).toBe(0);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe('General Behavior', () => {
|
describe('General Behavior', () => {
|
||||||
it('should return false for unhandled event types', () => {
|
it('should return 0 count for unhandled event types', () => {
|
||||||
const otherEvent = {
|
const otherEvent = {
|
||||||
type: 'unhandled_event',
|
type: 'unhandled_event',
|
||||||
} as unknown as ServerGeminiStreamEvent;
|
} as unknown as ServerGeminiStreamEvent;
|
||||||
expect(service.addAndCheck(otherEvent)).toBe(false);
|
expect(service.addAndCheck(otherEvent).count).toBe(0);
|
||||||
expect(service.addAndCheck(otherEvent)).toBe(false);
|
expect(service.addAndCheck(otherEvent).count).toBe(0);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
@@ -805,16 +842,16 @@ describe('LoopDetectionService LLM Checks', () => {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
it('should not trigger LLM check before LLM_CHECK_AFTER_TURNS', async () => {
|
it('should not trigger LLM check before LLM_CHECK_AFTER_TURNS (30)', async () => {
|
||||||
await advanceTurns(39);
|
await advanceTurns(29);
|
||||||
expect(mockBaseLlmClient.generateJson).not.toHaveBeenCalled();
|
expect(mockBaseLlmClient.generateJson).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should trigger LLM check on the 40th turn', async () => {
|
it('should trigger LLM check on the 30th turn', async () => {
|
||||||
mockBaseLlmClient.generateJson = vi
|
mockBaseLlmClient.generateJson = vi
|
||||||
.fn()
|
.fn()
|
||||||
.mockResolvedValue({ unproductive_state_confidence: 0.1 });
|
.mockResolvedValue({ unproductive_state_confidence: 0.1 });
|
||||||
await advanceTurns(40);
|
await advanceTurns(30);
|
||||||
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
|
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
|
||||||
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledWith(
|
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledWith(
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
@@ -828,12 +865,12 @@ describe('LoopDetectionService LLM Checks', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
it('should detect a cognitive loop when confidence is high', async () => {
|
it('should detect a cognitive loop when confidence is high', async () => {
|
||||||
// First check at turn 40
|
// First check at turn 30
|
||||||
mockBaseLlmClient.generateJson = vi.fn().mockResolvedValue({
|
mockBaseLlmClient.generateJson = vi.fn().mockResolvedValue({
|
||||||
unproductive_state_confidence: 0.85,
|
unproductive_state_confidence: 0.85,
|
||||||
unproductive_state_analysis: 'Repetitive actions',
|
unproductive_state_analysis: 'Repetitive actions',
|
||||||
});
|
});
|
||||||
await advanceTurns(40);
|
await advanceTurns(30);
|
||||||
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
|
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
|
||||||
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledWith(
|
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledWith(
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
@@ -842,16 +879,16 @@ describe('LoopDetectionService LLM Checks', () => {
|
|||||||
);
|
);
|
||||||
|
|
||||||
// The confidence of 0.85 will result in a low interval.
|
// The confidence of 0.85 will result in a low interval.
|
||||||
// The interval will be: 7 + (15 - 7) * (1 - 0.85) = 7 + 8 * 0.15 = 8.2 -> rounded to 8
|
// The interval will be: 5 + (15 - 5) * (1 - 0.85) = 5 + 10 * 0.15 = 6.5 -> rounded to 7
|
||||||
await advanceTurns(7); // advance to turn 47
|
await advanceTurns(6); // advance to turn 36
|
||||||
|
|
||||||
mockBaseLlmClient.generateJson = vi.fn().mockResolvedValue({
|
mockBaseLlmClient.generateJson = vi.fn().mockResolvedValue({
|
||||||
unproductive_state_confidence: 0.95,
|
unproductive_state_confidence: 0.95,
|
||||||
unproductive_state_analysis: 'Repetitive actions',
|
unproductive_state_analysis: 'Repetitive actions',
|
||||||
});
|
});
|
||||||
const finalResult = await service.turnStarted(abortController.signal); // This is turn 48
|
const finalResult = await service.turnStarted(abortController.signal); // This is turn 37
|
||||||
|
|
||||||
expect(finalResult).toBe(true);
|
expect(finalResult.count).toBe(1);
|
||||||
expect(loggers.logLoopDetected).toHaveBeenCalledWith(
|
expect(loggers.logLoopDetected).toHaveBeenCalledWith(
|
||||||
mockConfig,
|
mockConfig,
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
@@ -867,25 +904,25 @@ describe('LoopDetectionService LLM Checks', () => {
|
|||||||
unproductive_state_confidence: 0.5,
|
unproductive_state_confidence: 0.5,
|
||||||
unproductive_state_analysis: 'Looks okay',
|
unproductive_state_analysis: 'Looks okay',
|
||||||
});
|
});
|
||||||
await advanceTurns(40);
|
await advanceTurns(30);
|
||||||
const result = await service.turnStarted(abortController.signal);
|
const result = await service.turnStarted(abortController.signal);
|
||||||
expect(result).toBe(false);
|
expect(result.count).toBe(0);
|
||||||
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should adjust the check interval based on confidence', async () => {
|
it('should adjust the check interval based on confidence', async () => {
|
||||||
// Confidence is 0.0, so interval should be MAX_LLM_CHECK_INTERVAL (15)
|
// Confidence is 0.0, so interval should be MAX_LLM_CHECK_INTERVAL (15)
|
||||||
// Interval = 7 + (15 - 7) * (1 - 0.0) = 15
|
// Interval = 5 + (15 - 5) * (1 - 0.0) = 15
|
||||||
mockBaseLlmClient.generateJson = vi
|
mockBaseLlmClient.generateJson = vi
|
||||||
.fn()
|
.fn()
|
||||||
.mockResolvedValue({ unproductive_state_confidence: 0.0 });
|
.mockResolvedValue({ unproductive_state_confidence: 0.0 });
|
||||||
await advanceTurns(40); // First check at turn 40
|
await advanceTurns(30); // First check at turn 30
|
||||||
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
|
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
|
||||||
|
|
||||||
await advanceTurns(14); // Advance to turn 54
|
await advanceTurns(14); // Advance to turn 44
|
||||||
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
|
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
|
||||||
|
|
||||||
await service.turnStarted(abortController.signal); // Turn 55
|
await service.turnStarted(abortController.signal); // Turn 45
|
||||||
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(2);
|
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(2);
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -893,18 +930,18 @@ describe('LoopDetectionService LLM Checks', () => {
|
|||||||
mockBaseLlmClient.generateJson = vi
|
mockBaseLlmClient.generateJson = vi
|
||||||
.fn()
|
.fn()
|
||||||
.mockRejectedValue(new Error('API error'));
|
.mockRejectedValue(new Error('API error'));
|
||||||
await advanceTurns(40);
|
await advanceTurns(30);
|
||||||
const result = await service.turnStarted(abortController.signal);
|
const result = await service.turnStarted(abortController.signal);
|
||||||
expect(result).toBe(false);
|
expect(result.count).toBe(0);
|
||||||
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should not trigger LLM check when disabled for session', async () => {
|
it('should not trigger LLM check when disabled for session', async () => {
|
||||||
service.disableForSession();
|
service.disableForSession();
|
||||||
expect(loggers.logLoopDetectionDisabled).toHaveBeenCalledTimes(1);
|
expect(loggers.logLoopDetectionDisabled).toHaveBeenCalledTimes(1);
|
||||||
await advanceTurns(40);
|
await advanceTurns(30);
|
||||||
const result = await service.turnStarted(abortController.signal);
|
const result = await service.turnStarted(abortController.signal);
|
||||||
expect(result).toBe(false);
|
expect(result.count).toBe(0);
|
||||||
expect(mockBaseLlmClient.generateJson).not.toHaveBeenCalled();
|
expect(mockBaseLlmClient.generateJson).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -925,7 +962,7 @@ describe('LoopDetectionService LLM Checks', () => {
|
|||||||
.fn()
|
.fn()
|
||||||
.mockResolvedValue({ unproductive_state_confidence: 0.1 });
|
.mockResolvedValue({ unproductive_state_confidence: 0.1 });
|
||||||
|
|
||||||
await advanceTurns(40);
|
await advanceTurns(30);
|
||||||
|
|
||||||
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
|
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
|
||||||
const calledArg = vi.mocked(mockBaseLlmClient.generateJson).mock
|
const calledArg = vi.mocked(mockBaseLlmClient.generateJson).mock
|
||||||
@@ -950,7 +987,7 @@ describe('LoopDetectionService LLM Checks', () => {
|
|||||||
unproductive_state_analysis: 'Main says loop',
|
unproductive_state_analysis: 'Main says loop',
|
||||||
});
|
});
|
||||||
|
|
||||||
await advanceTurns(40);
|
await advanceTurns(30);
|
||||||
|
|
||||||
// It should have called generateJson twice
|
// It should have called generateJson twice
|
||||||
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(2);
|
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(2);
|
||||||
@@ -990,7 +1027,7 @@ describe('LoopDetectionService LLM Checks', () => {
|
|||||||
unproductive_state_analysis: 'Main says no loop',
|
unproductive_state_analysis: 'Main says no loop',
|
||||||
});
|
});
|
||||||
|
|
||||||
await advanceTurns(40);
|
await advanceTurns(30);
|
||||||
|
|
||||||
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(2);
|
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(2);
|
||||||
expect(mockBaseLlmClient.generateJson).toHaveBeenNthCalledWith(
|
expect(mockBaseLlmClient.generateJson).toHaveBeenNthCalledWith(
|
||||||
@@ -1010,12 +1047,12 @@ describe('LoopDetectionService LLM Checks', () => {
|
|||||||
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
|
||||||
|
|
||||||
// But should have updated the interval based on the main model's confidence (0.89)
|
// But should have updated the interval based on the main model's confidence (0.89)
|
||||||
// Interval = 7 + (15-7) * (1 - 0.89) = 7 + 8 * 0.11 = 7 + 0.88 = 7.88 -> 8
|
// Interval = 5 + (15-5) * (1 - 0.89) = 5 + 10 * 0.11 = 5 + 1.1 = 6.1 -> 6
|
||||||
|
|
||||||
// Advance by 7 turns
|
// Advance by 5 turns
|
||||||
await advanceTurns(7);
|
await advanceTurns(5);
|
||||||
|
|
||||||
// Next turn (48) should trigger another check
|
// Next turn (36) should trigger another check
|
||||||
await service.turnStarted(abortController.signal);
|
await service.turnStarted(abortController.signal);
|
||||||
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(3);
|
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(3);
|
||||||
});
|
});
|
||||||
@@ -1033,7 +1070,7 @@ describe('LoopDetectionService LLM Checks', () => {
|
|||||||
unproductive_state_analysis: 'Flash says loop',
|
unproductive_state_analysis: 'Flash says loop',
|
||||||
});
|
});
|
||||||
|
|
||||||
await advanceTurns(40);
|
await advanceTurns(30);
|
||||||
|
|
||||||
// It should have called generateJson only once
|
// It should have called generateJson only once
|
||||||
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
|
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
|
||||||
@@ -1047,8 +1084,6 @@ describe('LoopDetectionService LLM Checks', () => {
|
|||||||
expect(loggers.logLoopDetected).toHaveBeenCalledWith(
|
expect(loggers.logLoopDetected).toHaveBeenCalledWith(
|
||||||
mockConfig,
|
mockConfig,
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
'event.name': 'loop_detected',
|
|
||||||
loop_type: LoopType.LLM_DETECTED_LOOP,
|
|
||||||
confirmed_by_model: 'gemini-2.5-flash',
|
confirmed_by_model: 'gemini-2.5-flash',
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
@@ -1061,7 +1096,7 @@ describe('LoopDetectionService LLM Checks', () => {
|
|||||||
.fn()
|
.fn()
|
||||||
.mockResolvedValue({ unproductive_state_confidence: 0.1 });
|
.mockResolvedValue({ unproductive_state_confidence: 0.1 });
|
||||||
|
|
||||||
await advanceTurns(40);
|
await advanceTurns(30);
|
||||||
|
|
||||||
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
|
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
|
||||||
const calledArg = vi.mocked(mockBaseLlmClient.generateJson).mock
|
const calledArg = vi.mocked(mockBaseLlmClient.generateJson).mock
|
||||||
@@ -1091,7 +1126,7 @@ describe('LoopDetectionService LLM Checks', () => {
|
|||||||
.fn()
|
.fn()
|
||||||
.mockResolvedValue({ unproductive_state_confidence: 0.1 });
|
.mockResolvedValue({ unproductive_state_confidence: 0.1 });
|
||||||
|
|
||||||
await advanceTurns(40);
|
await advanceTurns(30);
|
||||||
|
|
||||||
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
|
expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
|
||||||
const calledArg = vi.mocked(mockBaseLlmClient.generateJson).mock
|
const calledArg = vi.mocked(mockBaseLlmClient.generateJson).mock
|
||||||
|
|||||||
@@ -39,7 +39,7 @@ const LLM_LOOP_CHECK_HISTORY_COUNT = 20;
|
|||||||
/**
|
/**
|
||||||
* The number of turns that must pass in a single prompt before the LLM-based loop check is activated.
|
* The number of turns that must pass in a single prompt before the LLM-based loop check is activated.
|
||||||
*/
|
*/
|
||||||
const LLM_CHECK_AFTER_TURNS = 40;
|
const LLM_CHECK_AFTER_TURNS = 30;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The default interval, in number of turns, at which the LLM-based loop check is performed.
|
* The default interval, in number of turns, at which the LLM-based loop check is performed.
|
||||||
@@ -51,7 +51,7 @@ const DEFAULT_LLM_CHECK_INTERVAL = 10;
|
|||||||
* The minimum interval for LLM-based loop checks.
|
* The minimum interval for LLM-based loop checks.
|
||||||
* This is used when the confidence of a loop is high, to check more frequently.
|
* This is used when the confidence of a loop is high, to check more frequently.
|
||||||
*/
|
*/
|
||||||
const MIN_LLM_CHECK_INTERVAL = 7;
|
const MIN_LLM_CHECK_INTERVAL = 5;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The maximum interval for LLM-based loop checks.
|
* The maximum interval for LLM-based loop checks.
|
||||||
@@ -117,6 +117,15 @@ const LOOP_DETECTION_SCHEMA: Record<string, unknown> = {
|
|||||||
required: ['unproductive_state_analysis', 'unproductive_state_confidence'],
|
required: ['unproductive_state_analysis', 'unproductive_state_confidence'],
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Result of a loop detection check.
|
||||||
|
*/
|
||||||
|
export interface LoopDetectionResult {
|
||||||
|
count: number;
|
||||||
|
type?: LoopType;
|
||||||
|
detail?: string;
|
||||||
|
confirmedByModel?: string;
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* Service for detecting and preventing infinite loops in AI responses.
|
* Service for detecting and preventing infinite loops in AI responses.
|
||||||
* Monitors tool call repetitions and content sentence repetitions.
|
* Monitors tool call repetitions and content sentence repetitions.
|
||||||
@@ -135,8 +144,11 @@ export class LoopDetectionService {
|
|||||||
private contentStats = new Map<string, number[]>();
|
private contentStats = new Map<string, number[]>();
|
||||||
private lastContentIndex = 0;
|
private lastContentIndex = 0;
|
||||||
private loopDetected = false;
|
private loopDetected = false;
|
||||||
|
private detectedCount = 0;
|
||||||
|
private lastLoopDetail?: string;
|
||||||
private inCodeBlock = false;
|
private inCodeBlock = false;
|
||||||
|
|
||||||
|
private lastLoopType?: LoopType;
|
||||||
// LLM loop track tracking
|
// LLM loop track tracking
|
||||||
private turnsInCurrentPrompt = 0;
|
private turnsInCurrentPrompt = 0;
|
||||||
private llmCheckInterval = DEFAULT_LLM_CHECK_INTERVAL;
|
private llmCheckInterval = DEFAULT_LLM_CHECK_INTERVAL;
|
||||||
@@ -169,31 +181,68 @@ export class LoopDetectionService {
|
|||||||
/**
|
/**
|
||||||
* Processes a stream event and checks for loop conditions.
|
* Processes a stream event and checks for loop conditions.
|
||||||
* @param event - The stream event to process
|
* @param event - The stream event to process
|
||||||
* @returns true if a loop is detected, false otherwise
|
* @returns A LoopDetectionResult
|
||||||
*/
|
*/
|
||||||
addAndCheck(event: ServerGeminiStreamEvent): boolean {
|
addAndCheck(event: ServerGeminiStreamEvent): LoopDetectionResult {
|
||||||
if (this.disabledForSession || this.config.getDisableLoopDetection()) {
|
if (this.disabledForSession || this.config.getDisableLoopDetection()) {
|
||||||
return false;
|
return { count: 0 };
|
||||||
|
}
|
||||||
|
if (this.loopDetected) {
|
||||||
|
return {
|
||||||
|
count: this.detectedCount,
|
||||||
|
type: this.lastLoopType,
|
||||||
|
detail: this.lastLoopDetail,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this.loopDetected) {
|
let isLoop = false;
|
||||||
return this.loopDetected;
|
let detail: string | undefined;
|
||||||
}
|
|
||||||
|
|
||||||
switch (event.type) {
|
switch (event.type) {
|
||||||
case GeminiEventType.ToolCallRequest:
|
case GeminiEventType.ToolCallRequest:
|
||||||
// content chanting only happens in one single stream, reset if there
|
// content chanting only happens in one single stream, reset if there
|
||||||
// is a tool call in between
|
// is a tool call in between
|
||||||
this.resetContentTracking();
|
this.resetContentTracking();
|
||||||
this.loopDetected = this.checkToolCallLoop(event.value);
|
isLoop = this.checkToolCallLoop(event.value);
|
||||||
|
if (isLoop) {
|
||||||
|
detail = `Repeated tool call: ${event.value.name} with arguments ${JSON.stringify(event.value.args)}`;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case GeminiEventType.Content:
|
case GeminiEventType.Content:
|
||||||
this.loopDetected = this.checkContentLoop(event.value);
|
isLoop = this.checkContentLoop(event.value);
|
||||||
|
if (isLoop) {
|
||||||
|
detail = `Repeating content detected: "${this.streamContentHistory.substring(Math.max(0, this.lastContentIndex - 20), this.lastContentIndex + CONTENT_CHUNK_SIZE).trim()}..."`;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
return this.loopDetected;
|
|
||||||
|
if (isLoop) {
|
||||||
|
this.loopDetected = true;
|
||||||
|
this.detectedCount++;
|
||||||
|
this.lastLoopDetail = detail;
|
||||||
|
this.lastLoopType =
|
||||||
|
event.type === GeminiEventType.ToolCallRequest
|
||||||
|
? LoopType.CONSECUTIVE_IDENTICAL_TOOL_CALLS
|
||||||
|
: LoopType.CONTENT_CHANTING_LOOP;
|
||||||
|
|
||||||
|
logLoopDetected(
|
||||||
|
this.config,
|
||||||
|
new LoopDetectedEvent(
|
||||||
|
this.lastLoopType,
|
||||||
|
this.promptId,
|
||||||
|
this.detectedCount,
|
||||||
|
),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return isLoop
|
||||||
|
? {
|
||||||
|
count: this.detectedCount,
|
||||||
|
type: this.lastLoopType,
|
||||||
|
detail: this.lastLoopDetail,
|
||||||
|
}
|
||||||
|
: { count: 0 };
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -204,12 +253,20 @@ export class LoopDetectionService {
|
|||||||
* is performed periodically based on the `llmCheckInterval`.
|
* is performed periodically based on the `llmCheckInterval`.
|
||||||
*
|
*
|
||||||
* @param signal - An AbortSignal to allow for cancellation of the asynchronous LLM check.
|
* @param signal - An AbortSignal to allow for cancellation of the asynchronous LLM check.
|
||||||
* @returns A promise that resolves to `true` if a loop is detected, and `false` otherwise.
|
* @returns A promise that resolves to a LoopDetectionResult.
|
||||||
*/
|
*/
|
||||||
async turnStarted(signal: AbortSignal) {
|
async turnStarted(signal: AbortSignal): Promise<LoopDetectionResult> {
|
||||||
if (this.disabledForSession || this.config.getDisableLoopDetection()) {
|
if (this.disabledForSession || this.config.getDisableLoopDetection()) {
|
||||||
return false;
|
return { count: 0 };
|
||||||
}
|
}
|
||||||
|
if (this.loopDetected) {
|
||||||
|
return {
|
||||||
|
count: this.detectedCount,
|
||||||
|
type: this.lastLoopType,
|
||||||
|
detail: this.lastLoopDetail,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
this.turnsInCurrentPrompt++;
|
this.turnsInCurrentPrompt++;
|
||||||
|
|
||||||
if (
|
if (
|
||||||
@@ -217,10 +274,35 @@ export class LoopDetectionService {
|
|||||||
this.turnsInCurrentPrompt - this.lastCheckTurn >= this.llmCheckInterval
|
this.turnsInCurrentPrompt - this.lastCheckTurn >= this.llmCheckInterval
|
||||||
) {
|
) {
|
||||||
this.lastCheckTurn = this.turnsInCurrentPrompt;
|
this.lastCheckTurn = this.turnsInCurrentPrompt;
|
||||||
return this.checkForLoopWithLLM(signal);
|
const { isLoop, analysis, confirmedByModel } =
|
||||||
}
|
await this.checkForLoopWithLLM(signal);
|
||||||
|
if (isLoop) {
|
||||||
|
this.loopDetected = true;
|
||||||
|
this.detectedCount++;
|
||||||
|
this.lastLoopDetail = analysis;
|
||||||
|
this.lastLoopType = LoopType.LLM_DETECTED_LOOP;
|
||||||
|
|
||||||
return false;
|
logLoopDetected(
|
||||||
|
this.config,
|
||||||
|
new LoopDetectedEvent(
|
||||||
|
this.lastLoopType,
|
||||||
|
this.promptId,
|
||||||
|
this.detectedCount,
|
||||||
|
confirmedByModel,
|
||||||
|
analysis,
|
||||||
|
LLM_CONFIDENCE_THRESHOLD,
|
||||||
|
),
|
||||||
|
);
|
||||||
|
|
||||||
|
return {
|
||||||
|
count: this.detectedCount,
|
||||||
|
type: this.lastLoopType,
|
||||||
|
detail: this.lastLoopDetail,
|
||||||
|
confirmedByModel,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return { count: 0 };
|
||||||
}
|
}
|
||||||
|
|
||||||
private checkToolCallLoop(toolCall: { name: string; args: object }): boolean {
|
private checkToolCallLoop(toolCall: { name: string; args: object }): boolean {
|
||||||
@@ -232,13 +314,6 @@ export class LoopDetectionService {
|
|||||||
this.toolCallRepetitionCount = 1;
|
this.toolCallRepetitionCount = 1;
|
||||||
}
|
}
|
||||||
if (this.toolCallRepetitionCount >= TOOL_CALL_LOOP_THRESHOLD) {
|
if (this.toolCallRepetitionCount >= TOOL_CALL_LOOP_THRESHOLD) {
|
||||||
logLoopDetected(
|
|
||||||
this.config,
|
|
||||||
new LoopDetectedEvent(
|
|
||||||
LoopType.CONSECUTIVE_IDENTICAL_TOOL_CALLS,
|
|
||||||
this.promptId,
|
|
||||||
),
|
|
||||||
);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
@@ -345,13 +420,6 @@ export class LoopDetectionService {
|
|||||||
const chunkHash = createHash('sha256').update(currentChunk).digest('hex');
|
const chunkHash = createHash('sha256').update(currentChunk).digest('hex');
|
||||||
|
|
||||||
if (this.isLoopDetectedForChunk(currentChunk, chunkHash)) {
|
if (this.isLoopDetectedForChunk(currentChunk, chunkHash)) {
|
||||||
logLoopDetected(
|
|
||||||
this.config,
|
|
||||||
new LoopDetectedEvent(
|
|
||||||
LoopType.CHANTING_IDENTICAL_SENTENCES,
|
|
||||||
this.promptId,
|
|
||||||
),
|
|
||||||
);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -445,28 +513,29 @@ export class LoopDetectionService {
|
|||||||
return originalChunk === currentChunk;
|
return originalChunk === currentChunk;
|
||||||
}
|
}
|
||||||
|
|
||||||
private trimRecentHistory(recentHistory: Content[]): Content[] {
|
private trimRecentHistory(history: Content[]): Content[] {
|
||||||
// A function response must be preceded by a function call.
|
// A function response must be preceded by a function call.
|
||||||
// Continuously removes dangling function calls from the end of the history
|
// Continuously removes dangling function calls from the end of the history
|
||||||
// until the last turn is not a function call.
|
// until the last turn is not a function call.
|
||||||
while (
|
while (history.length > 0 && isFunctionCall(history[history.length - 1])) {
|
||||||
recentHistory.length > 0 &&
|
history.pop();
|
||||||
isFunctionCall(recentHistory[recentHistory.length - 1])
|
|
||||||
) {
|
|
||||||
recentHistory.pop();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// A function response should follow a function call.
|
// A function response should follow a function call.
|
||||||
// Continuously removes leading function responses from the beginning of history
|
// Continuously removes leading function responses from the beginning of history
|
||||||
// until the first turn is not a function response.
|
// until the first turn is not a function response.
|
||||||
while (recentHistory.length > 0 && isFunctionResponse(recentHistory[0])) {
|
while (history.length > 0 && isFunctionResponse(history[0])) {
|
||||||
recentHistory.shift();
|
history.shift();
|
||||||
}
|
}
|
||||||
|
|
||||||
return recentHistory;
|
return history;
|
||||||
}
|
}
|
||||||
|
|
||||||
private async checkForLoopWithLLM(signal: AbortSignal) {
|
private async checkForLoopWithLLM(signal: AbortSignal): Promise<{
|
||||||
|
isLoop: boolean;
|
||||||
|
analysis?: string;
|
||||||
|
confirmedByModel?: string;
|
||||||
|
}> {
|
||||||
const recentHistory = this.config
|
const recentHistory = this.config
|
||||||
.getGeminiClient()
|
.getGeminiClient()
|
||||||
.getHistory()
|
.getHistory()
|
||||||
@@ -506,13 +575,17 @@ export class LoopDetectionService {
|
|||||||
);
|
);
|
||||||
|
|
||||||
if (!flashResult) {
|
if (!flashResult) {
|
||||||
return false;
|
return { isLoop: false };
|
||||||
}
|
}
|
||||||
|
|
||||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
const flashConfidence =
|
||||||
const flashConfidence = flashResult[
|
typeof flashResult['unproductive_state_confidence'] === 'number'
|
||||||
'unproductive_state_confidence'
|
? flashResult['unproductive_state_confidence']
|
||||||
] as number;
|
: 0;
|
||||||
|
const flashAnalysis =
|
||||||
|
typeof flashResult['unproductive_state_analysis'] === 'string'
|
||||||
|
? flashResult['unproductive_state_analysis']
|
||||||
|
: '';
|
||||||
|
|
||||||
const doubleCheckModelName =
|
const doubleCheckModelName =
|
||||||
this.config.modelConfigService.getResolvedConfig({
|
this.config.modelConfigService.getResolvedConfig({
|
||||||
@@ -530,7 +603,7 @@ export class LoopDetectionService {
|
|||||||
),
|
),
|
||||||
);
|
);
|
||||||
this.updateCheckInterval(flashConfidence);
|
this.updateCheckInterval(flashConfidence);
|
||||||
return false;
|
return { isLoop: false };
|
||||||
}
|
}
|
||||||
|
|
||||||
const availability = this.config.getModelAvailabilityService();
|
const availability = this.config.getModelAvailabilityService();
|
||||||
@@ -539,8 +612,11 @@ export class LoopDetectionService {
|
|||||||
const flashModelName = this.config.modelConfigService.getResolvedConfig({
|
const flashModelName = this.config.modelConfigService.getResolvedConfig({
|
||||||
model: 'loop-detection',
|
model: 'loop-detection',
|
||||||
}).model;
|
}).model;
|
||||||
this.handleConfirmedLoop(flashResult, flashModelName);
|
return {
|
||||||
return true;
|
isLoop: true,
|
||||||
|
analysis: flashAnalysis,
|
||||||
|
confirmedByModel: flashModelName,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Double check with configured model
|
// Double check with configured model
|
||||||
@@ -550,10 +626,16 @@ export class LoopDetectionService {
|
|||||||
signal,
|
signal,
|
||||||
);
|
);
|
||||||
|
|
||||||
const mainModelConfidence = mainModelResult
|
const mainModelConfidence =
|
||||||
? // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
mainModelResult &&
|
||||||
(mainModelResult['unproductive_state_confidence'] as number)
|
typeof mainModelResult['unproductive_state_confidence'] === 'number'
|
||||||
: 0;
|
? mainModelResult['unproductive_state_confidence']
|
||||||
|
: 0;
|
||||||
|
const mainModelAnalysis =
|
||||||
|
mainModelResult &&
|
||||||
|
typeof mainModelResult['unproductive_state_analysis'] === 'string'
|
||||||
|
? mainModelResult['unproductive_state_analysis']
|
||||||
|
: undefined;
|
||||||
|
|
||||||
logLlmLoopCheck(
|
logLlmLoopCheck(
|
||||||
this.config,
|
this.config,
|
||||||
@@ -567,14 +649,17 @@ export class LoopDetectionService {
|
|||||||
|
|
||||||
if (mainModelResult) {
|
if (mainModelResult) {
|
||||||
if (mainModelConfidence >= LLM_CONFIDENCE_THRESHOLD) {
|
if (mainModelConfidence >= LLM_CONFIDENCE_THRESHOLD) {
|
||||||
this.handleConfirmedLoop(mainModelResult, doubleCheckModelName);
|
return {
|
||||||
return true;
|
isLoop: true,
|
||||||
|
analysis: mainModelAnalysis,
|
||||||
|
confirmedByModel: doubleCheckModelName,
|
||||||
|
};
|
||||||
} else {
|
} else {
|
||||||
this.updateCheckInterval(mainModelConfidence);
|
this.updateCheckInterval(mainModelConfidence);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return { isLoop: false };
|
||||||
}
|
}
|
||||||
|
|
||||||
private async queryLoopDetectionModel(
|
private async queryLoopDetectionModel(
|
||||||
@@ -601,32 +686,16 @@ export class LoopDetectionService {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
} catch (e) {
|
} catch (error) {
|
||||||
this.config.getDebugMode() ? debugLogger.warn(e) : debugLogger.debug(e);
|
if (this.config.getDebugMode()) {
|
||||||
|
debugLogger.warn(
|
||||||
|
`Error querying loop detection model (${model}): ${String(error)}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private handleConfirmedLoop(
|
|
||||||
result: Record<string, unknown>,
|
|
||||||
modelName: string,
|
|
||||||
): void {
|
|
||||||
if (
|
|
||||||
typeof result['unproductive_state_analysis'] === 'string' &&
|
|
||||||
result['unproductive_state_analysis']
|
|
||||||
) {
|
|
||||||
debugLogger.warn(result['unproductive_state_analysis']);
|
|
||||||
}
|
|
||||||
logLoopDetected(
|
|
||||||
this.config,
|
|
||||||
new LoopDetectedEvent(
|
|
||||||
LoopType.LLM_DETECTED_LOOP,
|
|
||||||
this.promptId,
|
|
||||||
modelName,
|
|
||||||
),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
private updateCheckInterval(unproductive_state_confidence: number): void {
|
private updateCheckInterval(unproductive_state_confidence: number): void {
|
||||||
this.llmCheckInterval = Math.round(
|
this.llmCheckInterval = Math.round(
|
||||||
MIN_LLM_CHECK_INTERVAL +
|
MIN_LLM_CHECK_INTERVAL +
|
||||||
@@ -645,6 +714,17 @@ export class LoopDetectionService {
|
|||||||
this.resetContentTracking();
|
this.resetContentTracking();
|
||||||
this.resetLlmCheckTracking();
|
this.resetLlmCheckTracking();
|
||||||
this.loopDetected = false;
|
this.loopDetected = false;
|
||||||
|
this.detectedCount = 0;
|
||||||
|
this.lastLoopDetail = undefined;
|
||||||
|
this.lastLoopType = undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resets the loop detected flag to allow a recovery turn to proceed.
|
||||||
|
* This preserves the detectedCount so that the next detection will be count 2.
|
||||||
|
*/
|
||||||
|
clearDetection(): void {
|
||||||
|
this.loopDetected = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
private resetToolCallCount(): void {
|
private resetToolCallCount(): void {
|
||||||
|
|||||||
@@ -33,6 +33,7 @@ import {
|
|||||||
logFlashFallback,
|
logFlashFallback,
|
||||||
logChatCompression,
|
logChatCompression,
|
||||||
logMalformedJsonResponse,
|
logMalformedJsonResponse,
|
||||||
|
logInvalidChunk,
|
||||||
logFileOperation,
|
logFileOperation,
|
||||||
logRipgrepFallback,
|
logRipgrepFallback,
|
||||||
logToolOutputTruncated,
|
logToolOutputTruncated,
|
||||||
@@ -68,6 +69,7 @@ import {
|
|||||||
EVENT_AGENT_START,
|
EVENT_AGENT_START,
|
||||||
EVENT_AGENT_FINISH,
|
EVENT_AGENT_FINISH,
|
||||||
EVENT_WEB_FETCH_FALLBACK_ATTEMPT,
|
EVENT_WEB_FETCH_FALLBACK_ATTEMPT,
|
||||||
|
EVENT_INVALID_CHUNK,
|
||||||
ApiErrorEvent,
|
ApiErrorEvent,
|
||||||
ApiRequestEvent,
|
ApiRequestEvent,
|
||||||
ApiResponseEvent,
|
ApiResponseEvent,
|
||||||
@@ -77,6 +79,7 @@ import {
|
|||||||
FlashFallbackEvent,
|
FlashFallbackEvent,
|
||||||
RipgrepFallbackEvent,
|
RipgrepFallbackEvent,
|
||||||
MalformedJsonResponseEvent,
|
MalformedJsonResponseEvent,
|
||||||
|
InvalidChunkEvent,
|
||||||
makeChatCompressionEvent,
|
makeChatCompressionEvent,
|
||||||
FileOperationEvent,
|
FileOperationEvent,
|
||||||
ToolOutputTruncatedEvent,
|
ToolOutputTruncatedEvent,
|
||||||
@@ -1736,6 +1739,39 @@ describe('loggers', () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('logInvalidChunk', () => {
|
||||||
|
beforeEach(() => {
|
||||||
|
vi.spyOn(ClearcutLogger.prototype, 'logInvalidChunkEvent');
|
||||||
|
vi.spyOn(metrics, 'recordInvalidChunk');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('logs the event to Clearcut and OTEL', () => {
|
||||||
|
const mockConfig = makeFakeConfig();
|
||||||
|
const event = new InvalidChunkEvent('Unexpected token');
|
||||||
|
|
||||||
|
logInvalidChunk(mockConfig, event);
|
||||||
|
|
||||||
|
expect(
|
||||||
|
ClearcutLogger.prototype.logInvalidChunkEvent,
|
||||||
|
).toHaveBeenCalledWith(event);
|
||||||
|
|
||||||
|
expect(mockLogger.emit).toHaveBeenCalledWith({
|
||||||
|
body: 'Invalid chunk received from stream.',
|
||||||
|
attributes: {
|
||||||
|
'session.id': 'test-session-id',
|
||||||
|
'user.email': 'test-user@example.com',
|
||||||
|
'installation.id': 'test-installation-id',
|
||||||
|
'event.name': EVENT_INVALID_CHUNK,
|
||||||
|
'event.timestamp': '2025-01-01T00:00:00.000Z',
|
||||||
|
interactive: false,
|
||||||
|
'error.message': 'Unexpected token',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(metrics.recordInvalidChunk).toHaveBeenCalledWith(mockConfig);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe('logFileOperation', () => {
|
describe('logFileOperation', () => {
|
||||||
const mockConfig = {
|
const mockConfig = {
|
||||||
getSessionId: () => 'test-session-id',
|
getSessionId: () => 'test-session-id',
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ import {
|
|||||||
type ConversationFinishedEvent,
|
type ConversationFinishedEvent,
|
||||||
type ChatCompressionEvent,
|
type ChatCompressionEvent,
|
||||||
type MalformedJsonResponseEvent,
|
type MalformedJsonResponseEvent,
|
||||||
|
type InvalidChunkEvent,
|
||||||
type ContentRetryEvent,
|
type ContentRetryEvent,
|
||||||
type ContentRetryFailureEvent,
|
type ContentRetryFailureEvent,
|
||||||
type RipgrepFallbackEvent,
|
type RipgrepFallbackEvent,
|
||||||
@@ -75,6 +76,7 @@ import {
|
|||||||
recordPlanExecution,
|
recordPlanExecution,
|
||||||
recordKeychainAvailability,
|
recordKeychainAvailability,
|
||||||
recordTokenStorageInitialization,
|
recordTokenStorageInitialization,
|
||||||
|
recordInvalidChunk,
|
||||||
} from './metrics.js';
|
} from './metrics.js';
|
||||||
import { bufferTelemetryEvent } from './sdk.js';
|
import { bufferTelemetryEvent } from './sdk.js';
|
||||||
import { uiTelemetryService, type UiEvent } from './uiTelemetry.js';
|
import { uiTelemetryService, type UiEvent } from './uiTelemetry.js';
|
||||||
@@ -467,6 +469,22 @@ export function logMalformedJsonResponse(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function logInvalidChunk(
|
||||||
|
config: Config,
|
||||||
|
event: InvalidChunkEvent,
|
||||||
|
): void {
|
||||||
|
ClearcutLogger.getInstance(config)?.logInvalidChunkEvent(event);
|
||||||
|
bufferTelemetryEvent(() => {
|
||||||
|
const logger = logs.getLogger(SERVICE_NAME);
|
||||||
|
const logRecord: LogRecord = {
|
||||||
|
body: event.toLogBody(),
|
||||||
|
attributes: event.toOpenTelemetryAttributes(config),
|
||||||
|
};
|
||||||
|
logger.emit(logRecord);
|
||||||
|
recordInvalidChunk(config);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
export function logContentRetry(
|
export function logContentRetry(
|
||||||
config: Config,
|
config: Config,
|
||||||
event: ContentRetryEvent,
|
event: ContentRetryEvent,
|
||||||
|
|||||||
@@ -105,6 +105,7 @@ describe('Telemetry Metrics', () => {
|
|||||||
let recordPlanExecutionModule: typeof import('./metrics.js').recordPlanExecution;
|
let recordPlanExecutionModule: typeof import('./metrics.js').recordPlanExecution;
|
||||||
let recordKeychainAvailabilityModule: typeof import('./metrics.js').recordKeychainAvailability;
|
let recordKeychainAvailabilityModule: typeof import('./metrics.js').recordKeychainAvailability;
|
||||||
let recordTokenStorageInitializationModule: typeof import('./metrics.js').recordTokenStorageInitialization;
|
let recordTokenStorageInitializationModule: typeof import('./metrics.js').recordTokenStorageInitialization;
|
||||||
|
let recordInvalidChunkModule: typeof import('./metrics.js').recordInvalidChunk;
|
||||||
|
|
||||||
beforeEach(async () => {
|
beforeEach(async () => {
|
||||||
vi.resetModules();
|
vi.resetModules();
|
||||||
@@ -154,6 +155,7 @@ describe('Telemetry Metrics', () => {
|
|||||||
metricsJsModule.recordKeychainAvailability;
|
metricsJsModule.recordKeychainAvailability;
|
||||||
recordTokenStorageInitializationModule =
|
recordTokenStorageInitializationModule =
|
||||||
metricsJsModule.recordTokenStorageInitialization;
|
metricsJsModule.recordTokenStorageInitialization;
|
||||||
|
recordInvalidChunkModule = metricsJsModule.recordInvalidChunk;
|
||||||
|
|
||||||
const otelApiModule = await import('@opentelemetry/api');
|
const otelApiModule = await import('@opentelemetry/api');
|
||||||
|
|
||||||
@@ -1555,5 +1557,27 @@ describe('Telemetry Metrics', () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('recordInvalidChunk', () => {
|
||||||
|
it('should not record metrics if not initialized', () => {
|
||||||
|
const config = makeFakeConfig({});
|
||||||
|
recordInvalidChunkModule(config);
|
||||||
|
expect(mockCounterAddFn).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should record invalid chunk when initialized', () => {
|
||||||
|
const config = makeFakeConfig({});
|
||||||
|
initializeMetricsModule(config);
|
||||||
|
mockCounterAddFn.mockClear();
|
||||||
|
|
||||||
|
recordInvalidChunkModule(config);
|
||||||
|
|
||||||
|
expect(mockCounterAddFn).toHaveBeenCalledWith(1, {
|
||||||
|
'session.id': 'test-session-id',
|
||||||
|
'installation.id': 'test-installation-id',
|
||||||
|
'user.email': 'test@example.com',
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -790,25 +790,36 @@ export enum LoopType {
|
|||||||
CONSECUTIVE_IDENTICAL_TOOL_CALLS = 'consecutive_identical_tool_calls',
|
CONSECUTIVE_IDENTICAL_TOOL_CALLS = 'consecutive_identical_tool_calls',
|
||||||
CHANTING_IDENTICAL_SENTENCES = 'chanting_identical_sentences',
|
CHANTING_IDENTICAL_SENTENCES = 'chanting_identical_sentences',
|
||||||
LLM_DETECTED_LOOP = 'llm_detected_loop',
|
LLM_DETECTED_LOOP = 'llm_detected_loop',
|
||||||
|
// Aliases for tests/internal use
|
||||||
|
TOOL_CALL_LOOP = CONSECUTIVE_IDENTICAL_TOOL_CALLS,
|
||||||
|
CONTENT_CHANTING_LOOP = CHANTING_IDENTICAL_SENTENCES,
|
||||||
}
|
}
|
||||||
|
|
||||||
export class LoopDetectedEvent implements BaseTelemetryEvent {
|
export class LoopDetectedEvent implements BaseTelemetryEvent {
|
||||||
'event.name': 'loop_detected';
|
'event.name': 'loop_detected';
|
||||||
'event.timestamp': string;
|
'event.timestamp': string;
|
||||||
loop_type: LoopType;
|
loop_type: LoopType;
|
||||||
prompt_id: string;
|
prompt_id: string;
|
||||||
|
count: number;
|
||||||
confirmed_by_model?: string;
|
confirmed_by_model?: string;
|
||||||
|
analysis?: string;
|
||||||
|
confidence?: number;
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
loop_type: LoopType,
|
loop_type: LoopType,
|
||||||
prompt_id: string,
|
prompt_id: string,
|
||||||
|
count: number,
|
||||||
confirmed_by_model?: string,
|
confirmed_by_model?: string,
|
||||||
|
analysis?: string,
|
||||||
|
confidence?: number,
|
||||||
) {
|
) {
|
||||||
this['event.name'] = 'loop_detected';
|
this['event.name'] = 'loop_detected';
|
||||||
this['event.timestamp'] = new Date().toISOString();
|
this['event.timestamp'] = new Date().toISOString();
|
||||||
this.loop_type = loop_type;
|
this.loop_type = loop_type;
|
||||||
this.prompt_id = prompt_id;
|
this.prompt_id = prompt_id;
|
||||||
|
this.count = count;
|
||||||
this.confirmed_by_model = confirmed_by_model;
|
this.confirmed_by_model = confirmed_by_model;
|
||||||
|
this.analysis = analysis;
|
||||||
|
this.confidence = confidence;
|
||||||
}
|
}
|
||||||
|
|
||||||
toOpenTelemetryAttributes(config: Config): LogAttributes {
|
toOpenTelemetryAttributes(config: Config): LogAttributes {
|
||||||
@@ -818,17 +829,28 @@ export class LoopDetectedEvent implements BaseTelemetryEvent {
|
|||||||
'event.timestamp': this['event.timestamp'],
|
'event.timestamp': this['event.timestamp'],
|
||||||
loop_type: this.loop_type,
|
loop_type: this.loop_type,
|
||||||
prompt_id: this.prompt_id,
|
prompt_id: this.prompt_id,
|
||||||
|
count: this.count,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (this.confirmed_by_model) {
|
if (this.confirmed_by_model) {
|
||||||
attributes['confirmed_by_model'] = this.confirmed_by_model;
|
attributes['confirmed_by_model'] = this.confirmed_by_model;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (this.analysis) {
|
||||||
|
attributes['analysis'] = this.analysis;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.confidence !== undefined) {
|
||||||
|
attributes['confidence'] = this.confidence;
|
||||||
|
}
|
||||||
|
|
||||||
return attributes;
|
return attributes;
|
||||||
}
|
}
|
||||||
|
|
||||||
toLogBody(): string {
|
toLogBody(): string {
|
||||||
return `Loop detected. Type: ${this.loop_type}.${this.confirmed_by_model ? ` Confirmed by: ${this.confirmed_by_model}` : ''}`;
|
const status =
|
||||||
|
this.count === 1 ? 'Attempting recovery' : 'Terminating session';
|
||||||
|
return `Loop detected (Strike ${this.count}: ${status}). Type: ${this.loop_type}.${this.confirmed_by_model ? ` Confirmed by: ${this.confirmed_by_model}` : ''}`;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -413,6 +413,20 @@ export interface EditToolParams {
|
|||||||
ai_proposed_content?: string;
|
ai_proposed_content?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function isEditToolParams(args: unknown): args is EditToolParams {
|
||||||
|
if (typeof args !== 'object' || args === null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return (
|
||||||
|
'file_path' in args &&
|
||||||
|
typeof args.file_path === 'string' &&
|
||||||
|
'old_string' in args &&
|
||||||
|
typeof args.old_string === 'string' &&
|
||||||
|
'new_string' in args &&
|
||||||
|
typeof args.new_string === 'string'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
interface CalculatedEdit {
|
interface CalculatedEdit {
|
||||||
currentContent: string | null;
|
currentContent: string | null;
|
||||||
newContent: string;
|
newContent: string;
|
||||||
|
|||||||
@@ -74,6 +74,20 @@ export interface WriteFileToolParams {
|
|||||||
ai_proposed_content?: string;
|
ai_proposed_content?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function isWriteFileToolParams(
|
||||||
|
args: unknown,
|
||||||
|
): args is WriteFileToolParams {
|
||||||
|
if (typeof args !== 'object' || args === null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return (
|
||||||
|
'file_path' in args &&
|
||||||
|
typeof args.file_path === 'string' &&
|
||||||
|
'content' in args &&
|
||||||
|
typeof args.content === 'string'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
interface GetCorrectedFileContentResult {
|
interface GetCorrectedFileContentResult {
|
||||||
originalContent: string;
|
originalContent: string;
|
||||||
correctedContent: string;
|
correctedContent: string;
|
||||||
|
|||||||
@@ -421,6 +421,47 @@ describe('FileSearch', () => {
|
|||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should prioritize filenames closer to the end of the path and shorter paths', async () => {
|
||||||
|
tmpDir = await createTmpDir({
|
||||||
|
src: {
|
||||||
|
'hooks.ts': '',
|
||||||
|
hooks: {
|
||||||
|
'index.ts': '',
|
||||||
|
},
|
||||||
|
utils: {
|
||||||
|
'hooks.tsx': '',
|
||||||
|
},
|
||||||
|
'hooks-dev': {
|
||||||
|
'test.ts': '',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const fileSearch = FileSearchFactory.create({
|
||||||
|
projectRoot: tmpDir,
|
||||||
|
fileDiscoveryService: new FileDiscoveryService(tmpDir, {
|
||||||
|
respectGitIgnore: false,
|
||||||
|
respectGeminiIgnore: false,
|
||||||
|
}),
|
||||||
|
ignoreDirs: [],
|
||||||
|
cache: false,
|
||||||
|
cacheTtl: 0,
|
||||||
|
enableRecursiveFileSearch: true,
|
||||||
|
enableFuzzySearch: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
await fileSearch.initialize();
|
||||||
|
const results = await fileSearch.search('hooks');
|
||||||
|
|
||||||
|
// The order should prioritize matches closer to the end and shorter strings.
|
||||||
|
// FZF matches right-to-left.
|
||||||
|
expect(results[0]).toBe('src/hooks/');
|
||||||
|
expect(results[1]).toBe('src/hooks.ts');
|
||||||
|
expect(results[2]).toBe('src/utils/hooks.tsx');
|
||||||
|
expect(results[3]).toBe('src/hooks-dev/');
|
||||||
|
expect(results[4]).toBe('src/hooks/index.ts');
|
||||||
|
expect(results[5]).toBe('src/hooks-dev/test.ts');
|
||||||
|
});
|
||||||
it('should return empty array when no matches are found', async () => {
|
it('should return empty array when no matches are found', async () => {
|
||||||
tmpDir = await createTmpDir({
|
tmpDir = await createTmpDir({
|
||||||
src: ['file1.js'],
|
src: ['file1.js'],
|
||||||
|
|||||||
@@ -13,6 +13,44 @@ import { AsyncFzf, type FzfResultItem } from 'fzf';
|
|||||||
import { unescapePath } from '../paths.js';
|
import { unescapePath } from '../paths.js';
|
||||||
import type { FileDiscoveryService } from '../../services/fileDiscoveryService.js';
|
import type { FileDiscoveryService } from '../../services/fileDiscoveryService.js';
|
||||||
|
|
||||||
|
// Tiebreaker: Prefers shorter paths.
|
||||||
|
const byLengthAsc = (a: { item: string }, b: { item: string }) =>
|
||||||
|
a.item.length - b.item.length;
|
||||||
|
|
||||||
|
// Tiebreaker: Prefers matches at the start of the filename (basename prefix).
|
||||||
|
const byBasenamePrefix = (
|
||||||
|
a: { item: string; positions: Set<number> },
|
||||||
|
b: { item: string; positions: Set<number> },
|
||||||
|
) => {
|
||||||
|
const getBasenameStart = (p: string) => {
|
||||||
|
const trimmed = p.endsWith('/') ? p.slice(0, -1) : p;
|
||||||
|
return Math.max(trimmed.lastIndexOf('/'), trimmed.lastIndexOf('\\')) + 1;
|
||||||
|
};
|
||||||
|
const aDiff = Math.min(...a.positions) - getBasenameStart(a.item);
|
||||||
|
const bDiff = Math.min(...b.positions) - getBasenameStart(b.item);
|
||||||
|
|
||||||
|
const aIsFilenameMatch = aDiff >= 0;
|
||||||
|
const bIsFilenameMatch = bDiff >= 0;
|
||||||
|
|
||||||
|
if (aIsFilenameMatch && !bIsFilenameMatch) return -1;
|
||||||
|
if (!aIsFilenameMatch && bIsFilenameMatch) return 1;
|
||||||
|
if (aIsFilenameMatch && bIsFilenameMatch) return aDiff - bDiff;
|
||||||
|
|
||||||
|
return 0; // Both are directory matches, let subsequent tiebreakers decide.
|
||||||
|
};
|
||||||
|
|
||||||
|
// Tiebreaker: Prefers matches closer to the end of the path.
|
||||||
|
const byMatchPosFromEnd = (
|
||||||
|
a: { item: string; positions: Set<number> },
|
||||||
|
b: { item: string; positions: Set<number> },
|
||||||
|
) => {
|
||||||
|
const maxPosA = Math.max(-1, ...a.positions);
|
||||||
|
const maxPosB = Math.max(-1, ...b.positions);
|
||||||
|
const distA = a.item.length - maxPosA;
|
||||||
|
const distB = b.item.length - maxPosB;
|
||||||
|
return distA - distB;
|
||||||
|
};
|
||||||
|
|
||||||
export interface FileSearchOptions {
|
export interface FileSearchOptions {
|
||||||
projectRoot: string;
|
projectRoot: string;
|
||||||
ignoreDirs: string[];
|
ignoreDirs: string[];
|
||||||
@@ -192,6 +230,8 @@ class RecursiveFileSearch implements FileSearch {
|
|||||||
// files, because the v2 algorithm is just too slow in those cases.
|
// files, because the v2 algorithm is just too slow in those cases.
|
||||||
this.fzf = new AsyncFzf(this.allFiles, {
|
this.fzf = new AsyncFzf(this.allFiles, {
|
||||||
fuzzy: this.allFiles.length > 20000 ? 'v1' : 'v2',
|
fuzzy: this.allFiles.length > 20000 ? 'v1' : 'v2',
|
||||||
|
forward: false,
|
||||||
|
tiebreakers: [byBasenamePrefix, byMatchPosFromEnd, byLengthAsc],
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1271,8 +1271,8 @@
|
|||||||
"properties": {
|
"properties": {
|
||||||
"sandbox": {
|
"sandbox": {
|
||||||
"title": "Sandbox",
|
"title": "Sandbox",
|
||||||
"description": "Sandbox execution environment. Set to a boolean to enable or disable the sandbox, or provide a string path to a sandbox profile.",
|
"description": "Sandbox execution environment. Set to a boolean to enable or disable the sandbox, provide a string path to a sandbox profile, or specify an explicit sandbox command (e.g., \"docker\", \"podman\", \"lxc\").",
|
||||||
"markdownDescription": "Sandbox execution environment. Set to a boolean to enable or disable the sandbox, or provide a string path to a sandbox profile.\n\n- Category: `Tools`\n- Requires restart: `yes`",
|
"markdownDescription": "Sandbox execution environment. Set to a boolean to enable or disable the sandbox, provide a string path to a sandbox profile, or specify an explicit sandbox command (e.g., \"docker\", \"podman\", \"lxc\").\n\n- Category: `Tools`\n- Requires restart: `yes`",
|
||||||
"$ref": "#/$defs/BooleanOrString"
|
"$ref": "#/$defs/BooleanOrString"
|
||||||
},
|
},
|
||||||
"shell": {
|
"shell": {
|
||||||
|
|||||||
@@ -155,9 +155,9 @@ function generateMarkdown(currentStatsByModel, history) {
|
|||||||
|
|
||||||
const models = Object.keys(currentStatsByModel).sort();
|
const models = Object.keys(currentStatsByModel).sort();
|
||||||
|
|
||||||
for (const model of models) {
|
const getPassRate = (statsForModel) => {
|
||||||
const currentStats = currentStatsByModel[model];
|
if (!statsForModel) return '-';
|
||||||
const totalStats = Object.values(currentStats).reduce(
|
const totalStats = Object.values(statsForModel).reduce(
|
||||||
(acc, stats) => {
|
(acc, stats) => {
|
||||||
acc.passed += stats.passed;
|
acc.passed += stats.passed;
|
||||||
acc.total += stats.total;
|
acc.total += stats.total;
|
||||||
@@ -165,11 +165,14 @@ function generateMarkdown(currentStatsByModel, history) {
|
|||||||
},
|
},
|
||||||
{ passed: 0, total: 0 },
|
{ passed: 0, total: 0 },
|
||||||
);
|
);
|
||||||
|
return totalStats.total > 0
|
||||||
|
? ((totalStats.passed / totalStats.total) * 100).toFixed(1) + '%'
|
||||||
|
: '-';
|
||||||
|
};
|
||||||
|
|
||||||
const totalPassRate =
|
for (const model of models) {
|
||||||
totalStats.total > 0
|
const currentStats = currentStatsByModel[model];
|
||||||
? ((totalStats.passed / totalStats.total) * 100).toFixed(1) + '%'
|
const totalPassRate = getPassRate(currentStats);
|
||||||
: 'N/A';
|
|
||||||
|
|
||||||
console.log(`#### Model: ${model}`);
|
console.log(`#### Model: ${model}`);
|
||||||
console.log(`**Total Pass Rate: ${totalPassRate}**\n`);
|
console.log(`**Total Pass Rate: ${totalPassRate}**\n`);
|
||||||
@@ -177,18 +180,22 @@ function generateMarkdown(currentStatsByModel, history) {
|
|||||||
// Header
|
// Header
|
||||||
let header = '| Test Name |';
|
let header = '| Test Name |';
|
||||||
let separator = '| :--- |';
|
let separator = '| :--- |';
|
||||||
|
let passRateRow = '| **Overall Pass Rate** |';
|
||||||
|
|
||||||
for (const item of reversedHistory) {
|
for (const item of reversedHistory) {
|
||||||
header += ` [${item.run.databaseId}](${item.run.url}) |`;
|
header += ` [${item.run.databaseId}](${item.run.url}) |`;
|
||||||
separator += ' :---: |';
|
separator += ' :---: |';
|
||||||
|
passRateRow += ` **${getPassRate(item.stats[model])}** |`;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add Current column last
|
// Add Current column last
|
||||||
header += ' Current |';
|
header += ' Current |';
|
||||||
separator += ' :---: |';
|
separator += ' :---: |';
|
||||||
|
passRateRow += ` **${totalPassRate}** |`;
|
||||||
|
|
||||||
console.log(header);
|
console.log(header);
|
||||||
console.log(separator);
|
console.log(separator);
|
||||||
|
console.log(passRateRow);
|
||||||
|
|
||||||
// Collect all test names for this model
|
// Collect all test names for this model
|
||||||
const allTestNames = new Set(Object.keys(currentStats));
|
const allTestNames = new Set(Object.keys(currentStats));
|
||||||
|
|||||||
Reference in New Issue
Block a user