diff --git a/docs/changelogs/index.md b/docs/changelogs/index.md index d9713c973a..45b48ab53d 100644 --- a/docs/changelogs/index.md +++ b/docs/changelogs/index.md @@ -18,6 +18,27 @@ on GitHub. | [Preview](preview.md) | Experimental features ready for early feedback. | | [Stable](latest.md) | Stable, recommended for general use. | +## Announcements: v0.38.0 - 2026-04-14 + +- **Chapters Narrative Flow:** Group agent interactions into "Chapters" based on + intent and tool usage for better session structure + ([#23150](https://github.com/google-gemini/gemini-cli/pull/23150) by + @Abhijit-2592, + [#24079](https://github.com/google-gemini/gemini-cli/pull/24079) by + @gundermanc). +- **Context Compression Service:** Advanced context management to efficiently + distill conversation history + ([#24483](https://github.com/google-gemini/gemini-cli/pull/24483) by + @joshualitt). +- **UI Flicker & UX Enhancements:** Solved rendering flicker with "Terminal + Buffer" mode and introduced selective topic expansion + ([#24512](https://github.com/google-gemini/gemini-cli/pull/24512) by + @jacob314, [#24793](https://github.com/google-gemini/gemini-cli/pull/24793) by + @Abhijit-2592). +- **Persistent Policy Approvals:** Implemented context-aware persistent + approvals for tool execution + ([#23257](https://github.com/google-gemini/gemini-cli/pull/23257) by @jerop). + ## Announcements: v0.37.0 - 2026-04-08 - **Dynamic Sandbox Expansion:** Implemented dynamic sandbox expansion and diff --git a/docs/changelogs/latest.md b/docs/changelogs/latest.md index bccbc4bd77..0a105857f3 100644 --- a/docs/changelogs/latest.md +++ b/docs/changelogs/latest.md @@ -1,6 +1,6 @@ -# Latest stable release: v0.37.2 +# Latest stable release: v0.38.0 -Released: April 13, 2026 +Released: April 14, 2026 For most users, our latest stable release is the recommended release. Install the latest stable version with: @@ -11,418 +11,258 @@ npm install -g @google/gemini-cli ## Highlights -- **Dynamic Sandbox Expansion:** Implemented dynamic sandbox expansion and - worktree support for both Linux and Windows, enhancing development flexibility - in restricted environments. -- **Tool-Based Topic Grouping (Chapters):** Introduced "Chapters" to logically - group agent interactions based on tool usage and intent, providing a clearer - narrative flow in long sessions. -- **Enhanced Browser Agent:** Added persistent session management, dynamic - read-only tool discovery, and sandbox-aware initialization for the browser - agent. -- **Security & Permission Hardening:** Implemented secret visibility lockdown - for environment files and integrated integrity controls for Windows - sandboxing. +- **Chapters Narrative Flow:** Introduced tool-based topic grouping ("Chapters") + to provide better session structure and narrative continuity in long-running + tasks. +- **Context Compression Service:** Implemented a dedicated service for advanced + context management, efficiently distilling conversation history to preserve + focus and tokens. +- **Enhanced UI Stability & UX:** Introduced a new "Terminal Buffer" mode to + solve rendering flicker, along with selective topic expansion and improved + tool confirmation layouts. +- **Context-Aware Policy Approvals:** Users can now grant persistent, + context-aware approvals for tools, significantly reducing manual confirmation + overhead for trusted workflows. +- **Background Process Monitoring:** New tools for monitoring and inspecting + background shell processes, providing better visibility into asynchronous + tasks. ## What's Changed -- fix(patch): cherry-pick 9d741ab to release/v0.37.1-pr-24565 to patch version - v0.37.1 and create version 0.37.2 by @gemini-cli-robot in - [#25322](https://github.com/google-gemini/gemini-cli/pull/25322) -- fix(acp): handle all InvalidStreamError types gracefully in prompt - [#24540](https://github.com/google-gemini/gemini-cli/pull/24540) -- feat(acp): add support for /about command - [#24649](https://github.com/google-gemini/gemini-cli/pull/24649) -- feat(acp): add /help command - [#24839](https://github.com/google-gemini/gemini-cli/pull/24839) -- feat(evals): centralize test agents into test-utils for reuse by @Samee24 in - [#23616](https://github.com/google-gemini/gemini-cli/pull/23616) -- revert: chore(config): disable agents by default by @abhipatel12 in - [#23672](https://github.com/google-gemini/gemini-cli/pull/23672) -- fix(plan): update telemetry attribute keys and add timestamp by @Adib234 in - [#23685](https://github.com/google-gemini/gemini-cli/pull/23685) -- fix(core): prevent premature MCP discovery completion by @jackwotherspoon in - [#23637](https://github.com/google-gemini/gemini-cli/pull/23637) -- feat(browser): add maxActionsPerTask for browser agent setting by - @cynthialong0-0 in - [#23216](https://github.com/google-gemini/gemini-cli/pull/23216) -- fix(core): improve agent loader error formatting for empty paths by - @adamfweidman in - [#23690](https://github.com/google-gemini/gemini-cli/pull/23690) -- fix(cli): only show updating spinner when auto-update is in progress by - @scidomino in [#23709](https://github.com/google-gemini/gemini-cli/pull/23709) -- Refine onboarding metrics to log the duration explicitly and use the tier - name. by @yunaseoul in - [#23678](https://github.com/google-gemini/gemini-cli/pull/23678) -- chore(tools): add toJSON to tools and invocations to reduce logging verbosity - by @alisa-alisa in - [#22899](https://github.com/google-gemini/gemini-cli/pull/22899) -- fix(cli): stabilize copy mode to prevent flickering and cursor resets by - @mattKorwel in - [#22584](https://github.com/google-gemini/gemini-cli/pull/22584) -- fix(test): move flaky ctrl-c-exit test to non-blocking suite by @mattKorwel in - [#23732](https://github.com/google-gemini/gemini-cli/pull/23732) -- feat(skills): add ci skill for automated failure replication by @mattKorwel in - [#23720](https://github.com/google-gemini/gemini-cli/pull/23720) -- feat(sandbox): implement forbiddenPaths for OS-specific sandbox managers by - @ehedlund in [#23282](https://github.com/google-gemini/gemini-cli/pull/23282) -- fix(core): conditionally expose additional_permissions in shell tool by - @galz10 in [#23729](https://github.com/google-gemini/gemini-cli/pull/23729) -- refactor(core): standardize OS-specific sandbox tests and extract linux helper - methods by @ehedlund in - [#23715](https://github.com/google-gemini/gemini-cli/pull/23715) -- format recently added script by @scidomino in - [#23739](https://github.com/google-gemini/gemini-cli/pull/23739) -- fix(ui): prevent over-eager slash subcommand completion by @keithguerin in - [#20136](https://github.com/google-gemini/gemini-cli/pull/20136) -- Fix dynamic model routing for gemini 3.1 pro to customtools model by - @kevinjwang1 in - [#23641](https://github.com/google-gemini/gemini-cli/pull/23641) -- feat(core): support inline agentCardJson for remote agents by @adamfweidman in - [#23743](https://github.com/google-gemini/gemini-cli/pull/23743) -- fix(cli): skip console log/info in headless mode by @cynthialong0-0 in - [#22739](https://github.com/google-gemini/gemini-cli/pull/22739) -- test(core): install bubblewrap on Linux CI for sandbox integration tests by - @ehedlund in [#23583](https://github.com/google-gemini/gemini-cli/pull/23583) -- docs(reference): split tools table into category sections by @sheikhlimon in - [#21516](https://github.com/google-gemini/gemini-cli/pull/21516) -- fix(browser): detect embedded URLs in query params to prevent allowedDomains - bypass by @tony-shi in - [#23225](https://github.com/google-gemini/gemini-cli/pull/23225) -- fix(browser): add proxy bypass constraint to domain restriction system prompt - by @tony-shi in - [#23229](https://github.com/google-gemini/gemini-cli/pull/23229) -- fix(policy): relax write_file argsPattern in plan mode to allow paths without - session ID by @Adib234 in - [#23695](https://github.com/google-gemini/gemini-cli/pull/23695) -- docs: fix grammar in CONTRIBUTING and numbering in sandbox docs by - @splint-disk-8i in - [#23448](https://github.com/google-gemini/gemini-cli/pull/23448) -- fix(acp): allow attachments by adding a permission prompt by @sripasg in - [#23680](https://github.com/google-gemini/gemini-cli/pull/23680) -- fix(core): thread AbortSignal to chat compression requests (#20405) by - @SH20RAJ in [#20778](https://github.com/google-gemini/gemini-cli/pull/20778) -- feat(core): implement Windows sandbox dynamic expansion Phase 1 and 2.1 by - @scidomino in [#23691](https://github.com/google-gemini/gemini-cli/pull/23691) -- Add note about root privileges in sandbox docs by @diodesign in - [#23314](https://github.com/google-gemini/gemini-cli/pull/23314) -- docs(core): document agent_card_json string literal options for remote agents - by @adamfweidman in - [#23797](https://github.com/google-gemini/gemini-cli/pull/23797) -- fix(cli): resolve TTY hang on headless environments by unconditionally - resuming process.stdin before React Ink launch by @cocosheng-g in - [#23673](https://github.com/google-gemini/gemini-cli/pull/23673) -- fix(ui): cleanup estimated string length hacks in composer by @keithguerin in - [#23694](https://github.com/google-gemini/gemini-cli/pull/23694) -- feat(browser): dynamically discover read-only tools by @cynthialong0-0 in - [#23805](https://github.com/google-gemini/gemini-cli/pull/23805) -- docs: clarify policy requirement for `general.plan.directory` in settings - schema by @jerop in - [#23784](https://github.com/google-gemini/gemini-cli/pull/23784) -- Revert "perf(cli): optimize --version startup time (#23671)" by @scidomino in - [#23812](https://github.com/google-gemini/gemini-cli/pull/23812) -- don't silence errors from wombat by @scidomino in - [#23822](https://github.com/google-gemini/gemini-cli/pull/23822) -- fix(ui): prevent escape key from cancelling requests in shell mode by - @PrasannaPal21 in - [#21245](https://github.com/google-gemini/gemini-cli/pull/21245) -- Changelog for v0.36.0-preview.0 by @gemini-cli-robot in - [#23702](https://github.com/google-gemini/gemini-cli/pull/23702) -- feat(core,ui): Add experiment-gated support for gemini flash 3.1 lite by - @chrstnb in [#23794](https://github.com/google-gemini/gemini-cli/pull/23794) -- Changelog for v0.36.0-preview.3 by @gemini-cli-robot in - [#23827](https://github.com/google-gemini/gemini-cli/pull/23827) -- new linting check: github-actions-pinning by @alisa-alisa in - [#23808](https://github.com/google-gemini/gemini-cli/pull/23808) -- fix(cli): show helpful guidance when no skills are available by @Niralisj in - [#23785](https://github.com/google-gemini/gemini-cli/pull/23785) -- fix: Chat logs and errors handle tail tool calls correctly by @googlestrobe in - [#22460](https://github.com/google-gemini/gemini-cli/pull/22460) -- Don't try removing a tag from a non-existent release. by @scidomino in - [#23830](https://github.com/google-gemini/gemini-cli/pull/23830) -- fix(cli): allow ask question dialog to take full window height by @jacob314 in - [#23693](https://github.com/google-gemini/gemini-cli/pull/23693) -- fix(core): strip leading underscores from error types in telemetry by - @yunaseoul in [#23824](https://github.com/google-gemini/gemini-cli/pull/23824) -- Changelog for v0.35.0 by @gemini-cli-robot in - [#23819](https://github.com/google-gemini/gemini-cli/pull/23819) -- feat(evals): add reliability harvester and 500/503 retry support by - @alisa-alisa in - [#23626](https://github.com/google-gemini/gemini-cli/pull/23626) -- feat(sandbox): dynamic Linux sandbox expansion and worktree support by @galz10 - in [#23692](https://github.com/google-gemini/gemini-cli/pull/23692) -- Merge examples of use into quickstart documentation by @diodesign in - [#23319](https://github.com/google-gemini/gemini-cli/pull/23319) -- fix(cli): prioritize primary name matches in slash command search by @sehoon38 - in [#23850](https://github.com/google-gemini/gemini-cli/pull/23850) -- Changelog for v0.35.1 by @gemini-cli-robot in - [#23840](https://github.com/google-gemini/gemini-cli/pull/23840) -- fix(browser): keep input blocker active across navigations by @kunal-10-cloud - in [#22562](https://github.com/google-gemini/gemini-cli/pull/22562) -- feat(core): new skill to look for duplicated code while reviewing PRs by - @devr0306 in [#23704](https://github.com/google-gemini/gemini-cli/pull/23704) -- fix(core): replace hardcoded non-interactive ASK_USER denial with explicit - policy rules by @ruomengz in - [#23668](https://github.com/google-gemini/gemini-cli/pull/23668) -- fix(plan): after exiting plan mode switches model to a flash model by @Adib234 - in [#23885](https://github.com/google-gemini/gemini-cli/pull/23885) -- feat(gcp): add development worker infrastructure by @mattKorwel in - [#23814](https://github.com/google-gemini/gemini-cli/pull/23814) -- fix(a2a-server): A2A server should execute ask policies in interactive mode by - @kschaab in [#23831](https://github.com/google-gemini/gemini-cli/pull/23831) -- feat(core): define TrajectoryProvider interface by @sehoon38 in - [#23050](https://github.com/google-gemini/gemini-cli/pull/23050) -- Docs: Update quotas and pricing by @jkcinouye in - [#23835](https://github.com/google-gemini/gemini-cli/pull/23835) -- fix(core): allow disabling environment variable redaction by @galz10 in - [#23927](https://github.com/google-gemini/gemini-cli/pull/23927) -- feat(cli): enable notifications cross-platform via terminal bell fallback by - @genneth in [#21618](https://github.com/google-gemini/gemini-cli/pull/21618) -- feat(sandbox): implement secret visibility lockdown for env files by - @DavidAPierce in - [#23712](https://github.com/google-gemini/gemini-cli/pull/23712) -- fix(core): remove shell outputChunks buffer caching to prevent memory bloat - and sanitize prompt input by @spencer426 in - [#23751](https://github.com/google-gemini/gemini-cli/pull/23751) -- feat(core): implement persistent browser session management by @kunal-10-cloud - in [#21306](https://github.com/google-gemini/gemini-cli/pull/21306) -- refactor(core): delegate sandbox denial parsing to SandboxManager by - @scidomino in [#23928](https://github.com/google-gemini/gemini-cli/pull/23928) -- dep(update) Update Ink version to 6.5.0 by @jacob314 in - [#23843](https://github.com/google-gemini/gemini-cli/pull/23843) -- Docs: Update 'docs-writer' skill for relative links by @jkcinouye in - [#21463](https://github.com/google-gemini/gemini-cli/pull/21463) -- Changelog for v0.36.0-preview.4 by @gemini-cli-robot in - [#23935](https://github.com/google-gemini/gemini-cli/pull/23935) -- fix(acp): Update allow approval policy flow for ACP clients to fix config - persistence and compatible with TUI by @sripasg in - [#23818](https://github.com/google-gemini/gemini-cli/pull/23818) -- Changelog for v0.35.2 by @gemini-cli-robot in - [#23960](https://github.com/google-gemini/gemini-cli/pull/23960) -- ACP integration documents by @g-samroberts in - [#22254](https://github.com/google-gemini/gemini-cli/pull/22254) -- fix(core): explicitly set error names to avoid bundling renaming issues by - @yunaseoul in [#23913](https://github.com/google-gemini/gemini-cli/pull/23913) -- feat(core): subagent isolation and cleanup hardening by @abhipatel12 in - [#23903](https://github.com/google-gemini/gemini-cli/pull/23903) -- disable extension-reload test by @scidomino in - [#24018](https://github.com/google-gemini/gemini-cli/pull/24018) -- feat(core): add forbiddenPaths to GlobalSandboxOptions and refactor - createSandboxManager by @ehedlund in - [#23936](https://github.com/google-gemini/gemini-cli/pull/23936) -- refactor(core): improve ignore resolution and fix directory-matching bug by - @ehedlund in [#23816](https://github.com/google-gemini/gemini-cli/pull/23816) -- revert(core): support custom base URL via env vars by @spencer426 in - [#23976](https://github.com/google-gemini/gemini-cli/pull/23976) -- Increase memory limited for eslint. by @jacob314 in - [#24022](https://github.com/google-gemini/gemini-cli/pull/24022) -- fix(acp): prevent crash on empty response in ACP mode by @sripasg in - [#23952](https://github.com/google-gemini/gemini-cli/pull/23952) -- feat(core): Land `AgentHistoryProvider`. by @joshualitt in - [#23978](https://github.com/google-gemini/gemini-cli/pull/23978) -- fix(core): switch to subshells for shell tool wrapping to fix heredocs and - edge cases by @abhipatel12 in - [#24024](https://github.com/google-gemini/gemini-cli/pull/24024) -- Debug command. by @jacob314 in - [#23851](https://github.com/google-gemini/gemini-cli/pull/23851) -- Changelog for v0.36.0-preview.5 by @gemini-cli-robot in - [#24046](https://github.com/google-gemini/gemini-cli/pull/24046) -- Fix test flakes by globally mocking ink-spinner by @jacob314 in - [#24044](https://github.com/google-gemini/gemini-cli/pull/24044) -- Enable network access in sandbox configuration by @galz10 in - [#24055](https://github.com/google-gemini/gemini-cli/pull/24055) -- feat(context): add configurable memoryBoundaryMarkers setting by @SandyTao520 - in [#24020](https://github.com/google-gemini/gemini-cli/pull/24020) -- feat(core): implement windows sandbox expansion and denial detection by - @scidomino in [#24027](https://github.com/google-gemini/gemini-cli/pull/24027) -- fix(core): resolve ACP Operation Aborted Errors in grep_search by @ivanporty - in [#23821](https://github.com/google-gemini/gemini-cli/pull/23821) -- fix(hooks): prevent SessionEnd from firing twice in non-interactive mode by - @krishdef7 in [#22139](https://github.com/google-gemini/gemini-cli/pull/22139) -- Re-word intro to Gemini 3 page. by @g-samroberts in - [#24069](https://github.com/google-gemini/gemini-cli/pull/24069) -- fix(cli): resolve layout contention and flashing loop in StatusRow by - @keithguerin in - [#24065](https://github.com/google-gemini/gemini-cli/pull/24065) -- fix(sandbox): implement Windows Mandatory Integrity Control for GeminiSandbox - by @galz10 in [#24057](https://github.com/google-gemini/gemini-cli/pull/24057) -- feat(core): implement tool-based topic grouping (Chapters) by @Abhijit-2592 in - [#23150](https://github.com/google-gemini/gemini-cli/pull/23150) -- feat(cli): support 'tab to queue' for messages while generating by @gundermanc - in [#24052](https://github.com/google-gemini/gemini-cli/pull/24052) -- feat(core): agnostic background task UI with CompletionBehavior by - @adamfweidman in - [#22740](https://github.com/google-gemini/gemini-cli/pull/22740) -- UX for topic narration tool by @gundermanc in - [#24079](https://github.com/google-gemini/gemini-cli/pull/24079) -- fix: shellcheck warnings in scripts by @scidomino in - [#24035](https://github.com/google-gemini/gemini-cli/pull/24035) -- test(evals): add comprehensive subagent delegation evaluations by @abhipatel12 - in [#24132](https://github.com/google-gemini/gemini-cli/pull/24132) -- fix(a2a-server): prioritize ADC before evaluating headless constraints for - auth initialization by @spencer426 in - [#23614](https://github.com/google-gemini/gemini-cli/pull/23614) -- Text can be added after /plan command by @rambleraptor in - [#22833](https://github.com/google-gemini/gemini-cli/pull/22833) -- fix(cli): resolve missing F12 logs via global console store by @scidomino in - [#24235](https://github.com/google-gemini/gemini-cli/pull/24235) -- fix broken tests by @scidomino in - [#24279](https://github.com/google-gemini/gemini-cli/pull/24279) -- fix(evals): add update_topic behavioral eval by @gundermanc in - [#24223](https://github.com/google-gemini/gemini-cli/pull/24223) -- feat(core): Unified Context Management and Tool Distillation. by @joshualitt - in [#24157](https://github.com/google-gemini/gemini-cli/pull/24157) -- Default enable narration for the team. by @gundermanc in - [#24224](https://github.com/google-gemini/gemini-cli/pull/24224) -- fix(core): ensure default agents provide tools and use model-specific schemas - by @abhipatel12 in - [#24268](https://github.com/google-gemini/gemini-cli/pull/24268) -- feat(cli): show Flash Lite Preview model regardless of user tier by @sehoon38 - in [#23904](https://github.com/google-gemini/gemini-cli/pull/23904) -- feat(cli): implement compact tool output by @jwhelangoog in - [#20974](https://github.com/google-gemini/gemini-cli/pull/20974) -- Add security settings for tool sandboxing by @galz10 in - [#23923](https://github.com/google-gemini/gemini-cli/pull/23923) -- chore(test-utils): switch integration tests to use PREVIEW_GEMINI_MODEL by - @sehoon38 in [#24276](https://github.com/google-gemini/gemini-cli/pull/24276) -- feat(core): enable topic update narration for legacy models by @Abhijit-2592 - in [#24241](https://github.com/google-gemini/gemini-cli/pull/24241) -- feat(core): add project-level memory scope to save_memory tool by @SandyTao520 - in [#24161](https://github.com/google-gemini/gemini-cli/pull/24161) -- test(integration): fix plan mode write denial test false positive by @sehoon38 - in [#24299](https://github.com/google-gemini/gemini-cli/pull/24299) -- feat(plan): support `Plan` mode in untrusted folders by @Adib234 in - [#17586](https://github.com/google-gemini/gemini-cli/pull/17586) -- fix(core): enable mid-stream retries for all models and re-enable compression - test by @sehoon38 in - [#24302](https://github.com/google-gemini/gemini-cli/pull/24302) -- Changelog for v0.36.0-preview.6 by @gemini-cli-robot in - [#24082](https://github.com/google-gemini/gemini-cli/pull/24082) -- Changelog for v0.35.3 by @gemini-cli-robot in - [#24083](https://github.com/google-gemini/gemini-cli/pull/24083) -- feat(cli): add auth info to footer by @sehoon38 in - [#24042](https://github.com/google-gemini/gemini-cli/pull/24042) -- fix(browser): reset action counter for each agent session and let it ignore - internal actions by @cynthialong0-0 in - [#24228](https://github.com/google-gemini/gemini-cli/pull/24228) -- feat(plan): promote planning feature to stable by @ruomengz in - [#24282](https://github.com/google-gemini/gemini-cli/pull/24282) -- fix(browser): terminate subagent immediately on domain restriction violations - by @gsquared94 in - [#24313](https://github.com/google-gemini/gemini-cli/pull/24313) -- feat(cli): add UI to update extensions by @ruomengz in - [#23682](https://github.com/google-gemini/gemini-cli/pull/23682) -- Fix(browser): terminate immediately for "browser is already running" error by - @cynthialong0-0 in - [#24233](https://github.com/google-gemini/gemini-cli/pull/24233) -- docs: Add 'plan' option to approval mode in CLI reference by @YifanRuan in - [#24134](https://github.com/google-gemini/gemini-cli/pull/24134) -- fix(core): batch macOS seatbelt rules into a profile file to prevent ARG_MAX - errors by @ehedlund in - [#24255](https://github.com/google-gemini/gemini-cli/pull/24255) -- fix(core): fix race condition between browser agent and main closing process - by @cynthialong0-0 in - [#24340](https://github.com/google-gemini/gemini-cli/pull/24340) -- perf(build): optimize build scripts for parallel execution and remove - redundant checks by @sehoon38 in - [#24307](https://github.com/google-gemini/gemini-cli/pull/24307) -- ci: install bubblewrap on Linux for release workflows by @ehedlund in - [#24347](https://github.com/google-gemini/gemini-cli/pull/24347) -- chore(release): allow bundling for all builds, including stable by @sehoon38 - in [#24305](https://github.com/google-gemini/gemini-cli/pull/24305) -- Revert "Add security settings for tool sandboxing" by @jerop in - [#24357](https://github.com/google-gemini/gemini-cli/pull/24357) -- docs: update subagents docs to not be experimental by @abhipatel12 in - [#24343](https://github.com/google-gemini/gemini-cli/pull/24343) -- fix(core): implement **read and **write commands in sandbox managers by - @galz10 in [#24283](https://github.com/google-gemini/gemini-cli/pull/24283) -- don't try to remove tags in dry run by @scidomino in - [#24356](https://github.com/google-gemini/gemini-cli/pull/24356) -- fix(config): disable JIT context loading by default by @SandyTao520 in - [#24364](https://github.com/google-gemini/gemini-cli/pull/24364) -- test(sandbox): add integration test for dynamic permission expansion by - @galz10 in [#24359](https://github.com/google-gemini/gemini-cli/pull/24359) -- docs(policy): remove unsupported mcpName wildcard edge case by @abhipatel12 in - [#24133](https://github.com/google-gemini/gemini-cli/pull/24133) -- docs: fix broken GEMINI.md link in CONTRIBUTING.md by @Panchal-Tirth in - [#24182](https://github.com/google-gemini/gemini-cli/pull/24182) -- feat(core): infrastructure for event-driven subagent history by @abhipatel12 - in [#23914](https://github.com/google-gemini/gemini-cli/pull/23914) -- fix(core): resolve Plan Mode deadlock during plan file creation due to sandbox - restrictions by @DavidAPierce in - [#24047](https://github.com/google-gemini/gemini-cli/pull/24047) -- fix(core): fix browser agent UX issues and improve E2E test reliability by - @gsquared94 in - [#24312](https://github.com/google-gemini/gemini-cli/pull/24312) -- fix(ui): wrap topic and intent fields in TopicMessage by @jwhelangoog in - [#24386](https://github.com/google-gemini/gemini-cli/pull/24386) -- refactor(core): Centralize context management logic into src/context by - @joshualitt in - [#24380](https://github.com/google-gemini/gemini-cli/pull/24380) -- fix(core): pin AuthType.GATEWAY to use Gemini 3.1 Pro/Flash Lite by default by - @sripasg in [#24375](https://github.com/google-gemini/gemini-cli/pull/24375) -- feat(ui): add Tokyo Night theme by @danrneal in - [#24054](https://github.com/google-gemini/gemini-cli/pull/24054) -- fix(cli): refactor test config loading and mock debugLogger in test-setup by - @mattKorwel in - [#24389](https://github.com/google-gemini/gemini-cli/pull/24389) -- Set memoryManager to false in settings.json by @mattKorwel in - [#24393](https://github.com/google-gemini/gemini-cli/pull/24393) -- ink 6.6.3 by @jacob314 in - [#24372](https://github.com/google-gemini/gemini-cli/pull/24372) -- fix(core): resolve subagent chat recording gaps and directory inheritance by +- fix(cli): refresh slash command list after /skills reload by @NTaylorMullen in + [#24454](https://github.com/google-gemini/gemini-cli/pull/24454) +- Update README.md for links. by @g-samroberts in + [#22759](https://github.com/google-gemini/gemini-cli/pull/22759) +- fix(core): ensure complete_task tool calls are recorded in chat history by @abhipatel12 in - [#24368](https://github.com/google-gemini/gemini-cli/pull/24368) -- fix(cli): cap shell output at 10 MB to prevent RangeError crash by @ProthamD - in [#24168](https://github.com/google-gemini/gemini-cli/pull/24168) -- feat(plan): conditionally add enter/exit plan mode tools based on current mode - by @ruomengz in - [#24378](https://github.com/google-gemini/gemini-cli/pull/24378) -- feat(core): prioritize discussion before formal plan approval by @jerop in - [#24423](https://github.com/google-gemini/gemini-cli/pull/24423) -- fix(ui): add accelerated scrolling on alternate buffer mode by @devr0306 in - [#23940](https://github.com/google-gemini/gemini-cli/pull/23940) -- feat(core): populate sandbox forbidden paths with project ignore file contents - by @ehedlund in - [#24038](https://github.com/google-gemini/gemini-cli/pull/24038) -- fix(core): ensure blue border overlay and input blocker to act correctly - depending on browser agent activities by @cynthialong0-0 in - [#24385](https://github.com/google-gemini/gemini-cli/pull/24385) -- fix(ui): removed additional vertical padding for tables by @devr0306 in - [#24381](https://github.com/google-gemini/gemini-cli/pull/24381) -- fix(build): upload full bundle directory archive to GitHub releases by - @sehoon38 in [#24403](https://github.com/google-gemini/gemini-cli/pull/24403) -- fix(build): wire bundle:browser-mcp into bundle pipeline by @gsquared94 in - [#24424](https://github.com/google-gemini/gemini-cli/pull/24424) -- feat(browser): add sandbox-aware browser agent initialization by @gsquared94 - in [#24419](https://github.com/google-gemini/gemini-cli/pull/24419) -- feat(core): enhance tracker task schemas for detailed titles and descriptions - by @anj-s in [#23902](https://github.com/google-gemini/gemini-cli/pull/23902) -- refactor(core): Unified context management settings schema by @joshualitt in - [#24391](https://github.com/google-gemini/gemini-cli/pull/24391) -- feat(core): update browser agent prompt to check open pages first when - bringing up by @cynthialong0-0 in - [#24431](https://github.com/google-gemini/gemini-cli/pull/24431) -- fix(acp) refactor(core,cli): centralize model discovery logic in - ModelConfigService by @sripasg in - [#24392](https://github.com/google-gemini/gemini-cli/pull/24392) -- Changelog for v0.36.0-preview.7 by @gemini-cli-robot in - [#24346](https://github.com/google-gemini/gemini-cli/pull/24346) -- fix: update task tracker storage location in system prompt by @anj-s in - [#24034](https://github.com/google-gemini/gemini-cli/pull/24034) -- feat(browser): supersede stale snapshots to reclaim context-window tokens by + [#24437](https://github.com/google-gemini/gemini-cli/pull/24437) +- feat(policy): explicitly allow web_fetch in plan mode with ask_user by + @Adib234 in [#24456](https://github.com/google-gemini/gemini-cli/pull/24456) +- fix(core): refactor linux sandbox to fix ARG_MAX crashes by @ehedlund in + [#24286](https://github.com/google-gemini/gemini-cli/pull/24286) +- feat(config): add experimental.adk.agentSessionNoninteractiveEnabled setting + by @adamfweidman in + [#24439](https://github.com/google-gemini/gemini-cli/pull/24439) +- Changelog for v0.36.0-preview.8 by @gemini-cli-robot in + [#24453](https://github.com/google-gemini/gemini-cli/pull/24453) +- feat(cli): change default loadingPhrases to 'off' to hide tips by @keithguerin + in [#24342](https://github.com/google-gemini/gemini-cli/pull/24342) +- fix(cli): ensure agent stops when all declinable tools are cancelled by + @NTaylorMullen in + [#24479](https://github.com/google-gemini/gemini-cli/pull/24479) +- fix(core): enhance sandbox usability and fix build error by @galz10 in + [#24460](https://github.com/google-gemini/gemini-cli/pull/24460) +- Terminal Serializer Optimization by @jacob314 in + [#24485](https://github.com/google-gemini/gemini-cli/pull/24485) +- Auto configure memory. by @jacob314 in + [#24474](https://github.com/google-gemini/gemini-cli/pull/24474) +- Unused error variables in catch block are not allowed by @alisa-alisa in + [#24487](https://github.com/google-gemini/gemini-cli/pull/24487) +- feat(core): add background memory service for skill extraction by @SandyTao520 + in [#24274](https://github.com/google-gemini/gemini-cli/pull/24274) +- feat: implement high-signal PR regression check for evaluations by + @alisa-alisa in + [#23937](https://github.com/google-gemini/gemini-cli/pull/23937) +- Fix shell output display by @jacob314 in + [#24490](https://github.com/google-gemini/gemini-cli/pull/24490) +- fix(ui): resolve unwanted vertical spacing around various tool output + treatments by @jwhelangoog in + [#24449](https://github.com/google-gemini/gemini-cli/pull/24449) +- revert(cli): bring back input box and footer visibility in copy mode by + @sehoon38 in [#24504](https://github.com/google-gemini/gemini-cli/pull/24504) +- fix(cli): prevent crash in AnsiOutputText when handling non-array data by + @sehoon38 in [#24498](https://github.com/google-gemini/gemini-cli/pull/24498) +- feat(cli): support default values for environment variables by @ruomengz in + [#24469](https://github.com/google-gemini/gemini-cli/pull/24469) +- Implement background process monitoring and inspection tools by @cocosheng-g + in [#23799](https://github.com/google-gemini/gemini-cli/pull/23799) +- docs(browser-agent): update stale browser agent documentation by @gsquared94 + in [#24463](https://github.com/google-gemini/gemini-cli/pull/24463) +- fix: enable browser_agent in integration tests and add localhost fixture tests + by @gsquared94 in + [#24523](https://github.com/google-gemini/gemini-cli/pull/24523) +- fix(browser): handle computer-use model detection for analyze_screenshot by @gsquared94 in - [#24440](https://github.com/google-gemini/gemini-cli/pull/24440) -- docs(core): add subagent tool isolation draft doc by @akh64bit in - [#23275](https://github.com/google-gemini/gemini-cli/pull/23275) -- fix(patch): cherry-pick 64c928f to release/v0.37.0-preview.0-pr-23257 to patch - version v0.37.0-preview.0 and create version 0.37.0-preview.1 by - @gemini-cli-robot in - [#24561](https://github.com/google-gemini/gemini-cli/pull/24561) -- fix(patch): cherry-pick cb7f7d6 to release/v0.37.0-preview.1-pr-24342 to patch - version v0.37.0-preview.1 and create version 0.37.0-preview.2 by - @gemini-cli-robot in - [#24842](https://github.com/google-gemini/gemini-cli/pull/24842) + [#24502](https://github.com/google-gemini/gemini-cli/pull/24502) +- feat(core): Land ContextCompressionService by @joshualitt in + [#24483](https://github.com/google-gemini/gemini-cli/pull/24483) +- feat(core): scope subagent workspace directories via AsyncLocalStorage by + @SandyTao520 in + [#24445](https://github.com/google-gemini/gemini-cli/pull/24445) +- Update ink version to 6.6.7 by @jacob314 in + [#24514](https://github.com/google-gemini/gemini-cli/pull/24514) +- fix(acp): handle all InvalidStreamError types gracefully in prompt by @sripasg + in [#24540](https://github.com/google-gemini/gemini-cli/pull/24540) +- Fix crash when vim editor is not found in PATH on Windows by + @Nagajyothi-tammisetti in + [#22423](https://github.com/google-gemini/gemini-cli/pull/22423) +- fix(core): move project memory dir under tmp directory by @SandyTao520 in + [#24542](https://github.com/google-gemini/gemini-cli/pull/24542) +- Enable 'Other' option for yesno question type by @ruomengz in + [#24545](https://github.com/google-gemini/gemini-cli/pull/24545) +- fix(cli): clear stale retry/loading state after cancellation (#21096) by + @Aaxhirrr in [#21960](https://github.com/google-gemini/gemini-cli/pull/21960) +- Changelog for v0.37.0-preview.0 by @gemini-cli-robot in + [#24464](https://github.com/google-gemini/gemini-cli/pull/24464) +- feat(core): implement context-aware persistent policy approvals by @jerop in + [#23257](https://github.com/google-gemini/gemini-cli/pull/23257) +- docs: move agent disabling instructions and update remote agent status by + @jackwotherspoon in + [#24559](https://github.com/google-gemini/gemini-cli/pull/24559) +- feat(cli): migrate nonInteractiveCli to LegacyAgentSession by @adamfweidman in + [#22987](https://github.com/google-gemini/gemini-cli/pull/22987) +- fix(core): unsafe type assertions in Core File System #19712 by + @aniketsaurav18 in + [#19739](https://github.com/google-gemini/gemini-cli/pull/19739) +- fix(ui): hide model quota in /stats and refactor quota display by @danzaharia1 + in [#24206](https://github.com/google-gemini/gemini-cli/pull/24206) +- Changelog for v0.36.0 by @gemini-cli-robot in + [#24558](https://github.com/google-gemini/gemini-cli/pull/24558) +- Changelog for v0.37.0-preview.1 by @gemini-cli-robot in + [#24568](https://github.com/google-gemini/gemini-cli/pull/24568) +- docs: add missing .md extensions to internal doc links by @ishaan-arora-1 in + [#24145](https://github.com/google-gemini/gemini-cli/pull/24145) +- fix(ui): fixed table styling by @devr0306 in + [#24565](https://github.com/google-gemini/gemini-cli/pull/24565) +- fix(core): pass includeDirectories to sandbox configuration by @galz10 in + [#24573](https://github.com/google-gemini/gemini-cli/pull/24573) +- feat(ui): enable "TerminalBuffer" mode to solve flicker by @jacob314 in + [#24512](https://github.com/google-gemini/gemini-cli/pull/24512) +- docs: clarify release coordination by @scidomino in + [#24575](https://github.com/google-gemini/gemini-cli/pull/24575) +- fix(core): remove broken PowerShell translation and fix native \_\_write in + Windows sandbox by @scidomino in + [#24571](https://github.com/google-gemini/gemini-cli/pull/24571) +- Add instructions for how to start react in prod and force react to prod mode + by @jacob314 in + [#24590](https://github.com/google-gemini/gemini-cli/pull/24590) +- feat(cli): minimalist sandbox status labels by @galz10 in + [#24582](https://github.com/google-gemini/gemini-cli/pull/24582) +- Feat/browser agent metrics by @kunal-10-cloud in + [#24210](https://github.com/google-gemini/gemini-cli/pull/24210) +- test: fix Windows CI execution and resolve exposed platform failures by + @ehedlund in [#24476](https://github.com/google-gemini/gemini-cli/pull/24476) +- feat(core,cli): prioritize summary for topics (#24608) by @Abhijit-2592 in + [#24609](https://github.com/google-gemini/gemini-cli/pull/24609) +- show color by @jacob314 in + [#24613](https://github.com/google-gemini/gemini-cli/pull/24613) +- feat(cli): enable compact tool output by default (#24509) by @jwhelangoog in + [#24510](https://github.com/google-gemini/gemini-cli/pull/24510) +- fix(core): inject skill system instructions into subagent prompts if activated + by @abhipatel12 in + [#24620](https://github.com/google-gemini/gemini-cli/pull/24620) +- fix(core): improve windows sandbox reliability and fix integration tests by + @ehedlund in [#24480](https://github.com/google-gemini/gemini-cli/pull/24480) +- fix(core): ensure sandbox approvals are correctly persisted and matched for + proactive expansions by @galz10 in + [#24577](https://github.com/google-gemini/gemini-cli/pull/24577) +- feat(cli) Scrollbar for input prompt by @jacob314 in + [#21992](https://github.com/google-gemini/gemini-cli/pull/21992) +- Do not run pr-eval workflow when no steering changes detected by @alisa-alisa + in [#24621](https://github.com/google-gemini/gemini-cli/pull/24621) +- Fix restoration of topic headers. by @gundermanc in + [#24650](https://github.com/google-gemini/gemini-cli/pull/24650) +- feat(core): discourage update topic tool for simple tasks by @Samee24 in + [#24640](https://github.com/google-gemini/gemini-cli/pull/24640) +- fix(core): ensure global temp directory is always in sandbox allowed paths by + @galz10 in [#24638](https://github.com/google-gemini/gemini-cli/pull/24638) +- fix(core): detect uninitialized lines by @jacob314 in + [#24646](https://github.com/google-gemini/gemini-cli/pull/24646) +- docs: update sandboxing documentation and toolSandboxing settings by @galz10 + in [#24655](https://github.com/google-gemini/gemini-cli/pull/24655) +- feat(cli): enhance tool confirmation UI and selection layout by @galz10 in + [#24376](https://github.com/google-gemini/gemini-cli/pull/24376) +- feat(acp): add support for `/about` command by @sripasg in + [#24649](https://github.com/google-gemini/gemini-cli/pull/24649) +- feat(cli): add role specific metrics to /stats by @cynthialong0-0 in + [#24659](https://github.com/google-gemini/gemini-cli/pull/24659) +- split context by @jacob314 in + [#24623](https://github.com/google-gemini/gemini-cli/pull/24623) +- fix(cli): remove -S from shebang to fix Windows and BSD execution by + @scidomino in [#24756](https://github.com/google-gemini/gemini-cli/pull/24756) +- Fix issue where topic headers can be posted back to back by @gundermanc in + [#24759](https://github.com/google-gemini/gemini-cli/pull/24759) +- fix(core): handle partial llm_request in BeforeModel hook override by + @krishdef7 in [#22326](https://github.com/google-gemini/gemini-cli/pull/22326) +- fix(ui): improve narration suppression and reduce flicker by @gundermanc in + [#24635](https://github.com/google-gemini/gemini-cli/pull/24635) +- fix(ui): fixed auth race condition causing logo to flicker by @devr0306 in + [#24652](https://github.com/google-gemini/gemini-cli/pull/24652) +- fix(browser): remove premature browser cleanup after subagent invocation by + @gsquared94 in + [#24753](https://github.com/google-gemini/gemini-cli/pull/24753) +- Revert "feat(core,cli): prioritize summary for topics (#24608)" by + @Abhijit-2592 in + [#24777](https://github.com/google-gemini/gemini-cli/pull/24777) +- relax tool sandboxing overrides for plan mode to match defaults. by + @DavidAPierce in + [#24762](https://github.com/google-gemini/gemini-cli/pull/24762) +- fix(cli): respect global environment variable allowlist by @scidomino in + [#24767](https://github.com/google-gemini/gemini-cli/pull/24767) +- fix(cli): ensure skills list outputs to stdout in non-interactive environments + by @spencer426 in + [#24566](https://github.com/google-gemini/gemini-cli/pull/24566) +- Add an eval for and fix unsafe cloning behavior. by @gundermanc in + [#24457](https://github.com/google-gemini/gemini-cli/pull/24457) +- fix(policy): allow complete_task in plan mode by @abhipatel12 in + [#24771](https://github.com/google-gemini/gemini-cli/pull/24771) +- feat(telemetry): add browser agent clearcut metrics by @gsquared94 in + [#24688](https://github.com/google-gemini/gemini-cli/pull/24688) +- feat(cli): support selective topic expansion and click-to-expand by + @Abhijit-2592 in + [#24793](https://github.com/google-gemini/gemini-cli/pull/24793) +- temporarily disable sandbox integration test on windows by @ehedlund in + [#24786](https://github.com/google-gemini/gemini-cli/pull/24786) +- Remove flakey test by @scidomino in + [#24837](https://github.com/google-gemini/gemini-cli/pull/24837) +- Alisa/approve button by @alisa-alisa in + [#24645](https://github.com/google-gemini/gemini-cli/pull/24645) +- feat(hooks): display hook system messages in UI by @mbleigh in + [#24616](https://github.com/google-gemini/gemini-cli/pull/24616) +- fix(core): propagate BeforeModel hook model override end-to-end by @krishdef7 + in [#24784](https://github.com/google-gemini/gemini-cli/pull/24784) +- chore: fix formatting for behavioral eval skill reference file by @abhipatel12 + in [#24846](https://github.com/google-gemini/gemini-cli/pull/24846) +- fix: use directory junctions on Windows for skill linking by @enjoykumawat in + [#24823](https://github.com/google-gemini/gemini-cli/pull/24823) +- fix(cli): prevent multiple banner increments on remount by @sehoon38 in + [#24843](https://github.com/google-gemini/gemini-cli/pull/24843) +- feat(acp): add /help command by @sripasg in + [#24839](https://github.com/google-gemini/gemini-cli/pull/24839) +- fix(core): remove tmux alternate buffer warning by @jackwotherspoon in + [#24852](https://github.com/google-gemini/gemini-cli/pull/24852) +- Improve sandbox error matching and caching by @DavidAPierce in + [#24550](https://github.com/google-gemini/gemini-cli/pull/24550) +- feat(core): add agent protocol UI types and experimental flag by @mbleigh in + [#24275](https://github.com/google-gemini/gemini-cli/pull/24275) +- feat(core): use experiment flags for default fetch timeouts by @yunaseoul in + [#24261](https://github.com/google-gemini/gemini-cli/pull/24261) +- Revert "fix(ui): improve narration suppression and reduce flicker (#2… by + @gundermanc in + [#24857](https://github.com/google-gemini/gemini-cli/pull/24857) +- refactor(cli): remove duplication in interactive shell awaiting input hint by + @JayadityaGit in + [#24801](https://github.com/google-gemini/gemini-cli/pull/24801) +- refactor(core): make LegacyAgentSession dependencies optional by @mbleigh in + [#24287](https://github.com/google-gemini/gemini-cli/pull/24287) +- Changelog for v0.37.0-preview.2 by @gemini-cli-robot in + [#24848](https://github.com/google-gemini/gemini-cli/pull/24848) +- fix(cli): always show shell command description or actual command by @jacob314 + in [#24774](https://github.com/google-gemini/gemini-cli/pull/24774) +- Added flag for ept size and increased default size by @devr0306 in + [#24859](https://github.com/google-gemini/gemini-cli/pull/24859) +- fix(core): dispose Scheduler to prevent McpProgress listener leak by + @Anjaligarhwal in + [#24870](https://github.com/google-gemini/gemini-cli/pull/24870) +- fix(cli): switch default back to terminalBuffer=false and fix regressions + introduced for that mode by @jacob314 in + [#24873](https://github.com/google-gemini/gemini-cli/pull/24873) +- feat(cli): switch to ctrl+g from ctrl-x by @jacob314 in + [#24861](https://github.com/google-gemini/gemini-cli/pull/24861) +- fix: isolate concurrent browser agent instances by @gsquared94 in + [#24794](https://github.com/google-gemini/gemini-cli/pull/24794) +- docs: update MCP server OAuth redirect port documentation by @adamfweidman in + [#24844](https://github.com/google-gemini/gemini-cli/pull/24844) **Full Changelog**: -https://github.com/google-gemini/gemini-cli/compare/v0.36.0...v0.37.2 +https://github.com/google-gemini/gemini-cli/compare/v0.37.2...v0.38.0 diff --git a/docs/changelogs/preview.md b/docs/changelogs/preview.md index cf43e62c45..737b0917b4 100644 --- a/docs/changelogs/preview.md +++ b/docs/changelogs/preview.md @@ -1,6 +1,6 @@ -# Preview release: v0.38.0-preview.0 +# Preview release: v0.39.0-preview.0 -Released: April 08, 2026 +Released: April 14, 2026 Our preview release includes the latest, new, and experimental features. This release may not be as stable as our [latest weekly release](latest.md). @@ -13,256 +13,245 @@ npm install -g @google/gemini-cli@preview ## Highlights -- **Context Management:** Introduced a Context Compression Service to optimize - context window usage and landed a background memory service for skill - extraction. -- **Enhanced Security:** Implemented context-aware persistent policy approvals - for smarter tool permissions and enabled `web_fetch` in plan mode with user - confirmation. -- **Workflow Monitoring:** Added background process monitoring and inspection - tools for better visibility into long-running tasks. -- **UI/UX Refinements:** Enhanced the tool confirmation UI, selection layout, - and added support for selective topic expansion and click-to-expand. -- **Core Stability:** Improved sandbox reliability on Linux and Windows, - resolved shebang compatibility issues, and fixed various crashes in the CLI - and core services. +- **Refactored Subagents and Unified Tooling:** Consolidate subagent tools into + a single `invoke_subagent` tool, removed legacy wrapping tools, and improved + turn limits for codebase investigator. +- **Advanced Memory and Skill Management:** Introduced `/memory` inbox for + reviewing extracted skills and added skill patching support, enhancing agent + learning and persistence. +- **Expanded Test and Evaluation Infrastructure:** Added memory and CPU + performance integration test harnesses and generalized evaluation + infrastructure for better suite organization. +- **Sandbox and Security Hardening:** Centralized sandbox paths for Linux and + macOS, enforced read-only security for async git worktree resolution, and + optimized Windows sandbox initialization. +- **Enhanced CLI UX and UI Stability:** Improved scroll momentum, added a + `debugRainbow` setting, and resolved various memory leaks and PTY exhaustion + issues for a smoother terminal experience. ## What's Changed -- fix(cli): refresh slash command list after /skills reload by @NTaylorMullen in - [#24454](https://github.com/google-gemini/gemini-cli/pull/24454) -- Update README.md for links. by @g-samroberts in - [#22759](https://github.com/google-gemini/gemini-cli/pull/22759) -- fix(core): ensure complete_task tool calls are recorded in chat history by - @abhipatel12 in - [#24437](https://github.com/google-gemini/gemini-cli/pull/24437) -- feat(policy): explicitly allow web_fetch in plan mode with ask_user by - @Adib234 in [#24456](https://github.com/google-gemini/gemini-cli/pull/24456) -- fix(core): refactor linux sandbox to fix ARG_MAX crashes by @ehedlund in - [#24286](https://github.com/google-gemini/gemini-cli/pull/24286) -- feat(config): add experimental.adk.agentSessionNoninteractiveEnabled setting - by @adamfweidman in - [#24439](https://github.com/google-gemini/gemini-cli/pull/24439) -- Changelog for v0.36.0-preview.8 by @gemini-cli-robot in - [#24453](https://github.com/google-gemini/gemini-cli/pull/24453) -- feat(cli): change default loadingPhrases to 'off' to hide tips by @keithguerin - in [#24342](https://github.com/google-gemini/gemini-cli/pull/24342) -- fix(cli): ensure agent stops when all declinable tools are cancelled by - @NTaylorMullen in - [#24479](https://github.com/google-gemini/gemini-cli/pull/24479) -- fix(core): enhance sandbox usability and fix build error by @galz10 in - [#24460](https://github.com/google-gemini/gemini-cli/pull/24460) -- Terminal Serializer Optimization by @jacob314 in - [#24485](https://github.com/google-gemini/gemini-cli/pull/24485) -- Auto configure memory. by @jacob314 in - [#24474](https://github.com/google-gemini/gemini-cli/pull/24474) -- Unused error variables in catch block are not allowed by @alisa-alisa in - [#24487](https://github.com/google-gemini/gemini-cli/pull/24487) -- feat(core): add background memory service for skill extraction by @SandyTao520 - in [#24274](https://github.com/google-gemini/gemini-cli/pull/24274) -- feat: implement high-signal PR regression check for evaluations by - @alisa-alisa in - [#23937](https://github.com/google-gemini/gemini-cli/pull/23937) -- Fix shell output display by @jacob314 in - [#24490](https://github.com/google-gemini/gemini-cli/pull/24490) -- fix(ui): resolve unwanted vertical spacing around various tool output - treatments by @jwhelangoog in - [#24449](https://github.com/google-gemini/gemini-cli/pull/24449) -- revert(cli): bring back input box and footer visibility in copy mode by - @sehoon38 in [#24504](https://github.com/google-gemini/gemini-cli/pull/24504) -- fix(cli): prevent crash in AnsiOutputText when handling non-array data by - @sehoon38 in [#24498](https://github.com/google-gemini/gemini-cli/pull/24498) -- feat(cli): support default values for environment variables by @ruomengz in - [#24469](https://github.com/google-gemini/gemini-cli/pull/24469) -- Implement background process monitoring and inspection tools by @cocosheng-g - in [#23799](https://github.com/google-gemini/gemini-cli/pull/23799) -- docs(browser-agent): update stale browser agent documentation by @gsquared94 - in [#24463](https://github.com/google-gemini/gemini-cli/pull/24463) -- fix: enable browser_agent in integration tests and add localhost fixture tests - by @gsquared94 in - [#24523](https://github.com/google-gemini/gemini-cli/pull/24523) -- fix(browser): handle computer-use model detection for analyze_screenshot by - @gsquared94 in - [#24502](https://github.com/google-gemini/gemini-cli/pull/24502) -- feat(core): Land ContextCompressionService by @joshualitt in - [#24483](https://github.com/google-gemini/gemini-cli/pull/24483) -- feat(core): scope subagent workspace directories via AsyncLocalStorage by +- refactor(plan): simplify policy priorities and consolidate read-only rules by + @ruomengz in [#24849](https://github.com/google-gemini/gemini-cli/pull/24849) +- feat(test-utils): add memory usage integration test harness by @sripasg in + [#24876](https://github.com/google-gemini/gemini-cli/pull/24876) +- feat(memory): add /memory inbox command for reviewing extracted skills by @SandyTao520 in - [#24445](https://github.com/google-gemini/gemini-cli/pull/24445) -- Update ink version to 6.6.7 by @jacob314 in - [#24514](https://github.com/google-gemini/gemini-cli/pull/24514) -- fix(acp): handle all InvalidStreamError types gracefully in prompt by @sripasg - in [#24540](https://github.com/google-gemini/gemini-cli/pull/24540) -- Fix crash when vim editor is not found in PATH on Windows by - @Nagajyothi-tammisetti in - [#22423](https://github.com/google-gemini/gemini-cli/pull/22423) -- fix(core): move project memory dir under tmp directory by @SandyTao520 in - [#24542](https://github.com/google-gemini/gemini-cli/pull/24542) -- Enable 'Other' option for yesno question type by @ruomengz in - [#24545](https://github.com/google-gemini/gemini-cli/pull/24545) -- fix(cli): clear stale retry/loading state after cancellation (#21096) by - @Aaxhirrr in [#21960](https://github.com/google-gemini/gemini-cli/pull/21960) -- Changelog for v0.37.0-preview.0 by @gemini-cli-robot in - [#24464](https://github.com/google-gemini/gemini-cli/pull/24464) -- feat(core): implement context-aware persistent policy approvals by @jerop in - [#23257](https://github.com/google-gemini/gemini-cli/pull/23257) -- docs: move agent disabling instructions and update remote agent status by - @jackwotherspoon in - [#24559](https://github.com/google-gemini/gemini-cli/pull/24559) -- feat(cli): migrate nonInteractiveCli to LegacyAgentSession by @adamfweidman in - [#22987](https://github.com/google-gemini/gemini-cli/pull/22987) -- fix(core): unsafe type assertions in Core File System #19712 by - @aniketsaurav18 in - [#19739](https://github.com/google-gemini/gemini-cli/pull/19739) -- fix(ui): hide model quota in /stats and refactor quota display by @danzaharia1 - in [#24206](https://github.com/google-gemini/gemini-cli/pull/24206) -- Changelog for v0.36.0 by @gemini-cli-robot in - [#24558](https://github.com/google-gemini/gemini-cli/pull/24558) -- Changelog for v0.37.0-preview.1 by @gemini-cli-robot in - [#24568](https://github.com/google-gemini/gemini-cli/pull/24568) -- docs: add missing .md extensions to internal doc links by @ishaan-arora-1 in - [#24145](https://github.com/google-gemini/gemini-cli/pull/24145) -- fix(ui): fixed table styling by @devr0306 in - [#24565](https://github.com/google-gemini/gemini-cli/pull/24565) -- fix(core): pass includeDirectories to sandbox configuration by @galz10 in - [#24573](https://github.com/google-gemini/gemini-cli/pull/24573) -- feat(ui): enable "TerminalBuffer" mode to solve flicker by @jacob314 in - [#24512](https://github.com/google-gemini/gemini-cli/pull/24512) -- docs: clarify release coordination by @scidomino in - [#24575](https://github.com/google-gemini/gemini-cli/pull/24575) -- fix(core): remove broken PowerShell translation and fix native \_\_write in - Windows sandbox by @scidomino in - [#24571](https://github.com/google-gemini/gemini-cli/pull/24571) -- Add instructions for how to start react in prod and force react to prod mode - by @jacob314 in - [#24590](https://github.com/google-gemini/gemini-cli/pull/24590) -- feat(cli): minimalist sandbox status labels by @galz10 in - [#24582](https://github.com/google-gemini/gemini-cli/pull/24582) -- Feat/browser agent metrics by @kunal-10-cloud in - [#24210](https://github.com/google-gemini/gemini-cli/pull/24210) -- test: fix Windows CI execution and resolve exposed platform failures by - @ehedlund in [#24476](https://github.com/google-gemini/gemini-cli/pull/24476) -- feat(core,cli): prioritize summary for topics (#24608) by @Abhijit-2592 in - [#24609](https://github.com/google-gemini/gemini-cli/pull/24609) -- show color by @jacob314 in - [#24613](https://github.com/google-gemini/gemini-cli/pull/24613) -- feat(cli): enable compact tool output by default (#24509) by @jwhelangoog in - [#24510](https://github.com/google-gemini/gemini-cli/pull/24510) -- fix(core): inject skill system instructions into subagent prompts if activated - by @abhipatel12 in - [#24620](https://github.com/google-gemini/gemini-cli/pull/24620) -- fix(core): improve windows sandbox reliability and fix integration tests by - @ehedlund in [#24480](https://github.com/google-gemini/gemini-cli/pull/24480) -- fix(core): ensure sandbox approvals are correctly persisted and matched for - proactive expansions by @galz10 in - [#24577](https://github.com/google-gemini/gemini-cli/pull/24577) -- feat(cli) Scrollbar for input prompt by @jacob314 in - [#21992](https://github.com/google-gemini/gemini-cli/pull/21992) -- Do not run pr-eval workflow when no steering changes detected by @alisa-alisa - in [#24621](https://github.com/google-gemini/gemini-cli/pull/24621) -- Fix restoration of topic headers. by @gundermanc in - [#24650](https://github.com/google-gemini/gemini-cli/pull/24650) -- feat(core): discourage update topic tool for simple tasks by @Samee24 in - [#24640](https://github.com/google-gemini/gemini-cli/pull/24640) -- fix(core): ensure global temp directory is always in sandbox allowed paths by - @galz10 in [#24638](https://github.com/google-gemini/gemini-cli/pull/24638) -- fix(core): detect uninitialized lines by @jacob314 in - [#24646](https://github.com/google-gemini/gemini-cli/pull/24646) -- docs: update sandboxing documentation and toolSandboxing settings by @galz10 - in [#24655](https://github.com/google-gemini/gemini-cli/pull/24655) -- feat(cli): enhance tool confirmation UI and selection layout by @galz10 in - [#24376](https://github.com/google-gemini/gemini-cli/pull/24376) -- feat(acp): add support for `/about` command by @sripasg in - [#24649](https://github.com/google-gemini/gemini-cli/pull/24649) -- feat(cli): add role specific metrics to /stats by @cynthialong0-0 in - [#24659](https://github.com/google-gemini/gemini-cli/pull/24659) -- split context by @jacob314 in - [#24623](https://github.com/google-gemini/gemini-cli/pull/24623) -- fix(cli): remove -S from shebang to fix Windows and BSD execution by - @scidomino in [#24756](https://github.com/google-gemini/gemini-cli/pull/24756) -- Fix issue where topic headers can be posted back to back by @gundermanc in - [#24759](https://github.com/google-gemini/gemini-cli/pull/24759) -- fix(core): handle partial llm_request in BeforeModel hook override by - @krishdef7 in [#22326](https://github.com/google-gemini/gemini-cli/pull/22326) -- fix(ui): improve narration suppression and reduce flicker by @gundermanc in - [#24635](https://github.com/google-gemini/gemini-cli/pull/24635) -- fix(ui): fixed auth race condition causing logo to flicker by @devr0306 in - [#24652](https://github.com/google-gemini/gemini-cli/pull/24652) -- fix(browser): remove premature browser cleanup after subagent invocation by - @gsquared94 in - [#24753](https://github.com/google-gemini/gemini-cli/pull/24753) -- Revert "feat(core,cli): prioritize summary for topics (#24608)" by - @Abhijit-2592 in - [#24777](https://github.com/google-gemini/gemini-cli/pull/24777) -- relax tool sandboxing overrides for plan mode to match defaults. by - @DavidAPierce in - [#24762](https://github.com/google-gemini/gemini-cli/pull/24762) -- fix(cli): respect global environment variable allowlist by @scidomino in - [#24767](https://github.com/google-gemini/gemini-cli/pull/24767) -- fix(cli): ensure skills list outputs to stdout in non-interactive environments + [#24544](https://github.com/google-gemini/gemini-cli/pull/24544) +- chore(release): bump version to 0.39.0-nightly.20260408.e77b22e63 by + @gemini-cli-robot in + [#24939](https://github.com/google-gemini/gemini-cli/pull/24939) +- fix(core): ensure robust sandbox cleanup in all process execution paths by + @ehedlund in [#24763](https://github.com/google-gemini/gemini-cli/pull/24763) +- chore: update ink version to 6.6.8 by @jacob314 in + [#24934](https://github.com/google-gemini/gemini-cli/pull/24934) +- Changelog for v0.38.0-preview.0 by @gemini-cli-robot in + [#24938](https://github.com/google-gemini/gemini-cli/pull/24938) +- chore: ignore conductor directory by @JayadityaGit in + [#22128](https://github.com/google-gemini/gemini-cli/pull/22128) +- Changelog for v0.37.0 by @gemini-cli-robot in + [#24940](https://github.com/google-gemini/gemini-cli/pull/24940) +- feat(plan): require user confirmation for activate_skill in Plan Mode by + @ruomengz in [#24946](https://github.com/google-gemini/gemini-cli/pull/24946) +- feat(test-utils): add CPU performance integration test harness by @sripasg in + [#24951](https://github.com/google-gemini/gemini-cli/pull/24951) +- fix(cli-ui): enable Ctrl+Backspace for word deletion in Windows Terminal by + @dogukanozen in + [#21447](https://github.com/google-gemini/gemini-cli/pull/21447) +- test(sdk): add unit tests for GeminiCliSession by @AdamyaSingh7 in + [#21897](https://github.com/google-gemini/gemini-cli/pull/21897) +- fix(core): resolve windows symlink bypass and stabilize sandbox integration + tests by @ehedlund in + [#24834](https://github.com/google-gemini/gemini-cli/pull/24834) +- fix(cli): restore file path display in edit and write tool confirmations by + @jwhelangoog in + [#24974](https://github.com/google-gemini/gemini-cli/pull/24974) +- feat(core): refine shell tool description display logic by @jwhelangoog in + [#24903](https://github.com/google-gemini/gemini-cli/pull/24903) +- fix(core): dynamic session ID injection to resolve resume bugs by @scidomino + in [#24972](https://github.com/google-gemini/gemini-cli/pull/24972) +- Update ink version to 6.6.9 by @jacob314 in + [#24980](https://github.com/google-gemini/gemini-cli/pull/24980) +- Generalize evals infra to support more types of evals, organization and + queuing of named suites by @gundermanc in + [#24941](https://github.com/google-gemini/gemini-cli/pull/24941) +- fix(cli): optimize startup with lightweight parent process by @sehoon38 in + [#24667](https://github.com/google-gemini/gemini-cli/pull/24667) +- refactor(sandbox): use centralized sandbox paths in macOS Seatbelt + implementation by @ehedlund in + [#24984](https://github.com/google-gemini/gemini-cli/pull/24984) +- feat(cli): refine tool output formatting for compact mode by @jwhelangoog in + [#24677](https://github.com/google-gemini/gemini-cli/pull/24677) +- fix(sdk): skip broken sendStream tests to unblock nightly by @SandyTao520 in + [#25000](https://github.com/google-gemini/gemini-cli/pull/25000) +- refactor(core): use centralized path resolution for Linux sandbox by @ehedlund + in [#24985](https://github.com/google-gemini/gemini-cli/pull/24985) +- Support ctrl+shift+g by @jacob314 in + [#25035](https://github.com/google-gemini/gemini-cli/pull/25035) +- feat(core): refactor subagent tool to unified invoke_subagent tool by + @abhipatel12 in + [#24489](https://github.com/google-gemini/gemini-cli/pull/24489) +- fix(core): add explicit git identity env vars to prevent sandbox checkpointing + error by @mrpmohiburrahman in + [#19775](https://github.com/google-gemini/gemini-cli/pull/19775) +- fix: respect hideContextPercentage when FooterConfigDialog is closed without + changes by @chernistry in + [#24773](https://github.com/google-gemini/gemini-cli/pull/24773) +- fix(cli): suppress unhandled AbortError logs during request cancellation by + @euxaristia in + [#22621](https://github.com/google-gemini/gemini-cli/pull/22621) +- Automated documentation audit by @g-samroberts in + [#24567](https://github.com/google-gemini/gemini-cli/pull/24567) +- feat(cli): implement useAgentStream hook by @mbleigh in + [#24292](https://github.com/google-gemini/gemini-cli/pull/24292) +- refactor(plan) Clean default plan toml by @ruomengz in + [#25037](https://github.com/google-gemini/gemini-cli/pull/25037) +- refactor(core): remove legacy subagent wrapping tools by @abhipatel12 in + [#25053](https://github.com/google-gemini/gemini-cli/pull/25053) +- fix(core): honor retryDelay in RetryInfo for 503 errors by @yunaseoul in + [#25057](https://github.com/google-gemini/gemini-cli/pull/25057) +- fix(core): remediate subagent memory leaks using AbortSignal in MessageBus by + @abhipatel12 in + [#25048](https://github.com/google-gemini/gemini-cli/pull/25048) +- feat(cli): wire up useAgentStream in AppContainer by @mbleigh in + [#24297](https://github.com/google-gemini/gemini-cli/pull/24297) +- feat(core): migrate chat recording to JSONL streaming by @spencer426 in + [#23749](https://github.com/google-gemini/gemini-cli/pull/23749) +- fix(core): clear 5-minute timeouts in oauth flow to prevent memory leaks by + @spencer426 in + [#24968](https://github.com/google-gemini/gemini-cli/pull/24968) +- fix(sandbox): centralize async git worktree resolution and enforce read-only + security by @ehedlund in + [#25040](https://github.com/google-gemini/gemini-cli/pull/25040) +- feat(test): add high-volume shell test and refine perf harness by @sripasg in + [#24983](https://github.com/google-gemini/gemini-cli/pull/24983) +- fix(core): silently handle EPERM when listing dir structure by @scidomino in + [#25066](https://github.com/google-gemini/gemini-cli/pull/25066) +- Changelog for v0.37.1 by @gemini-cli-robot in + [#25055](https://github.com/google-gemini/gemini-cli/pull/25055) +- fix: decode Uint8Array and multi-byte UTF-8 in API error messages by + @kimjune01 in [#23341](https://github.com/google-gemini/gemini-cli/pull/23341) +- Automated documentation audit results by @g-samroberts in + [#22755](https://github.com/google-gemini/gemini-cli/pull/22755) +- debugging(ui): add optional debugRainbow setting by @jacob314 in + [#25088](https://github.com/google-gemini/gemini-cli/pull/25088) +- fix: resolve lifecycle memory leaks by cleaning up listeners and root closures by @spencer426 in - [#24566](https://github.com/google-gemini/gemini-cli/pull/24566) -- Add an eval for and fix unsafe cloning behavior. by @gundermanc in - [#24457](https://github.com/google-gemini/gemini-cli/pull/24457) -- fix(policy): allow complete_task in plan mode by @abhipatel12 in - [#24771](https://github.com/google-gemini/gemini-cli/pull/24771) -- feat(telemetry): add browser agent clearcut metrics by @gsquared94 in - [#24688](https://github.com/google-gemini/gemini-cli/pull/24688) -- feat(cli): support selective topic expansion and click-to-expand by - @Abhijit-2592 in - [#24793](https://github.com/google-gemini/gemini-cli/pull/24793) -- temporarily disable sandbox integration test on windows by @ehedlund in - [#24786](https://github.com/google-gemini/gemini-cli/pull/24786) -- Remove flakey test by @scidomino in - [#24837](https://github.com/google-gemini/gemini-cli/pull/24837) -- Alisa/approve button by @alisa-alisa in - [#24645](https://github.com/google-gemini/gemini-cli/pull/24645) -- feat(hooks): display hook system messages in UI by @mbleigh in - [#24616](https://github.com/google-gemini/gemini-cli/pull/24616) -- fix(core): propagate BeforeModel hook model override end-to-end by @krishdef7 - in [#24784](https://github.com/google-gemini/gemini-cli/pull/24784) -- chore: fix formatting for behavioral eval skill reference file by @abhipatel12 - in [#24846](https://github.com/google-gemini/gemini-cli/pull/24846) -- fix: use directory junctions on Windows for skill linking by @enjoykumawat in - [#24823](https://github.com/google-gemini/gemini-cli/pull/24823) -- fix(cli): prevent multiple banner increments on remount by @sehoon38 in - [#24843](https://github.com/google-gemini/gemini-cli/pull/24843) -- feat(acp): add /help command by @sripasg in - [#24839](https://github.com/google-gemini/gemini-cli/pull/24839) -- fix(core): remove tmux alternate buffer warning by @jackwotherspoon in - [#24852](https://github.com/google-gemini/gemini-cli/pull/24852) -- Improve sandbox error matching and caching by @DavidAPierce in - [#24550](https://github.com/google-gemini/gemini-cli/pull/24550) -- feat(core): add agent protocol UI types and experimental flag by @mbleigh in - [#24275](https://github.com/google-gemini/gemini-cli/pull/24275) -- feat(core): use experiment flags for default fetch timeouts by @yunaseoul in - [#24261](https://github.com/google-gemini/gemini-cli/pull/24261) -- Revert "fix(ui): improve narration suppression and reduce flicker (#2… by + [#25049](https://github.com/google-gemini/gemini-cli/pull/25049) +- docs(cli): updates f12 description to be more precise by @JayadityaGit in + [#15816](https://github.com/google-gemini/gemini-cli/pull/15816) +- fix(cli): mark /settings as unsafe to run concurrently by @jacob314 in + [#25061](https://github.com/google-gemini/gemini-cli/pull/25061) +- fix(core): remove buffer slice to prevent OOM on large output streams by + @spencer426 in + [#25094](https://github.com/google-gemini/gemini-cli/pull/25094) +- feat(core): persist subagent agentId in tool call records by @abhipatel12 in + [#25092](https://github.com/google-gemini/gemini-cli/pull/25092) +- chore(core): increase codebase investigator turn limits to 50 by @abhipatel12 + in [#25125](https://github.com/google-gemini/gemini-cli/pull/25125) +- refactor(core): consolidate execute() arguments into ExecuteOptions by + @mbleigh in [#25101](https://github.com/google-gemini/gemini-cli/pull/25101) +- feat(core): add Strategic Re-evaluation guidance to system prompt by + @aishaneeshah in + [#25062](https://github.com/google-gemini/gemini-cli/pull/25062) +- fix(core): preserve shell execution config fields on update by + @jasonmatthewsuhari in + [#25113](https://github.com/google-gemini/gemini-cli/pull/25113) +- docs: add vi shortcuts and clarify MCP sandbox setup by @chrisjcthomas in + [#21679](https://github.com/google-gemini/gemini-cli/pull/21679) +- fix(cli): pass session id to interactive shell executions by + @jasonmatthewsuhari in + [#25114](https://github.com/google-gemini/gemini-cli/pull/25114) +- fix(cli): resolve text sanitization data loss due to C1 control characters by + @euxaristia in + [#22624](https://github.com/google-gemini/gemini-cli/pull/22624) +- feat(core): add large memory regression test by @cynthialong0-0 in + [#25059](https://github.com/google-gemini/gemini-cli/pull/25059) +- fix(core): resolve PTY exhaustion and orphan MCP subprocess leaks by + @spencer426 in + [#25079](https://github.com/google-gemini/gemini-cli/pull/25079) +- chore(deps): update vulnerable dependencies via npm audit fix by @scidomino in + [#25140](https://github.com/google-gemini/gemini-cli/pull/25140) +- perf(sandbox): optimize Windows sandbox initialization via native ACL + application by @ehedlund in + [#25077](https://github.com/google-gemini/gemini-cli/pull/25077) +- chore: switch from keytar to @github/keytar by @cocosheng-g in + [#25143](https://github.com/google-gemini/gemini-cli/pull/25143) +- fix: improve audio MIME normalization and validation in file reads by + @junaiddshaukat in + [#21636](https://github.com/google-gemini/gemini-cli/pull/21636) +- docs: Update docs-audit to include changes in PR body by @g-samroberts in + [#25153](https://github.com/google-gemini/gemini-cli/pull/25153) +- docs: correct documentation for enforced authentication type by @cocosheng-g + in [#25142](https://github.com/google-gemini/gemini-cli/pull/25142) +- fix(cli): exclude update_topic from confirmation queue count by @Abhijit-2592 + in [#24945](https://github.com/google-gemini/gemini-cli/pull/24945) +- Memory fix for trace's streamWrapper. by @anthraxmilkshake in + [#25089](https://github.com/google-gemini/gemini-cli/pull/25089) +- fix(core): fix quota footer for non-auto models and improve display by + @jackwotherspoon in + [#25121](https://github.com/google-gemini/gemini-cli/pull/25121) +- docs(contributing): clarify self-assignment policy for issues by @jmr in + [#23087](https://github.com/google-gemini/gemini-cli/pull/23087) +- feat(core): add skill patching support with /memory inbox integration by + @SandyTao520 in + [#25148](https://github.com/google-gemini/gemini-cli/pull/25148) +- Stop suppressing thoughts and text in model response by @gundermanc in + [#25073](https://github.com/google-gemini/gemini-cli/pull/25073) +- fix(release): prefix git hash in nightly versions to prevent semver + normalization by @SandyTao520 in + [#25304](https://github.com/google-gemini/gemini-cli/pull/25304) +- feat(cli): extract QuotaContext and resolve infinite render loop by @Adib234 + in [#24959](https://github.com/google-gemini/gemini-cli/pull/24959) +- refactor(core): extract and centralize sandbox path utilities by @ehedlund in + [#25305](https://github.com/google-gemini/gemini-cli/pull/25305) +- feat(ui): added enhancements to scroll momentum by @devr0306 in + [#24447](https://github.com/google-gemini/gemini-cli/pull/24447) +- fix(core): replace custom binary detection with isbinaryfile to correctly + handle UTF-8 (U+FFFD) by @Anjaligarhwal in + [#25297](https://github.com/google-gemini/gemini-cli/pull/25297) +- feat(agent): implement tool-controlled display protocol (Steps 2-3) by + @mbleigh in [#25134](https://github.com/google-gemini/gemini-cli/pull/25134) +- Stop showing scrollbar unless we are in terminalBuffer mode by @jacob314 in + [#25320](https://github.com/google-gemini/gemini-cli/pull/25320) +- feat: support auth block in MCP servers config in agents by @TanmayVartak in + [#24770](https://github.com/google-gemini/gemini-cli/pull/24770) +- fix(core): expose GEMINI_PLANS_DIR to hook environment by @Adib234 in + [#25296](https://github.com/google-gemini/gemini-cli/pull/25296) +- feat(core): implement silent fallback for Plan Mode model routing by @jerop in + [#25317](https://github.com/google-gemini/gemini-cli/pull/25317) +- fix: correct redirect count increment in fetchJson by @KevinZhao in + [#24896](https://github.com/google-gemini/gemini-cli/pull/24896) +- fix(core): prevent secondary crash in ModelRouterService finally block by @gundermanc in - [#24857](https://github.com/google-gemini/gemini-cli/pull/24857) -- refactor(cli): remove duplication in interactive shell awaiting input hint by - @JayadityaGit in - [#24801](https://github.com/google-gemini/gemini-cli/pull/24801) -- refactor(core): make LegacyAgentSession dependencies optional by @mbleigh in - [#24287](https://github.com/google-gemini/gemini-cli/pull/24287) -- Changelog for v0.37.0-preview.2 by @gemini-cli-robot in - [#24848](https://github.com/google-gemini/gemini-cli/pull/24848) -- fix(cli): always show shell command description or actual command by @jacob314 - in [#24774](https://github.com/google-gemini/gemini-cli/pull/24774) -- Added flag for ept size and increased default size by @devr0306 in - [#24859](https://github.com/google-gemini/gemini-cli/pull/24859) -- fix(core): dispose Scheduler to prevent McpProgress listener leak by - @Anjaligarhwal in - [#24870](https://github.com/google-gemini/gemini-cli/pull/24870) -- fix(cli): switch default back to terminalBuffer=false and fix regressions - introduced for that mode by @jacob314 in - [#24873](https://github.com/google-gemini/gemini-cli/pull/24873) -- feat(cli): switch to ctrl+g from ctrl-x by @jacob314 in - [#24861](https://github.com/google-gemini/gemini-cli/pull/24861) -- fix: isolate concurrent browser agent instances by @gsquared94 in - [#24794](https://github.com/google-gemini/gemini-cli/pull/24794) -- docs: update MCP server OAuth redirect port documentation by @adamfweidman in - [#24844](https://github.com/google-gemini/gemini-cli/pull/24844) + [#25333](https://github.com/google-gemini/gemini-cli/pull/25333) +- feat(core): introduce decoupled ContextManager and Sidecar architecture by + @joshualitt in + [#24752](https://github.com/google-gemini/gemini-cli/pull/24752) +- docs(core): update generalist agent documentation by @abhipatel12 in + [#25325](https://github.com/google-gemini/gemini-cli/pull/25325) +- chore(mcp): check MCP error code over brittle string match by @jackwotherspoon + in [#25381](https://github.com/google-gemini/gemini-cli/pull/25381) +- feat(plan): update plan mode prompt to allow showing plan content by @ruomengz + in [#25058](https://github.com/google-gemini/gemini-cli/pull/25058) +- test(core): improve sandbox integration test coverage and fix OS-specific + failures by @ehedlund in + [#25307](https://github.com/google-gemini/gemini-cli/pull/25307) +- fix(core): use debug level for keychain fallback logging by @ehedlund in + [#25398](https://github.com/google-gemini/gemini-cli/pull/25398) +- feat(test): add a performance test in asian language by @cynthialong0-0 in + [#25392](https://github.com/google-gemini/gemini-cli/pull/25392) +- feat(cli): enable mouse clicking for cursor positioning in AskUser multi-line + answers by @Adib234 in + [#24630](https://github.com/google-gemini/gemini-cli/pull/24630) +- fix(core): detect kmscon terminal as supporting true color by @claygeo in + [#25282](https://github.com/google-gemini/gemini-cli/pull/25282) +- ci: add agent session drift check workflow by @adamfweidman in + [#25389](https://github.com/google-gemini/gemini-cli/pull/25389) +- use macos-latest-large runner where applicable. by @scidomino in + [#25413](https://github.com/google-gemini/gemini-cli/pull/25413) +- Changelog for v0.37.2 by @gemini-cli-robot in + [#25336](https://github.com/google-gemini/gemini-cli/pull/25336) **Full Changelog**: -https://github.com/google-gemini/gemini-cli/compare/v0.37.0-preview.2...v0.38.0-preview.0 +https://github.com/google-gemini/gemini-cli/compare/v0.38.0-preview.0...v0.39.0-preview.0 diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index 05368f20fe..2047a9b09d 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -2148,6 +2148,21 @@ the `advanced.excludedEnvVars` setting in your `settings.json` file. - When set, overrides the default API version used by the SDK. - Example: `export GOOGLE_GENAI_API_VERSION="v1"` (Windows PowerShell: `$env:GOOGLE_GENAI_API_VERSION="v1"`) +- **`GOOGLE_GEMINI_BASE_URL`**: + - Overrides the default base URL for Gemini API requests (when using + `gemini-api-key` authentication). + - Must be a valid URL. For security, it must use HTTPS unless pointing to + `localhost` (or `127.0.0.1` / `[::1]`). + - Example: `export GOOGLE_GEMINI_BASE_URL="https://my-proxy.com"` (Windows + PowerShell: `$env:GOOGLE_GEMINI_BASE_URL="https://my-proxy.com"`) +- **`GOOGLE_VERTEX_BASE_URL`**: + - Overrides the default base URL for Vertex AI API requests (when using + `vertex-ai` authentication). + - Must be a valid URL. For security, it must use HTTPS unless pointing to + `localhost` (or `127.0.0.1` / `[::1]`). + - Example: `export GOOGLE_VERTEX_BASE_URL="https://my-vertex-proxy.com"` + (Windows PowerShell: + `$env:GOOGLE_VERTEX_BASE_URL="https://my-vertex-proxy.com"`) - **`OTLP_GOOGLE_CLOUD_PROJECT`**: - Your Google Cloud Project ID for Telemetry in Google Cloud - Example: `export OTLP_GOOGLE_CLOUD_PROJECT="YOUR_PROJECT_ID"` (Windows diff --git a/evals/component-test-helper.ts b/evals/component-test-helper.ts index 9be68e6936..097f6e3d05 100644 --- a/evals/component-test-helper.ts +++ b/evals/component-test-helper.ts @@ -16,6 +16,7 @@ import fs from 'node:fs'; import path from 'node:path'; import os from 'node:os'; import { randomUUID } from 'node:crypto'; +import { vi } from 'vitest'; import { Config, type ConfigParameters, @@ -52,6 +53,7 @@ export interface ComponentEvalCase extends BaseEvalCase { export class ComponentRig { public config: Config | undefined; public testDir: string; + public homeDir: string; public sessionId: string; constructor( @@ -61,6 +63,9 @@ export class ComponentRig { this.testDir = fs.mkdtempSync( path.join(os.tmpdir(), `gemini-component-rig-${uniqueId.slice(0, 8)}-`), ); + this.homeDir = fs.mkdtempSync( + path.join(os.tmpdir(), `gemini-component-home-${uniqueId.slice(0, 8)}-`), + ); this.sessionId = `test-session-${uniqueId}`; } @@ -89,12 +94,23 @@ export class ComponentRig { this.config = makeFakeConfig(configParams); await this.config.initialize(); - // Refresh auth using USE_GEMINI to initialize the real BaseLlmClient + // Refresh auth using USE_GEMINI to initialize the real BaseLlmClient. + // This must happen BEFORE stubbing GEMINI_CLI_HOME because OAuth credential + // lookup resolves through homedir() → GEMINI_CLI_HOME. await this.config.refreshAuth(AuthType.USE_GEMINI); + + // Isolate storage paths (session files, skills, extraction state) by + // pointing GEMINI_CLI_HOME at a per-test temp directory. Storage resolves + // global paths through `homedir()` which reads this env var. This is set + // after auth so credential lookup uses the real home directory. + vi.stubEnv('GEMINI_CLI_HOME', this.homeDir); } async cleanup() { + await this.config?.dispose(); + vi.unstubAllEnvs(); fs.rmSync(this.testDir, { recursive: true, force: true }); + fs.rmSync(this.homeDir, { recursive: true, force: true }); } } diff --git a/evals/skill_extraction.eval.ts b/evals/skill_extraction.eval.ts new file mode 100644 index 0000000000..4149f29a67 --- /dev/null +++ b/evals/skill_extraction.eval.ts @@ -0,0 +1,341 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import fsp from 'node:fs/promises'; +import path from 'node:path'; +import { describe, expect } from 'vitest'; +import { + type Config, + ApprovalMode, + SESSION_FILE_PREFIX, + getProjectHash, + startMemoryService, +} from '@google/gemini-cli-core'; +import { componentEvalTest } from './component-test-helper.js'; + +interface SeedSession { + sessionId: string; + summary: string; + userTurns: string[]; + timestampOffsetMinutes: number; +} + +interface MessageRecord { + id: string; + timestamp: string; + type: string; + content: Array<{ text: string }>; +} + +const WORKSPACE_FILES = { + 'package.json': JSON.stringify( + { + name: 'skill-extraction-eval', + private: true, + scripts: { + build: 'echo build', + lint: 'echo lint', + test: 'echo test', + }, + }, + null, + 2, + ), + 'README.md': `# Skill Extraction Eval + +This workspace exists to exercise background skill extraction from prior chats. +`, +}; + +function buildMessages(userTurns: string[]): MessageRecord[] { + const baseTime = new Date(Date.now() - 6 * 60 * 60 * 1000).toISOString(); + return userTurns.flatMap((text, index) => [ + { + id: `u${index + 1}`, + timestamp: baseTime, + type: 'user', + content: [{ text }], + }, + { + id: `a${index + 1}`, + timestamp: baseTime, + type: 'gemini', + content: [{ text: `Acknowledged: ${index + 1}` }], + }, + ]); +} + +async function seedSessions( + config: Config, + sessions: SeedSession[], +): Promise { + const chatsDir = path.join(config.storage.getProjectTempDir(), 'chats'); + await fsp.mkdir(chatsDir, { recursive: true }); + + const projectRoot = config.storage.getProjectRoot(); + + for (const session of sessions) { + const timestamp = new Date( + Date.now() - session.timestampOffsetMinutes * 60 * 1000, + ) + .toISOString() + .slice(0, 16) + .replace(/:/g, '-'); + const filename = `${SESSION_FILE_PREFIX}${timestamp}-${session.sessionId.slice(0, 8)}.json`; + const conversation = { + sessionId: session.sessionId, + projectHash: getProjectHash(projectRoot), + summary: session.summary, + startTime: new Date(Date.now() - 7 * 60 * 60 * 1000).toISOString(), + lastUpdated: new Date(Date.now() - 4 * 60 * 60 * 1000).toISOString(), + messages: buildMessages(session.userTurns), + }; + + await fsp.writeFile( + path.join(chatsDir, filename), + JSON.stringify(conversation, null, 2), + ); + } +} + +async function runExtractionAndReadState(config: Config): Promise<{ + state: { runs: Array<{ sessionIds: string[]; skillsCreated: string[] }> }; + skillsDir: string; +}> { + await startMemoryService(config); + + const memoryDir = config.storage.getProjectMemoryTempDir(); + const skillsDir = config.storage.getProjectSkillsMemoryDir(); + const statePath = path.join(memoryDir, '.extraction-state.json'); + + const raw = await fsp.readFile(statePath, 'utf-8'); + const state = JSON.parse(raw) as { + runs?: Array<{ sessionIds?: string[]; skillsCreated?: string[] }>; + }; + if (!Array.isArray(state.runs) || state.runs.length === 0) { + throw new Error('Skill extraction finished without writing any run state'); + } + + return { + state: { + runs: state.runs.map((run) => ({ + sessionIds: Array.isArray(run.sessionIds) ? run.sessionIds : [], + skillsCreated: Array.isArray(run.skillsCreated) + ? run.skillsCreated + : [], + })), + }, + skillsDir, + }; +} + +async function readSkillBodies(skillsDir: string): Promise { + try { + const entries = await fsp.readdir(skillsDir, { withFileTypes: true }); + const skillDirs = entries.filter((entry) => entry.isDirectory()); + const bodies = await Promise.all( + skillDirs.map((entry) => + fsp.readFile(path.join(skillsDir, entry.name, 'SKILL.md'), 'utf-8'), + ), + ); + return bodies; + } catch { + return []; + } +} + +/** + * Shared configOverrides for all skill extraction component evals. + * - experimentalMemoryManager: enables the memory extraction pipeline. + * - approvalMode: YOLO auto-approves tool calls (write_file, read_file) so the + * background agent can execute without interactive confirmation. + */ +const EXTRACTION_CONFIG_OVERRIDES = { + experimentalMemoryManager: true, + approvalMode: ApprovalMode.YOLO, +}; + +describe('Skill Extraction', () => { + componentEvalTest('USUALLY_PASSES', { + suiteName: 'skill-extraction', + suiteType: 'component-level', + name: 'ignores one-off incidents even when session summaries look similar', + files: WORKSPACE_FILES, + timeout: 180000, + configOverrides: EXTRACTION_CONFIG_OVERRIDES, + setup: async (config) => { + await seedSessions(config, [ + { + sessionId: 'incident-login-redirect', + summary: 'Debug login redirect loop in staging', + timestampOffsetMinutes: 420, + userTurns: [ + 'We only need a one-off fix for incident INC-4412 on branch hotfix/login-loop.', + 'The exact failing string is ERR_REDIRECT_4412 and this workaround is incident-specific.', + 'Patch packages/auth/src/redirect.ts just for this branch and do not generalize it.', + 'The thing that worked was deleting the stale staging cookie before retrying.', + 'This is not a normal workflow and should not become a reusable instruction.', + 'It only reproduced against the 2026-04-08 staging rollout.', + 'After the cookie clear, the branch-specific redirect logic passed.', + 'Do not turn this incident writeup into a standing process.', + 'Yes, the hotfix worked for this exact redirect-loop incident.', + 'Close out INC-4412 once the staging login succeeds again.', + ], + }, + { + sessionId: 'incident-login-timeout', + summary: 'Debug login callback timeout in staging', + timestampOffsetMinutes: 360, + userTurns: [ + 'This is another one-off staging incident, this time TICKET-991 for callback timeout.', + 'The exact failing string is ERR_CALLBACK_TIMEOUT_991 and it is unrelated to the redirect loop.', + 'The temporary fix was rotating the staging secret and deleting a bad feature-flag row.', + 'Do not write a generic login-debugging playbook from this.', + 'This only applied to the callback timeout during the April rollout.', + 'The successful fix was specific to the stale secret in staging.', + 'It does not define a durable repo workflow for future tasks.', + 'After rotating the secret, the callback timeout stopped reproducing.', + 'Treat this as incident response only, not a reusable skill.', + 'Once staging passed again, we closed TICKET-991.', + ], + }, + ]); + }, + assert: async (config) => { + const { state, skillsDir } = await runExtractionAndReadState(config); + const skillBodies = await readSkillBodies(skillsDir); + + expect(state.runs).toHaveLength(1); + expect(state.runs[0].sessionIds).toHaveLength(2); + expect(state.runs[0].skillsCreated).toEqual([]); + expect(skillBodies).toEqual([]); + }, + }); + + componentEvalTest('USUALLY_PASSES', { + suiteName: 'skill-extraction', + suiteType: 'component-level', + name: 'extracts a repeated project-specific workflow into a skill', + files: WORKSPACE_FILES, + timeout: 180000, + configOverrides: EXTRACTION_CONFIG_OVERRIDES, + setup: async (config) => { + await seedSessions(config, [ + { + sessionId: 'settings-docs-regen-1', + summary: 'Update settings docs after adding a config option', + timestampOffsetMinutes: 420, + userTurns: [ + 'When we add a new config option, we have to regenerate the settings docs in a specific order.', + 'The sequence that worked was npm run predocs:settings, npm run schema:settings, then npm run docs:settings.', + 'Do not hand-edit generated settings docs.', + 'If predocs is skipped, the generated schema docs miss the new defaults.', + 'Update the source first, then run that generation sequence.', + 'After regenerating, verify the schema output and docs changed together.', + 'We used this same sequence the last time we touched settings docs.', + 'That ordered workflow passed and produced the expected generated files.', + 'Please keep the exact command order because reversing it breaks the output.', + 'Yes, the generated settings docs were correct after those three commands.', + ], + }, + { + sessionId: 'settings-docs-regen-2', + summary: 'Regenerate settings schema docs for another new setting', + timestampOffsetMinutes: 360, + userTurns: [ + 'We are touching another setting, so follow the same settings-doc regeneration workflow again.', + 'Run npm run predocs:settings before npm run schema:settings and npm run docs:settings.', + 'The project keeps generated settings docs in sync through those commands, not manual edits.', + 'Skipping predocs caused stale defaults in the generated output before.', + 'Change the source, then execute the same three commands in order.', + 'Verify both the schema artifact and docs update together after regeneration.', + 'This is the recurring workflow we use whenever a setting changes.', + 'The exact order worked again on this second settings update.', + 'Please preserve that ordering constraint for future settings changes.', + 'Confirmed: the settings docs regenerated correctly with the same command sequence.', + ], + }, + ]); + }, + assert: async (config) => { + const { state, skillsDir } = await runExtractionAndReadState(config); + const skillBodies = await readSkillBodies(skillsDir); + const combinedSkills = skillBodies.join('\n\n'); + + expect(state.runs).toHaveLength(1); + expect(state.runs[0].sessionIds).toHaveLength(2); + expect(state.runs[0].skillsCreated.length).toBeGreaterThanOrEqual(1); + expect(skillBodies.length).toBeGreaterThanOrEqual(1); + expect(combinedSkills).toContain('npm run predocs:settings'); + expect(combinedSkills).toContain('npm run schema:settings'); + expect(combinedSkills).toContain('npm run docs:settings'); + expect(combinedSkills).toMatch(/When to Use/i); + expect(combinedSkills).toMatch(/Verification/i); + }, + }); + + componentEvalTest('USUALLY_PASSES', { + suiteName: 'skill-extraction', + suiteType: 'component-level', + name: 'extracts a repeated multi-step migration workflow with ordering constraints', + files: WORKSPACE_FILES, + timeout: 180000, + configOverrides: EXTRACTION_CONFIG_OVERRIDES, + setup: async (config) => { + await seedSessions(config, [ + { + sessionId: 'db-migration-v12', + summary: 'Run database migration for v12 schema update', + timestampOffsetMinutes: 420, + userTurns: [ + 'Every time we change the database schema we follow a specific migration workflow.', + 'First run npm run db:check to verify no pending migrations conflict.', + 'Then run npm run db:migrate to apply the new migration files.', + 'After migration, always run npm run db:validate to confirm schema integrity.', + 'If db:validate fails, immediately run npm run db:rollback before anything else.', + 'Never skip db:check — last time we did, two migrations collided and corrupted the index.', + 'The ordering is critical: check, migrate, validate. Reversing migrate and validate caused silent data loss before.', + 'This v12 migration passed after following that exact sequence.', + 'We use this same three-step workflow every time the schema changes.', + 'Confirmed: db:check, db:migrate, db:validate completed successfully for v12.', + ], + }, + { + sessionId: 'db-migration-v13', + summary: 'Run database migration for v13 schema update', + timestampOffsetMinutes: 360, + userTurns: [ + 'New schema change for v13, following the same database migration workflow as before.', + 'Start with npm run db:check to ensure no conflicting pending migrations.', + 'Then npm run db:migrate to apply the v13 migration files.', + 'Then npm run db:validate to confirm the schema is consistent.', + 'If validation fails, run npm run db:rollback immediately — do not attempt manual fixes.', + 'We learned the hard way that skipping db:check causes index corruption.', + 'The check-migrate-validate order is mandatory for every schema change.', + 'This is the same recurring workflow we used for v12 and earlier migrations.', + 'The v13 migration passed with the same three-step sequence.', + 'Confirmed: the standard db migration workflow succeeded again for v13.', + ], + }, + ]); + }, + assert: async (config) => { + const { state, skillsDir } = await runExtractionAndReadState(config); + const skillBodies = await readSkillBodies(skillsDir); + const combinedSkills = skillBodies.join('\n\n'); + + expect(state.runs).toHaveLength(1); + expect(state.runs[0].sessionIds).toHaveLength(2); + expect(state.runs[0].skillsCreated.length).toBeGreaterThanOrEqual(1); + expect(skillBodies.length).toBeGreaterThanOrEqual(1); + expect(combinedSkills).toContain('npm run db:check'); + expect(combinedSkills).toContain('npm run db:migrate'); + expect(combinedSkills).toContain('npm run db:validate'); + expect(combinedSkills).toMatch(/rollback/i); + expect(combinedSkills).toMatch(/When to Use/i); + }, + }); +}); diff --git a/evals/subtask_delegation.eval.ts b/evals/subtask_delegation.eval.ts new file mode 100644 index 0000000000..6d197a0253 --- /dev/null +++ b/evals/subtask_delegation.eval.ts @@ -0,0 +1,130 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect } from 'vitest'; +import { + TRACKER_CREATE_TASK_TOOL_NAME, + TRACKER_UPDATE_TASK_TOOL_NAME, +} from '@google/gemini-cli-core'; +import { evalTest, TEST_AGENTS } from './test-helper.js'; + +describe('subtask delegation eval test cases', () => { + /** + * Checks that the main agent can correctly decompose a complex, sequential + * task into subtasks using the task tracker and delegate each to the appropriate expert subagent. + * + * The task requires: + * 1. Reading requirements (researcher) + * 2. Implementing logic (developer) + * 3. Documenting (doc expert) + */ + evalTest('USUALLY_PASSES', { + name: 'should delegate sequential subtasks to relevant experts using the task tracker', + params: { + settings: { + experimental: { + enableAgents: true, + taskTracker: true, + }, + }, + }, + prompt: + 'Please read the requirements in requirements.txt using a researcher, then implement the requested logic in src/logic.ts using a developer, and finally document the implementation in docs/logic.md using a documentation expert.', + files: { + '.gemini/agents/researcher.md': `--- +name: researcher +description: Expert in reading files and extracting requirements. +tools: + - read_file +--- +You are the researcher. Read the provided file and extract requirements.`, + '.gemini/agents/developer.md': `--- +name: developer +description: Expert in implementing logic in TypeScript. +tools: + - write_file +--- +You are the developer. Implement the requested logic in the specified file.`, + '.gemini/agents/doc-expert.md': `--- +name: doc-expert +description: Expert in writing technical documentation. +tools: + - write_file +--- +You are the doc expert. Document the provided implementation clearly.`, + 'requirements.txt': + 'Implement a function named "calculateSum" that adds two numbers.', + }, + assert: async (rig, _result) => { + // Verify tracker tasks were created + const wasCreateCalled = await rig.waitForToolCall( + TRACKER_CREATE_TASK_TOOL_NAME, + ); + expect(wasCreateCalled).toBe(true); + + const toolLogs = rig.readToolLogs(); + const createCalls = toolLogs.filter( + (l) => l.toolRequest.name === TRACKER_CREATE_TASK_TOOL_NAME, + ); + expect(createCalls.length).toBeGreaterThanOrEqual(3); + + await rig.expectToolCallSuccess([ + 'researcher', + 'developer', + 'doc-expert', + ]); + + const logicFile = rig.readFile('src/logic.ts'); + const docFile = rig.readFile('docs/logic.md'); + + expect(logicFile).toContain('calculateSum'); + expect(docFile).toBeTruthy(); + }, + }); + + /** + * Checks that the main agent can delegate a batch of independent subtasks + * to multiple subagents in parallel using the task tracker to manage state. + */ + evalTest('USUALLY_PASSES', { + name: 'should delegate independent subtasks to specialists using the task tracker', + params: { + settings: { + experimental: { + enableAgents: true, + taskTracker: true, + }, + }, + }, + prompt: + 'Please update the project for internationalization (i18n), audit the security of the current code, and update the CSS to use a blue theme. Use specialized experts for each task.', + files: { + ...TEST_AGENTS.I18N_AGENT.asFile(), + ...TEST_AGENTS.SECURITY_AGENT.asFile(), + ...TEST_AGENTS.CSS_AGENT.asFile(), + 'index.ts': 'console.log("Hello World");', + }, + assert: async (rig, _result) => { + // Verify tracker tasks were created + const wasCreateCalled = await rig.waitForToolCall( + TRACKER_CREATE_TASK_TOOL_NAME, + ); + expect(wasCreateCalled).toBe(true); + + const toolLogs = rig.readToolLogs(); + const createCalls = toolLogs.filter( + (l) => l.toolRequest.name === TRACKER_CREATE_TASK_TOOL_NAME, + ); + expect(createCalls.length).toBeGreaterThanOrEqual(3); + + await rig.expectToolCallSuccess([ + TEST_AGENTS.I18N_AGENT.name, + TEST_AGENTS.SECURITY_AGENT.name, + TEST_AGENTS.CSS_AGENT.name, + ]); + }, + }); +}); diff --git a/packages/cli/src/ui/components/__snapshots__/ThemeDialog.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/ThemeDialog.test.tsx.snap index 2b9090e237..258e994bfa 100644 --- a/packages/cli/src/ui/components/__snapshots__/ThemeDialog.test.tsx.snap +++ b/packages/cli/src/ui/components/__snapshots__/ThemeDialog.test.tsx.snap @@ -11,12 +11,12 @@ exports[`Initial Theme Selection > should default to a dark theme when terminal │ ● 4. Default Dark (Matches terminal) │ 3 a, b = 0, 1 │ │ │ 5. Dracula Dark │ 4 for _ in range(n): │ │ │ 6. GitHub Dark │ 5 a, b = b, a + b │ │ -│ 7. Holiday Dark │ 6 return a │ │ -│ 8. Shades Of Purple Dark │ │ │ -│ 9. Solarized Dark │ 1 - print("Hello, " + name) │ │ -│ 10. Tokyo Night Dark │ 1 + print(f"Hello, {name}!") │ │ -│ 11. ANSI Light │ │ │ -│ 12. Ayu Light └─────────────────────────────────────────────────┘ │ +│ 7. GitHub Dark Colorblind Dark │ 6 return a │ │ +│ 8. Holiday Dark │ │ │ +│ 9. Shades Of Purple Dark │ 1 - print("Hello, " + name) │ │ +│ 10. Solarized Dark │ 1 + print(f"Hello, {name}!") │ │ +│ 11. Tokyo Night Dark │ │ │ +│ 12. ANSI Light └─────────────────────────────────────────────────┘ │ │ ▼ │ │ │ │ (Use Enter to select, Tab to configure scope, Esc to close) │ @@ -34,14 +34,14 @@ exports[`Initial Theme Selection > should default to a light theme when terminal │ 2. Ayu Light │ 1 # function │ │ │ ● 3. Default Light │ 2 def fibonacci(n): │ │ │ 4. GitHub Light │ 3 a, b = 0, 1 │ │ -│ 5. Google Code Light │ 4 for _ in range(n): │ │ -│ 6. Solarized Light │ 5 a, b = b, a + b │ │ -│ 7. Xcode Light │ 6 return a │ │ -│ 8. ANSI Dark (Incompatible) │ │ │ -│ 9. Atom One Dark (Incompatible) │ 1 - print("Hello, " + name) │ │ -│ 10. Ayu Dark (Incompatible) │ 1 + print(f"Hello, {name}!") │ │ -│ 11. Default Dark (Incompatible) │ │ │ -│ 12. Dracula Dark (Incompatible) └─────────────────────────────────────────────────┘ │ +│ 5. GitHub Light Colorblind Light (Mat… │ 4 for _ in range(n): │ │ +│ 6. Google Code Light │ 5 a, b = b, a + b │ │ +│ 7. Solarized Light │ 6 return a │ │ +│ 8. Xcode Light │ │ │ +│ 9. ANSI Dark (Incompatible) │ 1 - print("Hello, " + name) │ │ +│ 10. Atom One Dark (Incompatible) │ 1 + print(f"Hello, {name}!") │ │ +│ 11. Ayu Dark (Incompatible) │ │ │ +│ 12. Default Dark (Incompatible) └─────────────────────────────────────────────────┘ │ │ ▼ │ │ │ │ (Use Enter to select, Tab to configure scope, Esc to close) │ @@ -61,12 +61,12 @@ exports[`Initial Theme Selection > should use the theme from settings even if te │ 4. Default Dark (Matches terminal) │ 3 a, b = 0, 1 │ │ │ 5. Dracula Dark │ 4 for _ in range(n): │ │ │ 6. GitHub Dark │ 5 a, b = b, a + b │ │ -│ 7. Holiday Dark │ 6 return a │ │ -│ 8. Shades Of Purple Dark │ │ │ -│ 9. Solarized Dark │ 1 - print("Hello, " + name) │ │ -│ 10. Tokyo Night Dark │ 1 + print(f"Hello, {name}!") │ │ -│ 11. ANSI Light │ │ │ -│ 12. Ayu Light └─────────────────────────────────────────────────┘ │ +│ 7. GitHub Dark Colorblind Dark │ 6 return a │ │ +│ 8. Holiday Dark │ │ │ +│ 9. Shades Of Purple Dark │ 1 - print("Hello, " + name) │ │ +│ 10. Solarized Dark │ 1 + print(f"Hello, {name}!") │ │ +│ 11. Tokyo Night Dark │ │ │ +│ 12. ANSI Light └─────────────────────────────────────────────────┘ │ │ ▼ │ │ │ │ (Use Enter to select, Tab to configure scope, Esc to close) │ @@ -100,12 +100,12 @@ exports[`ThemeDialog Snapshots > should render correctly in theme selection mode │ 4. Default Dark │ 3 a, b = 0, 1 │ │ │ 5. Dracula Dark │ 4 for _ in range(n): │ │ │ 6. GitHub Dark │ 5 a, b = b, a + b │ │ -│ 7. Holiday Dark │ 6 return a │ │ -│ 8. Shades Of Purple Dark │ │ │ -│ 9. Solarized Dark │ 1 - print("Hello, " + name) │ │ -│ 10. Tokyo Night Dark │ 1 + print(f"Hello, {name}!") │ │ -│ 11. ANSI Light │ │ │ -│ 12. Ayu Light └─────────────────────────────────────────────────┘ │ +│ 7. GitHub Dark Colorblind Dark │ 6 return a │ │ +│ 8. Holiday Dark │ │ │ +│ 9. Shades Of Purple Dark │ 1 - print("Hello, " + name) │ │ +│ 10. Solarized Dark │ 1 + print(f"Hello, {name}!") │ │ +│ 11. Tokyo Night Dark │ │ │ +│ 12. ANSI Light └─────────────────────────────────────────────────┘ │ │ ▼ │ │ │ │ (Use Enter to select, Tab to configure scope, Esc to close) │ @@ -125,12 +125,12 @@ exports[`ThemeDialog Snapshots > should render correctly in theme selection mode │ 4. Default Dark │ 3 a, b = 0, 1 │ │ │ 5. Dracula Dark │ 4 for _ in range(n): │ │ │ 6. GitHub Dark │ 5 a, b = b, a + b │ │ -│ 7. Holiday Dark │ 6 return a │ │ -│ 8. Shades Of Purple Dark │ │ │ -│ 9. Solarized Dark │ 1 - print("Hello, " + name) │ │ -│ 10. Tokyo Night Dark │ 1 + print(f"Hello, {name}!") │ │ -│ 11. ANSI Light │ │ │ -│ 12. Ayu Light └─────────────────────────────────────────────────┘ │ +│ 7. GitHub Dark Colorblind Dark │ 6 return a │ │ +│ 8. Holiday Dark │ │ │ +│ 9. Shades Of Purple Dark │ 1 - print("Hello, " + name) │ │ +│ 10. Solarized Dark │ 1 + print(f"Hello, {name}!") │ │ +│ 11. Tokyo Night Dark │ │ │ +│ 12. ANSI Light └─────────────────────────────────────────────────┘ │ │ ▼ │ │ ╭─────────────────────────────────────────────────╮ │ │ │ DEVELOPER TOOLS (Not visible to users) │ │ diff --git a/packages/cli/src/ui/hooks/slashCommandProcessor.test.tsx b/packages/cli/src/ui/hooks/slashCommandProcessor.test.tsx index ec4aa00677..3e521a6627 100644 --- a/packages/cli/src/ui/hooks/slashCommandProcessor.test.tsx +++ b/packages/cli/src/ui/hooks/slashCommandProcessor.test.tsx @@ -858,11 +858,81 @@ describe('useSlashCommandProcessor', () => { }); describe('Lifecycle', () => { + it('removes the IDE status listener on unmount after async initialization', async () => { + let resolveIdeClient: + | ((client: { + addStatusChangeListener: (listener: () => void) => void; + removeStatusChangeListener: (listener: () => void) => void; + }) => void) + | undefined; + const addStatusChangeListener = vi.fn(); + const removeStatusChangeListener = vi.fn(); + + mockIdeClientGetInstance.mockImplementation( + () => + new Promise((resolve) => { + resolveIdeClient = resolve; + }), + ); + + const result = await setupProcessorHook(); + + await act(async () => { + resolveIdeClient?.({ + addStatusChangeListener, + removeStatusChangeListener, + }); + }); + + result.unmount(); + unmountHook = undefined; + + expect(addStatusChangeListener).toHaveBeenCalledTimes(1); + expect(removeStatusChangeListener).toHaveBeenCalledTimes(1); + expect(removeStatusChangeListener).toHaveBeenCalledWith( + addStatusChangeListener.mock.calls[0]?.[0], + ); + }); + + it('does not register an IDE status listener if unmounted before async initialization resolves', async () => { + let resolveIdeClient: + | ((client: { + addStatusChangeListener: (listener: () => void) => void; + removeStatusChangeListener: (listener: () => void) => void; + }) => void) + | undefined; + const addStatusChangeListener = vi.fn(); + const removeStatusChangeListener = vi.fn(); + + mockIdeClientGetInstance.mockImplementation( + () => + new Promise((resolve) => { + resolveIdeClient = resolve; + }), + ); + + const result = await setupProcessorHook(); + + result.unmount(); + unmountHook = undefined; + + await act(async () => { + resolveIdeClient?.({ + addStatusChangeListener, + removeStatusChangeListener, + }); + }); + + expect(addStatusChangeListener).not.toHaveBeenCalled(); + expect(removeStatusChangeListener).not.toHaveBeenCalled(); + }); + it('should abort command loading when the hook unmounts', async () => { const abortSpy = vi.spyOn(AbortController.prototype, 'abort'); const { unmount } = await setupProcessorHook(); unmount(); + unmountHook = undefined; expect(abortSpy).toHaveBeenCalledTimes(1); }); diff --git a/packages/cli/src/ui/hooks/slashCommandProcessor.ts b/packages/cli/src/ui/hooks/slashCommandProcessor.ts index f55503ad25..20de86002c 100644 --- a/packages/cli/src/ui/hooks/slashCommandProcessor.ts +++ b/packages/cli/src/ui/hooks/slashCommandProcessor.ts @@ -281,10 +281,16 @@ export const useSlashCommandProcessor = ( const listener = () => { reloadCommands(); }; + let isActive = true; + let activeIdeClient: IdeClient | undefined; // eslint-disable-next-line @typescript-eslint/no-floating-promises (async () => { const ideClient = await IdeClient.getInstance(); + if (!isActive) { + return; + } + activeIdeClient = ideClient; ideClient.addStatusChangeListener(listener); })(); @@ -307,11 +313,8 @@ export const useSlashCommandProcessor = ( coreEvents.on('extensionsStopping', extensionEventListener); return () => { - // eslint-disable-next-line @typescript-eslint/no-floating-promises - (async () => { - const ideClient = await IdeClient.getInstance(); - ideClient.removeStatusChangeListener(listener); - })(); + isActive = false; + activeIdeClient?.removeStatusChangeListener(listener); removeMCPStatusChangeListener(listener); coreEvents.off('extensionsStarting', extensionEventListener); coreEvents.off('extensionsStopping', extensionEventListener); diff --git a/packages/cli/src/ui/themes/builtin/dark/github-dark-colorblind.ts b/packages/cli/src/ui/themes/builtin/dark/github-dark-colorblind.ts new file mode 100644 index 0000000000..1af6fba87b --- /dev/null +++ b/packages/cli/src/ui/themes/builtin/dark/github-dark-colorblind.ts @@ -0,0 +1,147 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { type ColorsTheme, Theme } from '../../theme.js'; +import { interpolateColor } from '../../color-utils.js'; + +const githubDarkColorblindColors: ColorsTheme = { + type: 'dark', + Background: '#0d1117', + Foreground: '#e6edf3', + LightBlue: '#a5d6ff', + AccentBlue: '#79c0ff', + AccentPurple: '#d2a8ff', + AccentCyan: '#a5d6ff', + AccentGreen: '#a5d6ff', + AccentYellow: '#d29922', + AccentRed: '#f0883e', + DiffAdded: '#0d161f', + DiffRemoved: '#1d150e', + Comment: '#7d8590', + Gray: '#7d8590', + DarkGray: interpolateColor('#7d8590', '#0d1117', 0.5), + GradientColors: ['#58a6ff', '#f0883e'], +}; + +export const GitHubDarkColorblind: Theme = new Theme( + 'GitHub Dark Colorblind', + 'dark', + { + hljs: { + display: 'block', + overflowX: 'auto', + padding: '0.5em', + color: githubDarkColorblindColors.Foreground, + background: githubDarkColorblindColors.Background, + }, + 'hljs-comment': { + color: githubDarkColorblindColors.Comment, + fontStyle: 'italic', + }, + 'hljs-quote': { + color: githubDarkColorblindColors.Comment, + fontStyle: 'italic', + }, + 'hljs-keyword': { + color: githubDarkColorblindColors.AccentRed, + fontWeight: 'bold', + }, + 'hljs-selector-tag': { + color: githubDarkColorblindColors.AccentRed, + fontWeight: 'bold', + }, + 'hljs-subst': { + color: githubDarkColorblindColors.Foreground, + }, + 'hljs-number': { + color: githubDarkColorblindColors.LightBlue, + }, + 'hljs-literal': { + color: githubDarkColorblindColors.LightBlue, + }, + 'hljs-variable': { + color: githubDarkColorblindColors.Foreground, + }, + 'hljs-template-variable': { + color: githubDarkColorblindColors.Foreground, + }, + 'hljs-tag .hljs-attr': { + color: githubDarkColorblindColors.AccentYellow, + }, + 'hljs-string': { + color: githubDarkColorblindColors.AccentCyan, + }, + 'hljs-doctag': { + color: githubDarkColorblindColors.AccentCyan, + }, + 'hljs-title': { + color: githubDarkColorblindColors.AccentPurple, + fontWeight: 'bold', + }, + 'hljs-section': { + color: githubDarkColorblindColors.AccentPurple, + fontWeight: 'bold', + }, + 'hljs-selector-id': { + color: githubDarkColorblindColors.AccentPurple, + fontWeight: 'bold', + }, + 'hljs-type': { + color: githubDarkColorblindColors.AccentGreen, + fontWeight: 'bold', + }, + 'hljs-class .hljs-title': { + color: githubDarkColorblindColors.AccentGreen, + fontWeight: 'bold', + }, + 'hljs-tag': { + color: githubDarkColorblindColors.AccentGreen, + }, + 'hljs-name': { + color: githubDarkColorblindColors.AccentGreen, + }, + 'hljs-attribute': { + color: githubDarkColorblindColors.LightBlue, + }, + 'hljs-regexp': { + color: githubDarkColorblindColors.AccentCyan, + }, + 'hljs-link': { + color: githubDarkColorblindColors.AccentCyan, + }, + 'hljs-symbol': { + color: githubDarkColorblindColors.AccentPurple, + }, + 'hljs-bullet': { + color: githubDarkColorblindColors.AccentPurple, + }, + 'hljs-built_in': { + color: githubDarkColorblindColors.LightBlue, + }, + 'hljs-builtin-name': { + color: githubDarkColorblindColors.LightBlue, + }, + 'hljs-meta': { + color: githubDarkColorblindColors.LightBlue, + fontWeight: 'bold', + }, + 'hljs-deletion': { + background: '#682d0f', + color: githubDarkColorblindColors.AccentRed, + }, + 'hljs-addition': { + background: '#0c2d6b', + color: githubDarkColorblindColors.AccentGreen, + }, + 'hljs-emphasis': { + fontStyle: 'italic', + }, + 'hljs-strong': { + fontWeight: 'bold', + }, + }, + githubDarkColorblindColors, +); diff --git a/packages/cli/src/ui/themes/builtin/light/github-light-colorblind.ts b/packages/cli/src/ui/themes/builtin/light/github-light-colorblind.ts new file mode 100644 index 0000000000..eb36fd32d8 --- /dev/null +++ b/packages/cli/src/ui/themes/builtin/light/github-light-colorblind.ts @@ -0,0 +1,147 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { type ColorsTheme, Theme } from '../../theme.js'; +import { interpolateColor } from '../../color-utils.js'; + +const githubLightColorblindColors: ColorsTheme = { + type: 'light', + Background: '#ffffff', + Foreground: '#1f2328', + LightBlue: '#0a3069', + AccentBlue: '#0550ae', + AccentPurple: '#8250df', + AccentCyan: '#0a3069', + AccentGreen: '#0969da', + AccentYellow: '#9a6700', + AccentRed: '#bc4c00', + DiffAdded: '#ddf4ff', + DiffRemoved: '#fff1e5', + Comment: '#656d76', + Gray: '#656d76', + DarkGray: interpolateColor('#656d76', '#ffffff', 0.5), + GradientColors: ['#0969da', '#bc4c00'], +}; + +export const GitHubLightColorblind: Theme = new Theme( + 'GitHub Light Colorblind', + 'light', + { + hljs: { + display: 'block', + overflowX: 'auto', + padding: '0.5em', + color: githubLightColorblindColors.Foreground, + background: githubLightColorblindColors.Background, + }, + 'hljs-comment': { + color: githubLightColorblindColors.Comment, + fontStyle: 'italic', + }, + 'hljs-quote': { + color: githubLightColorblindColors.Comment, + fontStyle: 'italic', + }, + 'hljs-keyword': { + color: githubLightColorblindColors.AccentRed, + fontWeight: 'bold', + }, + 'hljs-selector-tag': { + color: githubLightColorblindColors.AccentRed, + fontWeight: 'bold', + }, + 'hljs-subst': { + color: githubLightColorblindColors.Foreground, + }, + 'hljs-number': { + color: githubLightColorblindColors.LightBlue, + }, + 'hljs-literal': { + color: githubLightColorblindColors.LightBlue, + }, + 'hljs-variable': { + color: githubLightColorblindColors.Foreground, + }, + 'hljs-template-variable': { + color: githubLightColorblindColors.Foreground, + }, + 'hljs-tag .hljs-attr': { + color: githubLightColorblindColors.AccentYellow, + }, + 'hljs-string': { + color: githubLightColorblindColors.AccentCyan, + }, + 'hljs-doctag': { + color: githubLightColorblindColors.AccentCyan, + }, + 'hljs-title': { + color: githubLightColorblindColors.AccentPurple, + fontWeight: 'bold', + }, + 'hljs-section': { + color: githubLightColorblindColors.AccentPurple, + fontWeight: 'bold', + }, + 'hljs-selector-id': { + color: githubLightColorblindColors.AccentPurple, + fontWeight: 'bold', + }, + 'hljs-type': { + color: githubLightColorblindColors.AccentGreen, + fontWeight: 'bold', + }, + 'hljs-class .hljs-title': { + color: githubLightColorblindColors.AccentGreen, + fontWeight: 'bold', + }, + 'hljs-tag': { + color: githubLightColorblindColors.AccentGreen, + }, + 'hljs-name': { + color: githubLightColorblindColors.AccentGreen, + }, + 'hljs-attribute': { + color: githubLightColorblindColors.LightBlue, + }, + 'hljs-regexp': { + color: githubLightColorblindColors.AccentCyan, + }, + 'hljs-link': { + color: githubLightColorblindColors.AccentCyan, + }, + 'hljs-symbol': { + color: githubLightColorblindColors.AccentPurple, + }, + 'hljs-bullet': { + color: githubLightColorblindColors.AccentPurple, + }, + 'hljs-built_in': { + color: githubLightColorblindColors.LightBlue, + }, + 'hljs-builtin-name': { + color: githubLightColorblindColors.LightBlue, + }, + 'hljs-meta': { + color: githubLightColorblindColors.LightBlue, + fontWeight: 'bold', + }, + 'hljs-deletion': { + background: '#fff1e5', + color: githubLightColorblindColors.AccentRed, + }, + 'hljs-addition': { + background: '#ddf4ff', + color: githubLightColorblindColors.AccentGreen, + }, + 'hljs-emphasis': { + fontStyle: 'italic', + }, + 'hljs-strong': { + fontWeight: 'bold', + }, + }, + githubLightColorblindColors, +); diff --git a/packages/cli/src/ui/themes/theme-manager.ts b/packages/cli/src/ui/themes/theme-manager.ts index 9f0a7e528a..83848142d6 100644 --- a/packages/cli/src/ui/themes/theme-manager.ts +++ b/packages/cli/src/ui/themes/theme-manager.ts @@ -10,6 +10,8 @@ import { AtomOneDark } from './builtin/dark/atom-one-dark.js'; import { Dracula } from './builtin/dark/dracula-dark.js'; import { GitHubDark } from './builtin/dark/github-dark.js'; import { GitHubLight } from './builtin/light/github-light.js'; +import { GitHubDarkColorblind } from './builtin/dark/github-dark-colorblind.js'; +import { GitHubLightColorblind } from './builtin/light/github-light-colorblind.js'; import { GoogleCode } from './builtin/light/googlecode-light.js'; import { Holiday } from './builtin/dark/holiday-dark.js'; import { DefaultLight } from './builtin/light/default-light.js'; @@ -79,6 +81,8 @@ class ThemeManager { DefaultDark, GitHubDark, GitHubLight, + GitHubDarkColorblind, + GitHubLightColorblind, GoogleCode, Holiday, ShadesOfPurple, diff --git a/packages/core/src/agents/skill-extraction-agent.test.ts b/packages/core/src/agents/skill-extraction-agent.test.ts new file mode 100644 index 0000000000..a67c7db270 --- /dev/null +++ b/packages/core/src/agents/skill-extraction-agent.test.ts @@ -0,0 +1,90 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect, it } from 'vitest'; +import { SkillExtractionAgent } from './skill-extraction-agent.js'; +import { + EDIT_TOOL_NAME, + GLOB_TOOL_NAME, + GREP_TOOL_NAME, + LS_TOOL_NAME, + READ_FILE_TOOL_NAME, + WRITE_FILE_TOOL_NAME, +} from '../tools/tool-names.js'; +import { PREVIEW_GEMINI_FLASH_MODEL } from '../config/models.js'; + +describe('SkillExtractionAgent', () => { + const skillsDir = '/tmp/skills'; + const sessionIndex = + '[NEW] Debug login flow (12 user msgs) — /tmp/chats/session-1.json'; + const existingSkillsSummary = + '## Workspace Skills (.gemini/skills — do NOT duplicate)\n- **existing-skill**: Existing description'; + + const agent = SkillExtractionAgent( + skillsDir, + sessionIndex, + existingSkillsSummary, + ); + + it('should expose expected metadata, model, and tools', () => { + expect(agent.kind).toBe('local'); + expect(agent.name).toBe('confucius'); + expect(agent.displayName).toBe('Skill Extractor'); + expect(agent.modelConfig.model).toBe(PREVIEW_GEMINI_FLASH_MODEL); + expect(agent.toolConfig?.tools).toEqual( + expect.arrayContaining([ + READ_FILE_TOOL_NAME, + WRITE_FILE_TOOL_NAME, + EDIT_TOOL_NAME, + LS_TOOL_NAME, + GLOB_TOOL_NAME, + GREP_TOOL_NAME, + ]), + ); + }); + + it('should default to no skill unless recurrence and durability are proven', () => { + const prompt = agent.promptConfig.systemPrompt; + + expect(prompt).toContain('Default to NO SKILL.'); + expect(prompt).toContain( + 'strong evidence this will recur for future agents in this repo/workflow', + ); + expect(prompt).toContain('broader than a single incident'); + expect(prompt).toContain('A skill MUST meet ALL of these criteria:'); + expect(prompt).toContain( + 'Future agents in this repo/workflow are likely to need it', + ); + }); + + it('should explicitly reject one-off incidents and single-session preferences', () => { + const prompt = agent.promptConfig.systemPrompt; + + expect(prompt).toContain('Single-session preferences'); + expect(prompt).toContain('One-off incidents'); + expect(prompt).toContain('Output-style preferences'); + expect(prompt).toContain('cannot survive renaming the specific'); + }); + + it('should warn that session summaries are user-intent summaries, not workflow evidence', () => { + const query = agent.promptConfig.query ?? ''; + + expect(query).toContain(existingSkillsSummary); + expect(query).toContain(sessionIndex); + expect(query).toContain( + 'The summary is a user-intent summary, not a workflow summary.', + ); + expect(query).toContain( + 'The session summaries describe user intent, not workflow details.', + ); + expect(query).toContain( + 'Only write a skill if the evidence shows a durable, recurring workflow', + ); + expect(query).toContain( + 'If recurrence or future reuse is unclear, create no skill and explain why.', + ); + }); +}); diff --git a/packages/core/src/agents/skill-extraction-agent.ts b/packages/core/src/agents/skill-extraction-agent.ts index 2678bd206d..771c94eb2f 100644 --- a/packages/core/src/agents/skill-extraction-agent.ts +++ b/packages/core/src/agents/skill-extraction-agent.ts @@ -36,7 +36,7 @@ function buildSystemPrompt(skillsDir: string): string { '- solve similar tasks with fewer tool calls and fewer reasoning tokens', '- reuse proven workflows and verification checklists', '- avoid known failure modes and landmines', - '- anticipate user preferences without being reminded', + '- capture durable workflow constraints that future agents are likely to encounter again', '', '============================================================', 'SAFETY AND HYGIENE (STRICT)', @@ -59,6 +59,10 @@ function buildSystemPrompt(skillsDir: string): string { '1. "Is this something a competent agent would NOT already know?" If no, STOP.', '2. "Does an existing skill (listed below) already cover this?" If yes, STOP.', '3. "Can I write a concrete, step-by-step procedure?" If no, STOP.', + '4. "Is there strong evidence this will recur for future agents in this repo/workflow?" If no, STOP.', + '5. "Is this broader than a single incident (one bug, one ticket, one branch, one date, one exact error)?" If no, STOP.', + '', + 'Default to NO SKILL.', '', 'Do NOT create skills for:', '', @@ -67,6 +71,10 @@ function buildSystemPrompt(skillsDir: string): string { '- **Pure Q&A**: The user asked "how does X work?" and got an answer. No procedure.', '- **Brainstorming/design**: Discussion of how to build something, without a validated', ' implementation that produced a reusable procedure.', + '- **Single-session preferences**: User-specific style/output preferences or workflow', + ' preferences mentioned only once.', + '- **One-off incidents**: Debugging or incident response tied to a single bug, ticket,', + ' branch, date, or exact error string.', '- **Anything already covered by an existing skill** (global, workspace, builtin, or', ' previously extracted). Check the "Existing Skills" section carefully.', '', @@ -74,31 +82,40 @@ function buildSystemPrompt(skillsDir: string): string { 'WHAT COUNTS AS A SKILL', '============================================================', '', - 'A skill MUST meet BOTH of these criteria:', + 'A skill MUST meet ALL of these criteria:', '', '1. **Procedural and concrete**: It can be expressed as numbered steps with specific', ' commands, paths, or code patterns. If you can only write vague guidance, it is NOT', ' a skill. "Be careful with X" is advice, not a skill.', '', - '2. **Non-obvious and project-specific**: A competent agent would NOT already know this.', - ' It encodes project-specific knowledge, non-obvious ordering constraints, or', - ' hard-won failure shields that cannot be inferred from the codebase alone.', + '2. **Durable and reusable**: Future agents in this repo/workflow are likely to need it', + ' again. If it only solved one incident, it is NOT a skill.', '', - 'Confidence tiers (prefer higher tiers):', + '3. **Evidence-backed and project-specific**: It encodes project-specific knowledge,', + ' repeated operational constraints, or hard-won failure shields supported by session', + ' evidence. Do not assume something is non-obvious just because it sounds detailed.', '', - '**High confidence** — create the skill:', - '- The same workflow appeared in multiple sessions (cross-session repetition)', - '- A multi-step procedure was validated (tests passed, user confirmed success)', + 'Confidence tiers:', '', - '**Medium confidence** — create the skill if it is clearly project-specific:', - '- A project-specific build/test/deploy/release procedure was established', - '- A non-obvious ordering constraint or prerequisite was discovered', - '- A failure mode was hit and a concrete fix was found and verified', + '**High confidence** — create the skill only when recurrence/durability is clear:', + '- The same workflow appeared in multiple sessions (cross-session repetition), OR it is', + ' a stable recurring repo workflow (for example setup/build/test/deploy/release) with a', + ' clear future trigger', + '- The workflow was validated (tests passed, user confirmed success, or the same fix', + ' worked repeatedly)', + '- The skill can be named without referencing a specific incident, bug, branch, or date', + '', + '**Medium confidence** — usually do NOT create the skill yet:', + '- A project-specific procedure appeared once and seems useful, but recurrence is not yet', + ' clear', + '- A verified fix exists, but it is still tied to one incident', + '- A user correction changed the approach once, but durability is uncertain', '', '**Low confidence** — do NOT create the skill:', '- A one-off debugging session with no reusable procedure', '- Generic workflows any agent could figure out from the codebase', '- A code review or investigation with no durable takeaway', + '- Output-style preferences that do not materially change procedure', '', 'Aim for 0-2 skills per run. Quality over quantity.', '', @@ -117,8 +134,10 @@ function buildSystemPrompt(skillsDir: string): string { '', 'What to look for:', '', - '- User corrections: "No, do it this way" -> preference signal', + '- User corrections that change procedure in a durable way, especially when repeated', + ' across sessions', '- Repeated patterns across sessions: same commands, same file paths, same workflow', + '- Stable recurring repo lifecycle workflows with clear future triggers', '- Failed attempts followed by successful ones -> failure shield', '- Multi-step procedures that were validated (tests passed, user confirmed)', '- User interruptions: "Stop, you need to X first" -> ordering constraint', @@ -129,6 +148,8 @@ function buildSystemPrompt(skillsDir: string): string { '- Tool outputs that are just data (file contents, search results)', '- Speculative plans that were never executed', "- Temporary context (current branch name, today's date, specific error IDs)", + '- Similar session summaries without matching workflow evidence', + '- One-off artifact names: bug IDs, branch names, timestamps, exact incident strings', '', '============================================================', 'SKILL FORMAT', @@ -214,7 +235,10 @@ function buildSystemPrompt(skillsDir: string): string { '- Keep scopes distinct. Avoid overlapping "do-everything" skills.', '- Every skill MUST have: triggers, procedure, at least one pitfall or verification step.', '- If you cannot write a reliable procedure (too many unknowns), do NOT create the skill.', - '- Do not create skills for generic advice that any competent agent would already know.', + '- If the candidate is tied to one incident or cannot survive renaming the specific', + ' bug/ticket, do NOT create it.', + '- Do not create skills for generic advice, output-style preferences, or ephemeral', + ' choices that any competent agent would already know or adapt to on the fly.', '- Prefer fewer, higher-quality skills. 0-2 skills per run is typical. 3+ is unusual.', '', '============================================================', @@ -224,17 +248,23 @@ function buildSystemPrompt(skillsDir: string): string { `1. Use list_directory on ${skillsDir} to see existing skills.`, '2. If skills exist, read their SKILL.md files to understand what is already captured.', '3. Scan the session index provided in the query. Look for [NEW] sessions whose summaries', - ' suggest workflows that ALSO appear in other sessions (either [NEW] or [old]).', - '4. Apply the minimum signal gate. If no repeated patterns are visible, report that and finish.', + ' hint at workflows that ALSO appear in other sessions (either [NEW] or [old]) or at a', + ' stable recurring repo workflow. Remember: summary similarity alone is NOT enough.', + '4. Apply the minimum signal gate. If recurrence or durability is not visible, report that', + ' no skill should be created and finish.', '5. For promising patterns, use read_file on the session file paths to inspect the full', - ' conversation. Confirm the workflow was actually repeated and validated.', - '6. For each confirmed skill, verify it meets ALL criteria (repeatable, procedural, high-leverage).', + ' conversation. Confirm the workflow was actually repeated and validated. Read at least', + ' two sessions unless the candidate is clearly a stable recurring repo lifecycle workflow.', + '6. For each candidate, verify it meets ALL criteria. Before writing, make sure you can', + ' state: future trigger, evidence sessions, recurrence signal, validation signal, and', + ' why it is not generic.', '7. Write new SKILL.md files or update existing ones in your directory using write_file.', ' For skills that live OUTSIDE your directory, write a .patch file instead (see UPDATING EXISTING SKILLS).', '8. Write COMPLETE files — never partially update a SKILL.md.', '', 'IMPORTANT: Do NOT read every session. Only read sessions whose summaries suggest a', - 'repeated pattern worth investigating. Most runs should read 0-3 sessions and create 0 skills.', + 'repeated pattern or a stable recurring repo workflow worth investigating. Most runs', + 'should read 0-3 sessions and create 0 skills.', 'Do not explore the codebase. Work only with the session index, session files, and the skills directory.', ].join('\n'); } @@ -301,6 +331,9 @@ export const SkillExtractionAgent = ( 'Below is an index of past conversation sessions. Each line shows:', '[NEW] or [old] status, a 1-line summary, message count, and the file path.', '', + 'The summary is a user-intent summary, not a workflow summary.', + 'Matching summary text alone is never enough evidence for a reusable skill.', + '', '[NEW] = not yet processed for skill extraction (focus on these)', '[old] = previously processed (read only if a [NEW] session hints at a repeated pattern)', '', @@ -319,7 +352,7 @@ export const SkillExtractionAgent = ( return { systemPrompt: buildSystemPrompt(skillsDir), - query: `${initialContext}\n\nAnalyze the session index above. Read sessions that suggest repeated workflows using read_file. Extract reusable skills to ${skillsDir}/.`, + query: `${initialContext}\n\nAnalyze the session index above. The session summaries describe user intent, not workflow details. Read sessions that suggest repeated workflows using read_file. Only write a skill if the evidence shows a durable, recurring workflow or a stable recurring repo procedure. If recurrence or future reuse is unclear, create no skill and explain why.`, }; }, runConfig: { diff --git a/packages/core/src/core/contentGenerator.test.ts b/packages/core/src/core/contentGenerator.test.ts index 35d7879f96..bf7eef167d 100644 --- a/packages/core/src/core/contentGenerator.test.ts +++ b/packages/core/src/core/contentGenerator.test.ts @@ -148,7 +148,7 @@ describe('createContentGenerator', () => { ); expect(GoogleGenAI).toHaveBeenCalledWith({ apiKey: 'test-api-key', - vertexai: undefined, + vertexai: false, httpOptions: expect.objectContaining({ headers: expect.objectContaining({ 'User-Agent': expect.stringMatching( @@ -365,7 +365,7 @@ describe('createContentGenerator', () => { expect(GoogleGenAI).toHaveBeenCalledWith({ apiKey: 'test-api-key', - vertexai: undefined, + vertexai: false, httpOptions: expect.objectContaining({ headers: expect.objectContaining({ 'User-Agent': expect.any(String), @@ -409,7 +409,7 @@ describe('createContentGenerator', () => { expect(GoogleGenAI).toHaveBeenCalledWith({ apiKey: 'test-api-key', - vertexai: undefined, + vertexai: false, httpOptions: expect.objectContaining({ headers: expect.objectContaining({ 'User-Agent': expect.any(String), @@ -443,7 +443,7 @@ describe('createContentGenerator', () => { expect(GoogleGenAI).toHaveBeenCalledWith({ apiKey: 'test-api-key', - vertexai: undefined, + vertexai: false, httpOptions: expect.objectContaining({ headers: expect.objectContaining({ 'User-Agent': expect.any(String), @@ -481,7 +481,7 @@ describe('createContentGenerator', () => { ); expect(GoogleGenAI).toHaveBeenCalledWith({ apiKey: 'test-api-key', - vertexai: undefined, + vertexai: false, httpOptions: expect.objectContaining({ headers: { 'User-Agent': expect.any(String), @@ -517,7 +517,7 @@ describe('createContentGenerator', () => { expect(GoogleGenAI).toHaveBeenCalledWith({ apiKey: 'test-api-key', - vertexai: undefined, + vertexai: false, httpOptions: expect.objectContaining({ headers: expect.objectContaining({ 'User-Agent': expect.any(String), @@ -550,7 +550,7 @@ describe('createContentGenerator', () => { expect(GoogleGenAI).toHaveBeenCalledWith({ apiKey: 'test-api-key', - vertexai: undefined, + vertexai: false, httpOptions: expect.objectContaining({ headers: expect.objectContaining({ 'User-Agent': expect.any(String), @@ -589,7 +589,7 @@ describe('createContentGenerator', () => { expect(GoogleGenAI).toHaveBeenCalledWith({ apiKey: 'test-api-key', - vertexai: undefined, + vertexai: false, httpOptions: expect.objectContaining({ headers: expect.objectContaining({ 'User-Agent': expect.any(String), @@ -638,6 +638,193 @@ describe('createContentGenerator', () => { apiVersion: 'v1alpha', }); }); + + it('should pass baseUrl to GoogleGenAI when GOOGLE_GEMINI_BASE_URL is set', async () => { + const mockConfig = { + getModel: vi.fn().mockReturnValue('gemini-pro'), + getProxy: vi.fn().mockReturnValue(undefined), + getUsageStatisticsEnabled: () => false, + getClientName: vi.fn().mockReturnValue(undefined), + } as unknown as Config; + + const mockGenerator = { + models: {}, + } as unknown as GoogleGenAI; + vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never); + vi.stubEnv('GOOGLE_GEMINI_BASE_URL', 'https://gemini.test.local'); + vi.stubEnv('GEMINI_API_KEY', 'test-api-key'); + + const config = await createContentGeneratorConfig( + mockConfig, + AuthType.USE_GEMINI, + ); + await createContentGenerator(config, mockConfig); + + expect(GoogleGenAI).toHaveBeenCalledWith( + expect.objectContaining({ + apiKey: 'test-api-key', + vertexai: false, + httpOptions: expect.objectContaining({ + baseUrl: 'https://gemini.test.local', + }), + }), + ); + }); + + it('should pass baseUrl to GoogleGenAI when GOOGLE_VERTEX_BASE_URL is set', async () => { + const mockConfig = { + getModel: vi.fn().mockReturnValue('gemini-pro'), + getProxy: vi.fn().mockReturnValue(undefined), + getUsageStatisticsEnabled: () => false, + getClientName: vi.fn().mockReturnValue(undefined), + } as unknown as Config; + + const mockGenerator = { + models: {}, + } as unknown as GoogleGenAI; + vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never); + vi.stubEnv('GOOGLE_VERTEX_BASE_URL', 'https://vertex.test.local'); + vi.stubEnv('GOOGLE_CLOUD_PROJECT', 'my-project'); + vi.stubEnv('GOOGLE_CLOUD_LOCATION', 'us-central1'); + + const config = await createContentGeneratorConfig( + mockConfig, + AuthType.USE_VERTEX_AI, + ); + await createContentGenerator(config, mockConfig); + + expect(GoogleGenAI).toHaveBeenCalledWith( + expect.objectContaining({ + apiKey: undefined, + vertexai: true, + httpOptions: expect.objectContaining({ + baseUrl: 'https://vertex.test.local', + }), + }), + ); + }); + + it('should prefer GOOGLE_VERTEX_BASE_URL when authType is USE_VERTEX_AI without inferred vertex credentials', async () => { + const mockConfig = { + getModel: vi.fn().mockReturnValue('gemini-pro'), + getProxy: vi.fn().mockReturnValue(undefined), + getUsageStatisticsEnabled: () => false, + getClientName: vi.fn().mockReturnValue(undefined), + } as unknown as Config; + + const mockGenerator = { + models: {}, + } as unknown as GoogleGenAI; + vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never); + vi.stubEnv('GOOGLE_GEMINI_BASE_URL', 'https://gemini.test.local'); + vi.stubEnv('GOOGLE_VERTEX_BASE_URL', 'https://vertex.test.local'); + + await createContentGenerator( + { + authType: AuthType.USE_VERTEX_AI, + }, + mockConfig, + ); + + expect(GoogleGenAI).toHaveBeenCalledWith( + expect.objectContaining({ + apiKey: undefined, + vertexai: true, + httpOptions: expect.objectContaining({ + baseUrl: 'https://vertex.test.local', + }), + }), + ); + }); + + it('should prefer an explicit baseUrl over GOOGLE_GEMINI_BASE_URL', async () => { + const mockConfig = { + getModel: vi.fn().mockReturnValue('gemini-pro'), + getProxy: vi.fn().mockReturnValue(undefined), + getUsageStatisticsEnabled: () => false, + getClientName: vi.fn().mockReturnValue(undefined), + } as unknown as Config; + + const mockGenerator = { + models: {}, + } as unknown as GoogleGenAI; + vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never); + vi.stubEnv('GOOGLE_GEMINI_BASE_URL', 'https://env.test.local'); + vi.stubEnv('GEMINI_API_KEY', 'test-api-key'); + + const config = await createContentGeneratorConfig( + mockConfig, + AuthType.USE_GEMINI, + undefined, + 'https://explicit.test.local', + ); + await createContentGenerator(config, mockConfig); + + expect(GoogleGenAI).toHaveBeenCalledWith( + expect.objectContaining({ + httpOptions: expect.objectContaining({ + baseUrl: 'https://explicit.test.local', + }), + }), + ); + }); + + it('should allow localhost baseUrl overrides over http', async () => { + const mockConfig = { + getModel: vi.fn().mockReturnValue('gemini-pro'), + getProxy: vi.fn().mockReturnValue(undefined), + getUsageStatisticsEnabled: () => false, + getClientName: vi.fn().mockReturnValue(undefined), + } as unknown as Config; + + const mockGenerator = { + models: {}, + } as unknown as GoogleGenAI; + vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never); + + await createContentGenerator( + { + apiKey: 'test-api-key', + authType: AuthType.USE_GEMINI, + baseUrl: 'http://127.0.0.1:8080', + }, + mockConfig, + ); + + expect(GoogleGenAI).toHaveBeenCalledWith( + expect.objectContaining({ + httpOptions: expect.objectContaining({ + baseUrl: 'http://127.0.0.1:8080', + }), + }), + ); + }); + + it('should reject invalid custom baseUrl values', async () => { + await expect( + createContentGenerator( + { + apiKey: 'test-api-key', + authType: AuthType.USE_GEMINI, + baseUrl: 'not-a-url', + }, + mockConfig, + ), + ).rejects.toThrow('Invalid custom base URL: not-a-url'); + }); + + it('should reject non-https remote custom baseUrl values', async () => { + await expect( + createContentGenerator( + { + apiKey: 'test-api-key', + authType: AuthType.USE_GEMINI, + baseUrl: 'http://example.com', + }, + mockConfig, + ), + ).rejects.toThrow('Custom base URL must use HTTPS unless it is localhost.'); + }); }); describe('createContentGeneratorConfig', () => { diff --git a/packages/core/src/core/contentGenerator.ts b/packages/core/src/core/contentGenerator.ts index 4fc56b59b4..31e36ede41 100644 --- a/packages/core/src/core/contentGenerator.ts +++ b/packages/core/src/core/contentGenerator.ts @@ -101,6 +101,21 @@ export type ContentGeneratorConfig = { customHeaders?: Record; }; +const LOCAL_HOSTNAMES = ['localhost', '127.0.0.1', '[::1]']; + +function validateBaseUrl(baseUrl: string): void { + let url: URL; + try { + url = new URL(baseUrl); + } catch { + throw new Error(`Invalid custom base URL: ${baseUrl}`); + } + + if (url.protocol !== 'https:' && !LOCAL_HOSTNAMES.includes(url.hostname)) { + throw new Error('Custom base URL must use HTTPS unless it is localhost.'); + } +} + export async function createContentGeneratorConfig( config: Config, authType: AuthType | undefined, @@ -273,18 +288,32 @@ export async function createContentGenerator( 'x-gemini-api-privileged-user-id': `${installationId}`, }; } + let baseUrl = config.baseUrl; + if (!baseUrl) { + const envBaseUrl = + config.authType === AuthType.USE_VERTEX_AI + ? process.env['GOOGLE_VERTEX_BASE_URL'] + : process.env['GOOGLE_GEMINI_BASE_URL']; + if (envBaseUrl) { + validateBaseUrl(envBaseUrl); + baseUrl = envBaseUrl; + } + } else { + validateBaseUrl(baseUrl); + } + const httpOptions: { baseUrl?: string; headers: Record; } = { headers }; - if (config.baseUrl) { - httpOptions.baseUrl = config.baseUrl; + if (baseUrl) { + httpOptions.baseUrl = baseUrl; } const googleGenAI = new GoogleGenAI({ apiKey: config.apiKey === '' ? undefined : config.apiKey, - vertexai: config.vertexai, + vertexai: config.vertexai ?? config.authType === AuthType.USE_VERTEX_AI, httpOptions, ...(apiVersionEnv && { apiVersion: apiVersionEnv }), }); diff --git a/packages/core/src/telemetry/memory-monitor.test.ts b/packages/core/src/telemetry/memory-monitor.test.ts index 8ad0d45595..9cb0e91caa 100644 --- a/packages/core/src/telemetry/memory-monitor.test.ts +++ b/packages/core/src/telemetry/memory-monitor.test.ts @@ -17,13 +17,18 @@ import { _resetGlobalMemoryMonitorForTests, } from './memory-monitor.js'; import type { Config } from '../config/config.js'; -import { recordMemoryUsage, isPerformanceMonitoringActive } from './metrics.js'; +import { + recordMemoryUsage, + recordCpuUsage, + isPerformanceMonitoringActive, +} from './metrics.js'; import { HighWaterMarkTracker } from './high-water-mark-tracker.js'; import { RateLimiter } from './rate-limiter.js'; // Mock dependencies vi.mock('./metrics.js', () => ({ recordMemoryUsage: vi.fn(), + recordCpuUsage: vi.fn(), isPerformanceMonitoringActive: vi.fn(), MemoryMetricType: { HEAP_USED: 'heap_used', @@ -50,6 +55,7 @@ vi.mock('node:process', () => ({ })); const mockRecordMemoryUsage = vi.mocked(recordMemoryUsage); +const mockRecordCpuUsage = vi.mocked(recordCpuUsage); const mockIsPerformanceMonitoringActive = vi.mocked( isPerformanceMonitoringActive, ); @@ -192,6 +198,13 @@ describe('MemoryMonitor', () => { component: 'test_context', }, ); + expect(mockRecordCpuUsage).toHaveBeenCalledWith( + mockConfig, + expect.any(Number), + { + component: 'test_context', + }, + ); }); it('should not record metrics when performance monitoring is inactive', () => { diff --git a/packages/core/src/telemetry/memory-monitor.ts b/packages/core/src/telemetry/memory-monitor.ts index e005bd73cc..aeaecc6ca0 100644 --- a/packages/core/src/telemetry/memory-monitor.ts +++ b/packages/core/src/telemetry/memory-monitor.ts @@ -12,6 +12,7 @@ import { isUserActive } from './activity-detector.js'; import { HighWaterMarkTracker } from './high-water-mark-tracker.js'; import { recordMemoryUsage, + recordCpuUsage, MemoryMetricType, isPerformanceMonitoringActive, } from './metrics.js'; @@ -37,6 +38,7 @@ export class MemoryMonitor { private intervalId: NodeJS.Timeout | null = null; private isRunning = false; private lastSnapshot: MemorySnapshot | null = null; + private lastCpuUsage: NodeJS.CpuUsage | null = null; private monitoringInterval: number = 10000; private highWaterMarkTracker: HighWaterMarkTracker; private rateLimiter: RateLimiter; @@ -191,6 +193,13 @@ export class MemoryMonitor { memory_type: MemoryMetricType.RSS, component: context, }); + + // Record delta CPU usage (in microseconds) + const cpuUsage = process.cpuUsage(this.lastCpuUsage ?? undefined); + this.lastCpuUsage = process.cpuUsage(); + recordCpuUsage(config, cpuUsage.user + cpuUsage.system, { + component: context, + }); } this.lastSnapshot = snapshot; diff --git a/packages/test-utils/src/perf-test-harness.ts b/packages/test-utils/src/perf-test-harness.ts index 2f376f58b6..f0520ccecb 100644 --- a/packages/test-utils/src/perf-test-harness.ts +++ b/packages/test-utils/src/perf-test-harness.ts @@ -147,7 +147,9 @@ export class PerfTestHarness { throw new Error(`No active timer found for label "${label}"`); } - const wallClockMs = performance.now() - timer.startTime; + // Round wall-clock time to nearest 0.1 ms + const wallClockMs = + Math.round((performance.now() - timer.startTime) * 10) / 10; const cpuDelta = process.cpuUsage(timer.startCpuUsage); this.activeTimers.delete(label); diff --git a/packages/test-utils/src/test-rig.ts b/packages/test-utils/src/test-rig.ts index 734c1b9546..906a7760bf 100644 --- a/packages/test-utils/src/test-rig.ts +++ b/packages/test-utils/src/test-rig.ts @@ -193,6 +193,28 @@ export function checkModelOutputContent( return isValid; } +export interface MetricDataPoint { + attributes?: Record; + value?: { + sum?: number; + min?: number; + max?: number; + count?: number; + }; + startTime?: [number, number]; + endTime?: string; +} + +export interface TelemetryMetric { + descriptor: { + name: string; + type?: string; + description?: string; + unit?: string; + }; + dataPoints: MetricDataPoint[]; +} + export interface ParsedLog { attributes?: { 'event.name'?: string; @@ -213,11 +235,7 @@ export interface ParsedLog { prompt_id?: string; }; scopeMetrics?: { - metrics: { - descriptor: { - name: string; - }; - }[]; + metrics: TelemetryMetric[]; }[]; } @@ -1297,6 +1315,10 @@ export class TestRig { return logs; } + readTelemetryLogs(): ParsedLog[] { + return this._readAndParseTelemetryLog(); + } + private _readAndParseTelemetryLog(): ParsedLog[] { // Telemetry is always written to the test directory const logFilePath = join(this.homeDir!, 'telemetry.log'); @@ -1450,7 +1472,7 @@ export class TestRig { ); } - readMetric(metricName: string): Record | null { + readMetric(metricName: string): TelemetryMetric | null { const logs = this._readAndParseTelemetryLog(); for (const logData of logs) { if (logData.scopeMetrics) { diff --git a/perf-tests/baselines.json b/perf-tests/baselines.json index d6972342d4..caf92bedb6 100644 --- a/perf-tests/baselines.json +++ b/perf-tests/baselines.json @@ -1,14 +1,14 @@ { "version": 1, - "updatedAt": "2026-04-09T02:30:22.000Z", + "updatedAt": "2026-04-14T14:04:02.662Z", "scenarios": { "cold-startup-time": { - "wallClockMs": 927.553249999999, + "wallClockMs": 927.6, "cpuTotalUs": 1470, "timestamp": "2026-04-08T22:27:54.871Z" }, "idle-cpu-usage": { - "wallClockMs": 5000.460750000002, + "wallClockMs": 5000.5, "cpuTotalUs": 12157, "timestamp": "2026-04-08T22:28:19.098Z" }, @@ -18,7 +18,7 @@ "timestamp": "2026-04-14T15:22:56.133Z" }, "skill-loading-time": { - "wallClockMs": 930.0920409999962, + "wallClockMs": 930.1, "cpuTotalUs": 1323, "timestamp": "2026-04-08T22:28:23.290Z" }, @@ -26,6 +26,31 @@ "wallClockMs": 1119.9, "cpuTotalUs": 2100, "timestamp": "2026-04-09T02:30:22.000Z" + }, + "long-conversation-resume": { + "wallClockMs": 4212.5, + "cpuTotalUs": 351393, + "timestamp": "2026-04-14T14:02:53.268Z" + }, + "long-conversation-typing": { + "wallClockMs": 113.7, + "cpuTotalUs": 3304, + "timestamp": "2026-04-14T14:03:12.525Z" + }, + "long-conversation-execution": { + "wallClockMs": 248.7, + "cpuTotalUs": 3825, + "timestamp": "2026-04-14T14:03:28.575Z" + }, + "long-conversation-terminal-scrolling": { + "wallClockMs": 362.4, + "cpuTotalUs": 12755860, + "timestamp": "2026-04-14T14:03:45.687Z" + }, + "long-conversation-alternate-scrolling": { + "wallClockMs": 362.4, + "cpuTotalUs": 12755860, + "timestamp": "2026-04-14T14:04:02.662Z" } } } diff --git a/perf-tests/perf-usage.test.ts b/perf-tests/perf-usage.test.ts index 4bbc5ab0ea..a100382f48 100644 --- a/perf-tests/perf-usage.test.ts +++ b/perf-tests/perf-usage.test.ts @@ -5,10 +5,20 @@ */ import { describe, it, beforeAll, afterAll } from 'vitest'; -import { TestRig, PerfTestHarness } from '@google/gemini-cli-test-utils'; +import { + TestRig, + PerfTestHarness, + type PerfSnapshot, +} from '@google/gemini-cli-test-utils'; import { join, dirname } from 'node:path'; import { fileURLToPath } from 'node:url'; -import { existsSync, readFileSync } from 'node:fs'; +import { + existsSync, + readFileSync, + mkdirSync, + copyFileSync, + writeFileSync, +} from 'node:fs'; const __dirname = dirname(fileURLToPath(import.meta.url)); const BASELINES_PATH = join(__dirname, 'baselines.json'); @@ -195,7 +205,7 @@ describe('CPU Performance Tests', () => { const snapshot = await harness.measureWithEventLoop( 'high-volume-output', async () => { - const runResult = await rig.run({ + await rig.run({ args: ['Generate 1M lines of output'], timeout: 120000, env: { @@ -206,7 +216,6 @@ describe('CPU Performance Tests', () => { DEBUG: 'true', }, }); - console.log(` Child Process Output:`, runResult); }, ); @@ -246,8 +255,7 @@ describe('CPU Performance Tests', () => { JSON.stringify(toolLatencyMetric), ); } - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const logs = (rig as any)._readAndParseTelemetryLog(); + const logs = rig.readTelemetryLogs(); console.log(` Total telemetry log entries: ${logs.length}`); for (const logData of logs) { if (logData.scopeMetrics) { @@ -272,10 +280,9 @@ describe('CPU Performance Tests', () => { const findValue = (percentile: string) => { const dp = eventLoopMetric.dataPoints.find( - // eslint-disable-next-line @typescript-eslint/no-explicit-any - (p: any) => p.attributes.percentile === percentile, + (p) => p.attributes?.['percentile'] === percentile, ); - return dp ? dp.value.min : undefined; + return dp?.value?.min; }; snapshot.childEventLoopDelayP50Ms = findValue('p50'); @@ -296,4 +303,358 @@ describe('CPU Performance Tests', () => { harness.assertWithinBaseline(result); } }); + + describe('long-conversation', () => { + let rig: TestRig; + const identifier = 'perf-long-conversation'; + const SESSION_ID = + 'anonymous_unique_id_577296e0eee5afecdcec05d11838e0cd1a851cd97a28119a4a876b11'; + const LARGE_CHAT_SOURCE = join( + __dirname, + '..', + 'memory-tests', + 'large-chat-session.json', + ); + + beforeAll(async () => { + if (!existsSync(LARGE_CHAT_SOURCE)) { + throw new Error( + `Performance test fixture missing: ${LARGE_CHAT_SOURCE}.`, + ); + } + + rig = new TestRig(); + rig.setup(identifier, { + fakeResponsesPath: join(__dirname, 'perf.long-chat.responses'), + }); + + const geminiDir = join(rig.homeDir!, '.gemini'); + const projectTempDir = join(geminiDir, 'tmp', identifier); + const targetChatsDir = join(projectTempDir, 'chats'); + + mkdirSync(targetChatsDir, { recursive: true }); + writeFileSync( + join(geminiDir, 'projects.json'), + JSON.stringify({ + projects: { [rig.testDir!]: identifier }, + }), + ); + writeFileSync(join(projectTempDir, '.project_root'), rig.testDir!); + copyFileSync( + LARGE_CHAT_SOURCE, + join(targetChatsDir, `session-${SESSION_ID}.json`), + ); + }); + + afterAll(async () => { + await rig.cleanup(); + }); + + it('session-load: resume a 60MB chat history', async () => { + const result = await harness.runScenario( + 'long-conversation-resume', + async () => { + const snapshot = await harness.measureWithEventLoop( + 'resume', + async () => { + const run = await rig.runInteractive({ + args: ['--resume', 'latest'], + env: { + GEMINI_API_KEY: 'fake-perf-test-key', + GEMINI_TELEMETRY_ENABLED: 'true', + GEMINI_MEMORY_MONITOR_INTERVAL: '500', + GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true', + DEBUG: 'true', + }, + }); + await run.kill(); + }, + ); + return snapshot; + }, + ); + + if (UPDATE_BASELINES) { + harness.updateScenarioBaseline(result); + } else { + harness.assertWithinBaseline(result); + } + }); + + it('typing: latency when typing into a large session', async () => { + const result = await harness.runScenario( + 'long-conversation-typing', + async () => { + const run = await rig.runInteractive({ + args: ['--resume', 'latest'], + env: { + GEMINI_API_KEY: 'fake-perf-test-key', + GEMINI_TELEMETRY_ENABLED: 'true', + GEMINI_MEMORY_MONITOR_INTERVAL: '500', + GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true', + DEBUG: 'true', + }, + }); + + const snapshot = await harness.measureWithEventLoop( + 'typing', + async () => { + // On average, the expected latency per key is under 30ms. + for (const char of 'Hello') { + await run.type(char); + } + }, + ); + + await run.kill(); + return snapshot; + }, + ); + + if (UPDATE_BASELINES) { + harness.updateScenarioBaseline(result); + } else { + harness.assertWithinBaseline(result); + } + }); + + it('execution: response latency for a simple shell command', async () => { + const result = await harness.runScenario( + 'long-conversation-execution', + async () => { + const run = await rig.runInteractive({ + args: ['--resume', 'latest'], + env: { + GEMINI_API_KEY: 'fake-perf-test-key', + GEMINI_TELEMETRY_ENABLED: 'true', + GEMINI_MEMORY_MONITOR_INTERVAL: '500', + GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true', + DEBUG: 'true', + }, + }); + + await run.expectText('Type your message'); + + const snapshot = await harness.measureWithEventLoop( + 'execution', + async () => { + await run.sendKeys('!echo hi\r'); + await run.expectText('hi'); + }, + ); + + await run.kill(); + return snapshot; + }, + ); + + if (UPDATE_BASELINES) { + harness.updateScenarioBaseline(result); + } else { + harness.assertWithinBaseline(result); + } + }); + + it('terminal-scrolling: latency when scrolling a large terminal buffer', async () => { + const result = await harness.runScenario( + 'long-conversation-terminal-scrolling', + async () => { + // Enable terminalBuffer to intentionally test CLI scrolling logic + const settingsPath = join(rig.homeDir!, '.gemini', 'settings.json'); + writeFileSync( + settingsPath, + JSON.stringify({ + security: { folderTrust: { enabled: false } }, + ui: { terminalBuffer: true }, + }), + ); + + const run = await rig.runInteractive({ + args: ['--resume', 'latest'], + env: { + GEMINI_API_KEY: 'fake-perf-test-key', + GEMINI_TELEMETRY_ENABLED: 'true', + GEMINI_MEMORY_MONITOR_INTERVAL: '500', + GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true', + DEBUG: 'true', + }, + }); + + await run.expectText('Type your message'); + + for (let i = 0; i < 5; i++) { + await run.sendKeys('\u001b[5~'); // PageUp + } + + // Scroll to the very top + await run.sendKeys('\u001b[H'); // Home + // Verify top line of chat is visible. + await run.expectText('Authenticated with'); + + for (let i = 0; i < 5; i++) { + await run.sendKeys('\u001b[6~'); // PageDown + } + + await rig.waitForTelemetryReady(); + await run.kill(); + + const eventLoopMetric = rig.readMetric('event_loop.delay'); + const cpuMetric = rig.readMetric('cpu.usage'); + + let p50Ms = 0; + let p95Ms = 0; + let maxMs = 0; + if (eventLoopMetric) { + const dataPoints = eventLoopMetric.dataPoints; + const p50Data = dataPoints.find( + (dp) => dp.attributes?.['percentile'] === 'p50', + ); + const p95Data = dataPoints.find( + (dp) => dp.attributes?.['percentile'] === 'p95', + ); + const maxData = dataPoints.find( + (dp) => dp.attributes?.['percentile'] === 'max', + ); + + if (p50Data?.value?.sum) p50Ms = p50Data.value.sum; + if (p95Data?.value?.sum) p95Ms = p95Data.value.sum; + if (maxData?.value?.sum) maxMs = maxData.value.sum; + } + + let cpuTotalUs = 0; + if (cpuMetric) { + const dataPoints = cpuMetric.dataPoints; + for (const dp of dataPoints) { + if (dp.value?.sum && dp.value.sum > 0) { + cpuTotalUs += dp.value.sum; + } + } + } + const cpuUserUs = cpuTotalUs; + const cpuSystemUs = 0; + + const snapshot: PerfSnapshot = { + timestamp: Date.now(), + label: 'scrolling', + wallClockMs: Math.round(p50Ms * 10) / 10, + cpuTotalUs, + cpuUserUs, + cpuSystemUs, + eventLoopDelayP50Ms: p50Ms, + eventLoopDelayP95Ms: p95Ms, + eventLoopDelayMaxMs: maxMs, + }; + + return snapshot; + }, + ); + + if (UPDATE_BASELINES) { + harness.updateScenarioBaseline(result); + } else { + harness.assertWithinBaseline(result); + } + }); + + it('alternate-scrolling: latency when scrolling a large alternate buffer', async () => { + const result = await harness.runScenario( + 'long-conversation-alternate-scrolling', + async () => { + // Enable useAlternateBuffer to intentionally test CLI scrolling logic + const settingsPath = join(rig.homeDir!, '.gemini', 'settings.json'); + writeFileSync( + settingsPath, + JSON.stringify({ + security: { folderTrust: { enabled: false } }, + ui: { useAlternateBuffer: true }, + }), + ); + + const run = await rig.runInteractive({ + args: ['--resume', 'latest'], + env: { + GEMINI_API_KEY: 'fake-perf-test-key', + GEMINI_TELEMETRY_ENABLED: 'true', + GEMINI_MEMORY_MONITOR_INTERVAL: '500', + GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true', + DEBUG: 'true', + }, + }); + + await run.expectText('Type your message'); + + for (let i = 0; i < 5; i++) { + await run.sendKeys('\u001b[5~'); // PageUp + } + + // Scroll to the very top + await run.sendKeys('\u001b[H'); // Home + // Verify top line of chat is visible. + await run.expectText('Authenticated with'); + + for (let i = 0; i < 5; i++) { + await run.sendKeys('\u001b[6~'); // PageDown + } + + await rig.waitForTelemetryReady(); + await run.kill(); + + const eventLoopMetric = rig.readMetric('event_loop.delay'); + const cpuMetric = rig.readMetric('cpu.usage'); + + let p50Ms = 0; + let p95Ms = 0; + let maxMs = 0; + if (eventLoopMetric) { + const dataPoints = eventLoopMetric.dataPoints; + const p50Data = dataPoints.find( + (dp) => dp.attributes?.['percentile'] === 'p50', + ); + const p95Data = dataPoints.find( + (dp) => dp.attributes?.['percentile'] === 'p95', + ); + const maxData = dataPoints.find( + (dp) => dp.attributes?.['percentile'] === 'max', + ); + + if (p50Data?.value?.sum) p50Ms = p50Data.value.sum; + if (p95Data?.value?.sum) p95Ms = p95Data.value.sum; + if (maxData?.value?.sum) maxMs = maxData.value.sum; + } + + let cpuTotalUs = 0; + if (cpuMetric) { + const dataPoints = cpuMetric.dataPoints; + for (const dp of dataPoints) { + if (dp.value?.sum && dp.value.sum > 0) { + cpuTotalUs += dp.value.sum; + } + } + } + const cpuUserUs = cpuTotalUs; + const cpuSystemUs = 0; + + const snapshot: PerfSnapshot = { + timestamp: Date.now(), + label: 'scrolling', + wallClockMs: Math.round(p50Ms * 10) / 10, + cpuTotalUs, + cpuUserUs, + cpuSystemUs, + eventLoopDelayP50Ms: p50Ms, + eventLoopDelayP95Ms: p95Ms, + eventLoopDelayMaxMs: maxMs, + }; + + return snapshot; + }, + ); + + if (UPDATE_BASELINES) { + harness.updateScenarioBaseline(result); + } else { + harness.assertWithinBaseline(result); + } + }); + }); }); diff --git a/perf-tests/perf.long-chat.responses b/perf-tests/perf.long-chat.responses new file mode 100644 index 0000000000..7cf057e5a4 --- /dev/null +++ b/perf-tests/perf.long-chat.responses @@ -0,0 +1,4 @@ +{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"{\"complexity_reasoning\":\"simple\",\"complexity_score\":1}"}],"role":"model"},"finishReason":"STOP","index":0}]}} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I am a large conversation model response."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"candidatesTokenCount":10,"promptTokenCount":20,"totalTokenCount":30}}]} +{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"{\"originalSummary\":\"large chat summary\",\"events\":[]}"}],"role":"model"},"finishReason":"STOP","index":0}]}} +{"method":"countTokens","response":{"totalTokens":100}}