From 00b7781c3cc639282a642dd5642fd3bd8099812e Mon Sep 17 00:00:00 2001 From: Sandy Tao Date: Wed, 15 Apr 2026 16:14:12 -0700 Subject: [PATCH 1/3] fix(evals): update eval tests for invoke_agent telemetry and project-scoped memory (#25502) --- evals/cli_help_delegation.eval.ts | 14 ++++- evals/generalist_agent.eval.ts | 17 +++++- evals/save_memory.eval.ts | 94 +++++++++++++++++++++---------- 3 files changed, 88 insertions(+), 37 deletions(-) diff --git a/evals/cli_help_delegation.eval.ts b/evals/cli_help_delegation.eval.ts index e1714c0636..32d6701917 100644 --- a/evals/cli_help_delegation.eval.ts +++ b/evals/cli_help_delegation.eval.ts @@ -17,9 +17,17 @@ describe('CliHelpAgent Delegation', () => { timeout: 60000, assert: async (rig, _result) => { const toolLogs = rig.readToolLogs(); - const toolCallIndex = toolLogs.findIndex( - (log) => log.toolRequest.name === 'cli_help', - ); + const toolCallIndex = toolLogs.findIndex((log) => { + if (log.toolRequest.name === 'invoke_agent') { + try { + const args = JSON.parse(log.toolRequest.args); + return args.agent_name === 'cli_help'; + } catch { + return false; + } + } + return false; + }); expect(toolCallIndex).toBeGreaterThan(-1); expect(toolCallIndex).toBeLessThan(5); // Called within first 5 turns }, diff --git a/evals/generalist_agent.eval.ts b/evals/generalist_agent.eval.ts index b8313079e9..8c3f3d0632 100644 --- a/evals/generalist_agent.eval.ts +++ b/evals/generalist_agent.eval.ts @@ -26,11 +26,22 @@ describe('generalist_agent', () => { prompt: 'Please use the generalist agent to create a file called "generalist_test_file.txt" containing exactly the following text: success', assert: async (rig) => { - // 1) Verify the generalist agent was invoked - const foundToolCall = await rig.waitForToolCall('generalist'); + // 1) Verify the generalist agent was invoked via invoke_agent + const foundToolCall = await rig.waitForToolCall( + 'invoke_agent', + undefined, + (args) => { + try { + const parsed = JSON.parse(args); + return parsed.agent_name === 'generalist'; + } catch { + return false; + } + }, + ); expect( foundToolCall, - 'Expected to find a tool call for generalist agent', + 'Expected to find an invoke_agent tool call for generalist agent', ).toBeTruthy(); // 2) Verify the file was created as expected diff --git a/evals/save_memory.eval.ts b/evals/save_memory.eval.ts index 5a228ed065..314f052f19 100644 --- a/evals/save_memory.eval.ts +++ b/evals/save_memory.eval.ts @@ -145,22 +145,30 @@ describe('save_memory', () => { }, }); - const ignoringDbSchemaLocation = - "Agent ignores workspace's database schema location"; + const savingDbSchemaLocationAsProjectMemory = + 'Agent saves workspace database schema location as project memory'; evalTest('USUALLY_PASSES', { suiteName: 'default', suiteType: 'behavioral', - name: ignoringDbSchemaLocation, + name: savingDbSchemaLocationAsProjectMemory, prompt: `The database schema for this workspace is located in \`db/schema.sql\`.`, assert: async (rig, result) => { - await rig.waitForTelemetryReady(); - const wasToolCalled = rig - .readToolLogs() - .some((log) => log.toolRequest.name === 'save_memory'); + const wasToolCalled = await rig.waitForToolCall( + 'save_memory', + undefined, + (args) => { + try { + const params = JSON.parse(args); + return params.scope === 'project'; + } catch { + return false; + } + }, + ); expect( wasToolCalled, - 'save_memory should not be called for workspace-specific information', - ).toBe(false); + 'Expected save_memory to be called with scope="project" for workspace-specific information', + ).toBe(true); assertModelHasOutput(result); }, @@ -188,42 +196,59 @@ describe('save_memory', () => { }, }); - const ignoringBuildArtifactLocation = - 'Agent ignores workspace build artifact location'; + const savingBuildArtifactLocationAsProjectMemory = + 'Agent saves workspace build artifact location as project memory'; evalTest('USUALLY_PASSES', { suiteName: 'default', suiteType: 'behavioral', - name: ignoringBuildArtifactLocation, + name: savingBuildArtifactLocationAsProjectMemory, prompt: `In this workspace, build artifacts are stored in the \`dist/artifacts\` directory.`, assert: async (rig, result) => { - await rig.waitForTelemetryReady(); - const wasToolCalled = rig - .readToolLogs() - .some((log) => log.toolRequest.name === 'save_memory'); + const wasToolCalled = await rig.waitForToolCall( + 'save_memory', + undefined, + (args) => { + try { + const params = JSON.parse(args); + return params.scope === 'project'; + } catch { + return false; + } + }, + ); expect( wasToolCalled, - 'save_memory should not be called for workspace-specific information', - ).toBe(false); + 'Expected save_memory to be called with scope="project" for workspace-specific information', + ).toBe(true); assertModelHasOutput(result); }, }); - const ignoringMainEntryPoint = "Agent ignores workspace's main entry point"; + const savingMainEntryPointAsProjectMemory = + 'Agent saves workspace main entry point as project memory'; evalTest('USUALLY_PASSES', { suiteName: 'default', suiteType: 'behavioral', - name: ignoringMainEntryPoint, + name: savingMainEntryPointAsProjectMemory, prompt: `The main entry point for this workspace is \`src/index.js\`.`, assert: async (rig, result) => { - await rig.waitForTelemetryReady(); - const wasToolCalled = rig - .readToolLogs() - .some((log) => log.toolRequest.name === 'save_memory'); + const wasToolCalled = await rig.waitForToolCall( + 'save_memory', + undefined, + (args) => { + try { + const params = JSON.parse(args); + return params.scope === 'project'; + } catch { + return false; + } + }, + ); expect( wasToolCalled, - 'save_memory should not be called for workspace-specific information', - ).toBe(false); + 'Expected save_memory to be called with scope="project" for workspace-specific information', + ).toBe(true); assertModelHasOutput(result); }, @@ -317,13 +342,13 @@ describe('save_memory', () => { 'Please save any persistent preferences or facts about me from our conversation to memory.', assert: async (rig, result) => { const wasToolCalled = await rig.waitForToolCall( - 'save_memory', + 'invoke_agent', undefined, - (args) => /vitest/i.test(args), + (args) => /save_memory/i.test(args) && /vitest/i.test(args), ); expect( wasToolCalled, - 'Expected save_memory to be called with the Vitest preference from the conversation history', + 'Expected invoke_agent to be called with save_memory agent and the Vitest preference from the conversation history', ).toBe(true); assertModelHasOutput(result); @@ -379,8 +404,15 @@ describe('save_memory', () => { ], prompt: 'Please save the preferences I mentioned earlier to memory.', assert: async (rig, result) => { - const wasToolCalled = await rig.waitForToolCall('save_memory'); - expect(wasToolCalled, 'Expected save_memory to be called').toBe(true); + const wasToolCalled = await rig.waitForToolCall( + 'invoke_agent', + undefined, + (args) => /save_memory/i.test(args), + ); + expect( + wasToolCalled, + 'Expected invoke_agent to be called with save_memory agent', + ).toBe(true); assertModelHasOutput(result); }, From 34a9d6e42173cd5536e1ed998f1be5fc7f197dfc Mon Sep 17 00:00:00 2001 From: gemini-cli-robot Date: Wed, 15 Apr 2026 17:06:43 -0700 Subject: [PATCH 2/3] Changelog for v0.38.1 (#25476) Co-authored-by: gemini-cli-robot <224641728+gemini-cli-robot@users.noreply.github.com> Co-authored-by: Sam Roberts <158088236+g-samroberts@users.noreply.github.com> --- docs/changelogs/latest.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/docs/changelogs/latest.md b/docs/changelogs/latest.md index 0a105857f3..f5d87b8c9e 100644 --- a/docs/changelogs/latest.md +++ b/docs/changelogs/latest.md @@ -1,6 +1,6 @@ -# Latest stable release: v0.38.0 +# Latest stable release: v0.38.1 -Released: April 14, 2026 +Released: April 15, 2026 For most users, our latest stable release is the recommended release. Install the latest stable version with: @@ -29,6 +29,9 @@ npm install -g @google/gemini-cli ## What's Changed +- fix(patch): cherry-pick 050c303 to release/v0.38.0-pr-25317 to patch version + v0.38.0 and create version 0.38.1 by @gemini-cli-robot in + [#25466](https://github.com/google-gemini/gemini-cli/pull/25466) - fix(cli): refresh slash command list after /skills reload by @NTaylorMullen in [#24454](https://github.com/google-gemini/gemini-cli/pull/24454) - Update README.md for links. by @g-samroberts in @@ -265,4 +268,4 @@ npm install -g @google/gemini-cli [#24844](https://github.com/google-gemini/gemini-cli/pull/24844) **Full Changelog**: -https://github.com/google-gemini/gemini-cli/compare/v0.37.2...v0.38.0 +https://github.com/google-gemini/gemini-cli/compare/v0.38.0...v0.38.1 From 6d7974f1effbe2a349e8d766e5cc5bd1874e1307 Mon Sep 17 00:00:00 2001 From: Sandy Tao Date: Wed, 15 Apr 2026 18:38:59 -0700 Subject: [PATCH 3/3] feat(core): integrate skill-creator into skill extraction agent (#25421) --- evals/skill_extraction.eval.ts | 12 +++- .../core/src/agents/skill-extraction-agent.ts | 68 ++++++------------- 2 files changed, 31 insertions(+), 49 deletions(-) diff --git a/evals/skill_extraction.eval.ts b/evals/skill_extraction.eval.ts index 4149f29a67..28ca557f01 100644 --- a/evals/skill_extraction.eval.ts +++ b/evals/skill_extraction.eval.ts @@ -272,8 +272,12 @@ describe('Skill Extraction', () => { expect(combinedSkills).toContain('npm run predocs:settings'); expect(combinedSkills).toContain('npm run schema:settings'); expect(combinedSkills).toContain('npm run docs:settings'); - expect(combinedSkills).toMatch(/When to Use/i); expect(combinedSkills).toMatch(/Verification/i); + + // Verify the extraction agent activated skill-creator for design guidance. + expect(config.getSkillManager().isSkillActive('skill-creator')).toBe( + true, + ); }, }); @@ -335,7 +339,11 @@ describe('Skill Extraction', () => { expect(combinedSkills).toContain('npm run db:migrate'); expect(combinedSkills).toContain('npm run db:validate'); expect(combinedSkills).toMatch(/rollback/i); - expect(combinedSkills).toMatch(/When to Use/i); + + // Verify the extraction agent activated skill-creator for design guidance. + expect(config.getSkillManager().isSkillActive('skill-creator')).toBe( + true, + ); }, }); }); diff --git a/packages/core/src/agents/skill-extraction-agent.ts b/packages/core/src/agents/skill-extraction-agent.ts index 771c94eb2f..4aa18af388 100644 --- a/packages/core/src/agents/skill-extraction-agent.ts +++ b/packages/core/src/agents/skill-extraction-agent.ts @@ -7,11 +7,13 @@ import { z } from 'zod'; import type { LocalAgentDefinition } from './types.js'; import { + ACTIVATE_SKILL_TOOL_NAME, EDIT_TOOL_NAME, GLOB_TOOL_NAME, GREP_TOOL_NAME, LS_TOOL_NAME, READ_FILE_TOOL_NAME, + SHELL_TOOL_NAME, WRITE_FILE_TOOL_NAME, } from '../tools/tool-names.js'; import { PREVIEW_GEMINI_FLASH_MODEL } from '../config/models.js'; @@ -152,45 +154,6 @@ function buildSystemPrompt(skillsDir: string): string { '- One-off artifact names: bug IDs, branch names, timestamps, exact incident strings', '', '============================================================', - 'SKILL FORMAT', - '============================================================', - '', - 'Each skill is a directory containing a SKILL.md file with YAML frontmatter', - 'and optional supporting scripts.', - '', - 'Directory structure:', - ` ${skillsDir}//`, - ' SKILL.md # Required entrypoint', - ' scripts/.* # Optional helper scripts (Python stdlib-only or shell)', - '', - 'SKILL.md structure:', - '', - ' ---', - ' name: ', - ' description: <1-2 lines; include concrete triggers in user-like language>', - ' ---', - '', - ' ## When to Use', - ' ', - '', - ' ## Procedure', - ' ', - '', - ' ## Pitfalls and Fixes', - ' likely cause -> fix; only include observed failures>', - '', - ' ## Verification', - ' ', - '', - 'Supporting scripts (optional but recommended when applicable):', - '- Put helper scripts in scripts/ and reference them from SKILL.md', - '- Prefer Python (stdlib only) or small shell scripts', - '- Make scripts safe: no destructive actions, no secrets, deterministic output', - '- Include a usage example in SKILL.md', - '', - 'Naming: kebab-case (e.g., fix-lint-errors, run-migrations).', - '', - '============================================================', 'UPDATING EXISTING SKILLS (PATCHES)', '============================================================', '', @@ -247,20 +210,28 @@ function buildSystemPrompt(skillsDir: string): string { '', `1. Use list_directory on ${skillsDir} to see existing skills.`, '2. If skills exist, read their SKILL.md files to understand what is already captured.', - '3. Scan the session index provided in the query. Look for [NEW] sessions whose summaries', + '3. Use activate_skill to load the "skill-creator" skill. Follow its design guidance', + ' (conciseness, progressive disclosure, frontmatter format, bundled resources) when', + ' writing SKILL.md files. You may also use its init_skill.cjs script to scaffold new', + ' skill directories and package_skill.cjs to validate finished skills.', + ' IMPORTANT: You are a background agent with no user interaction. Skip any interactive', + ' steps in the skill-creator guide (asking clarifying questions, requesting user feedback,', + ' installation prompts, iteration loops). Use only its format and quality guidance.', + '4. Scan the session index provided in the query. Look for [NEW] sessions whose summaries', ' hint at workflows that ALSO appear in other sessions (either [NEW] or [old]) or at a', ' stable recurring repo workflow. Remember: summary similarity alone is NOT enough.', - '4. Apply the minimum signal gate. If recurrence or durability is not visible, report that', + '5. Apply the minimum signal gate. If recurrence or durability is not visible, report that', ' no skill should be created and finish.', - '5. For promising patterns, use read_file on the session file paths to inspect the full', + '6. For promising patterns, use read_file on the session file paths to inspect the full', ' conversation. Confirm the workflow was actually repeated and validated. Read at least', ' two sessions unless the candidate is clearly a stable recurring repo lifecycle workflow.', - '6. For each candidate, verify it meets ALL criteria. Before writing, make sure you can', + '7. For each candidate, verify it meets ALL criteria. Before writing, make sure you can', ' state: future trigger, evidence sessions, recurrence signal, validation signal, and', ' why it is not generic.', - '7. Write new SKILL.md files or update existing ones in your directory using write_file.', + '8. Write new SKILL.md files or update existing ones in your directory.', + ' Use run_shell_command to run init_skill.cjs for scaffolding and package_skill.cjs for validation.', ' For skills that live OUTSIDE your directory, write a .patch file instead (see UPDATING EXISTING SKILLS).', - '8. Write COMPLETE files — never partially update a SKILL.md.', + '9. Write COMPLETE files — never partially update a SKILL.md.', '', 'IMPORTANT: Do NOT read every session. Only read sessions whose summaries suggest a', 'repeated pattern or a stable recurring repo workflow worth investigating. Most runs', @@ -274,8 +245,9 @@ function buildSystemPrompt(skillsDir: string): string { * writes reusable SKILL.md files to the project memory directory. * * This agent is designed to run in the background on session startup. - * It has restricted tool access (file tools only, no shell or user interaction) - * and is prompted to only operate within the skills memory directory. + * It has restricted tool access (file tools, shell, and skill activation — no + * user interaction) and is prompted to only operate within the skills memory + * directory. */ export const SkillExtractionAgent = ( skillsDir: string, @@ -309,12 +281,14 @@ export const SkillExtractionAgent = ( }, toolConfig: { tools: [ + ACTIVATE_SKILL_TOOL_NAME, READ_FILE_TOOL_NAME, WRITE_FILE_TOOL_NAME, EDIT_TOOL_NAME, LS_TOOL_NAME, GLOB_TOOL_NAME, GREP_TOOL_NAME, + SHELL_TOOL_NAME, ], }, get promptConfig() {