Merge branch 'main' into memory_usage3

This commit is contained in:
Adib234
2026-03-09 11:39:28 -04:00
committed by GitHub
643 changed files with 26192 additions and 10259 deletions

View File

@@ -55,7 +55,7 @@ describe.skip('ACP Environment and Auth', () => {
const bundlePath = join(import.meta.dirname, '..', 'bundle/gemini.js');
child = spawn('node', [bundlePath, '--experimental-acp'], {
child = spawn('node', [bundlePath, '--acp'], {
cwd: rig.homeDir!,
stdio: ['pipe', 'pipe', 'inherit'],
env: {
@@ -120,7 +120,7 @@ describe.skip('ACP Environment and Auth', () => {
const bundlePath = join(import.meta.dirname, '..', 'bundle/gemini.js');
child = spawn('node', [bundlePath, '--experimental-acp'], {
child = spawn('node', [bundlePath, '--acp'], {
cwd: rig.homeDir!,
stdio: ['pipe', 'pipe', 'inherit'],
env: {

View File

@@ -58,7 +58,7 @@ describe('ACP telemetry', () => {
'node',
[
bundlePath,
'--experimental-acp',
'--acp',
'--fake-responses',
join(rig.testDir!, 'fake-responses.json'),
],

View File

@@ -0,0 +1 @@
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Part 1. "}],"role":"model"},"index":0}]},{"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":10,"totalTokenCount":110}},{"candidates":[{"content":{"parts":[{"text":"Part 2."}],"role":"model"},"index":0}],"finishReason":"STOP"}]}

View File

@@ -0,0 +1,50 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import { TestRig } from './test-helper.js';
import { join, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
describe('API Resilience E2E', () => {
let rig: TestRig;
beforeEach(() => {
rig = new TestRig();
});
afterEach(async () => {
await rig.cleanup();
});
it('should not crash when receiving metadata-only chunks in a stream', async () => {
await rig.setup('api-resilience-metadata-only', {
fakeResponsesPath: join(
dirname(fileURLToPath(import.meta.url)),
'api-resilience.responses',
),
settings: {
planSettings: { modelRouting: false },
},
});
// Run the CLI with a simple prompt.
// The fake responses will provide a stream with a metadata-only chunk in the middle.
// We use gemini-3-pro-preview to minimize internal service calls.
const result = await rig.run({
args: ['hi', '--model', 'gemini-3-pro-preview'],
});
// Verify the output contains text from the normal chunks.
// If the CLI crashed on the metadata chunk, rig.run would throw.
expect(result).toContain('Part 1.');
expect(result).toContain('Part 2.');
// Verify telemetry event for the prompt was still generated
const hasUserPromptEvent = await rig.waitForTelemetryEvent('user_prompt');
expect(hasUserPromptEvent).toBe(true);
});
});

View File

@@ -0,0 +1,2 @@
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll open https://example.com and check the page title for you."},{"functionCall":{"name":"browser_agent","args":{"task":"Open https://example.com and get the page title"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":35,"totalTokenCount":135}}]}
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The page title of https://example.com is \"Example Domain\". The browser session has been completed and cleaned up successfully."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":30,"totalTokenCount":230}}]}

View File

@@ -0,0 +1,2 @@
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll navigate to https://example.com and analyze the links on the page."},{"functionCall":{"name":"browser_agent","args":{"task":"Go to https://example.com and find all links on the page, then describe them"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":40,"totalTokenCount":140}}]}
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"After analyzing https://example.com, I found the following links:\n\n1. **\"More information...\"** - This is the main link on the page that points to the IANA (Internet Assigned Numbers Authority) website for more details about reserved domains.\n\nThe page is quite minimal with just this single informational link, which is typical for example domains used in documentation."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":70,"totalTokenCount":270}}]}

View File

@@ -0,0 +1,2 @@
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll help you open https://example.com and analyze the page. Let me use the browser agent to navigate and capture the page information."},{"functionCall":{"name":"browser_agent","args":{"task":"Navigate to https://example.com and capture the accessibility tree to get the page title and main content"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":50,"totalTokenCount":150}}]}
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Based on the browser analysis of https://example.com:\n\n**Page Title**: Example Domain\n\n**Main Content**: The page contains a simple heading \"Example Domain\" and explanatory text stating \"This domain is for use in illustrative examples in documents. You may use this domain in literature without prior coordination or asking for permission.\"\n\nThe page has a clean, minimal layout typical of placeholder domains used for documentation and examples."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":80,"totalTokenCount":280}}]}

View File

@@ -0,0 +1,2 @@
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll navigate to https://example.com and take a screenshot for you."},{"functionCall":{"name":"browser_agent","args":{"task":"Navigate to https://example.com and take a screenshot of the page"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":40,"totalTokenCount":140}}]}
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I've successfully navigated to https://example.com and captured a screenshot. The page shows the familiar \"Example Domain\" header with explanatory text below it. The screenshot captures the clean, minimal layout of this demonstration website."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":50,"totalTokenCount":250}}]}

View File

@@ -0,0 +1,2 @@
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll help you navigate to https://example.com and perform both operations - capturing the accessibility tree and taking a screenshot."},{"functionCall":{"name":"browser_agent","args":{"task":"Navigate to https://example.com, take a snapshot of the accessibility tree, then take a screenshot"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":45,"totalTokenCount":145}}]}
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I've successfully completed both operations on https://example.com:\n\n**Accessibility Tree Snapshot**: The page has a clear structure with the main heading \"Example Domain\" and descriptive text about the domain's purpose for documentation examples.\n\n**Screenshot**: Captured a visual representation of the page showing the clean, minimal layout with the heading and explanatory text.\n\nBoth the accessibility data and visual screenshot confirm this is the standard example domain page used for documentation purposes."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":80,"totalTokenCount":280}}]}

View File

@@ -0,0 +1,206 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
/**
* Integration tests for the browser agent.
*
* These tests verify the complete end-to-end flow from CLI prompt through
* browser_agent delegation to MCP/Chrome DevTools and back. Unlike the unit
* tests in packages/core/src/agents/browser/ which mock all MCP components,
* these tests launch real Chrome instances in headless mode.
*
* Tests are skipped on systems without Chrome/Chromium installed.
*/
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import { TestRig, assertModelHasOutput } from './test-helper.js';
import { dirname, join } from 'node:path';
import { fileURLToPath } from 'node:url';
import { execSync } from 'node:child_process';
import { existsSync } from 'node:fs';
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
const chromeAvailable = (() => {
try {
if (process.platform === 'darwin') {
execSync(
'test -d "/Applications/Google Chrome.app" || test -d "/Applications/Chromium.app"',
{
stdio: 'ignore',
},
);
} else if (process.platform === 'linux') {
execSync(
'which google-chrome || which chromium-browser || which chromium',
{ stdio: 'ignore' },
);
} else if (process.platform === 'win32') {
// Check standard Windows installation paths using Node.js fs
const chromePaths = [
'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe',
'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe',
`${process.env['LOCALAPPDATA'] ?? ''}\\Google\\Chrome\\Application\\chrome.exe`,
];
const found = chromePaths.some((p) => existsSync(p));
if (!found) {
// Fall back to PATH check
execSync('where chrome || where chromium', { stdio: 'ignore' });
}
} else {
return false;
}
return true;
} catch {
return false;
}
})();
describe.skipIf(!chromeAvailable)('browser-agent', () => {
let rig: TestRig;
beforeEach(() => {
rig = new TestRig();
});
afterEach(async () => await rig.cleanup());
it('should navigate to a page and capture accessibility tree', async () => {
rig.setup('browser-navigate-and-snapshot', {
fakeResponsesPath: join(
__dirname,
'browser-agent.navigate-snapshot.responses',
),
settings: {
agents: {
browser_agent: {
headless: true,
sessionMode: 'isolated',
},
},
},
});
const result = await rig.run({
args: 'Open https://example.com in the browser and tell me the page title and main content.',
});
assertModelHasOutput(result);
const toolLogs = rig.readToolLogs();
const browserAgentCall = toolLogs.find(
(t) => t.toolRequest.name === 'browser_agent',
);
expect(
browserAgentCall,
'Expected browser_agent to be called',
).toBeDefined();
});
it('should take screenshots of web pages', async () => {
rig.setup('browser-screenshot', {
fakeResponsesPath: join(__dirname, 'browser-agent.screenshot.responses'),
settings: {
agents: {
browser_agent: {
headless: true,
sessionMode: 'isolated',
},
},
},
});
const result = await rig.run({
args: 'Navigate to https://example.com and take a screenshot.',
});
const toolLogs = rig.readToolLogs();
const browserCalls = toolLogs.filter(
(t) => t.toolRequest.name === 'browser_agent',
);
expect(browserCalls.length).toBeGreaterThan(0);
assertModelHasOutput(result);
});
it('should interact with page elements', async () => {
rig.setup('browser-interaction', {
fakeResponsesPath: join(__dirname, 'browser-agent.interaction.responses'),
settings: {
agents: {
browser_agent: {
headless: true,
sessionMode: 'isolated',
},
},
},
});
const result = await rig.run({
args: 'Go to https://example.com, find any links on the page, and describe them.',
});
const toolLogs = rig.readToolLogs();
const browserAgentCall = toolLogs.find(
(t) => t.toolRequest.name === 'browser_agent',
);
expect(
browserAgentCall,
'Expected browser_agent to be called',
).toBeDefined();
assertModelHasOutput(result);
});
it('should clean up browser processes after completion', async () => {
rig.setup('browser-cleanup', {
fakeResponsesPath: join(__dirname, 'browser-agent.cleanup.responses'),
settings: {
agents: {
browser_agent: {
headless: true,
sessionMode: 'isolated',
},
},
},
});
await rig.run({
args: 'Open https://example.com in the browser and check the page title.',
});
// Test passes if we reach here, relying on Vitest's timeout mechanism
// to detect hanging browser processes.
});
it('should handle multiple browser operations in sequence', async () => {
rig.setup('browser-sequential', {
fakeResponsesPath: join(__dirname, 'browser-agent.sequential.responses'),
settings: {
agents: {
browser_agent: {
headless: true,
sessionMode: 'isolated',
},
},
},
});
const result = await rig.run({
args: 'Navigate to https://example.com, take a snapshot of the accessibility tree, then take a screenshot.',
});
const toolLogs = rig.readToolLogs();
const browserCalls = toolLogs.filter(
(t) => t.toolRequest.name === 'browser_agent',
);
expect(browserCalls.length).toBeGreaterThan(0);
// Should successfully complete all operations
assertModelHasOutput(result);
});
});

View File

@@ -104,7 +104,7 @@ describe('extension reloading', () => {
return (
output.includes(
'test-server (from test-extension) - Ready (1 tool)',
) && output.includes('- hello')
) && output.includes('- mcp_test-server_hello')
);
},
30000, // 30s timeout
@@ -148,7 +148,7 @@ describe('extension reloading', () => {
return (
output.includes(
'test-server (from test-extension) - Ready (1 tool)',
) && output.includes('- goodbye')
) && output.includes('- mcp_test-server_goodbye')
);
},
30000,

View File

@@ -182,14 +182,22 @@ describe('Hooks Agent Flow', () => {
);
const afterAgentScript = `
console.log(JSON.stringify({
decision: 'block',
reason: 'Security policy triggered',
hookSpecificOutput: {
hookEventName: 'AfterAgent',
clearContext: true
}
}));
const fs = require('fs');
const input = JSON.parse(fs.readFileSync(0, 'utf-8'));
if (input.stop_hook_active) {
// Retry turn: allow execution to proceed (breaks the loop)
console.log(JSON.stringify({ decision: 'allow' }));
} else {
// First call: block and clear context to trigger the retry
console.log(JSON.stringify({
decision: 'block',
reason: 'Security policy triggered',
hookSpecificOutput: {
hookEventName: 'AfterAgent',
clearContext: true
}
}));
}
`;
const afterAgentScriptPath = rig.createScript(
'after_agent_clear.cjs',
@@ -198,8 +206,10 @@ describe('Hooks Agent Flow', () => {
rig.setup('should process clearContext in AfterAgent hook output', {
settings: {
hooks: {
hooksConfig: {
enabled: true,
},
hooks: {
BeforeModel: [
{
hooks: [

View File

@@ -1,2 +1,3 @@
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Hi there!"}],"role":"model"},"finishReason":"STOP","index":0}]}]}
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Clarification: I am a bot."}],"role":"model"},"finishReason":"STOP","index":0}]}]}
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Security policy triggered"}],"role":"model"},"finishReason":"STOP","index":0}]}]}

View File

@@ -0,0 +1,2 @@
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I will read the content of the file to identify its"}],"role":"model"},"index":0}],"usageMetadata":{"promptTokenCount":7969,"candidatesTokenCount":11,"totalTokenCount":8061,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7969}],"thoughtsTokenCount":81}},{"candidates":[{"content":{"parts":[{"text":" language.\n"}],"role":"model"},"index":0}],"usageMetadata":{"promptTokenCount":7969,"candidatesTokenCount":14,"totalTokenCount":8064,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7969}],"thoughtsTokenCount":81}},{"candidates":[{"content":{"parts":[{"functionCall":{"name":"read_file","args":{"file_path":"test.txt"}},"thoughtSignature":"EvkCCvYCAb4+9vt8mJ/o45uuuAJtfjaZ3YzkJzqXHZBttRE+Om0ahcr1S5RDFp50KpgHtJtbAH1pwEXampOnDV3WKiWwA+e3Jnyk4CNQegz7ZMKsl55Nem2XDViP8BZKnJVqGmSFuMoKJLFmbVIxKejtWcblfn3httbGsrUUNbHwdPjPHo1qY043lF63g0kWx4v68gPSsJpNhxLrSugKKjiyRFN+J0rOIBHI2S9MdZoHEKhJxvGMtXiJquxmhPmKcNEsn+hMdXAZB39hmrRrGRHDQPVYVPhfJthVc73ufzbn+5KGJpaMQyKY5hqrc2ea8MHz+z6BSx+tFz4NZBff1tJQOiUp09/QndxQRZHSQZr1ALGy0O1Qw4JqsX94x81IxtXqYkSRo3zgm2vl/xPMC5lKlnK5xoKJmoWaHkUNeXs/sopu3/Waf1a5Csoh9ImnKQsW0rJ6GRyDQvky1FwR6Aa98bgfNdcXOPHml/BtghaqRMXTiG6vaPJ8UFs="}],"role":"model"},"index":0}],"usageMetadata":{"promptTokenCount":7969,"candidatesTokenCount":64,"totalTokenCount":8114,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7969}],"thoughtsTokenCount":81}},{"candidates":[{"content":{"parts":[{"text":""}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":7969,"candidatesTokenCount":64,"totalTokenCount":8114,"cachedContentTokenCount":6082,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7969}],"cacheTokensDetails":[{"modality":"TEXT","tokenCount":6082}],"thoughtsTokenCount":81}}]}
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The language of the file is Latin."}],"role":"model"},"index":0}],"usageMetadata":{"promptTokenCount":8054,"candidatesTokenCount":8,"totalTokenCount":8078,"promptTokensDetails":[{"modality":"TEXT","tokenCount":8054}],"thoughtsTokenCount":16}},{"candidates":[{"content":{"parts":[{"text":"","thoughtSignature":"EnIKcAG+Pvb7vnRBJVz3khx1oArQQqTNvXOXkliNQS7NvYw94dq5m+wGKRmSj3egO3GVp7pacnAtLn9NT1ABKBGpa7MpRhiAe3bbPZfkqOuveeyC19LKQ9fzasCywiYqg5k5qSxfjs5okk+O0NLOvTjN/tg="}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":8135,"candidatesTokenCount":8,"totalTokenCount":8159,"promptTokensDetails":[{"modality":"TEXT","tokenCount":8135}],"thoughtsTokenCount":16}}]}

View File

@@ -0,0 +1,2 @@
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I will run the requested"}],"role":"model"},"index":0}],"usageMetadata":{"promptTokenCount":7949,"candidatesTokenCount":5,"totalTokenCount":8092,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7949}],"thoughtsTokenCount":138}},{"candidates":[{"content":{"parts":[{"text":" shell command to verify the policy configuration.\n"}],"role":"model"},"index":0}],"usageMetadata":{"promptTokenCount":7949,"candidatesTokenCount":14,"totalTokenCount":8101,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7949}],"thoughtsTokenCount":138}},{"candidates":[{"content":{"parts":[{"functionCall":{"name":"run_shell_command","args":{"command":"echo POLICY_TEST_ECHO_COMMAND","description":"Echo the test string to verify policy settings."}},"thoughtSignature":"EpwFCpkFAb4+9vulXgVj96CAm2eMFbDEGHz9B37GwI8N1KOvu9AHwdYWiita7yS4RKAdeBui22B5320XBaxOtZGnMo2E9pG0Pcus2WsBiecRaHUTxTmhx1BvURevrs+5m4UJeLRGMfP94+ncha4DeIQod3PKBnK8xeIJTyZBFB7+hmHbHvem2VwZh/v14e4fXlpEkkdntJbzrA1nUdctIGdEmdm0sL8PaFnMqWLUnkZvGdfq7ctFt9EYk2HW2SrHVhk3HdsyWhoxNz2MU0sRWzAgiSQY/heSSAbU7Jdgg0RjwB9o3SkCIHxqnVpkH8PQsARwnah5I5s7pW6EHr3D4f1/UVl0n26hyI2xBqF/n4aZKhtX55U4h/DIhxooZa2znstt6BS8vRcdzflFrX7OV86WQxHE4JHjQecP2ciBRimm8pL3Od3pXnRcx32L8JbrWm6dPyWlo5h5uCRy0qXye2+3SuHs5wtxOjD9NETR4TwzqFe+m0zThpxsR1ZKQeKlO7lN/s3pWih/TjbZQEQs9xr72UnlE8ZtJ4bOKj8GNbemvsrbYAO98NzJwvdil0FhblaXmReP1uYjucmLC0jCJHShqNz2KzAkDTvKs4tmio13IuCRjTZ3E5owqCUn7djDqOSDwrg235RIVJkiDIaPlHemOR15lbVQD1VOzytzT8TZLEzTV750oyHq/IhLMQHYixO8jJ2GkVvUp7bxz9oQ4UeTqT5lTF4s40H2Rlkb6trF4hKXoFhzILy1aOJTC9W3fCoop7VJLIMNulgHLWxiq65Uas6sIep87yiD4xLfbGfMm6HS4JTRhPlfxeckn/SzUfu1afg1nAvW3vBlR/YNREf0N28/PnRC08VYqA3mqCRiyPqPWsf3a0jyio0dD9A="}],"role":"model"},"index":0}],"usageMetadata":{"promptTokenCount":7949,"candidatesTokenCount":54,"totalTokenCount":8141,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7949}],"thoughtsTokenCount":138}},{"candidates":[{"content":{"parts":[{"text":""}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":7949,"candidatesTokenCount":54,"totalTokenCount":8141,"cachedContentTokenCount":6082,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7949}],"cacheTokensDetails":[{"modality":"TEXT","tokenCount":6082}],"thoughtsTokenCount":138}}]}
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"POLICY_TEST_"}],"role":"model"},"index":0}],"usageMetadata":{"promptTokenCount":8042,"candidatesTokenCount":4,"totalTokenCount":8046,"promptTokensDetails":[{"modality":"TEXT","tokenCount":8042}]}},{"candidates":[{"content":{"parts":[{"text":"ECHO_COMMAND"}],"role":"model"},"index":0}],"usageMetadata":{"promptTokenCount":8042,"candidatesTokenCount":8,"totalTokenCount":8050,"promptTokensDetails":[{"modality":"TEXT","tokenCount":8042}]}},{"candidates":[{"content":{"parts":[{"text":""}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":8180,"candidatesTokenCount":8,"totalTokenCount":8188,"promptTokensDetails":[{"modality":"TEXT","tokenCount":8180}]}}]}

View File

@@ -0,0 +1,2 @@
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"**Assessing Command Execution**\n\nOkay, I'm currently assessing the feasibility of executing `echo POLICY_TEST_ECHO_COMMAND` using the `run_shell_command` function. Restrictions are being evaluated; the prompt is specifically geared towards a successful command output: \"POLICY_TEST_ECHO_COMMAND\".\n\n\n","thought":true}],"role":"model"},"index":0}],"usageMetadata":{"promptTokenCount":7949,"totalTokenCount":7949,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7949}]}},{"candidates":[{"content":{"parts":[{"text":"I will execute the requested echo"}],"role":"model"},"index":0}],"usageMetadata":{"promptTokenCount":7949,"candidatesTokenCount":6,"totalTokenCount":8161,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7949}],"thoughtsTokenCount":206}},{"candidates":[{"content":{"parts":[{"text":" command to verify the policy."}],"role":"model"},"index":0}],"usageMetadata":{"promptTokenCount":7949,"candidatesTokenCount":12,"totalTokenCount":8167,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7949}],"thoughtsTokenCount":206}},{"candidates":[{"content":{"parts":[{"functionCall":{"name":"run_shell_command","args":{"description":"Execute the echo command as requested.","command":"echo POLICY_TEST_ECHO_COMMAND"}},"thoughtSignature":"EvkGCvYGAb4+9vucYbmJ8DrNCca9c0C8o4qKQ6V2WnzmT4mbCw8V7s0+2I/PoxrgnsxZJIIRM8y5E4bW7Jbs46GjbJ2cefY9Q3iC45eiGS5Gqvq0eAG04N3GZRwizyDOp+wJlBsaPu1cNB1t6CnMk/ZHDAHEIQUpYfYWmPudbHOQMspGMu3bX23YSI1+Q5vPVdOtM16J3EFbk3dCp+RnPa/8tVC+5AqFlLveuDbJXtrLN9wAyf4SjnPhn9BPfD0bgas3+gF03qRJvWoNcnnJiYxL3DNQtjsAYJ7IWRzciYYZSTm99blD730bn3NzvSObhlHDtb3hFpApYvG396+3prsgJg0Yjef54B4KxHfZaQbE2ndSP5zGrwLtVD5y7XJAYskvhiUqwPFHNVykqroEMzPn8wWQSGvonNR6ezcMIsUV5xwnxZDaPhvrDdIwF4NR1F5DeriJRu27+fwtCApeYkx9mPx4LqnyxOuVsILjzdSPHE6Bqf690VJSXpo67lCN4F3DRRYIuCD4UOlf8V3dvUO6BKjvChDDWnIq7KPoByDQT9VhVlZvS3/nYlkeDuhi0rk2jpByN1NdgD2YSvOlpJcka8JqKQ+lnO/7Swunij2ISUfpL2hkx6TEHjebPU2dBQkub5nSl9J1EhZn4sUGG5r6Zdv1lYcpIcO4ZYeMqZZ4uNvTvSpGdT4Jj1+qS88taKgYq7uN1RgQSTsT5wcpmlubIpgIycNwAIRFvN+DjkQjiUC6hSqdeOx3dc7LWgC/O/+PRog7kuFrD2nzih+oIP0YxXrLA9CMVPlzeAgPUi9b75HAJQ92GRHxfQ163tjZY+4bWmJtcU4NBqGH0x/jLEU9xCojTeh+mZoUDGsb3N+bVcGJftRIet7IBYveD29Z+XHtKhf7s/YIkFW8lgsG8Q0EtNchCxqIQxf9UjYEO52RhCx7i7zScB1knovt2HAotACKqDdPqg18PmpDv8Frw6Y66XeCCJzBCmNcSUTETq3K05gwkU8nyANQtjbJT0wF4LS9h5vPE+Vc7/dGH6pi1TgxWB/n4q1IXfNqilo/h2Pyw01VPsHKthNtKKq1/nSW/WuEU0rimqu7wHplMqU2nwRDCTNE9pPO59RtTHMfUxxd8yEgKBj9L8MiQGM5isIYl/lJtvucee4HD9iLpbYADlrQAlUCd0rg/z+5sQ=="}],"role":"model"},"index":0}],"usageMetadata":{"promptTokenCount":7949,"candidatesTokenCount":50,"totalTokenCount":8205,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7949}],"thoughtsTokenCount":206}},{"candidates":[{"content":{"parts":[{"text":""}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":7949,"candidatesTokenCount":50,"totalTokenCount":8205,"cachedContentTokenCount":6082,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7949}],"cacheTokensDetails":[{"modality":"TEXT","tokenCount":6082}],"thoughtsTokenCount":206}}]}
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"AR NAR"}],"role":"model"},"index":0}],"usageMetadata":{"promptTokenCount":8020,"candidatesTokenCount":2,"totalTokenCount":8049,"promptTokensDetails":[{"modality":"TEXT","tokenCount":8020}],"thoughtsTokenCount":27}},{"candidates":[{"content":{"parts":[{"text":"","thoughtSignature":"Er8BCrwBAb4+9vv6KGeMf6yopmPBE/az7Kjdp+Pe5a/R6wgXcyCZzGNwkwKFW3i3ro0j26bRrVeHD1zRfWFTIGdOSZKV6OMPWLqFC/RU6CNJ88B1xY7hbCVwA7EchYPzgd3YZRVNwmFu52j86/9qXf/zaqTFN+WQ0mUESJXh2O2YX8E7imAvxhmRdobVkxvEt4ZX3dW5skDhXHMDZOxbLpX0nkK7cWWS7iEc+qBFP0yinlA/eiG2ZdKpuTiDl76a9ik="}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":8226,"candidatesTokenCount":2,"totalTokenCount":8255,"promptTokensDetails":[{"modality":"TEXT","tokenCount":8226}],"thoughtsTokenCount":27}}]}

View File

@@ -0,0 +1,205 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import { join } from 'node:path';
import { TestRig } from './test-helper.js';
interface PromptCommand {
prompt: (testFile: string) => string;
tool: string;
command: string;
expectedSuccessResult: string;
expectedFailureResult: string;
}
const ECHO_PROMPT: PromptCommand = {
command: 'echo',
prompt: () =>
`Use the \`echo POLICY_TEST_ECHO_COMMAND\` shell command. On success, ` +
`your final response must ONLY be "POLICY_TEST_ECHO_COMMAND". If the ` +
`command fails output AR NAR and stop.`,
tool: 'run_shell_command',
expectedSuccessResult: 'POLICY_TEST_ECHO_COMMAND',
expectedFailureResult: 'AR NAR',
};
const READ_FILE_PROMPT: PromptCommand = {
prompt: (testFile: string) =>
`Read the file ${testFile} and tell me what language it is, if the ` +
`read_file tool fails output AR NAR and stop.`,
tool: 'read_file',
command: '',
expectedSuccessResult: 'Latin',
expectedFailureResult: 'AR NAR',
};
async function waitForToolCallLog(
rig: TestRig,
tool: string,
command: string,
timeout: number = 15000,
) {
const foundToolCall = await rig.waitForToolCall(tool, timeout, (args) =>
args.toLowerCase().includes(command.toLowerCase()),
);
expect(foundToolCall).toBe(true);
const toolLogs = rig
.readToolLogs()
.filter((toolLog) => toolLog.toolRequest.name === tool);
const log = toolLogs.find(
(toolLog) =>
!command ||
toolLog.toolRequest.args.toLowerCase().includes(command.toLowerCase()),
);
// The policy engine should have logged the tool call
expect(log).toBeTruthy();
return log;
}
async function verifyToolExecution(
rig: TestRig,
promptCommand: PromptCommand,
result: string,
expectAllowed: boolean,
expectedDenialString?: string,
) {
const log = await waitForToolCallLog(
rig,
promptCommand.tool,
promptCommand.command,
);
if (expectAllowed) {
expect(log!.toolRequest.success).toBe(true);
expect(result).not.toContain('Tool execution denied by policy');
expect(result).not.toContain(`Tool "${promptCommand.tool}" not found`);
expect(result).toContain(promptCommand.expectedSuccessResult);
} else {
expect(log!.toolRequest.success).toBe(false);
expect(result).toContain(
expectedDenialString || 'Tool execution denied by policy',
);
expect(result).toContain(promptCommand.expectedFailureResult);
}
}
interface TestCase {
name: string;
responsesFile: string;
promptCommand: PromptCommand;
policyContent?: string;
expectAllowed: boolean;
expectedDenialString?: string;
}
describe('Policy Engine Headless Mode', () => {
let rig: TestRig;
let testFile: string;
beforeEach(() => {
rig = new TestRig();
});
afterEach(async () => {
if (rig) {
await rig.cleanup();
}
});
const runTestCase = async (tc: TestCase) => {
const fakeResponsesPath = join(import.meta.dirname, tc.responsesFile);
rig.setup(tc.name, { fakeResponsesPath });
testFile = rig.createFile('test.txt', 'Lorem\nIpsum\nDolor\n');
const args = ['-p', tc.promptCommand.prompt(testFile)];
if (tc.policyContent) {
const policyPath = rig.createFile('test-policy.toml', tc.policyContent);
args.push('--policy', policyPath);
}
const result = await rig.run({
args,
approvalMode: 'default',
});
await verifyToolExecution(
rig,
tc.promptCommand,
result,
tc.expectAllowed,
tc.expectedDenialString,
);
};
const testCases = [
{
name: 'should deny ASK_USER tools by default in headless mode',
responsesFile: 'policy-headless-shell-denied.responses',
promptCommand: ECHO_PROMPT,
expectAllowed: false,
expectedDenialString: 'Tool "run_shell_command" not found',
},
{
name: 'should allow ASK_USER tools in headless mode if explicitly allowed via policy file',
responsesFile: 'policy-headless-shell-allowed.responses',
promptCommand: ECHO_PROMPT,
policyContent: `
[[rule]]
toolName = "run_shell_command"
decision = "allow"
priority = 100
`,
expectAllowed: true,
},
{
name: 'should allow read-only tools by default in headless mode',
responsesFile: 'policy-headless-readonly.responses',
promptCommand: READ_FILE_PROMPT,
expectAllowed: true,
},
{
name: 'should allow specific shell commands in policy file',
responsesFile: 'policy-headless-shell-allowed.responses',
promptCommand: ECHO_PROMPT,
policyContent: `
[[rule]]
toolName = "run_shell_command"
commandPrefix = "${ECHO_PROMPT.command}"
decision = "allow"
priority = 100
`,
expectAllowed: true,
},
{
name: 'should deny other shell commands in policy file',
responsesFile: 'policy-headless-shell-denied.responses',
promptCommand: ECHO_PROMPT,
policyContent: `
[[rule]]
toolName = "run_shell_command"
commandPrefix = "node"
decision = "allow"
priority = 100
`,
expectAllowed: false,
expectedDenialString: 'Tool execution denied by policy',
},
];
it.each(testCases)(
'$name',
async (tc) => {
await runTestCase(tc);
},
// Large timeout for regeneration
process.env['REGENERATE_MODEL_GOLDENS'] === 'true' ? 120000 : undefined,
);
});