mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-13 05:12:55 -07:00
feat(headless): surface diagnostic monitoring data in non-interactive output
When running Gemini CLI in headless mode (-p), critical diagnostic data like auth method, API retry attempts, loop detection, and request stats was invisible despite being tracked internally. This change surfaces that data across all three output formats (stream-json, json, text). Changes: - Add RETRY and LOOP_DETECTED event types to stream-json output - Include auth_method and user_tier in init events and JSON output - Add api_requests, api_errors, and retry_count to result stats - Track and expose detected loop type (tool call, chanting, LLM-detected) - Emit [RETRY] and [WARNING] messages to stderr in text mode - Listen to CoreEvent.RetryAttempt in non-interactive CLI - Add test script (scripts/test_gemini.sh) for manual verification
This commit is contained in:
@@ -3,8 +3,8 @@
|
|||||||
exports[`runNonInteractive > should emit appropriate error event in streaming JSON mode: 'loop detected' 1`] = `
|
exports[`runNonInteractive > should emit appropriate error event in streaming JSON mode: 'loop detected' 1`] = `
|
||||||
"{"type":"init","timestamp":"<TIMESTAMP>","session_id":"test-session-id","model":"test-model"}
|
"{"type":"init","timestamp":"<TIMESTAMP>","session_id":"test-session-id","model":"test-model"}
|
||||||
{"type":"message","timestamp":"<TIMESTAMP>","role":"user","content":"Loop test"}
|
{"type":"message","timestamp":"<TIMESTAMP>","role":"user","content":"Loop test"}
|
||||||
{"type":"error","timestamp":"<TIMESTAMP>","severity":"warning","message":"Loop detected, stopping execution"}
|
{"type":"loop_detected","timestamp":"<TIMESTAMP>"}
|
||||||
{"type":"result","timestamp":"<TIMESTAMP>","status":"success","stats":{"total_tokens":0,"input_tokens":0,"output_tokens":0,"cached":0,"input":0,"duration_ms":<DURATION>,"tool_calls":0}}
|
{"type":"result","timestamp":"<TIMESTAMP>","status":"success","stats":{"total_tokens":0,"input_tokens":0,"output_tokens":0,"cached":0,"input":0,"duration_ms":<DURATION>,"tool_calls":0,"api_requests":0,"api_errors":0}}
|
||||||
"
|
"
|
||||||
`;
|
`;
|
||||||
|
|
||||||
@@ -12,7 +12,7 @@ exports[`runNonInteractive > should emit appropriate error event in streaming JS
|
|||||||
"{"type":"init","timestamp":"<TIMESTAMP>","session_id":"test-session-id","model":"test-model"}
|
"{"type":"init","timestamp":"<TIMESTAMP>","session_id":"test-session-id","model":"test-model"}
|
||||||
{"type":"message","timestamp":"<TIMESTAMP>","role":"user","content":"Max turns test"}
|
{"type":"message","timestamp":"<TIMESTAMP>","role":"user","content":"Max turns test"}
|
||||||
{"type":"error","timestamp":"<TIMESTAMP>","severity":"error","message":"Maximum session turns exceeded"}
|
{"type":"error","timestamp":"<TIMESTAMP>","severity":"error","message":"Maximum session turns exceeded"}
|
||||||
{"type":"result","timestamp":"<TIMESTAMP>","status":"success","stats":{"total_tokens":0,"input_tokens":0,"output_tokens":0,"cached":0,"input":0,"duration_ms":<DURATION>,"tool_calls":0}}
|
{"type":"result","timestamp":"<TIMESTAMP>","status":"success","stats":{"total_tokens":0,"input_tokens":0,"output_tokens":0,"cached":0,"input":0,"duration_ms":<DURATION>,"tool_calls":0,"api_requests":0,"api_errors":0}}
|
||||||
"
|
"
|
||||||
`;
|
`;
|
||||||
|
|
||||||
@@ -23,7 +23,7 @@ exports[`runNonInteractive > should emit appropriate events for streaming JSON o
|
|||||||
{"type":"tool_use","timestamp":"<TIMESTAMP>","tool_name":"testTool","tool_id":"tool-1","parameters":{"arg1":"value1"}}
|
{"type":"tool_use","timestamp":"<TIMESTAMP>","tool_name":"testTool","tool_id":"tool-1","parameters":{"arg1":"value1"}}
|
||||||
{"type":"tool_result","timestamp":"<TIMESTAMP>","tool_id":"tool-1","status":"success","output":"Tool executed successfully"}
|
{"type":"tool_result","timestamp":"<TIMESTAMP>","tool_id":"tool-1","status":"success","output":"Tool executed successfully"}
|
||||||
{"type":"message","timestamp":"<TIMESTAMP>","role":"assistant","content":"Final answer","delta":true}
|
{"type":"message","timestamp":"<TIMESTAMP>","role":"assistant","content":"Final answer","delta":true}
|
||||||
{"type":"result","timestamp":"<TIMESTAMP>","status":"success","stats":{"total_tokens":0,"input_tokens":0,"output_tokens":0,"cached":0,"input":0,"duration_ms":<DURATION>,"tool_calls":0}}
|
{"type":"result","timestamp":"<TIMESTAMP>","status":"success","stats":{"total_tokens":0,"input_tokens":0,"output_tokens":0,"cached":0,"input":0,"duration_ms":<DURATION>,"tool_calls":0,"api_requests":0,"api_errors":0}}
|
||||||
"
|
"
|
||||||
`;
|
`;
|
||||||
|
|
||||||
|
|||||||
@@ -183,6 +183,7 @@ describe('runNonInteractive', () => {
|
|||||||
getIdeMode: vi.fn().mockReturnValue(false),
|
getIdeMode: vi.fn().mockReturnValue(false),
|
||||||
|
|
||||||
getContentGeneratorConfig: vi.fn().mockReturnValue({}),
|
getContentGeneratorConfig: vi.fn().mockReturnValue({}),
|
||||||
|
getUserTierName: vi.fn().mockReturnValue(undefined),
|
||||||
getDebugMode: vi.fn().mockReturnValue(false),
|
getDebugMode: vi.fn().mockReturnValue(false),
|
||||||
getOutputFormat: vi.fn().mockReturnValue('text'),
|
getOutputFormat: vi.fn().mockReturnValue('text'),
|
||||||
getModel: vi.fn().mockReturnValue('test-model'),
|
getModel: vi.fn().mockReturnValue('test-model'),
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import type {
|
|||||||
ToolCallRequestInfo,
|
ToolCallRequestInfo,
|
||||||
ResumedSessionData,
|
ResumedSessionData,
|
||||||
UserFeedbackPayload,
|
UserFeedbackPayload,
|
||||||
|
RetryAttemptPayload,
|
||||||
} from '@google/gemini-cli-core';
|
} from '@google/gemini-cli-core';
|
||||||
import { isSlashCommand } from './ui/utils/commandUtils.js';
|
import { isSlashCommand } from './ui/utils/commandUtils.js';
|
||||||
import type { LoadedSettings } from './config/settings.js';
|
import type { LoadedSettings } from './config/settings.js';
|
||||||
@@ -94,6 +95,7 @@ export async function runNonInteractive({
|
|||||||
};
|
};
|
||||||
|
|
||||||
const startTime = Date.now();
|
const startTime = Date.now();
|
||||||
|
let retryCount = 0;
|
||||||
const streamFormatter =
|
const streamFormatter =
|
||||||
config.getOutputFormat() === OutputFormat.STREAM_JSON
|
config.getOutputFormat() === OutputFormat.STREAM_JSON
|
||||||
? new StreamJsonFormatter()
|
? new StreamJsonFormatter()
|
||||||
@@ -181,6 +183,26 @@ export async function runNonInteractive({
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const handleRetryAttempt = (payload: RetryAttemptPayload) => {
|
||||||
|
retryCount++;
|
||||||
|
if (streamFormatter) {
|
||||||
|
streamFormatter.emitEvent({
|
||||||
|
type: JsonStreamEventType.RETRY,
|
||||||
|
timestamp: new Date().toISOString(),
|
||||||
|
attempt: payload.attempt,
|
||||||
|
max_attempts: payload.maxAttempts,
|
||||||
|
delay_ms: payload.delayMs,
|
||||||
|
error: payload.error,
|
||||||
|
model: payload.model,
|
||||||
|
});
|
||||||
|
} else if (config.getOutputFormat() === OutputFormat.TEXT) {
|
||||||
|
const errorSuffix = payload.error ? `: ${payload.error}` : '';
|
||||||
|
process.stderr.write(
|
||||||
|
`[RETRY] Attempt ${payload.attempt}/${payload.maxAttempts} for model ${payload.model} (delay: ${payload.delayMs}ms)${errorSuffix}\n`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
let errorToHandle: unknown | undefined;
|
let errorToHandle: unknown | undefined;
|
||||||
try {
|
try {
|
||||||
consolePatcher.patch();
|
consolePatcher.patch();
|
||||||
@@ -199,6 +221,8 @@ export async function runNonInteractive({
|
|||||||
setupStdinCancellation();
|
setupStdinCancellation();
|
||||||
|
|
||||||
coreEvents.on(CoreEvent.UserFeedback, handleUserFeedback);
|
coreEvents.on(CoreEvent.UserFeedback, handleUserFeedback);
|
||||||
|
coreEvents.on(CoreEvent.RetryAttempt, handleRetryAttempt);
|
||||||
|
|
||||||
coreEvents.drainBacklogs();
|
coreEvents.drainBacklogs();
|
||||||
|
|
||||||
// Handle EPIPE errors when the output is piped to a command that closes early.
|
// Handle EPIPE errors when the output is piped to a command that closes early.
|
||||||
@@ -228,12 +252,16 @@ export async function runNonInteractive({
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Emit init event for streaming JSON
|
// Emit init event for streaming JSON
|
||||||
|
const authMethod = config.getContentGeneratorConfig()?.authType;
|
||||||
|
const userTier = config.getUserTierName();
|
||||||
if (streamFormatter) {
|
if (streamFormatter) {
|
||||||
streamFormatter.emitEvent({
|
streamFormatter.emitEvent({
|
||||||
type: JsonStreamEventType.INIT,
|
type: JsonStreamEventType.INIT,
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
session_id: config.getSessionId(),
|
session_id: config.getSessionId(),
|
||||||
model: config.getModel(),
|
model: config.getModel(),
|
||||||
|
...(authMethod && { auth_method: authMethod }),
|
||||||
|
...(userTier && { user_tier: userTier }),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -345,13 +373,18 @@ export async function runNonInteractive({
|
|||||||
}
|
}
|
||||||
toolCallRequests.push(event.value);
|
toolCallRequests.push(event.value);
|
||||||
} else if (event.type === GeminiEventType.LoopDetected) {
|
} else if (event.type === GeminiEventType.LoopDetected) {
|
||||||
|
const loopType = event.value?.loopType;
|
||||||
if (streamFormatter) {
|
if (streamFormatter) {
|
||||||
streamFormatter.emitEvent({
|
streamFormatter.emitEvent({
|
||||||
type: JsonStreamEventType.ERROR,
|
type: JsonStreamEventType.LOOP_DETECTED,
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
severity: 'warning',
|
...(loopType && { loop_type: loopType }),
|
||||||
message: 'Loop detected, stopping execution',
|
|
||||||
});
|
});
|
||||||
|
} else if (config.getOutputFormat() === OutputFormat.TEXT) {
|
||||||
|
const loopTypeStr = loopType ? ` (${loopType})` : '';
|
||||||
|
process.stderr.write(
|
||||||
|
`[WARNING] Loop detected${loopTypeStr}, stopping execution\n`,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
} else if (event.type === GeminiEventType.MaxSessionTurns) {
|
} else if (event.type === GeminiEventType.MaxSessionTurns) {
|
||||||
if (streamFormatter) {
|
if (streamFormatter) {
|
||||||
@@ -380,6 +413,7 @@ export async function runNonInteractive({
|
|||||||
stats: streamFormatter.convertToStreamStats(
|
stats: streamFormatter.convertToStreamStats(
|
||||||
metrics,
|
metrics,
|
||||||
durationMs,
|
durationMs,
|
||||||
|
retryCount,
|
||||||
),
|
),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -479,13 +513,21 @@ export async function runNonInteractive({
|
|||||||
stats: streamFormatter.convertToStreamStats(
|
stats: streamFormatter.convertToStreamStats(
|
||||||
metrics,
|
metrics,
|
||||||
durationMs,
|
durationMs,
|
||||||
|
retryCount,
|
||||||
),
|
),
|
||||||
});
|
});
|
||||||
} else if (config.getOutputFormat() === OutputFormat.JSON) {
|
} else if (config.getOutputFormat() === OutputFormat.JSON) {
|
||||||
const formatter = new JsonFormatter();
|
const formatter = new JsonFormatter();
|
||||||
const stats = uiTelemetryService.getMetrics();
|
const stats = uiTelemetryService.getMetrics();
|
||||||
textOutput.write(
|
textOutput.write(
|
||||||
formatter.format(config.getSessionId(), responseText, stats),
|
formatter.format(
|
||||||
|
config.getSessionId(),
|
||||||
|
responseText,
|
||||||
|
stats,
|
||||||
|
undefined,
|
||||||
|
authMethod,
|
||||||
|
userTier,
|
||||||
|
),
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
textOutput.ensureTrailingNewline(); // Ensure a final newline
|
textOutput.ensureTrailingNewline(); // Ensure a final newline
|
||||||
@@ -503,13 +545,24 @@ export async function runNonInteractive({
|
|||||||
type: JsonStreamEventType.RESULT,
|
type: JsonStreamEventType.RESULT,
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
status: 'success',
|
status: 'success',
|
||||||
stats: streamFormatter.convertToStreamStats(metrics, durationMs),
|
stats: streamFormatter.convertToStreamStats(
|
||||||
|
metrics,
|
||||||
|
durationMs,
|
||||||
|
retryCount,
|
||||||
|
),
|
||||||
});
|
});
|
||||||
} else if (config.getOutputFormat() === OutputFormat.JSON) {
|
} else if (config.getOutputFormat() === OutputFormat.JSON) {
|
||||||
const formatter = new JsonFormatter();
|
const formatter = new JsonFormatter();
|
||||||
const stats = uiTelemetryService.getMetrics();
|
const stats = uiTelemetryService.getMetrics();
|
||||||
textOutput.write(
|
textOutput.write(
|
||||||
formatter.format(config.getSessionId(), responseText, stats),
|
formatter.format(
|
||||||
|
config.getSessionId(),
|
||||||
|
responseText,
|
||||||
|
stats,
|
||||||
|
undefined,
|
||||||
|
authMethod,
|
||||||
|
userTier,
|
||||||
|
),
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
textOutput.ensureTrailingNewline(); // Ensure a final newline
|
textOutput.ensureTrailingNewline(); // Ensure a final newline
|
||||||
@@ -524,6 +577,7 @@ export async function runNonInteractive({
|
|||||||
cleanupStdinCancellation();
|
cleanupStdinCancellation();
|
||||||
|
|
||||||
consolePatcher.cleanup();
|
consolePatcher.cleanup();
|
||||||
|
coreEvents.off(CoreEvent.RetryAttempt, handleRetryAttempt);
|
||||||
coreEvents.off(CoreEvent.UserFeedback, handleUserFeedback);
|
coreEvents.off(CoreEvent.UserFeedback, handleUserFeedback);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -636,7 +636,11 @@ export class GeminiClient {
|
|||||||
|
|
||||||
const loopDetected = await this.loopDetector.turnStarted(signal);
|
const loopDetected = await this.loopDetector.turnStarted(signal);
|
||||||
if (loopDetected) {
|
if (loopDetected) {
|
||||||
yield { type: GeminiEventType.LoopDetected };
|
const loopType = this.loopDetector.getDetectedLoopType();
|
||||||
|
yield {
|
||||||
|
type: GeminiEventType.LoopDetected,
|
||||||
|
value: loopType ? { loopType } : undefined,
|
||||||
|
};
|
||||||
return turn;
|
return turn;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -689,7 +693,11 @@ export class GeminiClient {
|
|||||||
|
|
||||||
for await (const event of resultStream) {
|
for await (const event of resultStream) {
|
||||||
if (this.loopDetector.addAndCheck(event)) {
|
if (this.loopDetector.addAndCheck(event)) {
|
||||||
yield { type: GeminiEventType.LoopDetected };
|
const loopType = this.loopDetector.getDetectedLoopType();
|
||||||
|
yield {
|
||||||
|
type: GeminiEventType.LoopDetected,
|
||||||
|
value: loopType ? { loopType } : undefined,
|
||||||
|
};
|
||||||
controller.abort();
|
controller.abort();
|
||||||
return turn;
|
return turn;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -207,6 +207,7 @@ export type ServerGeminiFinishedEvent = {
|
|||||||
|
|
||||||
export type ServerGeminiLoopDetectedEvent = {
|
export type ServerGeminiLoopDetectedEvent = {
|
||||||
type: GeminiEventType.LoopDetected;
|
type: GeminiEventType.LoopDetected;
|
||||||
|
value?: { loopType: string };
|
||||||
};
|
};
|
||||||
|
|
||||||
export type ServerGeminiCitationEvent = {
|
export type ServerGeminiCitationEvent = {
|
||||||
|
|||||||
@@ -14,6 +14,8 @@ export class JsonFormatter {
|
|||||||
response?: string,
|
response?: string,
|
||||||
stats?: SessionMetrics,
|
stats?: SessionMetrics,
|
||||||
error?: JsonError,
|
error?: JsonError,
|
||||||
|
authMethod?: string,
|
||||||
|
userTier?: string,
|
||||||
): string {
|
): string {
|
||||||
const output: JsonOutput = {};
|
const output: JsonOutput = {};
|
||||||
|
|
||||||
@@ -21,6 +23,14 @@ export class JsonFormatter {
|
|||||||
output.session_id = sessionId;
|
output.session_id = sessionId;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (authMethod) {
|
||||||
|
output.auth_method = authMethod;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (userTier) {
|
||||||
|
output.user_tier = userTier;
|
||||||
|
}
|
||||||
|
|
||||||
if (response !== undefined) {
|
if (response !== undefined) {
|
||||||
output.response = stripAnsi(response);
|
output.response = stripAnsi(response);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -39,23 +39,28 @@ export class StreamJsonFormatter {
|
|||||||
convertToStreamStats(
|
convertToStreamStats(
|
||||||
metrics: SessionMetrics,
|
metrics: SessionMetrics,
|
||||||
durationMs: number,
|
durationMs: number,
|
||||||
|
retryCount?: number,
|
||||||
): StreamStats {
|
): StreamStats {
|
||||||
let totalTokens = 0;
|
let totalTokens = 0;
|
||||||
let inputTokens = 0;
|
let inputTokens = 0;
|
||||||
let outputTokens = 0;
|
let outputTokens = 0;
|
||||||
let cached = 0;
|
let cached = 0;
|
||||||
let input = 0;
|
let input = 0;
|
||||||
|
let apiRequests = 0;
|
||||||
|
let apiErrors = 0;
|
||||||
|
|
||||||
// Aggregate token counts across all models
|
// Aggregate token counts and API stats across all models
|
||||||
for (const modelMetrics of Object.values(metrics.models)) {
|
for (const modelMetrics of Object.values(metrics.models)) {
|
||||||
totalTokens += modelMetrics.tokens.total;
|
totalTokens += modelMetrics.tokens.total;
|
||||||
inputTokens += modelMetrics.tokens.prompt;
|
inputTokens += modelMetrics.tokens.prompt;
|
||||||
outputTokens += modelMetrics.tokens.candidates;
|
outputTokens += modelMetrics.tokens.candidates;
|
||||||
cached += modelMetrics.tokens.cached;
|
cached += modelMetrics.tokens.cached;
|
||||||
input += modelMetrics.tokens.input;
|
input += modelMetrics.tokens.input;
|
||||||
|
apiRequests += modelMetrics.api.totalRequests;
|
||||||
|
apiErrors += modelMetrics.api.totalErrors;
|
||||||
}
|
}
|
||||||
|
|
||||||
return {
|
const stats: StreamStats = {
|
||||||
total_tokens: totalTokens,
|
total_tokens: totalTokens,
|
||||||
input_tokens: inputTokens,
|
input_tokens: inputTokens,
|
||||||
output_tokens: outputTokens,
|
output_tokens: outputTokens,
|
||||||
@@ -63,6 +68,14 @@ export class StreamJsonFormatter {
|
|||||||
input,
|
input,
|
||||||
duration_ms: durationMs,
|
duration_ms: durationMs,
|
||||||
tool_calls: metrics.tools.totalCalls,
|
tool_calls: metrics.tools.totalCalls,
|
||||||
|
api_requests: apiRequests,
|
||||||
|
api_errors: apiErrors,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if (retryCount && retryCount > 0) {
|
||||||
|
stats.retry_count = retryCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
return stats;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,6 +20,8 @@ export interface JsonError {
|
|||||||
|
|
||||||
export interface JsonOutput {
|
export interface JsonOutput {
|
||||||
session_id?: string;
|
session_id?: string;
|
||||||
|
auth_method?: string;
|
||||||
|
user_tier?: string;
|
||||||
response?: string;
|
response?: string;
|
||||||
stats?: SessionMetrics;
|
stats?: SessionMetrics;
|
||||||
error?: JsonError;
|
error?: JsonError;
|
||||||
@@ -33,6 +35,8 @@ export enum JsonStreamEventType {
|
|||||||
TOOL_RESULT = 'tool_result',
|
TOOL_RESULT = 'tool_result',
|
||||||
ERROR = 'error',
|
ERROR = 'error',
|
||||||
RESULT = 'result',
|
RESULT = 'result',
|
||||||
|
RETRY = 'retry',
|
||||||
|
LOOP_DETECTED = 'loop_detected',
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface BaseJsonStreamEvent {
|
export interface BaseJsonStreamEvent {
|
||||||
@@ -44,6 +48,8 @@ export interface InitEvent extends BaseJsonStreamEvent {
|
|||||||
type: JsonStreamEventType.INIT;
|
type: JsonStreamEventType.INIT;
|
||||||
session_id: string;
|
session_id: string;
|
||||||
model: string;
|
model: string;
|
||||||
|
auth_method?: string;
|
||||||
|
user_tier?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface MessageEvent extends BaseJsonStreamEvent {
|
export interface MessageEvent extends BaseJsonStreamEvent {
|
||||||
@@ -86,6 +92,9 @@ export interface StreamStats {
|
|||||||
input: number;
|
input: number;
|
||||||
duration_ms: number;
|
duration_ms: number;
|
||||||
tool_calls: number;
|
tool_calls: number;
|
||||||
|
api_requests?: number;
|
||||||
|
api_errors?: number;
|
||||||
|
retry_count?: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ResultEvent extends BaseJsonStreamEvent {
|
export interface ResultEvent extends BaseJsonStreamEvent {
|
||||||
@@ -98,10 +107,26 @@ export interface ResultEvent extends BaseJsonStreamEvent {
|
|||||||
stats?: StreamStats;
|
stats?: StreamStats;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface RetryEvent extends BaseJsonStreamEvent {
|
||||||
|
type: JsonStreamEventType.RETRY;
|
||||||
|
attempt: number;
|
||||||
|
max_attempts: number;
|
||||||
|
delay_ms: number;
|
||||||
|
error?: string;
|
||||||
|
model: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface LoopDetectedStreamEvent extends BaseJsonStreamEvent {
|
||||||
|
type: JsonStreamEventType.LOOP_DETECTED;
|
||||||
|
loop_type?: string;
|
||||||
|
}
|
||||||
|
|
||||||
export type JsonStreamEvent =
|
export type JsonStreamEvent =
|
||||||
| InitEvent
|
| InitEvent
|
||||||
| MessageEvent
|
| MessageEvent
|
||||||
| ToolUseEvent
|
| ToolUseEvent
|
||||||
| ToolResultEvent
|
| ToolResultEvent
|
||||||
| ErrorEvent
|
| ErrorEvent
|
||||||
| ResultEvent;
|
| ResultEvent
|
||||||
|
| RetryEvent
|
||||||
|
| LoopDetectedStreamEvent;
|
||||||
|
|||||||
@@ -118,6 +118,9 @@ export class LoopDetectionService {
|
|||||||
private llmCheckInterval = DEFAULT_LLM_CHECK_INTERVAL;
|
private llmCheckInterval = DEFAULT_LLM_CHECK_INTERVAL;
|
||||||
private lastCheckTurn = 0;
|
private lastCheckTurn = 0;
|
||||||
|
|
||||||
|
// Detected loop type tracking
|
||||||
|
private detectedLoopType: string | null = null;
|
||||||
|
|
||||||
// Session-level disable flag
|
// Session-level disable flag
|
||||||
private disabledForSession = false;
|
private disabledForSession = false;
|
||||||
|
|
||||||
@@ -208,6 +211,7 @@ export class LoopDetectionService {
|
|||||||
this.toolCallRepetitionCount = 1;
|
this.toolCallRepetitionCount = 1;
|
||||||
}
|
}
|
||||||
if (this.toolCallRepetitionCount >= TOOL_CALL_LOOP_THRESHOLD) {
|
if (this.toolCallRepetitionCount >= TOOL_CALL_LOOP_THRESHOLD) {
|
||||||
|
this.detectedLoopType = 'consecutive_identical_tool_calls';
|
||||||
logLoopDetected(
|
logLoopDetected(
|
||||||
this.config,
|
this.config,
|
||||||
new LoopDetectedEvent(
|
new LoopDetectedEvent(
|
||||||
@@ -321,6 +325,7 @@ export class LoopDetectionService {
|
|||||||
const chunkHash = createHash('sha256').update(currentChunk).digest('hex');
|
const chunkHash = createHash('sha256').update(currentChunk).digest('hex');
|
||||||
|
|
||||||
if (this.isLoopDetectedForChunk(currentChunk, chunkHash)) {
|
if (this.isLoopDetectedForChunk(currentChunk, chunkHash)) {
|
||||||
|
this.detectedLoopType = 'chanting_identical_sentences';
|
||||||
logLoopDetected(
|
logLoopDetected(
|
||||||
this.config,
|
this.config,
|
||||||
new LoopDetectedEvent(
|
new LoopDetectedEvent(
|
||||||
@@ -575,6 +580,7 @@ export class LoopDetectionService {
|
|||||||
result: Record<string, unknown>,
|
result: Record<string, unknown>,
|
||||||
modelName: string,
|
modelName: string,
|
||||||
): void {
|
): void {
|
||||||
|
this.detectedLoopType = 'llm_detected_loop';
|
||||||
if (
|
if (
|
||||||
typeof result['unproductive_state_analysis'] === 'string' &&
|
typeof result['unproductive_state_analysis'] === 'string' &&
|
||||||
result['unproductive_state_analysis']
|
result['unproductive_state_analysis']
|
||||||
@@ -599,6 +605,13 @@ export class LoopDetectionService {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the type of the most recently detected loop, or null if none.
|
||||||
|
*/
|
||||||
|
getDetectedLoopType(): string | null {
|
||||||
|
return this.detectedLoopType;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Resets all loop detection state.
|
* Resets all loop detection state.
|
||||||
*/
|
*/
|
||||||
@@ -608,6 +621,7 @@ export class LoopDetectionService {
|
|||||||
this.resetContentTracking();
|
this.resetContentTracking();
|
||||||
this.resetLlmCheckTracking();
|
this.resetLlmCheckTracking();
|
||||||
this.loopDetected = false;
|
this.loopDetected = false;
|
||||||
|
this.detectedLoopType = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
private resetToolCallCount(): void {
|
private resetToolCallCount(): void {
|
||||||
|
|||||||
Executable
+223
@@ -0,0 +1,223 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# Gemini CLI Headless Mode Monitoring Test Script
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# Purpose:
|
||||||
|
# Runs the Gemini CLI in headless mode across multiple models and output
|
||||||
|
# formats, then displays the monitoring data (auth method, API stats, retries,
|
||||||
|
# loop detection) in a readable summary.
|
||||||
|
#
|
||||||
|
# Prerequisites:
|
||||||
|
# Authentication must already be configured (API key, OAuth, or Vertex AI).
|
||||||
|
# Build the project first: npm run build
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# ./scripts/test_gemini.sh [--prompt "custom prompt"] [--models "model1 model2"]
|
||||||
|
#
|
||||||
|
# Options:
|
||||||
|
# --prompt <text> Override the default test prompt
|
||||||
|
# --models <list> Space-separated list of models to test (quoted)
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# ./scripts/test_gemini.sh
|
||||||
|
# ./scripts/test_gemini.sh --prompt "list files" --models "gemini-2.5-flash"
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||||
|
CLI="$REPO_ROOT/packages/cli/dist/index.js"
|
||||||
|
|
||||||
|
# Defaults
|
||||||
|
PROMPT="count how many files are in the current folder"
|
||||||
|
MODELS=(
|
||||||
|
"gemini-2.5-pro"
|
||||||
|
"gemini-2.5-flash"
|
||||||
|
"gemini-3.1-pro-preview"
|
||||||
|
"gemini-3-flash-preview"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Parse args
|
||||||
|
while [[ "$#" -gt 0 ]]; do
|
||||||
|
case "$1" in
|
||||||
|
--prompt) PROMPT="$2"; shift ;;
|
||||||
|
--models) IFS=' ' read -ra MODELS <<< "$2"; shift ;;
|
||||||
|
*) echo "Unknown option: $1"; exit 1 ;;
|
||||||
|
esac
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
|
||||||
|
# Colors
|
||||||
|
BOLD='\033[1m'
|
||||||
|
DIM='\033[2m'
|
||||||
|
GREEN='\033[32m'
|
||||||
|
YELLOW='\033[33m'
|
||||||
|
RED='\033[31m'
|
||||||
|
CYAN='\033[36m'
|
||||||
|
RESET='\033[0m'
|
||||||
|
|
||||||
|
# Check prerequisites
|
||||||
|
if [[ ! -f "$CLI" ]]; then
|
||||||
|
echo -e "${RED}CLI not found at $CLI${RESET}"
|
||||||
|
echo "Run 'npm run build' from the repo root first."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! command -v jq &>/dev/null; then
|
||||||
|
echo -e "${RED}jq is required but not installed.${RESET}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
separator() {
|
||||||
|
echo -e "${DIM}$(printf '%.0s─' {1..72})${RESET}"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Header
|
||||||
|
echo ""
|
||||||
|
echo -e "${BOLD}Gemini CLI Headless Monitoring Test${RESET}"
|
||||||
|
separator
|
||||||
|
echo -e "${DIM}Prompt:${RESET} $PROMPT"
|
||||||
|
echo -e "${DIM}Models:${RESET} ${MODELS[*]}"
|
||||||
|
echo -e "${DIM}CLI:${RESET} $CLI"
|
||||||
|
separator
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
total_models=${#MODELS[@]}
|
||||||
|
pass_count=0
|
||||||
|
fail_count=0
|
||||||
|
|
||||||
|
for model in "${MODELS[@]}"; do
|
||||||
|
echo -e "${BOLD}${CYAN}[$model]${RESET}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# ── stream-json run ──────────────────────────────────────────────────
|
||||||
|
TMPFILE=$(mktemp)
|
||||||
|
STDERRFILE=$(mktemp)
|
||||||
|
exit_code=0
|
||||||
|
|
||||||
|
echo -e " ${DIM}Running with -o stream-json ...${RESET}"
|
||||||
|
node "$CLI" -p "$PROMPT" -y -m "$model" -o stream-json \
|
||||||
|
>"$TMPFILE" 2>"$STDERRFILE" || exit_code=$?
|
||||||
|
|
||||||
|
if [[ $exit_code -ne 0 ]]; then
|
||||||
|
echo -e " ${RED}FAILED${RESET} (exit code $exit_code)"
|
||||||
|
echo ""
|
||||||
|
if [[ -s "$STDERRFILE" ]]; then
|
||||||
|
echo -e " ${DIM}stderr:${RESET}"
|
||||||
|
sed 's/^/ /' "$STDERRFILE"
|
||||||
|
echo ""
|
||||||
|
fi
|
||||||
|
((fail_count++))
|
||||||
|
rm -f "$TMPFILE" "$STDERRFILE"
|
||||||
|
separator
|
||||||
|
echo ""
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
((pass_count++))
|
||||||
|
|
||||||
|
# Parse init event
|
||||||
|
init_line=$(jq -c 'select(.type=="init")' "$TMPFILE" 2>/dev/null | head -1)
|
||||||
|
auth_method=$(echo "$init_line" | jq -r '.auth_method // "not set"' 2>/dev/null)
|
||||||
|
user_tier=$(echo "$init_line" | jq -r '.user_tier // "not set"' 2>/dev/null)
|
||||||
|
session_id=$(echo "$init_line" | jq -r '.session_id // "?"' 2>/dev/null)
|
||||||
|
|
||||||
|
# Parse result event
|
||||||
|
result_line=$(jq -c 'select(.type=="result")' "$TMPFILE" 2>/dev/null | tail -1)
|
||||||
|
status=$(echo "$result_line" | jq -r '.status // "?"' 2>/dev/null)
|
||||||
|
api_requests=$(echo "$result_line" | jq -r '.stats.api_requests // "?"' 2>/dev/null)
|
||||||
|
api_errors=$(echo "$result_line" | jq -r '.stats.api_errors // "?"' 2>/dev/null)
|
||||||
|
retry_count=$(echo "$result_line" | jq -r '.stats.retry_count // 0' 2>/dev/null)
|
||||||
|
total_tokens=$(echo "$result_line" | jq -r '.stats.total_tokens // "?"' 2>/dev/null)
|
||||||
|
input_tokens=$(echo "$result_line" | jq -r '.stats.input_tokens // "?"' 2>/dev/null)
|
||||||
|
output_tokens=$(echo "$result_line" | jq -r '.stats.output_tokens // "?"' 2>/dev/null)
|
||||||
|
cached=$(echo "$result_line" | jq -r '.stats.cached // "?"' 2>/dev/null)
|
||||||
|
tool_calls=$(echo "$result_line" | jq -r '.stats.tool_calls // 0' 2>/dev/null)
|
||||||
|
duration_ms=$(echo "$result_line" | jq -r '.stats.duration_ms // "?"' 2>/dev/null)
|
||||||
|
|
||||||
|
# Count retries and loop events
|
||||||
|
retry_events=$(jq -c 'select(.type=="retry")' "$TMPFILE" 2>/dev/null | wc -l | tr -d ' ')
|
||||||
|
loop_events=$(jq -c 'select(.type=="loop_detected")' "$TMPFILE" 2>/dev/null)
|
||||||
|
if [[ -n "$loop_events" ]]; then
|
||||||
|
loop_count=$(echo "$loop_events" | wc -l | tr -d ' ')
|
||||||
|
loop_type=$(echo "$loop_events" | jq -r '.loop_type // empty' 2>/dev/null | head -1)
|
||||||
|
else
|
||||||
|
loop_count=0
|
||||||
|
loop_type=""
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Extract assistant response (concatenate deltas)
|
||||||
|
response=$(jq -r 'select(.type=="message" and .role=="assistant") | .content' "$TMPFILE" 2>/dev/null | tr -d '\n')
|
||||||
|
# Truncate for display
|
||||||
|
if [[ ${#response} -gt 120 ]]; then
|
||||||
|
response="${response:0:120}..."
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Format duration
|
||||||
|
if [[ "$duration_ms" != "?" ]]; then
|
||||||
|
duration_s=$(echo "scale=1; $duration_ms / 1000" | bc 2>/dev/null || echo "$duration_ms ms")
|
||||||
|
duration_display="${duration_s}s"
|
||||||
|
else
|
||||||
|
duration_display="?"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Display
|
||||||
|
echo -e " ${BOLD}Auth & Session${RESET}"
|
||||||
|
echo -e " auth_method: ${GREEN}$auth_method${RESET}"
|
||||||
|
echo -e " user_tier: $user_tier"
|
||||||
|
echo -e " session_id: ${DIM}$session_id${RESET}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
echo -e " ${BOLD}API Stats${RESET}"
|
||||||
|
echo -e " status: $([ "$status" = "success" ] && echo "${GREEN}$status${RESET}" || echo "${RED}$status${RESET}")"
|
||||||
|
echo -e " api_requests: $api_requests"
|
||||||
|
echo -e " api_errors: $([ "$api_errors" = "0" ] && echo "$api_errors" || echo "${RED}$api_errors${RESET}")"
|
||||||
|
echo -e " retry_count: $([ "$retry_count" = "0" ] && echo "$retry_count" || echo "${YELLOW}$retry_count${RESET}")"
|
||||||
|
echo -e " duration: $duration_display"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
echo -e " ${BOLD}Tokens${RESET}"
|
||||||
|
echo -e " total: $total_tokens (in: $input_tokens, out: $output_tokens, cached: $cached)"
|
||||||
|
echo -e " tools: $tool_calls calls"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
if [[ "$retry_events" -gt 0 ]]; then
|
||||||
|
echo -e " ${BOLD}${YELLOW}Retries ($retry_events)${RESET}"
|
||||||
|
jq -r 'select(.type=="retry") | " attempt \(.attempt)/\(.max_attempts) delay=\(.delay_ms)ms \(.error // "")"' "$TMPFILE" 2>/dev/null
|
||||||
|
echo ""
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "$loop_count" -gt 0 ]]; then
|
||||||
|
echo -e " ${BOLD}${RED}Loop Detected${RESET}"
|
||||||
|
echo -e " type: ${loop_type:-unknown}"
|
||||||
|
echo ""
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo -e " ${BOLD}Response${RESET}"
|
||||||
|
echo -e " ${DIM}$response${RESET}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Show stderr if any
|
||||||
|
stderr_content=$(cat "$STDERRFILE")
|
||||||
|
if [[ -n "$stderr_content" ]]; then
|
||||||
|
echo -e " ${BOLD}Stderr${RESET}"
|
||||||
|
echo "$stderr_content" | sed 's/^/ /'
|
||||||
|
echo ""
|
||||||
|
fi
|
||||||
|
|
||||||
|
rm -f "$TMPFILE" "$STDERRFILE"
|
||||||
|
separator
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
# Summary
|
||||||
|
echo -e "${BOLD}Summary${RESET}"
|
||||||
|
echo -e " Models tested: $total_models"
|
||||||
|
echo -e " Passed: ${GREEN}$pass_count${RESET}"
|
||||||
|
if [[ $fail_count -gt 0 ]]; then
|
||||||
|
echo -e " Failed: ${RED}$fail_count${RESET}"
|
||||||
|
else
|
||||||
|
echo -e " Failed: $fail_count"
|
||||||
|
fi
|
||||||
|
echo ""
|
||||||
Reference in New Issue
Block a user