feat(headless): surface diagnostic monitoring data in non-interactive output

When running Gemini CLI in headless mode (-p), critical diagnostic data
like auth method, API retry attempts, loop detection, and request stats
was invisible despite being tracked internally. This change surfaces
that data across all three output formats (stream-json, json, text).

Changes:
- Add RETRY and LOOP_DETECTED event types to stream-json output
- Include auth_method and user_tier in init events and JSON output
- Add api_requests, api_errors, and retry_count to result stats
- Track and expose detected loop type (tool call, chanting, LLM-detected)
- Emit [RETRY] and [WARNING] messages to stderr in text mode
- Listen to CoreEvent.RetryAttempt in non-interactive CLI
- Add test script (scripts/test_gemini.sh) for manual verification
This commit is contained in:
Dmitry Lyalin
2026-02-24 23:37:39 -08:00
parent bf278ef2b0
commit a4b3229513
10 changed files with 364 additions and 15 deletions
@@ -3,8 +3,8 @@
exports[`runNonInteractive > should emit appropriate error event in streaming JSON mode: 'loop detected' 1`] = `
"{"type":"init","timestamp":"<TIMESTAMP>","session_id":"test-session-id","model":"test-model"}
{"type":"message","timestamp":"<TIMESTAMP>","role":"user","content":"Loop test"}
{"type":"error","timestamp":"<TIMESTAMP>","severity":"warning","message":"Loop detected, stopping execution"}
{"type":"result","timestamp":"<TIMESTAMP>","status":"success","stats":{"total_tokens":0,"input_tokens":0,"output_tokens":0,"cached":0,"input":0,"duration_ms":<DURATION>,"tool_calls":0}}
{"type":"loop_detected","timestamp":"<TIMESTAMP>"}
{"type":"result","timestamp":"<TIMESTAMP>","status":"success","stats":{"total_tokens":0,"input_tokens":0,"output_tokens":0,"cached":0,"input":0,"duration_ms":<DURATION>,"tool_calls":0,"api_requests":0,"api_errors":0}}
"
`;
@@ -12,7 +12,7 @@ exports[`runNonInteractive > should emit appropriate error event in streaming JS
"{"type":"init","timestamp":"<TIMESTAMP>","session_id":"test-session-id","model":"test-model"}
{"type":"message","timestamp":"<TIMESTAMP>","role":"user","content":"Max turns test"}
{"type":"error","timestamp":"<TIMESTAMP>","severity":"error","message":"Maximum session turns exceeded"}
{"type":"result","timestamp":"<TIMESTAMP>","status":"success","stats":{"total_tokens":0,"input_tokens":0,"output_tokens":0,"cached":0,"input":0,"duration_ms":<DURATION>,"tool_calls":0}}
{"type":"result","timestamp":"<TIMESTAMP>","status":"success","stats":{"total_tokens":0,"input_tokens":0,"output_tokens":0,"cached":0,"input":0,"duration_ms":<DURATION>,"tool_calls":0,"api_requests":0,"api_errors":0}}
"
`;
@@ -23,7 +23,7 @@ exports[`runNonInteractive > should emit appropriate events for streaming JSON o
{"type":"tool_use","timestamp":"<TIMESTAMP>","tool_name":"testTool","tool_id":"tool-1","parameters":{"arg1":"value1"}}
{"type":"tool_result","timestamp":"<TIMESTAMP>","tool_id":"tool-1","status":"success","output":"Tool executed successfully"}
{"type":"message","timestamp":"<TIMESTAMP>","role":"assistant","content":"Final answer","delta":true}
{"type":"result","timestamp":"<TIMESTAMP>","status":"success","stats":{"total_tokens":0,"input_tokens":0,"output_tokens":0,"cached":0,"input":0,"duration_ms":<DURATION>,"tool_calls":0}}
{"type":"result","timestamp":"<TIMESTAMP>","status":"success","stats":{"total_tokens":0,"input_tokens":0,"output_tokens":0,"cached":0,"input":0,"duration_ms":<DURATION>,"tool_calls":0,"api_requests":0,"api_errors":0}}
"
`;
@@ -183,6 +183,7 @@ describe('runNonInteractive', () => {
getIdeMode: vi.fn().mockReturnValue(false),
getContentGeneratorConfig: vi.fn().mockReturnValue({}),
getUserTierName: vi.fn().mockReturnValue(undefined),
getDebugMode: vi.fn().mockReturnValue(false),
getOutputFormat: vi.fn().mockReturnValue('text'),
getModel: vi.fn().mockReturnValue('test-model'),
+60 -6
View File
@@ -9,6 +9,7 @@ import type {
ToolCallRequestInfo,
ResumedSessionData,
UserFeedbackPayload,
RetryAttemptPayload,
} from '@google/gemini-cli-core';
import { isSlashCommand } from './ui/utils/commandUtils.js';
import type { LoadedSettings } from './config/settings.js';
@@ -94,6 +95,7 @@ export async function runNonInteractive({
};
const startTime = Date.now();
let retryCount = 0;
const streamFormatter =
config.getOutputFormat() === OutputFormat.STREAM_JSON
? new StreamJsonFormatter()
@@ -181,6 +183,26 @@ export async function runNonInteractive({
}
};
const handleRetryAttempt = (payload: RetryAttemptPayload) => {
retryCount++;
if (streamFormatter) {
streamFormatter.emitEvent({
type: JsonStreamEventType.RETRY,
timestamp: new Date().toISOString(),
attempt: payload.attempt,
max_attempts: payload.maxAttempts,
delay_ms: payload.delayMs,
error: payload.error,
model: payload.model,
});
} else if (config.getOutputFormat() === OutputFormat.TEXT) {
const errorSuffix = payload.error ? `: ${payload.error}` : '';
process.stderr.write(
`[RETRY] Attempt ${payload.attempt}/${payload.maxAttempts} for model ${payload.model} (delay: ${payload.delayMs}ms)${errorSuffix}\n`,
);
}
};
let errorToHandle: unknown | undefined;
try {
consolePatcher.patch();
@@ -199,6 +221,8 @@ export async function runNonInteractive({
setupStdinCancellation();
coreEvents.on(CoreEvent.UserFeedback, handleUserFeedback);
coreEvents.on(CoreEvent.RetryAttempt, handleRetryAttempt);
coreEvents.drainBacklogs();
// Handle EPIPE errors when the output is piped to a command that closes early.
@@ -228,12 +252,16 @@ export async function runNonInteractive({
}
// Emit init event for streaming JSON
const authMethod = config.getContentGeneratorConfig()?.authType;
const userTier = config.getUserTierName();
if (streamFormatter) {
streamFormatter.emitEvent({
type: JsonStreamEventType.INIT,
timestamp: new Date().toISOString(),
session_id: config.getSessionId(),
model: config.getModel(),
...(authMethod && { auth_method: authMethod }),
...(userTier && { user_tier: userTier }),
});
}
@@ -345,13 +373,18 @@ export async function runNonInteractive({
}
toolCallRequests.push(event.value);
} else if (event.type === GeminiEventType.LoopDetected) {
const loopType = event.value?.loopType;
if (streamFormatter) {
streamFormatter.emitEvent({
type: JsonStreamEventType.ERROR,
type: JsonStreamEventType.LOOP_DETECTED,
timestamp: new Date().toISOString(),
severity: 'warning',
message: 'Loop detected, stopping execution',
...(loopType && { loop_type: loopType }),
});
} else if (config.getOutputFormat() === OutputFormat.TEXT) {
const loopTypeStr = loopType ? ` (${loopType})` : '';
process.stderr.write(
`[WARNING] Loop detected${loopTypeStr}, stopping execution\n`,
);
}
} else if (event.type === GeminiEventType.MaxSessionTurns) {
if (streamFormatter) {
@@ -380,6 +413,7 @@ export async function runNonInteractive({
stats: streamFormatter.convertToStreamStats(
metrics,
durationMs,
retryCount,
),
});
}
@@ -479,13 +513,21 @@ export async function runNonInteractive({
stats: streamFormatter.convertToStreamStats(
metrics,
durationMs,
retryCount,
),
});
} else if (config.getOutputFormat() === OutputFormat.JSON) {
const formatter = new JsonFormatter();
const stats = uiTelemetryService.getMetrics();
textOutput.write(
formatter.format(config.getSessionId(), responseText, stats),
formatter.format(
config.getSessionId(),
responseText,
stats,
undefined,
authMethod,
userTier,
),
);
} else {
textOutput.ensureTrailingNewline(); // Ensure a final newline
@@ -503,13 +545,24 @@ export async function runNonInteractive({
type: JsonStreamEventType.RESULT,
timestamp: new Date().toISOString(),
status: 'success',
stats: streamFormatter.convertToStreamStats(metrics, durationMs),
stats: streamFormatter.convertToStreamStats(
metrics,
durationMs,
retryCount,
),
});
} else if (config.getOutputFormat() === OutputFormat.JSON) {
const formatter = new JsonFormatter();
const stats = uiTelemetryService.getMetrics();
textOutput.write(
formatter.format(config.getSessionId(), responseText, stats),
formatter.format(
config.getSessionId(),
responseText,
stats,
undefined,
authMethod,
userTier,
),
);
} else {
textOutput.ensureTrailingNewline(); // Ensure a final newline
@@ -524,6 +577,7 @@ export async function runNonInteractive({
cleanupStdinCancellation();
consolePatcher.cleanup();
coreEvents.off(CoreEvent.RetryAttempt, handleRetryAttempt);
coreEvents.off(CoreEvent.UserFeedback, handleUserFeedback);
}
+10 -2
View File
@@ -636,7 +636,11 @@ export class GeminiClient {
const loopDetected = await this.loopDetector.turnStarted(signal);
if (loopDetected) {
yield { type: GeminiEventType.LoopDetected };
const loopType = this.loopDetector.getDetectedLoopType();
yield {
type: GeminiEventType.LoopDetected,
value: loopType ? { loopType } : undefined,
};
return turn;
}
@@ -689,7 +693,11 @@ export class GeminiClient {
for await (const event of resultStream) {
if (this.loopDetector.addAndCheck(event)) {
yield { type: GeminiEventType.LoopDetected };
const loopType = this.loopDetector.getDetectedLoopType();
yield {
type: GeminiEventType.LoopDetected,
value: loopType ? { loopType } : undefined,
};
controller.abort();
return turn;
}
+1
View File
@@ -207,6 +207,7 @@ export type ServerGeminiFinishedEvent = {
export type ServerGeminiLoopDetectedEvent = {
type: GeminiEventType.LoopDetected;
value?: { loopType: string };
};
export type ServerGeminiCitationEvent = {
@@ -14,6 +14,8 @@ export class JsonFormatter {
response?: string,
stats?: SessionMetrics,
error?: JsonError,
authMethod?: string,
userTier?: string,
): string {
const output: JsonOutput = {};
@@ -21,6 +23,14 @@ export class JsonFormatter {
output.session_id = sessionId;
}
if (authMethod) {
output.auth_method = authMethod;
}
if (userTier) {
output.user_tier = userTier;
}
if (response !== undefined) {
output.response = stripAnsi(response);
}
@@ -39,23 +39,28 @@ export class StreamJsonFormatter {
convertToStreamStats(
metrics: SessionMetrics,
durationMs: number,
retryCount?: number,
): StreamStats {
let totalTokens = 0;
let inputTokens = 0;
let outputTokens = 0;
let cached = 0;
let input = 0;
let apiRequests = 0;
let apiErrors = 0;
// Aggregate token counts across all models
// Aggregate token counts and API stats across all models
for (const modelMetrics of Object.values(metrics.models)) {
totalTokens += modelMetrics.tokens.total;
inputTokens += modelMetrics.tokens.prompt;
outputTokens += modelMetrics.tokens.candidates;
cached += modelMetrics.tokens.cached;
input += modelMetrics.tokens.input;
apiRequests += modelMetrics.api.totalRequests;
apiErrors += modelMetrics.api.totalErrors;
}
return {
const stats: StreamStats = {
total_tokens: totalTokens,
input_tokens: inputTokens,
output_tokens: outputTokens,
@@ -63,6 +68,14 @@ export class StreamJsonFormatter {
input,
duration_ms: durationMs,
tool_calls: metrics.tools.totalCalls,
api_requests: apiRequests,
api_errors: apiErrors,
};
if (retryCount && retryCount > 0) {
stats.retry_count = retryCount;
}
return stats;
}
}
+26 -1
View File
@@ -20,6 +20,8 @@ export interface JsonError {
export interface JsonOutput {
session_id?: string;
auth_method?: string;
user_tier?: string;
response?: string;
stats?: SessionMetrics;
error?: JsonError;
@@ -33,6 +35,8 @@ export enum JsonStreamEventType {
TOOL_RESULT = 'tool_result',
ERROR = 'error',
RESULT = 'result',
RETRY = 'retry',
LOOP_DETECTED = 'loop_detected',
}
export interface BaseJsonStreamEvent {
@@ -44,6 +48,8 @@ export interface InitEvent extends BaseJsonStreamEvent {
type: JsonStreamEventType.INIT;
session_id: string;
model: string;
auth_method?: string;
user_tier?: string;
}
export interface MessageEvent extends BaseJsonStreamEvent {
@@ -86,6 +92,9 @@ export interface StreamStats {
input: number;
duration_ms: number;
tool_calls: number;
api_requests?: number;
api_errors?: number;
retry_count?: number;
}
export interface ResultEvent extends BaseJsonStreamEvent {
@@ -98,10 +107,26 @@ export interface ResultEvent extends BaseJsonStreamEvent {
stats?: StreamStats;
}
export interface RetryEvent extends BaseJsonStreamEvent {
type: JsonStreamEventType.RETRY;
attempt: number;
max_attempts: number;
delay_ms: number;
error?: string;
model: string;
}
export interface LoopDetectedStreamEvent extends BaseJsonStreamEvent {
type: JsonStreamEventType.LOOP_DETECTED;
loop_type?: string;
}
export type JsonStreamEvent =
| InitEvent
| MessageEvent
| ToolUseEvent
| ToolResultEvent
| ErrorEvent
| ResultEvent;
| ResultEvent
| RetryEvent
| LoopDetectedStreamEvent;
@@ -118,6 +118,9 @@ export class LoopDetectionService {
private llmCheckInterval = DEFAULT_LLM_CHECK_INTERVAL;
private lastCheckTurn = 0;
// Detected loop type tracking
private detectedLoopType: string | null = null;
// Session-level disable flag
private disabledForSession = false;
@@ -208,6 +211,7 @@ export class LoopDetectionService {
this.toolCallRepetitionCount = 1;
}
if (this.toolCallRepetitionCount >= TOOL_CALL_LOOP_THRESHOLD) {
this.detectedLoopType = 'consecutive_identical_tool_calls';
logLoopDetected(
this.config,
new LoopDetectedEvent(
@@ -321,6 +325,7 @@ export class LoopDetectionService {
const chunkHash = createHash('sha256').update(currentChunk).digest('hex');
if (this.isLoopDetectedForChunk(currentChunk, chunkHash)) {
this.detectedLoopType = 'chanting_identical_sentences';
logLoopDetected(
this.config,
new LoopDetectedEvent(
@@ -575,6 +580,7 @@ export class LoopDetectionService {
result: Record<string, unknown>,
modelName: string,
): void {
this.detectedLoopType = 'llm_detected_loop';
if (
typeof result['unproductive_state_analysis'] === 'string' &&
result['unproductive_state_analysis']
@@ -599,6 +605,13 @@ export class LoopDetectionService {
);
}
/**
* Returns the type of the most recently detected loop, or null if none.
*/
getDetectedLoopType(): string | null {
return this.detectedLoopType;
}
/**
* Resets all loop detection state.
*/
@@ -608,6 +621,7 @@ export class LoopDetectionService {
this.resetContentTracking();
this.resetLlmCheckTracking();
this.loopDetected = false;
this.detectedLoopType = null;
}
private resetToolCallCount(): void {