diff --git a/docs/cli/headless.md b/docs/cli/headless.md index 7de3287639..692a32fb64 100644 --- a/docs/cli/headless.md +++ b/docs/cli/headless.md @@ -17,8 +17,22 @@ You can specify the output format using the `--output-format` flag. Returns a single JSON object containing the response and usage statistics. - **Schema:** + - `session_id`: (string, optional) Session ID. + - `auth_method`: (string, optional) Authentication method. Emitted with + `--debug`. + - `user_tier`: (string, optional) User tier name. Emitted with `--debug`. - `response`: (string) The model's final answer. - `stats`: (object) Token usage and API latency metrics. + - `stats.api_requests`: (number, optional) Total API requests. Emitted with + `--debug`. + - `stats.api_errors`: (number, optional) Total API errors. Emitted with + `--debug`. + - `stats.retry_count`: (number, optional) Total retries. Emitted with + `--debug`. + - `stats.loop_detected`: (boolean, optional) Whether a loop was detected. + Emitted with `--debug`. + - `stats.loop_type`: (string, optional) Loop classification. Emitted with + `--debug`. - `error`: (object, optional) Error details if the request failed. #### Streaming JSON output @@ -26,13 +40,19 @@ Returns a single JSON object containing the response and usage statistics. Returns a stream of newline-delimited JSON (JSONL) events. - **Event types:** - - `init`: Session metadata (session ID, model). + - `init`: Session metadata (session ID, model). Includes `auth_method` and + `user_tier` with `--debug`. - `message`: User and assistant message chunks. - `tool_use`: Tool call requests with arguments. - `tool_result`: Output from executed tools. - `error`: Non-fatal warnings and system errors. + - `retry`: Retry attempt diagnostics. Emitted with `--debug`. + - `loop_detected`: Loop detection diagnostics. Emitted with `--debug`. - `result`: Final outcome with aggregated statistics. +In debug mode (`--debug`), `result.stats` also includes `api_requests`, +`api_errors`, and `retry_count`. + ## Exit codes The CLI returns standard exit codes to indicate the result of the headless diff --git a/packages/cli/src/nonInteractiveCli.test.ts b/packages/cli/src/nonInteractiveCli.test.ts index ea7b9e19d4..e7346d5bc7 100644 --- a/packages/cli/src/nonInteractiveCli.test.ts +++ b/packages/cli/src/nonInteractiveCli.test.ts @@ -734,6 +734,79 @@ describe('runNonInteractive', () => { ); }); + it('should include debug diagnostics in JSON output when debug mode is enabled', async () => { + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Hello World' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } }, + }, + ]; + const debugMetrics: SessionMetrics = { + models: { + 'gemini-2.5-pro': { + api: { + totalRequests: 2, + totalErrors: 1, + totalLatencyMs: 1234, + }, + tokens: { + input: 10, + prompt: 10, + candidates: 5, + total: 15, + cached: 0, + thoughts: 0, + tool: 0, + }, + roles: {}, + }, + }, + tools: { + totalCalls: 0, + totalSuccess: 0, + totalFail: 0, + totalDurationMs: 0, + totalDecisions: { + accept: 0, + reject: 0, + modify: 0, + auto_accept: 0, + }, + byName: {}, + }, + files: { + totalLinesAdded: 0, + totalLinesRemoved: 0, + }, + }; + + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + vi.mocked(mockConfig.getOutputFormat).mockReturnValue(OutputFormat.JSON); + vi.mocked(mockConfig.getDebugMode).mockReturnValue(true); + vi.mocked(mockConfig.getContentGeneratorConfig).mockReturnValue({ + authType: 'gemini-api-key', + }); + vi.mocked(mockConfig.getUserTierName).mockReturnValue('free'); + vi.mocked(uiTelemetryService.getMetrics).mockReturnValue(debugMetrics); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Test input', + prompt_id: 'prompt-id-json-debug', + }); + + const parsed = JSON.parse(getWrittenOutput()); + expect(parsed.auth_method).toBe('gemini-api-key'); + expect(parsed.user_tier).toBe('free'); + expect(parsed.stats.api_requests).toBe(2); + expect(parsed.stats.api_errors).toBe(1); + expect(parsed.stats.retry_count).toBe(0); + }); + it('should write JSON output with stats for tool-only commands (no text response)', async () => { // Test the scenario where a command completes successfully with only tool calls // but no text response - this would have caught the original bug @@ -1416,6 +1489,10 @@ describe('runNonInteractive', () => { CoreEvent.UserFeedback, expect.any(Function), ); + expect(mockCoreEvents.on).toHaveBeenCalledWith( + CoreEvent.RetryAttempt, + expect.any(Function), + ); expect(mockCoreEvents.drainBacklogs).toHaveBeenCalledTimes(1); }); @@ -1441,6 +1518,10 @@ describe('runNonInteractive', () => { CoreEvent.UserFeedback, expect.any(Function), ); + expect(mockCoreEvents.off).toHaveBeenCalledWith( + CoreEvent.RetryAttempt, + expect.any(Function), + ); }); it('logs to process.stderr when UserFeedback event is received', async () => { @@ -1724,6 +1805,75 @@ describe('runNonInteractive', () => { }, ); + it('should emit loop_detected and legacy warning error events in debug stream-json mode', async () => { + vi.mocked(mockConfig.getOutputFormat).mockReturnValue( + OutputFormat.STREAM_JSON, + ); + vi.mocked(mockConfig.getDebugMode).mockReturnValue(true); + vi.mocked(mockConfig.getContentGeneratorConfig).mockReturnValue({ + authType: 'oauth-personal', + }); + vi.mocked(mockConfig.getUserTierName).mockReturnValue('pro'); + vi.mocked(uiTelemetryService.getMetrics).mockReturnValue( + MOCK_SESSION_METRICS, + ); + + const events: ServerGeminiStreamEvent[] = [ + { + type: GeminiEventType.LoopDetected, + value: { loopType: 'llm_detected_loop' }, + }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 0 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Loop debug test', + prompt_id: 'prompt-id-loop-debug', + }); + + const outputLines = getWrittenOutput() + .trim() + .split('\n') + .map((line) => JSON.parse(line)); + + expect(outputLines[0]).toMatchObject({ + type: 'init', + auth_method: 'oauth-personal', + user_tier: 'pro', + }); + expect(outputLines).toContainEqual( + expect.objectContaining({ + type: 'loop_detected', + loop_type: 'llm_detected_loop', + }), + ); + expect(outputLines).toContainEqual( + expect.objectContaining({ + type: 'error', + severity: 'warning', + message: 'Loop detected, stopping execution', + }), + ); + expect(outputLines).toContainEqual( + expect.objectContaining({ + type: 'result', + stats: expect.objectContaining({ + api_requests: 0, + api_errors: 0, + retry_count: 0, + }), + }), + ); + }); + it('should log error when tool recording fails', async () => { const toolCallEvent: ServerGeminiStreamEvent = { type: GeminiEventType.ToolCallRequest, diff --git a/packages/cli/src/nonInteractiveCli.ts b/packages/cli/src/nonInteractiveCli.ts index 669e945269..9f63cf8e15 100644 --- a/packages/cli/src/nonInteractiveCli.ts +++ b/packages/cli/src/nonInteractiveCli.ts @@ -96,6 +96,8 @@ export async function runNonInteractive({ const startTime = Date.now(); let retryCount = 0; + let loopDetected = false; + let detectedLoopType: string | undefined; const debugMode = config.getDebugMode(); const streamFormatter = config.getOutputFormat() === OutputFormat.STREAM_JSON @@ -377,14 +379,26 @@ export async function runNonInteractive({ } toolCallRequests.push(event.value); } else if (event.type === GeminiEventType.LoopDetected) { + const loopType = event.value?.loopType; + loopDetected = true; + if (loopType) { + detectedLoopType = loopType; + } + if (debugMode) { - const loopType = event.value?.loopType; if (streamFormatter) { streamFormatter.emitEvent({ type: JsonStreamEventType.LOOP_DETECTED, timestamp: new Date().toISOString(), ...(loopType && { loop_type: loopType }), }); + // Keep emitting the legacy warning event for existing parsers. + streamFormatter.emitEvent({ + type: JsonStreamEventType.ERROR, + timestamp: new Date().toISOString(), + severity: 'warning', + message: 'Loop detected, stopping execution', + }); } else if (config.getOutputFormat() === OutputFormat.TEXT) { const loopTypeStr = loopType ? ` (${loopType})` : ''; process.stderr.write( @@ -540,6 +554,12 @@ export async function runNonInteractive({ undefined, authMethod, userTier, + { + includeDiagnostics: debugMode, + retryCount, + loopDetected, + loopType: detectedLoopType, + }, ), ); } else { @@ -574,6 +594,12 @@ export async function runNonInteractive({ undefined, authMethod, userTier, + { + includeDiagnostics: debugMode, + retryCount, + loopDetected, + loopType: detectedLoopType, + }, ), ); } else { diff --git a/packages/core/src/output/json-formatter.test.ts b/packages/core/src/output/json-formatter.test.ts index 13321fae77..55f2211c40 100644 --- a/packages/core/src/output/json-formatter.test.ts +++ b/packages/core/src/output/json-formatter.test.ts @@ -41,6 +41,25 @@ describe('JsonFormatter', () => { expect(parsed.response).toBe('Red text and Green text'); }); + it('should include auth method and user tier when provided', () => { + const formatter = new JsonFormatter(); + const formatted = formatter.format( + 'test-session-id', + 'hello', + undefined, + undefined, + 'gemini-api-key', + 'free', + ); + + expect(JSON.parse(formatted)).toEqual({ + session_id: 'test-session-id', + auth_method: 'gemini-api-key', + user_tier: 'free', + response: 'hello', + }); + }); + it('should strip control characters from response text', () => { const formatter = new JsonFormatter(); const responseWithControlChars = @@ -138,6 +157,87 @@ describe('JsonFormatter', () => { expect(JSON.parse(formatted)).toEqual(expected); }); + it('should include debug diagnostic stats when enabled', () => { + const formatter = new JsonFormatter(); + const stats: SessionMetrics = { + models: { + 'gemini-2.5-pro': { + api: { + totalRequests: 2, + totalErrors: 1, + totalLatencyMs: 1234, + }, + tokens: { + input: 10, + prompt: 10, + candidates: 5, + total: 15, + cached: 0, + thoughts: 0, + tool: 0, + }, + roles: {}, + }, + 'gemini-2.5-flash': { + api: { + totalRequests: 3, + totalErrors: 0, + totalLatencyMs: 2345, + }, + tokens: { + input: 10, + prompt: 10, + candidates: 5, + total: 15, + cached: 0, + thoughts: 0, + tool: 0, + }, + roles: {}, + }, + }, + tools: { + totalCalls: 0, + totalSuccess: 0, + totalFail: 0, + totalDurationMs: 0, + totalDecisions: { + accept: 0, + reject: 0, + modify: 0, + auto_accept: 0, + }, + byName: {}, + }, + files: { + totalLinesAdded: 0, + totalLinesRemoved: 0, + }, + }; + + const formatted = formatter.format( + 'test-session-id', + 'hello', + stats, + undefined, + 'oauth-personal', + 'pro', + { + includeDiagnostics: true, + retryCount: 0, + loopDetected: true, + loopType: 'llm_detected_loop', + }, + ); + + const parsed = JSON.parse(formatted); + expect(parsed.stats.api_requests).toBe(5); + expect(parsed.stats.api_errors).toBe(1); + expect(parsed.stats.retry_count).toBe(0); + expect(parsed.stats.loop_detected).toBe(true); + expect(parsed.stats.loop_type).toBe('llm_detected_loop'); + }); + it('should format error as JSON', () => { const formatter = new JsonFormatter(); const error: JsonError = { diff --git a/packages/core/src/output/json-formatter.ts b/packages/core/src/output/json-formatter.ts index 040c5ad7c1..5becbdc409 100644 --- a/packages/core/src/output/json-formatter.ts +++ b/packages/core/src/output/json-formatter.ts @@ -6,7 +6,14 @@ import stripAnsi from 'strip-ansi'; import type { SessionMetrics } from '../telemetry/uiTelemetry.js'; -import type { JsonError, JsonOutput } from './types.js'; +import type { JsonError, JsonOutput, JsonOutputStats } from './types.js'; + +type JsonFormatDiagnostics = { + includeDiagnostics?: boolean; + retryCount?: number; + loopDetected?: boolean; + loopType?: string; +}; export class JsonFormatter { format( @@ -16,6 +23,7 @@ export class JsonFormatter { error?: JsonError, authMethod?: string, userTier?: string, + diagnostics?: JsonFormatDiagnostics, ): string { const output: JsonOutput = {}; @@ -36,7 +44,28 @@ export class JsonFormatter { } if (stats) { - output.stats = stats; + const outputStats: JsonOutputStats = { ...stats }; + if (diagnostics?.includeDiagnostics) { + let apiRequests = 0; + let apiErrors = 0; + for (const modelMetrics of Object.values(stats.models)) { + apiRequests += modelMetrics.api.totalRequests; + apiErrors += modelMetrics.api.totalErrors; + } + + outputStats.api_requests = apiRequests; + outputStats.api_errors = apiErrors; + outputStats.retry_count = diagnostics.retryCount ?? 0; + + if (diagnostics.loopDetected) { + outputStats.loop_detected = true; + } + + if (diagnostics.loopType) { + outputStats.loop_type = diagnostics.loopType; + } + } + output.stats = outputStats; } if (error) { diff --git a/packages/core/src/output/stream-json-formatter.test.ts b/packages/core/src/output/stream-json-formatter.test.ts index 69dbaac23b..08499790bc 100644 --- a/packages/core/src/output/stream-json-formatter.test.ts +++ b/packages/core/src/output/stream-json-formatter.test.ts @@ -473,6 +473,68 @@ describe('StreamJsonFormatter', () => { expect(result.duration_ms).toBe(5000); }); + + it('should include diagnostic stats when enabled', () => { + const metrics = createMockMetrics(); + metrics.models['gemini-pro'] = { + api: { totalRequests: 2, totalErrors: 1, totalLatencyMs: 1000 }, + tokens: { + input: 10, + prompt: 10, + candidates: 5, + total: 15, + cached: 0, + thoughts: 0, + tool: 0, + }, + roles: {}, + }; + metrics.models['gemini-flash'] = { + api: { totalRequests: 3, totalErrors: 0, totalLatencyMs: 2000 }, + tokens: { + input: 20, + prompt: 20, + candidates: 10, + total: 30, + cached: 0, + thoughts: 0, + tool: 0, + }, + roles: {}, + }; + + const result = formatter.convertToStreamStats(metrics, 750, { + includeDiagnostics: true, + retryCount: 0, + }); + + expect(result.api_requests).toBe(5); + expect(result.api_errors).toBe(1); + expect(result.retry_count).toBe(0); + }); + + it('should not include diagnostic stats when disabled', () => { + const metrics = createMockMetrics(); + metrics.models['gemini-pro'] = { + api: { totalRequests: 2, totalErrors: 1, totalLatencyMs: 1000 }, + tokens: { + input: 10, + prompt: 10, + candidates: 5, + total: 15, + cached: 0, + thoughts: 0, + tool: 0, + }, + roles: {}, + }; + + const result = formatter.convertToStreamStats(metrics, 750); + + expect(result.api_requests).toBeUndefined(); + expect(result.api_errors).toBeUndefined(); + expect(result.retry_count).toBeUndefined(); + }); }); describe('JSON validity', () => { diff --git a/packages/core/src/output/stream-json-formatter.ts b/packages/core/src/output/stream-json-formatter.ts index fb57154f6d..82aaf805d0 100644 --- a/packages/core/src/output/stream-json-formatter.ts +++ b/packages/core/src/output/stream-json-formatter.ts @@ -75,10 +75,7 @@ export class StreamJsonFormatter { } stats.api_requests = apiRequests; stats.api_errors = apiErrors; - - if (options.retryCount && options.retryCount > 0) { - stats.retry_count = options.retryCount; - } + stats.retry_count = options.retryCount ?? 0; } return stats; diff --git a/packages/core/src/output/types.ts b/packages/core/src/output/types.ts index 91a2d9269b..8ad4a25d86 100644 --- a/packages/core/src/output/types.ts +++ b/packages/core/src/output/types.ts @@ -18,12 +18,20 @@ export interface JsonError { code?: string | number; } +export interface JsonOutputStats extends SessionMetrics { + api_requests?: number; + api_errors?: number; + retry_count?: number; + loop_detected?: boolean; + loop_type?: string; +} + export interface JsonOutput { session_id?: string; auth_method?: string; user_tier?: string; response?: string; - stats?: SessionMetrics; + stats?: JsonOutputStats; error?: JsonError; }