fix(headless): complete debug diagnostics parity for json and stream-json

This commit is contained in:
Dmitry Lyalin
2026-02-25 10:23:16 -08:00
parent 8fb2f1e7f8
commit 96aa6004fb
8 changed files with 401 additions and 9 deletions
+21 -1
View File
@@ -17,8 +17,22 @@ You can specify the output format using the `--output-format` flag.
Returns a single JSON object containing the response and usage statistics.
- **Schema:**
- `session_id`: (string, optional) Session ID.
- `auth_method`: (string, optional) Authentication method. Emitted with
`--debug`.
- `user_tier`: (string, optional) User tier name. Emitted with `--debug`.
- `response`: (string) The model's final answer.
- `stats`: (object) Token usage and API latency metrics.
- `stats.api_requests`: (number, optional) Total API requests. Emitted with
`--debug`.
- `stats.api_errors`: (number, optional) Total API errors. Emitted with
`--debug`.
- `stats.retry_count`: (number, optional) Total retries. Emitted with
`--debug`.
- `stats.loop_detected`: (boolean, optional) Whether a loop was detected.
Emitted with `--debug`.
- `stats.loop_type`: (string, optional) Loop classification. Emitted with
`--debug`.
- `error`: (object, optional) Error details if the request failed.
#### Streaming JSON output
@@ -26,13 +40,19 @@ Returns a single JSON object containing the response and usage statistics.
Returns a stream of newline-delimited JSON (JSONL) events.
- **Event types:**
- `init`: Session metadata (session ID, model).
- `init`: Session metadata (session ID, model). Includes `auth_method` and
`user_tier` with `--debug`.
- `message`: User and assistant message chunks.
- `tool_use`: Tool call requests with arguments.
- `tool_result`: Output from executed tools.
- `error`: Non-fatal warnings and system errors.
- `retry`: Retry attempt diagnostics. Emitted with `--debug`.
- `loop_detected`: Loop detection diagnostics. Emitted with `--debug`.
- `result`: Final outcome with aggregated statistics.
In debug mode (`--debug`), `result.stats` also includes `api_requests`,
`api_errors`, and `retry_count`.
## Exit codes
The CLI returns standard exit codes to indicate the result of the headless
+150
View File
@@ -734,6 +734,79 @@ describe('runNonInteractive', () => {
);
});
it('should include debug diagnostics in JSON output when debug mode is enabled', async () => {
const events: ServerGeminiStreamEvent[] = [
{ type: GeminiEventType.Content, value: 'Hello World' },
{
type: GeminiEventType.Finished,
value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } },
},
];
const debugMetrics: SessionMetrics = {
models: {
'gemini-2.5-pro': {
api: {
totalRequests: 2,
totalErrors: 1,
totalLatencyMs: 1234,
},
tokens: {
input: 10,
prompt: 10,
candidates: 5,
total: 15,
cached: 0,
thoughts: 0,
tool: 0,
},
roles: {},
},
},
tools: {
totalCalls: 0,
totalSuccess: 0,
totalFail: 0,
totalDurationMs: 0,
totalDecisions: {
accept: 0,
reject: 0,
modify: 0,
auto_accept: 0,
},
byName: {},
},
files: {
totalLinesAdded: 0,
totalLinesRemoved: 0,
},
};
mockGeminiClient.sendMessageStream.mockReturnValue(
createStreamFromEvents(events),
);
vi.mocked(mockConfig.getOutputFormat).mockReturnValue(OutputFormat.JSON);
vi.mocked(mockConfig.getDebugMode).mockReturnValue(true);
vi.mocked(mockConfig.getContentGeneratorConfig).mockReturnValue({
authType: 'gemini-api-key',
});
vi.mocked(mockConfig.getUserTierName).mockReturnValue('free');
vi.mocked(uiTelemetryService.getMetrics).mockReturnValue(debugMetrics);
await runNonInteractive({
config: mockConfig,
settings: mockSettings,
input: 'Test input',
prompt_id: 'prompt-id-json-debug',
});
const parsed = JSON.parse(getWrittenOutput());
expect(parsed.auth_method).toBe('gemini-api-key');
expect(parsed.user_tier).toBe('free');
expect(parsed.stats.api_requests).toBe(2);
expect(parsed.stats.api_errors).toBe(1);
expect(parsed.stats.retry_count).toBe(0);
});
it('should write JSON output with stats for tool-only commands (no text response)', async () => {
// Test the scenario where a command completes successfully with only tool calls
// but no text response - this would have caught the original bug
@@ -1416,6 +1489,10 @@ describe('runNonInteractive', () => {
CoreEvent.UserFeedback,
expect.any(Function),
);
expect(mockCoreEvents.on).toHaveBeenCalledWith(
CoreEvent.RetryAttempt,
expect.any(Function),
);
expect(mockCoreEvents.drainBacklogs).toHaveBeenCalledTimes(1);
});
@@ -1441,6 +1518,10 @@ describe('runNonInteractive', () => {
CoreEvent.UserFeedback,
expect.any(Function),
);
expect(mockCoreEvents.off).toHaveBeenCalledWith(
CoreEvent.RetryAttempt,
expect.any(Function),
);
});
it('logs to process.stderr when UserFeedback event is received', async () => {
@@ -1724,6 +1805,75 @@ describe('runNonInteractive', () => {
},
);
it('should emit loop_detected and legacy warning error events in debug stream-json mode', async () => {
vi.mocked(mockConfig.getOutputFormat).mockReturnValue(
OutputFormat.STREAM_JSON,
);
vi.mocked(mockConfig.getDebugMode).mockReturnValue(true);
vi.mocked(mockConfig.getContentGeneratorConfig).mockReturnValue({
authType: 'oauth-personal',
});
vi.mocked(mockConfig.getUserTierName).mockReturnValue('pro');
vi.mocked(uiTelemetryService.getMetrics).mockReturnValue(
MOCK_SESSION_METRICS,
);
const events: ServerGeminiStreamEvent[] = [
{
type: GeminiEventType.LoopDetected,
value: { loopType: 'llm_detected_loop' },
},
{
type: GeminiEventType.Finished,
value: { reason: undefined, usageMetadata: { totalTokenCount: 0 } },
},
];
mockGeminiClient.sendMessageStream.mockReturnValue(
createStreamFromEvents(events),
);
await runNonInteractive({
config: mockConfig,
settings: mockSettings,
input: 'Loop debug test',
prompt_id: 'prompt-id-loop-debug',
});
const outputLines = getWrittenOutput()
.trim()
.split('\n')
.map((line) => JSON.parse(line));
expect(outputLines[0]).toMatchObject({
type: 'init',
auth_method: 'oauth-personal',
user_tier: 'pro',
});
expect(outputLines).toContainEqual(
expect.objectContaining({
type: 'loop_detected',
loop_type: 'llm_detected_loop',
}),
);
expect(outputLines).toContainEqual(
expect.objectContaining({
type: 'error',
severity: 'warning',
message: 'Loop detected, stopping execution',
}),
);
expect(outputLines).toContainEqual(
expect.objectContaining({
type: 'result',
stats: expect.objectContaining({
api_requests: 0,
api_errors: 0,
retry_count: 0,
}),
}),
);
});
it('should log error when tool recording fails', async () => {
const toolCallEvent: ServerGeminiStreamEvent = {
type: GeminiEventType.ToolCallRequest,
+27 -1
View File
@@ -96,6 +96,8 @@ export async function runNonInteractive({
const startTime = Date.now();
let retryCount = 0;
let loopDetected = false;
let detectedLoopType: string | undefined;
const debugMode = config.getDebugMode();
const streamFormatter =
config.getOutputFormat() === OutputFormat.STREAM_JSON
@@ -377,14 +379,26 @@ export async function runNonInteractive({
}
toolCallRequests.push(event.value);
} else if (event.type === GeminiEventType.LoopDetected) {
const loopType = event.value?.loopType;
loopDetected = true;
if (loopType) {
detectedLoopType = loopType;
}
if (debugMode) {
const loopType = event.value?.loopType;
if (streamFormatter) {
streamFormatter.emitEvent({
type: JsonStreamEventType.LOOP_DETECTED,
timestamp: new Date().toISOString(),
...(loopType && { loop_type: loopType }),
});
// Keep emitting the legacy warning event for existing parsers.
streamFormatter.emitEvent({
type: JsonStreamEventType.ERROR,
timestamp: new Date().toISOString(),
severity: 'warning',
message: 'Loop detected, stopping execution',
});
} else if (config.getOutputFormat() === OutputFormat.TEXT) {
const loopTypeStr = loopType ? ` (${loopType})` : '';
process.stderr.write(
@@ -540,6 +554,12 @@ export async function runNonInteractive({
undefined,
authMethod,
userTier,
{
includeDiagnostics: debugMode,
retryCount,
loopDetected,
loopType: detectedLoopType,
},
),
);
} else {
@@ -574,6 +594,12 @@ export async function runNonInteractive({
undefined,
authMethod,
userTier,
{
includeDiagnostics: debugMode,
retryCount,
loopDetected,
loopType: detectedLoopType,
},
),
);
} else {
@@ -41,6 +41,25 @@ describe('JsonFormatter', () => {
expect(parsed.response).toBe('Red text and Green text');
});
it('should include auth method and user tier when provided', () => {
const formatter = new JsonFormatter();
const formatted = formatter.format(
'test-session-id',
'hello',
undefined,
undefined,
'gemini-api-key',
'free',
);
expect(JSON.parse(formatted)).toEqual({
session_id: 'test-session-id',
auth_method: 'gemini-api-key',
user_tier: 'free',
response: 'hello',
});
});
it('should strip control characters from response text', () => {
const formatter = new JsonFormatter();
const responseWithControlChars =
@@ -138,6 +157,87 @@ describe('JsonFormatter', () => {
expect(JSON.parse(formatted)).toEqual(expected);
});
it('should include debug diagnostic stats when enabled', () => {
const formatter = new JsonFormatter();
const stats: SessionMetrics = {
models: {
'gemini-2.5-pro': {
api: {
totalRequests: 2,
totalErrors: 1,
totalLatencyMs: 1234,
},
tokens: {
input: 10,
prompt: 10,
candidates: 5,
total: 15,
cached: 0,
thoughts: 0,
tool: 0,
},
roles: {},
},
'gemini-2.5-flash': {
api: {
totalRequests: 3,
totalErrors: 0,
totalLatencyMs: 2345,
},
tokens: {
input: 10,
prompt: 10,
candidates: 5,
total: 15,
cached: 0,
thoughts: 0,
tool: 0,
},
roles: {},
},
},
tools: {
totalCalls: 0,
totalSuccess: 0,
totalFail: 0,
totalDurationMs: 0,
totalDecisions: {
accept: 0,
reject: 0,
modify: 0,
auto_accept: 0,
},
byName: {},
},
files: {
totalLinesAdded: 0,
totalLinesRemoved: 0,
},
};
const formatted = formatter.format(
'test-session-id',
'hello',
stats,
undefined,
'oauth-personal',
'pro',
{
includeDiagnostics: true,
retryCount: 0,
loopDetected: true,
loopType: 'llm_detected_loop',
},
);
const parsed = JSON.parse(formatted);
expect(parsed.stats.api_requests).toBe(5);
expect(parsed.stats.api_errors).toBe(1);
expect(parsed.stats.retry_count).toBe(0);
expect(parsed.stats.loop_detected).toBe(true);
expect(parsed.stats.loop_type).toBe('llm_detected_loop');
});
it('should format error as JSON', () => {
const formatter = new JsonFormatter();
const error: JsonError = {
+31 -2
View File
@@ -6,7 +6,14 @@
import stripAnsi from 'strip-ansi';
import type { SessionMetrics } from '../telemetry/uiTelemetry.js';
import type { JsonError, JsonOutput } from './types.js';
import type { JsonError, JsonOutput, JsonOutputStats } from './types.js';
type JsonFormatDiagnostics = {
includeDiagnostics?: boolean;
retryCount?: number;
loopDetected?: boolean;
loopType?: string;
};
export class JsonFormatter {
format(
@@ -16,6 +23,7 @@ export class JsonFormatter {
error?: JsonError,
authMethod?: string,
userTier?: string,
diagnostics?: JsonFormatDiagnostics,
): string {
const output: JsonOutput = {};
@@ -36,7 +44,28 @@ export class JsonFormatter {
}
if (stats) {
output.stats = stats;
const outputStats: JsonOutputStats = { ...stats };
if (diagnostics?.includeDiagnostics) {
let apiRequests = 0;
let apiErrors = 0;
for (const modelMetrics of Object.values(stats.models)) {
apiRequests += modelMetrics.api.totalRequests;
apiErrors += modelMetrics.api.totalErrors;
}
outputStats.api_requests = apiRequests;
outputStats.api_errors = apiErrors;
outputStats.retry_count = diagnostics.retryCount ?? 0;
if (diagnostics.loopDetected) {
outputStats.loop_detected = true;
}
if (diagnostics.loopType) {
outputStats.loop_type = diagnostics.loopType;
}
}
output.stats = outputStats;
}
if (error) {
@@ -473,6 +473,68 @@ describe('StreamJsonFormatter', () => {
expect(result.duration_ms).toBe(5000);
});
it('should include diagnostic stats when enabled', () => {
const metrics = createMockMetrics();
metrics.models['gemini-pro'] = {
api: { totalRequests: 2, totalErrors: 1, totalLatencyMs: 1000 },
tokens: {
input: 10,
prompt: 10,
candidates: 5,
total: 15,
cached: 0,
thoughts: 0,
tool: 0,
},
roles: {},
};
metrics.models['gemini-flash'] = {
api: { totalRequests: 3, totalErrors: 0, totalLatencyMs: 2000 },
tokens: {
input: 20,
prompt: 20,
candidates: 10,
total: 30,
cached: 0,
thoughts: 0,
tool: 0,
},
roles: {},
};
const result = formatter.convertToStreamStats(metrics, 750, {
includeDiagnostics: true,
retryCount: 0,
});
expect(result.api_requests).toBe(5);
expect(result.api_errors).toBe(1);
expect(result.retry_count).toBe(0);
});
it('should not include diagnostic stats when disabled', () => {
const metrics = createMockMetrics();
metrics.models['gemini-pro'] = {
api: { totalRequests: 2, totalErrors: 1, totalLatencyMs: 1000 },
tokens: {
input: 10,
prompt: 10,
candidates: 5,
total: 15,
cached: 0,
thoughts: 0,
tool: 0,
},
roles: {},
};
const result = formatter.convertToStreamStats(metrics, 750);
expect(result.api_requests).toBeUndefined();
expect(result.api_errors).toBeUndefined();
expect(result.retry_count).toBeUndefined();
});
});
describe('JSON validity', () => {
@@ -75,10 +75,7 @@ export class StreamJsonFormatter {
}
stats.api_requests = apiRequests;
stats.api_errors = apiErrors;
if (options.retryCount && options.retryCount > 0) {
stats.retry_count = options.retryCount;
}
stats.retry_count = options.retryCount ?? 0;
}
return stats;
+9 -1
View File
@@ -18,12 +18,20 @@ export interface JsonError {
code?: string | number;
}
export interface JsonOutputStats extends SessionMetrics {
api_requests?: number;
api_errors?: number;
retry_count?: number;
loop_detected?: boolean;
loop_type?: string;
}
export interface JsonOutput {
session_id?: string;
auth_method?: string;
user_tier?: string;
response?: string;
stats?: SessionMetrics;
stats?: JsonOutputStats;
error?: JsonError;
}