Log recovery events (nudges) that happens inside the subagent (#12408)

This commit is contained in:
Silvio Junior
2025-11-03 17:53:43 -05:00
committed by GitHub
parent be1dc13bb1
commit 7a515339ea
7 changed files with 307 additions and 23 deletions
@@ -35,6 +35,7 @@ import type {
SmartEditCorrectionEvent,
AgentStartEvent,
AgentFinishEvent,
RecoveryAttemptEvent,
WebFetchFallbackAttemptEvent,
ExtensionUpdateEvent,
} from '../types.js';
@@ -90,6 +91,7 @@ export enum EventNames {
SMART_EDIT_CORRECTION = 'smart_edit_correction',
AGENT_START = 'agent_start',
AGENT_FINISH = 'agent_finish',
RECOVERY_ATTEMPT = 'recovery_attempt',
WEB_FETCH_FALLBACK_ATTEMPT = 'web_fetch_fallback_attempt',
}
@@ -1230,6 +1232,40 @@ export class ClearcutLogger {
this.flushIfNeeded();
}
logRecoveryAttemptEvent(event: RecoveryAttemptEvent): void {
const data: EventValue[] = [
{
gemini_cli_key: EventMetadataKey.GEMINI_CLI_AGENT_ID,
value: event.agent_id,
},
{
gemini_cli_key: EventMetadataKey.GEMINI_CLI_AGENT_NAME,
value: event.agent_name,
},
{
gemini_cli_key: EventMetadataKey.GEMINI_CLI_AGENT_RECOVERY_REASON,
value: event.reason,
},
{
gemini_cli_key: EventMetadataKey.GEMINI_CLI_AGENT_RECOVERY_DURATION_MS,
value: event.duration_ms.toString(),
},
{
gemini_cli_key: EventMetadataKey.GEMINI_CLI_AGENT_RECOVERY_SUCCESS,
value: event.success.toString(),
},
{
gemini_cli_key: EventMetadataKey.GEMINI_CLI_AGENT_TURN_COUNT,
value: event.turn_count.toString(),
},
];
this.enqueueLogEvent(
this.createLogEvent(EventNames.RECOVERY_ATTEMPT, data),
);
this.flushIfNeeded();
}
logWebFetchFallbackAttemptEvent(event: WebFetchFallbackAttemptEvent): void {
const data: EventValue[] = [
{
@@ -464,4 +464,13 @@ export enum EventMetadataKey {
// Logs the reason for agent termination.
GEMINI_CLI_AGENT_TERMINATE_REASON = 115,
// Logs the reason for an agent recovery attempt.
GEMINI_CLI_AGENT_RECOVERY_REASON = 122,
// Logs the duration of an agent recovery attempt in milliseconds.
GEMINI_CLI_AGENT_RECOVERY_DURATION_MS = 123,
// Logs whether the agent recovery attempt was successful.
GEMINI_CLI_AGENT_RECOVERY_SUCCESS = 124,
}
+19
View File
@@ -46,6 +46,7 @@ import type {
SmartEditCorrectionEvent,
AgentStartEvent,
AgentFinishEvent,
RecoveryAttemptEvent,
WebFetchFallbackAttemptEvent,
ExtensionUpdateEvent,
} from './types.js';
@@ -63,6 +64,7 @@ import {
recordTokenUsageMetrics,
recordApiResponseMetrics,
recordAgentRunMetrics,
recordRecoveryAttemptMetrics,
recordLinesChanged,
} from './metrics.js';
import { isTelemetrySdkInitialized } from './sdk.js';
@@ -636,6 +638,23 @@ export function logAgentFinish(config: Config, event: AgentFinishEvent): void {
recordAgentRunMetrics(config, event);
}
export function logRecoveryAttempt(
config: Config,
event: RecoveryAttemptEvent,
): void {
ClearcutLogger.getInstance(config)?.logRecoveryAttemptEvent(event);
if (!isTelemetrySdkInitialized()) return;
const logger = logs.getLogger(SERVICE_NAME);
const logRecord: LogRecord = {
body: event.toLogBody(),
attributes: event.toOpenTelemetryAttributes(config),
};
logger.emit(logRecord);
recordRecoveryAttemptMetrics(config, event);
}
export function logWebFetchFallbackAttempt(
config: Config,
event: WebFetchFallbackAttemptEvent,
+51
View File
@@ -12,6 +12,7 @@ import type {
ModelRoutingEvent,
ModelSlashCommandEvent,
AgentFinishEvent,
RecoveryAttemptEvent,
} from './types.js';
import { AuthType } from '../core/contentGenerator.js';
import { getCommonAttributes } from './telemetryAttributes.js';
@@ -38,6 +39,9 @@ const MODEL_SLASH_COMMAND_CALL_COUNT =
const AGENT_RUN_COUNT = 'gemini_cli.agent.run.count';
const AGENT_DURATION_MS = 'gemini_cli.agent.duration';
const AGENT_TURNS = 'gemini_cli.agent.turns';
const AGENT_RECOVERY_ATTEMPT_COUNT = 'gemini_cli.agent.recovery_attempt.count';
const AGENT_RECOVERY_ATTEMPT_DURATION =
'gemini_cli.agent.recovery_attempt.duration';
// OpenTelemetry GenAI Semantic Convention Metrics
const GEN_AI_CLIENT_TOKEN_USAGE = 'gen_ai.client.token.usage';
@@ -175,6 +179,16 @@ const COUNTER_DEFINITIONS = {
terminate_reason: string;
},
},
[AGENT_RECOVERY_ATTEMPT_COUNT]: {
description: 'Counts agent recovery attempts.',
valueType: ValueType.INT,
assign: (c: Counter) => (agentRecoveryAttemptCounter = c),
attributes: {} as {
agent_name: string;
reason: string;
success: boolean;
},
},
[FLICKER_FRAME_COUNT]: {
description:
'Counts UI frames that flicker (render taller than the terminal).',
@@ -244,6 +258,15 @@ const HISTOGRAM_DEFINITIONS = {
agent_name: string;
},
},
[AGENT_RECOVERY_ATTEMPT_DURATION]: {
description: 'Duration of agent recovery attempts in milliseconds.',
unit: 'ms',
valueType: ValueType.INT,
assign: (h: Histogram) => (agentRecoveryAttemptDurationHistogram = h),
attributes: {} as {
agent_name: string;
},
},
[GEN_AI_CLIENT_TOKEN_USAGE]: {
description: 'Number of input and output tokens used.',
unit: 'token',
@@ -478,6 +501,8 @@ let modelSlashCommandCallCounter: Counter | undefined;
let agentRunCounter: Counter | undefined;
let agentDurationHistogram: Histogram | undefined;
let agentTurnsHistogram: Histogram | undefined;
let agentRecoveryAttemptCounter: Counter | undefined;
let agentRecoveryAttemptDurationHistogram: Histogram | undefined;
let flickerFrameCounter: Counter | undefined;
let exitFailCounter: Counter | undefined;
let slowRenderHistogram: Histogram | undefined;
@@ -774,6 +799,32 @@ export function recordAgentRunMetrics(
});
}
export function recordRecoveryAttemptMetrics(
config: Config,
event: RecoveryAttemptEvent,
): void {
if (
!agentRecoveryAttemptCounter ||
!agentRecoveryAttemptDurationHistogram ||
!isMetricsInitialized
)
return;
const commonAttributes = baseMetricDefinition.getCommonAttributes(config);
agentRecoveryAttemptCounter.add(1, {
...commonAttributes,
agent_name: event.agent_name,
reason: event.reason,
success: event.success,
});
agentRecoveryAttemptDurationHistogram.record(event.duration_ms, {
...commonAttributes,
agent_name: event.agent_name,
});
}
// OpenTelemetry GenAI Semantic Convention Recording Functions
export function recordGenAiClientTokenUsage(
+68 -19
View File
@@ -1459,6 +1459,7 @@ export type TelemetryEvent =
| ModelSlashCommandEvent
| AgentStartEvent
| AgentFinishEvent
| RecoveryAttemptEvent
| WebFetchFallbackAttemptEvent;
export const EVENT_EXTENSION_DISABLE = 'gemini_cli.extension_disable';
@@ -1548,15 +1549,16 @@ export class SmartEditCorrectionEvent implements BaseTelemetryEvent {
}
}
export const EVENT_AGENT_START = 'gemini_cli.agent.start';
export class AgentStartEvent implements BaseTelemetryEvent {
'event.name': 'agent_start';
abstract class BaseAgentEvent implements BaseTelemetryEvent {
abstract 'event.name':
| 'agent_start'
| 'agent_finish'
| 'agent_recovery_attempt';
'event.timestamp': string;
agent_id: string;
agent_name: string;
constructor(agent_id: string, agent_name: string) {
this['event.name'] = 'agent_start';
this['event.timestamp'] = new Date().toISOString();
this.agent_id = agent_id;
this.agent_name = agent_name;
@@ -1565,24 +1567,38 @@ export class AgentStartEvent implements BaseTelemetryEvent {
toOpenTelemetryAttributes(config: Config): LogAttributes {
return {
...getCommonAttributes(config),
'event.name': EVENT_AGENT_START,
'event.timestamp': this['event.timestamp'],
agent_id: this.agent_id,
agent_name: this.agent_name,
};
}
abstract toLogBody(): string;
}
export const EVENT_AGENT_START = 'gemini_cli.agent.start';
export class AgentStartEvent extends BaseAgentEvent {
'event.name' = 'agent_start' as const;
constructor(agent_id: string, agent_name: string) {
super(agent_id, agent_name);
}
override toOpenTelemetryAttributes(config: Config): LogAttributes {
return {
...super.toOpenTelemetryAttributes(config),
'event.name': EVENT_AGENT_START,
};
}
toLogBody(): string {
return `Agent ${this.agent_name} started. ID: ${this.agent_id}`;
}
}
export const EVENT_AGENT_FINISH = 'gemini_cli.agent.finish';
export class AgentFinishEvent implements BaseTelemetryEvent {
'event.name': 'agent_finish';
'event.timestamp': string;
agent_id: string;
agent_name: string;
export class AgentFinishEvent extends BaseAgentEvent {
'event.name' = 'agent_finish' as const;
duration_ms: number;
turn_count: number;
terminate_reason: AgentTerminateMode;
@@ -1594,22 +1610,16 @@ export class AgentFinishEvent implements BaseTelemetryEvent {
turn_count: number,
terminate_reason: AgentTerminateMode,
) {
this['event.name'] = 'agent_finish';
this['event.timestamp'] = new Date().toISOString();
this.agent_id = agent_id;
this.agent_name = agent_name;
super(agent_id, agent_name);
this.duration_ms = duration_ms;
this.turn_count = turn_count;
this.terminate_reason = terminate_reason;
}
toOpenTelemetryAttributes(config: Config): LogAttributes {
override toOpenTelemetryAttributes(config: Config): LogAttributes {
return {
...getCommonAttributes(config),
...super.toOpenTelemetryAttributes(config),
'event.name': EVENT_AGENT_FINISH,
'event.timestamp': this['event.timestamp'],
agent_id: this.agent_id,
agent_name: this.agent_name,
duration_ms: this.duration_ms,
turn_count: this.turn_count,
terminate_reason: this.terminate_reason,
@@ -1621,6 +1631,45 @@ export class AgentFinishEvent implements BaseTelemetryEvent {
}
}
export const EVENT_AGENT_RECOVERY_ATTEMPT = 'gemini_cli.agent.recovery_attempt';
export class RecoveryAttemptEvent extends BaseAgentEvent {
'event.name' = 'agent_recovery_attempt' as const;
reason: AgentTerminateMode;
duration_ms: number;
success: boolean;
turn_count: number;
constructor(
agent_id: string,
agent_name: string,
reason: AgentTerminateMode,
duration_ms: number,
success: boolean,
turn_count: number,
) {
super(agent_id, agent_name);
this.reason = reason;
this.duration_ms = duration_ms;
this.success = success;
this.turn_count = turn_count;
}
override toOpenTelemetryAttributes(config: Config): LogAttributes {
return {
...super.toOpenTelemetryAttributes(config),
'event.name': EVENT_AGENT_RECOVERY_ATTEMPT,
reason: this.reason,
duration_ms: this.duration_ms,
success: this.success,
turn_count: this.turn_count,
};
}
toLogBody(): string {
return `Agent ${this.agent_name} recovery attempt. Reason: ${this.reason}. Success: ${this.success}. Duration: ${this.duration_ms}ms.`;
}
}
export const EVENT_WEB_FETCH_FALLBACK_ATTEMPT =
'gemini_cli.web_fetch_fallback_attempt';
export class WebFetchFallbackAttemptEvent implements BaseTelemetryEvent {