feat(headless): surface diagnostic monitoring data in non-interactive output

When running Gemini CLI in headless mode (-p), critical diagnostic data
like auth method, API retry attempts, loop detection, and request stats
was invisible despite being tracked internally. This change surfaces
that data across all three output formats (stream-json, json, text).

Changes:
- Add RETRY and LOOP_DETECTED event types to stream-json output
- Include auth_method and user_tier in init events and JSON output
- Add api_requests, api_errors, and retry_count to result stats
- Track and expose detected loop type (tool call, chanting, LLM-detected)
- Emit [RETRY] and [WARNING] messages to stderr in text mode
- Listen to CoreEvent.RetryAttempt in non-interactive CLI
- Add test script (scripts/test_gemini.sh) for manual verification
This commit is contained in:
Dmitry Lyalin
2026-02-24 23:37:39 -08:00
parent bf278ef2b0
commit a4b3229513
10 changed files with 364 additions and 15 deletions
+223
View File
@@ -0,0 +1,223 @@
#!/bin/bash
# -----------------------------------------------------------------------------
# Gemini CLI Headless Mode Monitoring Test Script
# -----------------------------------------------------------------------------
# Purpose:
# Runs the Gemini CLI in headless mode across multiple models and output
# formats, then displays the monitoring data (auth method, API stats, retries,
# loop detection) in a readable summary.
#
# Prerequisites:
# Authentication must already be configured (API key, OAuth, or Vertex AI).
# Build the project first: npm run build
#
# Usage:
# ./scripts/test_gemini.sh [--prompt "custom prompt"] [--models "model1 model2"]
#
# Options:
# --prompt <text> Override the default test prompt
# --models <list> Space-separated list of models to test (quoted)
#
# Example:
# ./scripts/test_gemini.sh
# ./scripts/test_gemini.sh --prompt "list files" --models "gemini-2.5-flash"
# -----------------------------------------------------------------------------
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
CLI="$REPO_ROOT/packages/cli/dist/index.js"
# Defaults
PROMPT="count how many files are in the current folder"
MODELS=(
"gemini-2.5-pro"
"gemini-2.5-flash"
"gemini-3.1-pro-preview"
"gemini-3-flash-preview"
)
# Parse args
while [[ "$#" -gt 0 ]]; do
case "$1" in
--prompt) PROMPT="$2"; shift ;;
--models) IFS=' ' read -ra MODELS <<< "$2"; shift ;;
*) echo "Unknown option: $1"; exit 1 ;;
esac
shift
done
# Colors
BOLD='\033[1m'
DIM='\033[2m'
GREEN='\033[32m'
YELLOW='\033[33m'
RED='\033[31m'
CYAN='\033[36m'
RESET='\033[0m'
# Check prerequisites
if [[ ! -f "$CLI" ]]; then
echo -e "${RED}CLI not found at $CLI${RESET}"
echo "Run 'npm run build' from the repo root first."
exit 1
fi
if ! command -v jq &>/dev/null; then
echo -e "${RED}jq is required but not installed.${RESET}"
exit 1
fi
separator() {
echo -e "${DIM}$(printf '%.0s─' {1..72})${RESET}"
}
# Header
echo ""
echo -e "${BOLD}Gemini CLI Headless Monitoring Test${RESET}"
separator
echo -e "${DIM}Prompt:${RESET} $PROMPT"
echo -e "${DIM}Models:${RESET} ${MODELS[*]}"
echo -e "${DIM}CLI:${RESET} $CLI"
separator
echo ""
total_models=${#MODELS[@]}
pass_count=0
fail_count=0
for model in "${MODELS[@]}"; do
echo -e "${BOLD}${CYAN}[$model]${RESET}"
echo ""
# ── stream-json run ──────────────────────────────────────────────────
TMPFILE=$(mktemp)
STDERRFILE=$(mktemp)
exit_code=0
echo -e " ${DIM}Running with -o stream-json ...${RESET}"
node "$CLI" -p "$PROMPT" -y -m "$model" -o stream-json \
>"$TMPFILE" 2>"$STDERRFILE" || exit_code=$?
if [[ $exit_code -ne 0 ]]; then
echo -e " ${RED}FAILED${RESET} (exit code $exit_code)"
echo ""
if [[ -s "$STDERRFILE" ]]; then
echo -e " ${DIM}stderr:${RESET}"
sed 's/^/ /' "$STDERRFILE"
echo ""
fi
((fail_count++))
rm -f "$TMPFILE" "$STDERRFILE"
separator
echo ""
continue
fi
((pass_count++))
# Parse init event
init_line=$(jq -c 'select(.type=="init")' "$TMPFILE" 2>/dev/null | head -1)
auth_method=$(echo "$init_line" | jq -r '.auth_method // "not set"' 2>/dev/null)
user_tier=$(echo "$init_line" | jq -r '.user_tier // "not set"' 2>/dev/null)
session_id=$(echo "$init_line" | jq -r '.session_id // "?"' 2>/dev/null)
# Parse result event
result_line=$(jq -c 'select(.type=="result")' "$TMPFILE" 2>/dev/null | tail -1)
status=$(echo "$result_line" | jq -r '.status // "?"' 2>/dev/null)
api_requests=$(echo "$result_line" | jq -r '.stats.api_requests // "?"' 2>/dev/null)
api_errors=$(echo "$result_line" | jq -r '.stats.api_errors // "?"' 2>/dev/null)
retry_count=$(echo "$result_line" | jq -r '.stats.retry_count // 0' 2>/dev/null)
total_tokens=$(echo "$result_line" | jq -r '.stats.total_tokens // "?"' 2>/dev/null)
input_tokens=$(echo "$result_line" | jq -r '.stats.input_tokens // "?"' 2>/dev/null)
output_tokens=$(echo "$result_line" | jq -r '.stats.output_tokens // "?"' 2>/dev/null)
cached=$(echo "$result_line" | jq -r '.stats.cached // "?"' 2>/dev/null)
tool_calls=$(echo "$result_line" | jq -r '.stats.tool_calls // 0' 2>/dev/null)
duration_ms=$(echo "$result_line" | jq -r '.stats.duration_ms // "?"' 2>/dev/null)
# Count retries and loop events
retry_events=$(jq -c 'select(.type=="retry")' "$TMPFILE" 2>/dev/null | wc -l | tr -d ' ')
loop_events=$(jq -c 'select(.type=="loop_detected")' "$TMPFILE" 2>/dev/null)
if [[ -n "$loop_events" ]]; then
loop_count=$(echo "$loop_events" | wc -l | tr -d ' ')
loop_type=$(echo "$loop_events" | jq -r '.loop_type // empty' 2>/dev/null | head -1)
else
loop_count=0
loop_type=""
fi
# Extract assistant response (concatenate deltas)
response=$(jq -r 'select(.type=="message" and .role=="assistant") | .content' "$TMPFILE" 2>/dev/null | tr -d '\n')
# Truncate for display
if [[ ${#response} -gt 120 ]]; then
response="${response:0:120}..."
fi
# Format duration
if [[ "$duration_ms" != "?" ]]; then
duration_s=$(echo "scale=1; $duration_ms / 1000" | bc 2>/dev/null || echo "$duration_ms ms")
duration_display="${duration_s}s"
else
duration_display="?"
fi
# Display
echo -e " ${BOLD}Auth & Session${RESET}"
echo -e " auth_method: ${GREEN}$auth_method${RESET}"
echo -e " user_tier: $user_tier"
echo -e " session_id: ${DIM}$session_id${RESET}"
echo ""
echo -e " ${BOLD}API Stats${RESET}"
echo -e " status: $([ "$status" = "success" ] && echo "${GREEN}$status${RESET}" || echo "${RED}$status${RESET}")"
echo -e " api_requests: $api_requests"
echo -e " api_errors: $([ "$api_errors" = "0" ] && echo "$api_errors" || echo "${RED}$api_errors${RESET}")"
echo -e " retry_count: $([ "$retry_count" = "0" ] && echo "$retry_count" || echo "${YELLOW}$retry_count${RESET}")"
echo -e " duration: $duration_display"
echo ""
echo -e " ${BOLD}Tokens${RESET}"
echo -e " total: $total_tokens (in: $input_tokens, out: $output_tokens, cached: $cached)"
echo -e " tools: $tool_calls calls"
echo ""
if [[ "$retry_events" -gt 0 ]]; then
echo -e " ${BOLD}${YELLOW}Retries ($retry_events)${RESET}"
jq -r 'select(.type=="retry") | " attempt \(.attempt)/\(.max_attempts) delay=\(.delay_ms)ms \(.error // "")"' "$TMPFILE" 2>/dev/null
echo ""
fi
if [[ "$loop_count" -gt 0 ]]; then
echo -e " ${BOLD}${RED}Loop Detected${RESET}"
echo -e " type: ${loop_type:-unknown}"
echo ""
fi
echo -e " ${BOLD}Response${RESET}"
echo -e " ${DIM}$response${RESET}"
echo ""
# Show stderr if any
stderr_content=$(cat "$STDERRFILE")
if [[ -n "$stderr_content" ]]; then
echo -e " ${BOLD}Stderr${RESET}"
echo "$stderr_content" | sed 's/^/ /'
echo ""
fi
rm -f "$TMPFILE" "$STDERRFILE"
separator
echo ""
done
# Summary
echo -e "${BOLD}Summary${RESET}"
echo -e " Models tested: $total_models"
echo -e " Passed: ${GREEN}$pass_count${RESET}"
if [[ $fail_count -gt 0 ]]; then
echo -e " Failed: ${RED}$fail_count${RESET}"
else
echo -e " Failed: $fail_count"
fi
echo ""