#!/bin/bash # ----------------------------------------------------------------------------- # Gemini CLI Headless Mode Monitoring Test Script # ----------------------------------------------------------------------------- # Purpose: # Runs the Gemini CLI in headless mode across multiple models and output # formats, then displays the monitoring data (auth method, API stats, retries, # loop detection) in a readable summary. # # Prerequisites: # Authentication must already be configured (API key, OAuth, or Vertex AI). # Build the project first: npm run build # # Usage: # ./scripts/test_gemini.sh [--prompt "custom prompt"] [--models "model1 model2"] # # Options: # --prompt Override the default test prompt # --models Space-separated list of models to test (quoted) # # Example: # ./scripts/test_gemini.sh # ./scripts/test_gemini.sh --prompt "list files" --models "gemini-2.5-flash" # ----------------------------------------------------------------------------- set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" CLI="$REPO_ROOT/packages/cli/dist/index.js" # Defaults PROMPT="count how many files are in the current folder" MODELS=( "gemini-2.5-pro" "gemini-2.5-flash" "gemini-3.1-pro-preview" "gemini-3-flash-preview" ) # Parse args while [[ "$#" -gt 0 ]]; do case "$1" in --prompt) PROMPT="$2"; shift ;; --models) IFS=' ' read -ra MODELS <<< "$2"; shift ;; *) echo "Unknown option: $1"; exit 1 ;; esac shift done # Colors BOLD='\033[1m' DIM='\033[2m' GREEN='\033[32m' YELLOW='\033[33m' RED='\033[31m' CYAN='\033[36m' RESET='\033[0m' # Check prerequisites if [[ ! -f "$CLI" ]]; then echo -e "${RED}CLI not found at $CLI${RESET}" echo "Run 'npm run build' from the repo root first." exit 1 fi if ! command -v jq &>/dev/null; then echo -e "${RED}jq is required but not installed.${RESET}" exit 1 fi separator() { echo -e "${DIM}$(printf '%.0s─' {1..72})${RESET}" } # Header echo "" echo -e "${BOLD}Gemini CLI Headless Monitoring Test${RESET}" separator echo -e "${DIM}Prompt:${RESET} $PROMPT" echo -e "${DIM}Models:${RESET} ${MODELS[*]}" echo -e "${DIM}CLI:${RESET} $CLI" separator echo "" total_models=${#MODELS[@]} pass_count=0 fail_count=0 for model in "${MODELS[@]}"; do echo -e "${BOLD}${CYAN}[$model]${RESET}" echo "" # ── stream-json run ────────────────────────────────────────────────── TMPFILE=$(mktemp) STDERRFILE=$(mktemp) exit_code=0 echo -e " ${DIM}Running with -o stream-json -d ...${RESET}" node "$CLI" -p "$PROMPT" -y -m "$model" -o stream-json -d \ >"$TMPFILE" 2>"$STDERRFILE" || exit_code=$? if [[ $exit_code -ne 0 ]]; then echo -e " ${RED}FAILED${RESET} (exit code $exit_code)" echo "" if [[ -s "$STDERRFILE" ]]; then echo -e " ${DIM}stderr:${RESET}" sed 's/^/ /' "$STDERRFILE" echo "" fi ((fail_count++)) rm -f "$TMPFILE" "$STDERRFILE" separator echo "" continue fi ((pass_count++)) # Parse init event init_line=$(jq -c 'select(.type=="init")' "$TMPFILE" 2>/dev/null | head -1) auth_method=$(echo "$init_line" | jq -r '.auth_method // "not set"' 2>/dev/null) user_tier=$(echo "$init_line" | jq -r '.user_tier // "not set"' 2>/dev/null) session_id=$(echo "$init_line" | jq -r '.session_id // "?"' 2>/dev/null) # Parse result event result_line=$(jq -c 'select(.type=="result")' "$TMPFILE" 2>/dev/null | tail -1) status=$(echo "$result_line" | jq -r '.status // "?"' 2>/dev/null) api_requests=$(echo "$result_line" | jq -r '.stats.api_requests // "?"' 2>/dev/null) api_errors=$(echo "$result_line" | jq -r '.stats.api_errors // "?"' 2>/dev/null) retry_count=$(echo "$result_line" | jq -r '.stats.retry_count // 0' 2>/dev/null) total_tokens=$(echo "$result_line" | jq -r '.stats.total_tokens // "?"' 2>/dev/null) input_tokens=$(echo "$result_line" | jq -r '.stats.input_tokens // "?"' 2>/dev/null) output_tokens=$(echo "$result_line" | jq -r '.stats.output_tokens // "?"' 2>/dev/null) cached=$(echo "$result_line" | jq -r '.stats.cached // "?"' 2>/dev/null) tool_calls=$(echo "$result_line" | jq -r '.stats.tool_calls // 0' 2>/dev/null) duration_ms=$(echo "$result_line" | jq -r '.stats.duration_ms // "?"' 2>/dev/null) # Count retries and loop events retry_events=$(jq -c 'select(.type=="retry")' "$TMPFILE" 2>/dev/null | wc -l | tr -d ' ') loop_events=$(jq -c 'select(.type=="loop_detected")' "$TMPFILE" 2>/dev/null) if [[ -n "$loop_events" ]]; then loop_count=$(echo "$loop_events" | wc -l | tr -d ' ') loop_type=$(echo "$loop_events" | jq -r '.loop_type // empty' 2>/dev/null | head -1) else loop_count=0 loop_type="" fi # Extract assistant response (concatenate deltas) response=$(jq -r 'select(.type=="message" and .role=="assistant") | .content' "$TMPFILE" 2>/dev/null | tr -d '\n') # Truncate for display if [[ ${#response} -gt 120 ]]; then response="${response:0:120}..." fi # Format duration if [[ "$duration_ms" != "?" ]]; then duration_s=$(echo "scale=1; $duration_ms / 1000" | bc 2>/dev/null || echo "$duration_ms ms") duration_display="${duration_s}s" else duration_display="?" fi # Display echo -e " ${BOLD}Auth & Session${RESET}" echo -e " auth_method: ${GREEN}$auth_method${RESET}" echo -e " user_tier: $user_tier" echo -e " session_id: ${DIM}$session_id${RESET}" echo "" echo -e " ${BOLD}API Stats${RESET}" echo -e " status: $([ "$status" = "success" ] && echo "${GREEN}$status${RESET}" || echo "${RED}$status${RESET}")" echo -e " api_requests: $api_requests" echo -e " api_errors: $([ "$api_errors" = "0" ] && echo "$api_errors" || echo "${RED}$api_errors${RESET}")" echo -e " retry_count: $([ "$retry_count" = "0" ] && echo "$retry_count" || echo "${YELLOW}$retry_count${RESET}")" echo -e " duration: $duration_display" echo "" echo -e " ${BOLD}Tokens${RESET}" echo -e " total: $total_tokens (in: $input_tokens, out: $output_tokens, cached: $cached)" echo -e " tools: $tool_calls calls" echo "" if [[ "$retry_events" -gt 0 ]]; then echo -e " ${BOLD}${YELLOW}Retries ($retry_events)${RESET}" jq -r 'select(.type=="retry") | " attempt \(.attempt)/\(.max_attempts) delay=\(.delay_ms)ms \(.error // "")"' "$TMPFILE" 2>/dev/null echo "" fi if [[ "$loop_count" -gt 0 ]]; then echo -e " ${BOLD}${RED}Loop Detected${RESET}" echo -e " type: ${loop_type:-unknown}" echo "" fi echo -e " ${BOLD}Response${RESET}" echo -e " ${DIM}$response${RESET}" echo "" # Show stderr if any stderr_content=$(cat "$STDERRFILE") if [[ -n "$stderr_content" ]]; then echo -e " ${BOLD}Stderr${RESET}" echo "$stderr_content" | sed 's/^/ /' echo "" fi rm -f "$TMPFILE" "$STDERRFILE" separator echo "" done # Summary echo -e "${BOLD}Summary${RESET}" echo -e " Models tested: $total_models" echo -e " Passed: ${GREEN}$pass_count${RESET}" if [[ $fail_count -gt 0 ]]; then echo -e " Failed: ${RED}$fail_count${RESET}" else echo -e " Failed: $fail_count" fi echo ""