scripts/test_gemini.sh

#!/bin/bash
# -----------------------------------------------------------------------------
# Gemini CLI Headless Mode Monitoring Test Script
# -----------------------------------------------------------------------------
# Purpose:
#   Runs the Gemini CLI in headless mode across multiple models and output
#   formats, then displays the monitoring data (auth method, API stats, retries,
#   loop detection) in a readable summary.
#
# Prerequisites:
#   Authentication must already be configured (API key, OAuth, or Vertex AI).
#   Build the project first: npm run build
#
# Usage:
#   ./scripts/test_gemini.sh [--prompt "custom prompt"] [--models "model1 model2"]
#
# Options:
#   --prompt <text>   Override the default test prompt
#   --models <list>   Space-separated list of models to test (quoted)
#
# Example:
#   ./scripts/test_gemini.sh
#   ./scripts/test_gemini.sh --prompt "list files" --models "gemini-2.5-flash"
# -----------------------------------------------------------------------------

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
CLI="$REPO_ROOT/packages/cli/dist/index.js"

# Defaults
PROMPT="count how many files are in the current folder"
MODELS=(
  "gemini-2.5-pro"
  "gemini-2.5-flash"
  "gemini-3.1-pro-preview"
  "gemini-3-flash-preview"
)

# Parse args
while [[ "$#" -gt 0 ]]; do
  case "$1" in
    --prompt) PROMPT="$2"; shift ;;
    --models) IFS=' ' read -ra MODELS <<< "$2"; shift ;;
    *) echo "Unknown option: $1"; exit 1 ;;
  esac
  shift
done

# Colors
BOLD='\033[1m'
DIM='\033[2m'
GREEN='\033[32m'
YELLOW='\033[33m'
RED='\033[31m'
CYAN='\033[36m'
RESET='\033[0m'

# Check prerequisites
if [[ ! -f "$CLI" ]]; then
  echo -e "${RED}CLI not found at $CLI${RESET}"
  echo "Run 'npm run build' from the repo root first."
  exit 1
fi

if ! command -v jq &>/dev/null; then
  echo -e "${RED}jq is required but not installed.${RESET}"
  exit 1
fi

separator() {
  echo -e "${DIM}$(printf '%.0s─' {1..72})${RESET}"
}

# Header
echo ""
echo -e "${BOLD}Gemini CLI Headless Monitoring Test${RESET}"
separator
echo -e "${DIM}Prompt:${RESET}  $PROMPT"
echo -e "${DIM}Models:${RESET}  ${MODELS[*]}"
echo -e "${DIM}CLI:${RESET}     $CLI"
separator
echo ""

total_models=${#MODELS[@]}
pass_count=0
fail_count=0

for model in "${MODELS[@]}"; do
  echo -e "${BOLD}${CYAN}[$model]${RESET}"
  echo ""

  # ── stream-json run ──────────────────────────────────────────────────
  TMPFILE=$(mktemp)
  STDERRFILE=$(mktemp)
  exit_code=0

  echo -e "  ${DIM}Running with -o stream-json -d ...${RESET}"
  node "$CLI" -p "$PROMPT" -y -m "$model" -o stream-json -d \
    >"$TMPFILE" 2>"$STDERRFILE" || exit_code=$?

  if [[ $exit_code -ne 0 ]]; then
    echo -e "  ${RED}FAILED${RESET} (exit code $exit_code)"
    echo ""
    if [[ -s "$STDERRFILE" ]]; then
      echo -e "  ${DIM}stderr:${RESET}"
      sed 's/^/    /' "$STDERRFILE"
      echo ""
    fi
    ((fail_count++))
    rm -f "$TMPFILE" "$STDERRFILE"
    separator
    echo ""
    continue
  fi

  ((pass_count++))

  # Parse init event
  init_line=$(jq -c 'select(.type=="init")' "$TMPFILE" 2>/dev/null | head -1)
  auth_method=$(echo "$init_line" | jq -r '.auth_method // "not set"' 2>/dev/null)
  user_tier=$(echo "$init_line" | jq -r '.user_tier // "not set"' 2>/dev/null)
  session_id=$(echo "$init_line" | jq -r '.session_id // "?"' 2>/dev/null)

  # Parse result event
  result_line=$(jq -c 'select(.type=="result")' "$TMPFILE" 2>/dev/null | tail -1)
  status=$(echo "$result_line" | jq -r '.status // "?"' 2>/dev/null)
  api_requests=$(echo "$result_line" | jq -r '.stats.api_requests // "?"' 2>/dev/null)
  api_errors=$(echo "$result_line" | jq -r '.stats.api_errors // "?"' 2>/dev/null)
  retry_count=$(echo "$result_line" | jq -r '.stats.retry_count // 0' 2>/dev/null)
  total_tokens=$(echo "$result_line" | jq -r '.stats.total_tokens // "?"' 2>/dev/null)
  input_tokens=$(echo "$result_line" | jq -r '.stats.input_tokens // "?"' 2>/dev/null)
  output_tokens=$(echo "$result_line" | jq -r '.stats.output_tokens // "?"' 2>/dev/null)
  cached=$(echo "$result_line" | jq -r '.stats.cached // "?"' 2>/dev/null)
  tool_calls=$(echo "$result_line" | jq -r '.stats.tool_calls // 0' 2>/dev/null)
  duration_ms=$(echo "$result_line" | jq -r '.stats.duration_ms // "?"' 2>/dev/null)

  # Count retries and loop events
  retry_events=$(jq -c 'select(.type=="retry")' "$TMPFILE" 2>/dev/null | wc -l | tr -d ' ')
  loop_events=$(jq -c 'select(.type=="loop_detected")' "$TMPFILE" 2>/dev/null)
  if [[ -n "$loop_events" ]]; then
    loop_count=$(echo "$loop_events" | wc -l | tr -d ' ')
    loop_type=$(echo "$loop_events" | jq -r '.loop_type // empty' 2>/dev/null | head -1)
  else
    loop_count=0
    loop_type=""
  fi

  # Extract assistant response (concatenate deltas)
  response=$(jq -r 'select(.type=="message" and .role=="assistant") | .content' "$TMPFILE" 2>/dev/null | tr -d '\n')
  # Truncate for display
  if [[ ${#response} -gt 120 ]]; then
    response="${response:0:120}..."
  fi

  # Format duration
  if [[ "$duration_ms" != "?" ]]; then
    duration_s=$(echo "scale=1; $duration_ms / 1000" | bc 2>/dev/null || echo "$duration_ms ms")
    duration_display="${duration_s}s"
  else
    duration_display="?"
  fi

  # Display
  echo -e "  ${BOLD}Auth & Session${RESET}"
  echo -e "    auth_method:  ${GREEN}$auth_method${RESET}"
  echo -e "    user_tier:    $user_tier"
  echo -e "    session_id:   ${DIM}$session_id${RESET}"
  echo ""

  echo -e "  ${BOLD}API Stats${RESET}"
  echo -e "    status:       $([ "$status" = "success" ] && echo "${GREEN}$status${RESET}" || echo "${RED}$status${RESET}")"
  echo -e "    api_requests: $api_requests"
  echo -e "    api_errors:   $([ "$api_errors" = "0" ] && echo "$api_errors" || echo "${RED}$api_errors${RESET}")"
  echo -e "    retry_count:  $([ "$retry_count" = "0" ] && echo "$retry_count" || echo "${YELLOW}$retry_count${RESET}")"
  echo -e "    duration:     $duration_display"
  echo ""

  echo -e "  ${BOLD}Tokens${RESET}"
  echo -e "    total:   $total_tokens  (in: $input_tokens, out: $output_tokens, cached: $cached)"
  echo -e "    tools:   $tool_calls calls"
  echo ""

  if [[ "$retry_events" -gt 0 ]]; then
    echo -e "  ${BOLD}${YELLOW}Retries ($retry_events)${RESET}"
    jq -r 'select(.type=="retry") | "    attempt \(.attempt)/\(.max_attempts) delay=\(.delay_ms)ms \(.error // "")"' "$TMPFILE" 2>/dev/null
    echo ""
  fi

  if [[ "$loop_count" -gt 0 ]]; then
    echo -e "  ${BOLD}${RED}Loop Detected${RESET}"
    echo -e "    type: ${loop_type:-unknown}"
    echo ""
  fi

  echo -e "  ${BOLD}Response${RESET}"
  echo -e "    ${DIM}$response${RESET}"
  echo ""

  # Show stderr if any
  stderr_content=$(cat "$STDERRFILE")
  if [[ -n "$stderr_content" ]]; then
    echo -e "  ${BOLD}Stderr${RESET}"
    echo "$stderr_content" | sed 's/^/    /'
    echo ""
  fi

  rm -f "$TMPFILE" "$STDERRFILE"
  separator
  echo ""
done

# Summary
echo -e "${BOLD}Summary${RESET}"
echo -e "  Models tested: $total_models"
echo -e "  Passed:        ${GREEN}$pass_count${RESET}"
if [[ $fail_count -gt 0 ]]; then
  echo -e "  Failed:        ${RED}$fail_count${RESET}"
else
  echo -e "  Failed:        $fail_count"
fi
echo ""
feat(headless): surface diagnostic monitoring data in non-interactive output 2026-02-24 23:37:39 -08:00			`#!/bin/bash`
			`# -----------------------------------------------------------------------------`
			`# Gemini CLI Headless Mode Monitoring Test Script`
			`# -----------------------------------------------------------------------------`
			`# Purpose:`
			`# Runs the Gemini CLI in headless mode across multiple models and output`
			`# formats, then displays the monitoring data (auth method, API stats, retries,`
			`# loop detection) in a readable summary.`
			`#`
			`# Prerequisites:`
			`# Authentication must already be configured (API key, OAuth, or Vertex AI).`
			`# Build the project first: npm run build`
			`#`
			`# Usage:`
			`# ./scripts/test_gemini.sh [--prompt "custom prompt"] [--models "model1 model2"]`
			`#`
			`# Options:`
			`# --prompt <text> Override the default test prompt`
			`# --models <list> Space-separated list of models to test (quoted)`
			`#`
			`# Example:`
			`# ./scripts/test_gemini.sh`
			`# ./scripts/test_gemini.sh --prompt "list files" --models "gemini-2.5-flash"`
			`# -----------------------------------------------------------------------------`

			`set -euo pipefail`

			`SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"`
			`REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"`
			`CLI="$REPO_ROOT/packages/cli/dist/index.js"`

			`# Defaults`
			`PROMPT="count how many files are in the current folder"`
			`MODELS=(`
			`"gemini-2.5-pro"`
			`"gemini-2.5-flash"`
			`"gemini-3.1-pro-preview"`
			`"gemini-3-flash-preview"`
			`)`

			`# Parse args`
			`while [[ "$#" -gt 0 ]]; do`
			`case "$1" in`
			`--prompt) PROMPT="$2"; shift ;;`
			`--models) IFS=' ' read -ra MODELS <<< "$2"; shift ;;`
			`*) echo "Unknown option: $1"; exit 1 ;;`
			`esac`
			`shift`
			`done`

			`# Colors`
			`BOLD='\033[1m'`
			`DIM='\033[2m'`
			`GREEN='\033[32m'`
			`YELLOW='\033[33m'`
			`RED='\033[31m'`
			`CYAN='\033[36m'`
			`RESET='\033[0m'`

			`# Check prerequisites`
			`if [[ ! -f "$CLI" ]]; then`
			`echo -e "${RED}CLI not found at $CLI${RESET}"`
			`echo "Run 'npm run build' from the repo root first."`
			`exit 1`
			`fi`

			`if ! command -v jq &>/dev/null; then`
			`echo -e "${RED}jq is required but not installed.${RESET}"`
			`exit 1`
			`fi`

			`separator() {`
			`echo -e "${DIM}$(printf '%.0s─' {1..72})${RESET}"`
			`}`

			`# Header`
			`echo ""`
			`echo -e "${BOLD}Gemini CLI Headless Monitoring Test${RESET}"`
			`separator`
			`echo -e "${DIM}Prompt:${RESET} $PROMPT"`
			`echo -e "${DIM}Models:${RESET} ${MODELS[*]}"`
			`echo -e "${DIM}CLI:${RESET} $CLI"`
			`separator`
			`echo ""`

			`total_models=${#MODELS[@]}`
			`pass_count=0`
			`fail_count=0`

			`for model in "${MODELS[@]}"; do`
			`echo -e "${BOLD}${CYAN}[$model]${RESET}"`
			`echo ""`

			`# ── stream-json run ──────────────────────────────────────────────────`
			`TMPFILE=$(mktemp)`
			`STDERRFILE=$(mktemp)`
			`exit_code=0`

feat(headless): gate diagnostic output behind --debug flag 2026-02-25 09:52:14 -08:00			`echo -e " ${DIM}Running with -o stream-json -d ...${RESET}"`
			`node "$CLI" -p "$PROMPT" -y -m "$model" -o stream-json -d \`
feat(headless): surface diagnostic monitoring data in non-interactive output 2026-02-24 23:37:39 -08:00			`>"$TMPFILE" 2>"$STDERRFILE" \|\| exit_code=$?`

			`if [[ $exit_code -ne 0 ]]; then`
			`echo -e " ${RED}FAILED${RESET} (exit code $exit_code)"`
			`echo ""`
			`if [[ -s "$STDERRFILE" ]]; then`
			`echo -e " ${DIM}stderr:${RESET}"`
			`sed 's/^/ /' "$STDERRFILE"`
			`echo ""`
			`fi`
			`((fail_count++))`
			`rm -f "$TMPFILE" "$STDERRFILE"`
			`separator`
			`echo ""`
			`continue`
			`fi`

			`((pass_count++))`

			`# Parse init event`
			`init_line=$(jq -c 'select(.type=="init")' "$TMPFILE" 2>/dev/null \| head -1)`
			`auth_method=$(echo "$init_line" \| jq -r '.auth_method // "not set"' 2>/dev/null)`
			`user_tier=$(echo "$init_line" \| jq -r '.user_tier // "not set"' 2>/dev/null)`
			`session_id=$(echo "$init_line" \| jq -r '.session_id // "?"' 2>/dev/null)`

			`# Parse result event`
			`result_line=$(jq -c 'select(.type=="result")' "$TMPFILE" 2>/dev/null \| tail -1)`
			`status=$(echo "$result_line" \| jq -r '.status // "?"' 2>/dev/null)`
			`api_requests=$(echo "$result_line" \| jq -r '.stats.api_requests // "?"' 2>/dev/null)`
			`api_errors=$(echo "$result_line" \| jq -r '.stats.api_errors // "?"' 2>/dev/null)`
			`retry_count=$(echo "$result_line" \| jq -r '.stats.retry_count // 0' 2>/dev/null)`
			`total_tokens=$(echo "$result_line" \| jq -r '.stats.total_tokens // "?"' 2>/dev/null)`
			`input_tokens=$(echo "$result_line" \| jq -r '.stats.input_tokens // "?"' 2>/dev/null)`
			`output_tokens=$(echo "$result_line" \| jq -r '.stats.output_tokens // "?"' 2>/dev/null)`
			`cached=$(echo "$result_line" \| jq -r '.stats.cached // "?"' 2>/dev/null)`
			`tool_calls=$(echo "$result_line" \| jq -r '.stats.tool_calls // 0' 2>/dev/null)`
			`duration_ms=$(echo "$result_line" \| jq -r '.stats.duration_ms // "?"' 2>/dev/null)`

			`# Count retries and loop events`
			`retry_events=$(jq -c 'select(.type=="retry")' "$TMPFILE" 2>/dev/null \| wc -l \| tr -d ' ')`
			`loop_events=$(jq -c 'select(.type=="loop_detected")' "$TMPFILE" 2>/dev/null)`
			`if [[ -n "$loop_events" ]]; then`
			`loop_count=$(echo "$loop_events" \| wc -l \| tr -d ' ')`
			`loop_type=$(echo "$loop_events" \| jq -r '.loop_type // empty' 2>/dev/null \| head -1)`
			`else`
			`loop_count=0`
			`loop_type=""`
			`fi`

			`# Extract assistant response (concatenate deltas)`
			`response=$(jq -r 'select(.type=="message" and .role=="assistant") \| .content' "$TMPFILE" 2>/dev/null \| tr -d '\n')`
			`# Truncate for display`
			`if [[ ${#response} -gt 120 ]]; then`
			`response="${response:0:120}..."`
			`fi`

			`# Format duration`
			`if [[ "$duration_ms" != "?" ]]; then`
			`duration_s=$(echo "scale=1; $duration_ms / 1000" \| bc 2>/dev/null \|\| echo "$duration_ms ms")`
			`duration_display="${duration_s}s"`
			`else`
			`duration_display="?"`
			`fi`

			`# Display`
			`echo -e " ${BOLD}Auth & Session${RESET}"`
			`echo -e " auth_method: ${GREEN}$auth_method${RESET}"`
			`echo -e " user_tier: $user_tier"`
			`echo -e " session_id: ${DIM}$session_id${RESET}"`
			`echo ""`

			`echo -e " ${BOLD}API Stats${RESET}"`
			`echo -e " status: $([ "$status" = "success" ] && echo "${GREEN}$status${RESET}" \|\| echo "${RED}$status${RESET}")"`
			`echo -e " api_requests: $api_requests"`
			`echo -e " api_errors: $([ "$api_errors" = "0" ] && echo "$api_errors" \|\| echo "${RED}$api_errors${RESET}")"`
			`echo -e " retry_count: $([ "$retry_count" = "0" ] && echo "$retry_count" \|\| echo "${YELLOW}$retry_count${RESET}")"`
			`echo -e " duration: $duration_display"`
			`echo ""`

			`echo -e " ${BOLD}Tokens${RESET}"`
			`echo -e " total: $total_tokens (in: $input_tokens, out: $output_tokens, cached: $cached)"`
			`echo -e " tools: $tool_calls calls"`
			`echo ""`

			`if [[ "$retry_events" -gt 0 ]]; then`
			`echo -e " ${BOLD}${YELLOW}Retries ($retry_events)${RESET}"`
			`jq -r 'select(.type=="retry") \| " attempt \(.attempt)/\(.max_attempts) delay=\(.delay_ms)ms \(.error // "")"' "$TMPFILE" 2>/dev/null`
			`echo ""`
			`fi`

			`if [[ "$loop_count" -gt 0 ]]; then`
			`echo -e " ${BOLD}${RED}Loop Detected${RESET}"`
			`echo -e " type: ${loop_type:-unknown}"`
			`echo ""`
			`fi`

			`echo -e " ${BOLD}Response${RESET}"`
			`echo -e " ${DIM}$response${RESET}"`
			`echo ""`

			`# Show stderr if any`
			`stderr_content=$(cat "$STDERRFILE")`
			`if [[ -n "$stderr_content" ]]; then`
			`echo -e " ${BOLD}Stderr${RESET}"`
			`echo "$stderr_content" \| sed 's/^/ /'`
			`echo ""`
			`fi`

			`rm -f "$TMPFILE" "$STDERRFILE"`
			`separator`
			`echo ""`
			`done`

			`# Summary`
			`echo -e "${BOLD}Summary${RESET}"`
			`echo -e " Models tested: $total_models"`
			`echo -e " Passed: ${GREEN}$pass_count${RESET}"`
			`if [[ $fail_count -gt 0 ]]; then`
			`echo -e " Failed: ${RED}$fail_count${RESET}"`
			`else`
			`echo -e " Failed: $fail_count"`
			`fi`
			`echo ""`