Files
gemini-cli/scripts/test_gemini.sh
Dmitry Lyalin 941a479855 feat(headless): gate diagnostic output behind --debug flag
Diagnostic monitoring data (auth_method, user_tier, api_requests,
api_errors, retry_count, RETRY events, LOOP_DETECTED events, and
stderr warnings) is now only emitted when --debug / -d is passed.

Without the flag, headless output is identical to before — no new
fields, no new events, no stderr noise. This keeps default output
clean for piped workflows while making diagnostics available on demand.
2026-02-25 09:52:14 -08:00

224 lines
7.2 KiB
Bash
Executable File

#!/bin/bash
# -----------------------------------------------------------------------------
# Gemini CLI Headless Mode Monitoring Test Script
# -----------------------------------------------------------------------------
# Purpose:
# Runs the Gemini CLI in headless mode across multiple models and output
# formats, then displays the monitoring data (auth method, API stats, retries,
# loop detection) in a readable summary.
#
# Prerequisites:
# Authentication must already be configured (API key, OAuth, or Vertex AI).
# Build the project first: npm run build
#
# Usage:
# ./scripts/test_gemini.sh [--prompt "custom prompt"] [--models "model1 model2"]
#
# Options:
# --prompt <text> Override the default test prompt
# --models <list> Space-separated list of models to test (quoted)
#
# Example:
# ./scripts/test_gemini.sh
# ./scripts/test_gemini.sh --prompt "list files" --models "gemini-2.5-flash"
# -----------------------------------------------------------------------------
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
CLI="$REPO_ROOT/packages/cli/dist/index.js"
# Defaults
PROMPT="count how many files are in the current folder"
MODELS=(
"gemini-2.5-pro"
"gemini-2.5-flash"
"gemini-3.1-pro-preview"
"gemini-3-flash-preview"
)
# Parse args
while [[ "$#" -gt 0 ]]; do
case "$1" in
--prompt) PROMPT="$2"; shift ;;
--models) IFS=' ' read -ra MODELS <<< "$2"; shift ;;
*) echo "Unknown option: $1"; exit 1 ;;
esac
shift
done
# Colors
BOLD='\033[1m'
DIM='\033[2m'
GREEN='\033[32m'
YELLOW='\033[33m'
RED='\033[31m'
CYAN='\033[36m'
RESET='\033[0m'
# Check prerequisites
if [[ ! -f "$CLI" ]]; then
echo -e "${RED}CLI not found at $CLI${RESET}"
echo "Run 'npm run build' from the repo root first."
exit 1
fi
if ! command -v jq &>/dev/null; then
echo -e "${RED}jq is required but not installed.${RESET}"
exit 1
fi
separator() {
echo -e "${DIM}$(printf '%.0s─' {1..72})${RESET}"
}
# Header
echo ""
echo -e "${BOLD}Gemini CLI Headless Monitoring Test${RESET}"
separator
echo -e "${DIM}Prompt:${RESET} $PROMPT"
echo -e "${DIM}Models:${RESET} ${MODELS[*]}"
echo -e "${DIM}CLI:${RESET} $CLI"
separator
echo ""
total_models=${#MODELS[@]}
pass_count=0
fail_count=0
for model in "${MODELS[@]}"; do
echo -e "${BOLD}${CYAN}[$model]${RESET}"
echo ""
# ── stream-json run ──────────────────────────────────────────────────
TMPFILE=$(mktemp)
STDERRFILE=$(mktemp)
exit_code=0
echo -e " ${DIM}Running with -o stream-json -d ...${RESET}"
node "$CLI" -p "$PROMPT" -y -m "$model" -o stream-json -d \
>"$TMPFILE" 2>"$STDERRFILE" || exit_code=$?
if [[ $exit_code -ne 0 ]]; then
echo -e " ${RED}FAILED${RESET} (exit code $exit_code)"
echo ""
if [[ -s "$STDERRFILE" ]]; then
echo -e " ${DIM}stderr:${RESET}"
sed 's/^/ /' "$STDERRFILE"
echo ""
fi
((fail_count++))
rm -f "$TMPFILE" "$STDERRFILE"
separator
echo ""
continue
fi
((pass_count++))
# Parse init event
init_line=$(jq -c 'select(.type=="init")' "$TMPFILE" 2>/dev/null | head -1)
auth_method=$(echo "$init_line" | jq -r '.auth_method // "not set"' 2>/dev/null)
user_tier=$(echo "$init_line" | jq -r '.user_tier // "not set"' 2>/dev/null)
session_id=$(echo "$init_line" | jq -r '.session_id // "?"' 2>/dev/null)
# Parse result event
result_line=$(jq -c 'select(.type=="result")' "$TMPFILE" 2>/dev/null | tail -1)
status=$(echo "$result_line" | jq -r '.status // "?"' 2>/dev/null)
api_requests=$(echo "$result_line" | jq -r '.stats.api_requests // "?"' 2>/dev/null)
api_errors=$(echo "$result_line" | jq -r '.stats.api_errors // "?"' 2>/dev/null)
retry_count=$(echo "$result_line" | jq -r '.stats.retry_count // 0' 2>/dev/null)
total_tokens=$(echo "$result_line" | jq -r '.stats.total_tokens // "?"' 2>/dev/null)
input_tokens=$(echo "$result_line" | jq -r '.stats.input_tokens // "?"' 2>/dev/null)
output_tokens=$(echo "$result_line" | jq -r '.stats.output_tokens // "?"' 2>/dev/null)
cached=$(echo "$result_line" | jq -r '.stats.cached // "?"' 2>/dev/null)
tool_calls=$(echo "$result_line" | jq -r '.stats.tool_calls // 0' 2>/dev/null)
duration_ms=$(echo "$result_line" | jq -r '.stats.duration_ms // "?"' 2>/dev/null)
# Count retries and loop events
retry_events=$(jq -c 'select(.type=="retry")' "$TMPFILE" 2>/dev/null | wc -l | tr -d ' ')
loop_events=$(jq -c 'select(.type=="loop_detected")' "$TMPFILE" 2>/dev/null)
if [[ -n "$loop_events" ]]; then
loop_count=$(echo "$loop_events" | wc -l | tr -d ' ')
loop_type=$(echo "$loop_events" | jq -r '.loop_type // empty' 2>/dev/null | head -1)
else
loop_count=0
loop_type=""
fi
# Extract assistant response (concatenate deltas)
response=$(jq -r 'select(.type=="message" and .role=="assistant") | .content' "$TMPFILE" 2>/dev/null | tr -d '\n')
# Truncate for display
if [[ ${#response} -gt 120 ]]; then
response="${response:0:120}..."
fi
# Format duration
if [[ "$duration_ms" != "?" ]]; then
duration_s=$(echo "scale=1; $duration_ms / 1000" | bc 2>/dev/null || echo "$duration_ms ms")
duration_display="${duration_s}s"
else
duration_display="?"
fi
# Display
echo -e " ${BOLD}Auth & Session${RESET}"
echo -e " auth_method: ${GREEN}$auth_method${RESET}"
echo -e " user_tier: $user_tier"
echo -e " session_id: ${DIM}$session_id${RESET}"
echo ""
echo -e " ${BOLD}API Stats${RESET}"
echo -e " status: $([ "$status" = "success" ] && echo "${GREEN}$status${RESET}" || echo "${RED}$status${RESET}")"
echo -e " api_requests: $api_requests"
echo -e " api_errors: $([ "$api_errors" = "0" ] && echo "$api_errors" || echo "${RED}$api_errors${RESET}")"
echo -e " retry_count: $([ "$retry_count" = "0" ] && echo "$retry_count" || echo "${YELLOW}$retry_count${RESET}")"
echo -e " duration: $duration_display"
echo ""
echo -e " ${BOLD}Tokens${RESET}"
echo -e " total: $total_tokens (in: $input_tokens, out: $output_tokens, cached: $cached)"
echo -e " tools: $tool_calls calls"
echo ""
if [[ "$retry_events" -gt 0 ]]; then
echo -e " ${BOLD}${YELLOW}Retries ($retry_events)${RESET}"
jq -r 'select(.type=="retry") | " attempt \(.attempt)/\(.max_attempts) delay=\(.delay_ms)ms \(.error // "")"' "$TMPFILE" 2>/dev/null
echo ""
fi
if [[ "$loop_count" -gt 0 ]]; then
echo -e " ${BOLD}${RED}Loop Detected${RESET}"
echo -e " type: ${loop_type:-unknown}"
echo ""
fi
echo -e " ${BOLD}Response${RESET}"
echo -e " ${DIM}$response${RESET}"
echo ""
# Show stderr if any
stderr_content=$(cat "$STDERRFILE")
if [[ -n "$stderr_content" ]]; then
echo -e " ${BOLD}Stderr${RESET}"
echo "$stderr_content" | sed 's/^/ /'
echo ""
fi
rm -f "$TMPFILE" "$STDERRFILE"
separator
echo ""
done
# Summary
echo -e "${BOLD}Summary${RESET}"
echo -e " Models tested: $total_models"
echo -e " Passed: ${GREEN}$pass_count${RESET}"
if [[ $fail_count -gt 0 ]]; then
echo -e " Failed: ${RED}$fail_count${RESET}"
else
echo -e " Failed: $fail_count"
fi
echo ""