mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-04-23 03:24:42 -07:00
941a479855
Diagnostic monitoring data (auth_method, user_tier, api_requests, api_errors, retry_count, RETRY events, LOOP_DETECTED events, and stderr warnings) is now only emitted when --debug / -d is passed. Without the flag, headless output is identical to before — no new fields, no new events, no stderr noise. This keeps default output clean for piped workflows while making diagnostics available on demand.
224 lines
7.2 KiB
Bash
Executable File
224 lines
7.2 KiB
Bash
Executable File
#!/bin/bash
|
|
# -----------------------------------------------------------------------------
|
|
# Gemini CLI Headless Mode Monitoring Test Script
|
|
# -----------------------------------------------------------------------------
|
|
# Purpose:
|
|
# Runs the Gemini CLI in headless mode across multiple models and output
|
|
# formats, then displays the monitoring data (auth method, API stats, retries,
|
|
# loop detection) in a readable summary.
|
|
#
|
|
# Prerequisites:
|
|
# Authentication must already be configured (API key, OAuth, or Vertex AI).
|
|
# Build the project first: npm run build
|
|
#
|
|
# Usage:
|
|
# ./scripts/test_gemini.sh [--prompt "custom prompt"] [--models "model1 model2"]
|
|
#
|
|
# Options:
|
|
# --prompt <text> Override the default test prompt
|
|
# --models <list> Space-separated list of models to test (quoted)
|
|
#
|
|
# Example:
|
|
# ./scripts/test_gemini.sh
|
|
# ./scripts/test_gemini.sh --prompt "list files" --models "gemini-2.5-flash"
|
|
# -----------------------------------------------------------------------------
|
|
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
CLI="$REPO_ROOT/packages/cli/dist/index.js"
|
|
|
|
# Defaults
|
|
PROMPT="count how many files are in the current folder"
|
|
MODELS=(
|
|
"gemini-2.5-pro"
|
|
"gemini-2.5-flash"
|
|
"gemini-3.1-pro-preview"
|
|
"gemini-3-flash-preview"
|
|
)
|
|
|
|
# Parse args
|
|
while [[ "$#" -gt 0 ]]; do
|
|
case "$1" in
|
|
--prompt) PROMPT="$2"; shift ;;
|
|
--models) IFS=' ' read -ra MODELS <<< "$2"; shift ;;
|
|
*) echo "Unknown option: $1"; exit 1 ;;
|
|
esac
|
|
shift
|
|
done
|
|
|
|
# Colors
|
|
BOLD='\033[1m'
|
|
DIM='\033[2m'
|
|
GREEN='\033[32m'
|
|
YELLOW='\033[33m'
|
|
RED='\033[31m'
|
|
CYAN='\033[36m'
|
|
RESET='\033[0m'
|
|
|
|
# Check prerequisites
|
|
if [[ ! -f "$CLI" ]]; then
|
|
echo -e "${RED}CLI not found at $CLI${RESET}"
|
|
echo "Run 'npm run build' from the repo root first."
|
|
exit 1
|
|
fi
|
|
|
|
if ! command -v jq &>/dev/null; then
|
|
echo -e "${RED}jq is required but not installed.${RESET}"
|
|
exit 1
|
|
fi
|
|
|
|
separator() {
|
|
echo -e "${DIM}$(printf '%.0s─' {1..72})${RESET}"
|
|
}
|
|
|
|
# Header
|
|
echo ""
|
|
echo -e "${BOLD}Gemini CLI Headless Monitoring Test${RESET}"
|
|
separator
|
|
echo -e "${DIM}Prompt:${RESET} $PROMPT"
|
|
echo -e "${DIM}Models:${RESET} ${MODELS[*]}"
|
|
echo -e "${DIM}CLI:${RESET} $CLI"
|
|
separator
|
|
echo ""
|
|
|
|
total_models=${#MODELS[@]}
|
|
pass_count=0
|
|
fail_count=0
|
|
|
|
for model in "${MODELS[@]}"; do
|
|
echo -e "${BOLD}${CYAN}[$model]${RESET}"
|
|
echo ""
|
|
|
|
# ── stream-json run ──────────────────────────────────────────────────
|
|
TMPFILE=$(mktemp)
|
|
STDERRFILE=$(mktemp)
|
|
exit_code=0
|
|
|
|
echo -e " ${DIM}Running with -o stream-json -d ...${RESET}"
|
|
node "$CLI" -p "$PROMPT" -y -m "$model" -o stream-json -d \
|
|
>"$TMPFILE" 2>"$STDERRFILE" || exit_code=$?
|
|
|
|
if [[ $exit_code -ne 0 ]]; then
|
|
echo -e " ${RED}FAILED${RESET} (exit code $exit_code)"
|
|
echo ""
|
|
if [[ -s "$STDERRFILE" ]]; then
|
|
echo -e " ${DIM}stderr:${RESET}"
|
|
sed 's/^/ /' "$STDERRFILE"
|
|
echo ""
|
|
fi
|
|
((fail_count++))
|
|
rm -f "$TMPFILE" "$STDERRFILE"
|
|
separator
|
|
echo ""
|
|
continue
|
|
fi
|
|
|
|
((pass_count++))
|
|
|
|
# Parse init event
|
|
init_line=$(jq -c 'select(.type=="init")' "$TMPFILE" 2>/dev/null | head -1)
|
|
auth_method=$(echo "$init_line" | jq -r '.auth_method // "not set"' 2>/dev/null)
|
|
user_tier=$(echo "$init_line" | jq -r '.user_tier // "not set"' 2>/dev/null)
|
|
session_id=$(echo "$init_line" | jq -r '.session_id // "?"' 2>/dev/null)
|
|
|
|
# Parse result event
|
|
result_line=$(jq -c 'select(.type=="result")' "$TMPFILE" 2>/dev/null | tail -1)
|
|
status=$(echo "$result_line" | jq -r '.status // "?"' 2>/dev/null)
|
|
api_requests=$(echo "$result_line" | jq -r '.stats.api_requests // "?"' 2>/dev/null)
|
|
api_errors=$(echo "$result_line" | jq -r '.stats.api_errors // "?"' 2>/dev/null)
|
|
retry_count=$(echo "$result_line" | jq -r '.stats.retry_count // 0' 2>/dev/null)
|
|
total_tokens=$(echo "$result_line" | jq -r '.stats.total_tokens // "?"' 2>/dev/null)
|
|
input_tokens=$(echo "$result_line" | jq -r '.stats.input_tokens // "?"' 2>/dev/null)
|
|
output_tokens=$(echo "$result_line" | jq -r '.stats.output_tokens // "?"' 2>/dev/null)
|
|
cached=$(echo "$result_line" | jq -r '.stats.cached // "?"' 2>/dev/null)
|
|
tool_calls=$(echo "$result_line" | jq -r '.stats.tool_calls // 0' 2>/dev/null)
|
|
duration_ms=$(echo "$result_line" | jq -r '.stats.duration_ms // "?"' 2>/dev/null)
|
|
|
|
# Count retries and loop events
|
|
retry_events=$(jq -c 'select(.type=="retry")' "$TMPFILE" 2>/dev/null | wc -l | tr -d ' ')
|
|
loop_events=$(jq -c 'select(.type=="loop_detected")' "$TMPFILE" 2>/dev/null)
|
|
if [[ -n "$loop_events" ]]; then
|
|
loop_count=$(echo "$loop_events" | wc -l | tr -d ' ')
|
|
loop_type=$(echo "$loop_events" | jq -r '.loop_type // empty' 2>/dev/null | head -1)
|
|
else
|
|
loop_count=0
|
|
loop_type=""
|
|
fi
|
|
|
|
# Extract assistant response (concatenate deltas)
|
|
response=$(jq -r 'select(.type=="message" and .role=="assistant") | .content' "$TMPFILE" 2>/dev/null | tr -d '\n')
|
|
# Truncate for display
|
|
if [[ ${#response} -gt 120 ]]; then
|
|
response="${response:0:120}..."
|
|
fi
|
|
|
|
# Format duration
|
|
if [[ "$duration_ms" != "?" ]]; then
|
|
duration_s=$(echo "scale=1; $duration_ms / 1000" | bc 2>/dev/null || echo "$duration_ms ms")
|
|
duration_display="${duration_s}s"
|
|
else
|
|
duration_display="?"
|
|
fi
|
|
|
|
# Display
|
|
echo -e " ${BOLD}Auth & Session${RESET}"
|
|
echo -e " auth_method: ${GREEN}$auth_method${RESET}"
|
|
echo -e " user_tier: $user_tier"
|
|
echo -e " session_id: ${DIM}$session_id${RESET}"
|
|
echo ""
|
|
|
|
echo -e " ${BOLD}API Stats${RESET}"
|
|
echo -e " status: $([ "$status" = "success" ] && echo "${GREEN}$status${RESET}" || echo "${RED}$status${RESET}")"
|
|
echo -e " api_requests: $api_requests"
|
|
echo -e " api_errors: $([ "$api_errors" = "0" ] && echo "$api_errors" || echo "${RED}$api_errors${RESET}")"
|
|
echo -e " retry_count: $([ "$retry_count" = "0" ] && echo "$retry_count" || echo "${YELLOW}$retry_count${RESET}")"
|
|
echo -e " duration: $duration_display"
|
|
echo ""
|
|
|
|
echo -e " ${BOLD}Tokens${RESET}"
|
|
echo -e " total: $total_tokens (in: $input_tokens, out: $output_tokens, cached: $cached)"
|
|
echo -e " tools: $tool_calls calls"
|
|
echo ""
|
|
|
|
if [[ "$retry_events" -gt 0 ]]; then
|
|
echo -e " ${BOLD}${YELLOW}Retries ($retry_events)${RESET}"
|
|
jq -r 'select(.type=="retry") | " attempt \(.attempt)/\(.max_attempts) delay=\(.delay_ms)ms \(.error // "")"' "$TMPFILE" 2>/dev/null
|
|
echo ""
|
|
fi
|
|
|
|
if [[ "$loop_count" -gt 0 ]]; then
|
|
echo -e " ${BOLD}${RED}Loop Detected${RESET}"
|
|
echo -e " type: ${loop_type:-unknown}"
|
|
echo ""
|
|
fi
|
|
|
|
echo -e " ${BOLD}Response${RESET}"
|
|
echo -e " ${DIM}$response${RESET}"
|
|
echo ""
|
|
|
|
# Show stderr if any
|
|
stderr_content=$(cat "$STDERRFILE")
|
|
if [[ -n "$stderr_content" ]]; then
|
|
echo -e " ${BOLD}Stderr${RESET}"
|
|
echo "$stderr_content" | sed 's/^/ /'
|
|
echo ""
|
|
fi
|
|
|
|
rm -f "$TMPFILE" "$STDERRFILE"
|
|
separator
|
|
echo ""
|
|
done
|
|
|
|
# Summary
|
|
echo -e "${BOLD}Summary${RESET}"
|
|
echo -e " Models tested: $total_models"
|
|
echo -e " Passed: ${GREEN}$pass_count${RESET}"
|
|
if [[ $fail_count -gt 0 ]]; then
|
|
echo -e " Failed: ${RED}$fail_count${RESET}"
|
|
else
|
|
echo -e " Failed: $fail_count"
|
|
fi
|
|
echo ""
|