diff --git a/packages/cli/src/__snapshots__/nonInteractiveCli.test.ts.snap b/packages/cli/src/__snapshots__/nonInteractiveCli.test.ts.snap index 8c1a85cdd7..dcf6268194 100644 --- a/packages/cli/src/__snapshots__/nonInteractiveCli.test.ts.snap +++ b/packages/cli/src/__snapshots__/nonInteractiveCli.test.ts.snap @@ -3,8 +3,8 @@ exports[`runNonInteractive > should emit appropriate error event in streaming JSON mode: 'loop detected' 1`] = ` "{"type":"init","timestamp":"","session_id":"test-session-id","model":"test-model"} {"type":"message","timestamp":"","role":"user","content":"Loop test"} -{"type":"error","timestamp":"","severity":"warning","message":"Loop detected, stopping execution"} -{"type":"result","timestamp":"","status":"success","stats":{"total_tokens":0,"input_tokens":0,"output_tokens":0,"cached":0,"input":0,"duration_ms":,"tool_calls":0}} +{"type":"loop_detected","timestamp":""} +{"type":"result","timestamp":"","status":"success","stats":{"total_tokens":0,"input_tokens":0,"output_tokens":0,"cached":0,"input":0,"duration_ms":,"tool_calls":0,"api_requests":0,"api_errors":0}} " `; @@ -12,7 +12,7 @@ exports[`runNonInteractive > should emit appropriate error event in streaming JS "{"type":"init","timestamp":"","session_id":"test-session-id","model":"test-model"} {"type":"message","timestamp":"","role":"user","content":"Max turns test"} {"type":"error","timestamp":"","severity":"error","message":"Maximum session turns exceeded"} -{"type":"result","timestamp":"","status":"success","stats":{"total_tokens":0,"input_tokens":0,"output_tokens":0,"cached":0,"input":0,"duration_ms":,"tool_calls":0}} +{"type":"result","timestamp":"","status":"success","stats":{"total_tokens":0,"input_tokens":0,"output_tokens":0,"cached":0,"input":0,"duration_ms":,"tool_calls":0,"api_requests":0,"api_errors":0}} " `; @@ -23,7 +23,7 @@ exports[`runNonInteractive > should emit appropriate events for streaming JSON o {"type":"tool_use","timestamp":"","tool_name":"testTool","tool_id":"tool-1","parameters":{"arg1":"value1"}} {"type":"tool_result","timestamp":"","tool_id":"tool-1","status":"success","output":"Tool executed successfully"} {"type":"message","timestamp":"","role":"assistant","content":"Final answer","delta":true} -{"type":"result","timestamp":"","status":"success","stats":{"total_tokens":0,"input_tokens":0,"output_tokens":0,"cached":0,"input":0,"duration_ms":,"tool_calls":0}} +{"type":"result","timestamp":"","status":"success","stats":{"total_tokens":0,"input_tokens":0,"output_tokens":0,"cached":0,"input":0,"duration_ms":,"tool_calls":0,"api_requests":0,"api_errors":0}} " `; diff --git a/packages/cli/src/nonInteractiveCli.test.ts b/packages/cli/src/nonInteractiveCli.test.ts index 206d011e63..ea7b9e19d4 100644 --- a/packages/cli/src/nonInteractiveCli.test.ts +++ b/packages/cli/src/nonInteractiveCli.test.ts @@ -183,6 +183,7 @@ describe('runNonInteractive', () => { getIdeMode: vi.fn().mockReturnValue(false), getContentGeneratorConfig: vi.fn().mockReturnValue({}), + getUserTierName: vi.fn().mockReturnValue(undefined), getDebugMode: vi.fn().mockReturnValue(false), getOutputFormat: vi.fn().mockReturnValue('text'), getModel: vi.fn().mockReturnValue('test-model'), diff --git a/packages/cli/src/nonInteractiveCli.ts b/packages/cli/src/nonInteractiveCli.ts index c2cab72353..097f7a22a5 100644 --- a/packages/cli/src/nonInteractiveCli.ts +++ b/packages/cli/src/nonInteractiveCli.ts @@ -9,6 +9,7 @@ import type { ToolCallRequestInfo, ResumedSessionData, UserFeedbackPayload, + RetryAttemptPayload, } from '@google/gemini-cli-core'; import { isSlashCommand } from './ui/utils/commandUtils.js'; import type { LoadedSettings } from './config/settings.js'; @@ -94,6 +95,7 @@ export async function runNonInteractive({ }; const startTime = Date.now(); + let retryCount = 0; const streamFormatter = config.getOutputFormat() === OutputFormat.STREAM_JSON ? new StreamJsonFormatter() @@ -181,6 +183,26 @@ export async function runNonInteractive({ } }; + const handleRetryAttempt = (payload: RetryAttemptPayload) => { + retryCount++; + if (streamFormatter) { + streamFormatter.emitEvent({ + type: JsonStreamEventType.RETRY, + timestamp: new Date().toISOString(), + attempt: payload.attempt, + max_attempts: payload.maxAttempts, + delay_ms: payload.delayMs, + error: payload.error, + model: payload.model, + }); + } else if (config.getOutputFormat() === OutputFormat.TEXT) { + const errorSuffix = payload.error ? `: ${payload.error}` : ''; + process.stderr.write( + `[RETRY] Attempt ${payload.attempt}/${payload.maxAttempts} for model ${payload.model} (delay: ${payload.delayMs}ms)${errorSuffix}\n`, + ); + } + }; + let errorToHandle: unknown | undefined; try { consolePatcher.patch(); @@ -199,6 +221,8 @@ export async function runNonInteractive({ setupStdinCancellation(); coreEvents.on(CoreEvent.UserFeedback, handleUserFeedback); + coreEvents.on(CoreEvent.RetryAttempt, handleRetryAttempt); + coreEvents.drainBacklogs(); // Handle EPIPE errors when the output is piped to a command that closes early. @@ -228,12 +252,16 @@ export async function runNonInteractive({ } // Emit init event for streaming JSON + const authMethod = config.getContentGeneratorConfig()?.authType; + const userTier = config.getUserTierName(); if (streamFormatter) { streamFormatter.emitEvent({ type: JsonStreamEventType.INIT, timestamp: new Date().toISOString(), session_id: config.getSessionId(), model: config.getModel(), + ...(authMethod && { auth_method: authMethod }), + ...(userTier && { user_tier: userTier }), }); } @@ -345,13 +373,18 @@ export async function runNonInteractive({ } toolCallRequests.push(event.value); } else if (event.type === GeminiEventType.LoopDetected) { + const loopType = event.value?.loopType; if (streamFormatter) { streamFormatter.emitEvent({ - type: JsonStreamEventType.ERROR, + type: JsonStreamEventType.LOOP_DETECTED, timestamp: new Date().toISOString(), - severity: 'warning', - message: 'Loop detected, stopping execution', + ...(loopType && { loop_type: loopType }), }); + } else if (config.getOutputFormat() === OutputFormat.TEXT) { + const loopTypeStr = loopType ? ` (${loopType})` : ''; + process.stderr.write( + `[WARNING] Loop detected${loopTypeStr}, stopping execution\n`, + ); } } else if (event.type === GeminiEventType.MaxSessionTurns) { if (streamFormatter) { @@ -380,6 +413,7 @@ export async function runNonInteractive({ stats: streamFormatter.convertToStreamStats( metrics, durationMs, + retryCount, ), }); } @@ -479,13 +513,21 @@ export async function runNonInteractive({ stats: streamFormatter.convertToStreamStats( metrics, durationMs, + retryCount, ), }); } else if (config.getOutputFormat() === OutputFormat.JSON) { const formatter = new JsonFormatter(); const stats = uiTelemetryService.getMetrics(); textOutput.write( - formatter.format(config.getSessionId(), responseText, stats), + formatter.format( + config.getSessionId(), + responseText, + stats, + undefined, + authMethod, + userTier, + ), ); } else { textOutput.ensureTrailingNewline(); // Ensure a final newline @@ -503,13 +545,24 @@ export async function runNonInteractive({ type: JsonStreamEventType.RESULT, timestamp: new Date().toISOString(), status: 'success', - stats: streamFormatter.convertToStreamStats(metrics, durationMs), + stats: streamFormatter.convertToStreamStats( + metrics, + durationMs, + retryCount, + ), }); } else if (config.getOutputFormat() === OutputFormat.JSON) { const formatter = new JsonFormatter(); const stats = uiTelemetryService.getMetrics(); textOutput.write( - formatter.format(config.getSessionId(), responseText, stats), + formatter.format( + config.getSessionId(), + responseText, + stats, + undefined, + authMethod, + userTier, + ), ); } else { textOutput.ensureTrailingNewline(); // Ensure a final newline @@ -524,6 +577,7 @@ export async function runNonInteractive({ cleanupStdinCancellation(); consolePatcher.cleanup(); + coreEvents.off(CoreEvent.RetryAttempt, handleRetryAttempt); coreEvents.off(CoreEvent.UserFeedback, handleUserFeedback); } diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index c94dd5c04d..72b7cb4639 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -636,7 +636,11 @@ export class GeminiClient { const loopDetected = await this.loopDetector.turnStarted(signal); if (loopDetected) { - yield { type: GeminiEventType.LoopDetected }; + const loopType = this.loopDetector.getDetectedLoopType(); + yield { + type: GeminiEventType.LoopDetected, + value: loopType ? { loopType } : undefined, + }; return turn; } @@ -689,7 +693,11 @@ export class GeminiClient { for await (const event of resultStream) { if (this.loopDetector.addAndCheck(event)) { - yield { type: GeminiEventType.LoopDetected }; + const loopType = this.loopDetector.getDetectedLoopType(); + yield { + type: GeminiEventType.LoopDetected, + value: loopType ? { loopType } : undefined, + }; controller.abort(); return turn; } diff --git a/packages/core/src/core/turn.ts b/packages/core/src/core/turn.ts index 23b55afe29..a66110bfb5 100644 --- a/packages/core/src/core/turn.ts +++ b/packages/core/src/core/turn.ts @@ -207,6 +207,7 @@ export type ServerGeminiFinishedEvent = { export type ServerGeminiLoopDetectedEvent = { type: GeminiEventType.LoopDetected; + value?: { loopType: string }; }; export type ServerGeminiCitationEvent = { diff --git a/packages/core/src/output/json-formatter.ts b/packages/core/src/output/json-formatter.ts index dd3e558a6f..040c5ad7c1 100644 --- a/packages/core/src/output/json-formatter.ts +++ b/packages/core/src/output/json-formatter.ts @@ -14,6 +14,8 @@ export class JsonFormatter { response?: string, stats?: SessionMetrics, error?: JsonError, + authMethod?: string, + userTier?: string, ): string { const output: JsonOutput = {}; @@ -21,6 +23,14 @@ export class JsonFormatter { output.session_id = sessionId; } + if (authMethod) { + output.auth_method = authMethod; + } + + if (userTier) { + output.user_tier = userTier; + } + if (response !== undefined) { output.response = stripAnsi(response); } diff --git a/packages/core/src/output/stream-json-formatter.ts b/packages/core/src/output/stream-json-formatter.ts index 585dbb0789..0029c8209a 100644 --- a/packages/core/src/output/stream-json-formatter.ts +++ b/packages/core/src/output/stream-json-formatter.ts @@ -39,23 +39,28 @@ export class StreamJsonFormatter { convertToStreamStats( metrics: SessionMetrics, durationMs: number, + retryCount?: number, ): StreamStats { let totalTokens = 0; let inputTokens = 0; let outputTokens = 0; let cached = 0; let input = 0; + let apiRequests = 0; + let apiErrors = 0; - // Aggregate token counts across all models + // Aggregate token counts and API stats across all models for (const modelMetrics of Object.values(metrics.models)) { totalTokens += modelMetrics.tokens.total; inputTokens += modelMetrics.tokens.prompt; outputTokens += modelMetrics.tokens.candidates; cached += modelMetrics.tokens.cached; input += modelMetrics.tokens.input; + apiRequests += modelMetrics.api.totalRequests; + apiErrors += modelMetrics.api.totalErrors; } - return { + const stats: StreamStats = { total_tokens: totalTokens, input_tokens: inputTokens, output_tokens: outputTokens, @@ -63,6 +68,14 @@ export class StreamJsonFormatter { input, duration_ms: durationMs, tool_calls: metrics.tools.totalCalls, + api_requests: apiRequests, + api_errors: apiErrors, }; + + if (retryCount && retryCount > 0) { + stats.retry_count = retryCount; + } + + return stats; } } diff --git a/packages/core/src/output/types.ts b/packages/core/src/output/types.ts index 0c129eac93..91a2d9269b 100644 --- a/packages/core/src/output/types.ts +++ b/packages/core/src/output/types.ts @@ -20,6 +20,8 @@ export interface JsonError { export interface JsonOutput { session_id?: string; + auth_method?: string; + user_tier?: string; response?: string; stats?: SessionMetrics; error?: JsonError; @@ -33,6 +35,8 @@ export enum JsonStreamEventType { TOOL_RESULT = 'tool_result', ERROR = 'error', RESULT = 'result', + RETRY = 'retry', + LOOP_DETECTED = 'loop_detected', } export interface BaseJsonStreamEvent { @@ -44,6 +48,8 @@ export interface InitEvent extends BaseJsonStreamEvent { type: JsonStreamEventType.INIT; session_id: string; model: string; + auth_method?: string; + user_tier?: string; } export interface MessageEvent extends BaseJsonStreamEvent { @@ -86,6 +92,9 @@ export interface StreamStats { input: number; duration_ms: number; tool_calls: number; + api_requests?: number; + api_errors?: number; + retry_count?: number; } export interface ResultEvent extends BaseJsonStreamEvent { @@ -98,10 +107,26 @@ export interface ResultEvent extends BaseJsonStreamEvent { stats?: StreamStats; } +export interface RetryEvent extends BaseJsonStreamEvent { + type: JsonStreamEventType.RETRY; + attempt: number; + max_attempts: number; + delay_ms: number; + error?: string; + model: string; +} + +export interface LoopDetectedStreamEvent extends BaseJsonStreamEvent { + type: JsonStreamEventType.LOOP_DETECTED; + loop_type?: string; +} + export type JsonStreamEvent = | InitEvent | MessageEvent | ToolUseEvent | ToolResultEvent | ErrorEvent - | ResultEvent; + | ResultEvent + | RetryEvent + | LoopDetectedStreamEvent; diff --git a/packages/core/src/services/loopDetectionService.ts b/packages/core/src/services/loopDetectionService.ts index 247b1dacf4..59b1fa2142 100644 --- a/packages/core/src/services/loopDetectionService.ts +++ b/packages/core/src/services/loopDetectionService.ts @@ -118,6 +118,9 @@ export class LoopDetectionService { private llmCheckInterval = DEFAULT_LLM_CHECK_INTERVAL; private lastCheckTurn = 0; + // Detected loop type tracking + private detectedLoopType: string | null = null; + // Session-level disable flag private disabledForSession = false; @@ -208,6 +211,7 @@ export class LoopDetectionService { this.toolCallRepetitionCount = 1; } if (this.toolCallRepetitionCount >= TOOL_CALL_LOOP_THRESHOLD) { + this.detectedLoopType = 'consecutive_identical_tool_calls'; logLoopDetected( this.config, new LoopDetectedEvent( @@ -321,6 +325,7 @@ export class LoopDetectionService { const chunkHash = createHash('sha256').update(currentChunk).digest('hex'); if (this.isLoopDetectedForChunk(currentChunk, chunkHash)) { + this.detectedLoopType = 'chanting_identical_sentences'; logLoopDetected( this.config, new LoopDetectedEvent( @@ -575,6 +580,7 @@ export class LoopDetectionService { result: Record, modelName: string, ): void { + this.detectedLoopType = 'llm_detected_loop'; if ( typeof result['unproductive_state_analysis'] === 'string' && result['unproductive_state_analysis'] @@ -599,6 +605,13 @@ export class LoopDetectionService { ); } + /** + * Returns the type of the most recently detected loop, or null if none. + */ + getDetectedLoopType(): string | null { + return this.detectedLoopType; + } + /** * Resets all loop detection state. */ @@ -608,6 +621,7 @@ export class LoopDetectionService { this.resetContentTracking(); this.resetLlmCheckTracking(); this.loopDetected = false; + this.detectedLoopType = null; } private resetToolCallCount(): void { diff --git a/scripts/test_gemini.sh b/scripts/test_gemini.sh new file mode 100755 index 0000000000..eaff8d6399 --- /dev/null +++ b/scripts/test_gemini.sh @@ -0,0 +1,223 @@ +#!/bin/bash +# ----------------------------------------------------------------------------- +# Gemini CLI Headless Mode Monitoring Test Script +# ----------------------------------------------------------------------------- +# Purpose: +# Runs the Gemini CLI in headless mode across multiple models and output +# formats, then displays the monitoring data (auth method, API stats, retries, +# loop detection) in a readable summary. +# +# Prerequisites: +# Authentication must already be configured (API key, OAuth, or Vertex AI). +# Build the project first: npm run build +# +# Usage: +# ./scripts/test_gemini.sh [--prompt "custom prompt"] [--models "model1 model2"] +# +# Options: +# --prompt Override the default test prompt +# --models Space-separated list of models to test (quoted) +# +# Example: +# ./scripts/test_gemini.sh +# ./scripts/test_gemini.sh --prompt "list files" --models "gemini-2.5-flash" +# ----------------------------------------------------------------------------- + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +CLI="$REPO_ROOT/packages/cli/dist/index.js" + +# Defaults +PROMPT="count how many files are in the current folder" +MODELS=( + "gemini-2.5-pro" + "gemini-2.5-flash" + "gemini-3.1-pro-preview" + "gemini-3-flash-preview" +) + +# Parse args +while [[ "$#" -gt 0 ]]; do + case "$1" in + --prompt) PROMPT="$2"; shift ;; + --models) IFS=' ' read -ra MODELS <<< "$2"; shift ;; + *) echo "Unknown option: $1"; exit 1 ;; + esac + shift +done + +# Colors +BOLD='\033[1m' +DIM='\033[2m' +GREEN='\033[32m' +YELLOW='\033[33m' +RED='\033[31m' +CYAN='\033[36m' +RESET='\033[0m' + +# Check prerequisites +if [[ ! -f "$CLI" ]]; then + echo -e "${RED}CLI not found at $CLI${RESET}" + echo "Run 'npm run build' from the repo root first." + exit 1 +fi + +if ! command -v jq &>/dev/null; then + echo -e "${RED}jq is required but not installed.${RESET}" + exit 1 +fi + +separator() { + echo -e "${DIM}$(printf '%.0s─' {1..72})${RESET}" +} + +# Header +echo "" +echo -e "${BOLD}Gemini CLI Headless Monitoring Test${RESET}" +separator +echo -e "${DIM}Prompt:${RESET} $PROMPT" +echo -e "${DIM}Models:${RESET} ${MODELS[*]}" +echo -e "${DIM}CLI:${RESET} $CLI" +separator +echo "" + +total_models=${#MODELS[@]} +pass_count=0 +fail_count=0 + +for model in "${MODELS[@]}"; do + echo -e "${BOLD}${CYAN}[$model]${RESET}" + echo "" + + # ── stream-json run ────────────────────────────────────────────────── + TMPFILE=$(mktemp) + STDERRFILE=$(mktemp) + exit_code=0 + + echo -e " ${DIM}Running with -o stream-json ...${RESET}" + node "$CLI" -p "$PROMPT" -y -m "$model" -o stream-json \ + >"$TMPFILE" 2>"$STDERRFILE" || exit_code=$? + + if [[ $exit_code -ne 0 ]]; then + echo -e " ${RED}FAILED${RESET} (exit code $exit_code)" + echo "" + if [[ -s "$STDERRFILE" ]]; then + echo -e " ${DIM}stderr:${RESET}" + sed 's/^/ /' "$STDERRFILE" + echo "" + fi + ((fail_count++)) + rm -f "$TMPFILE" "$STDERRFILE" + separator + echo "" + continue + fi + + ((pass_count++)) + + # Parse init event + init_line=$(jq -c 'select(.type=="init")' "$TMPFILE" 2>/dev/null | head -1) + auth_method=$(echo "$init_line" | jq -r '.auth_method // "not set"' 2>/dev/null) + user_tier=$(echo "$init_line" | jq -r '.user_tier // "not set"' 2>/dev/null) + session_id=$(echo "$init_line" | jq -r '.session_id // "?"' 2>/dev/null) + + # Parse result event + result_line=$(jq -c 'select(.type=="result")' "$TMPFILE" 2>/dev/null | tail -1) + status=$(echo "$result_line" | jq -r '.status // "?"' 2>/dev/null) + api_requests=$(echo "$result_line" | jq -r '.stats.api_requests // "?"' 2>/dev/null) + api_errors=$(echo "$result_line" | jq -r '.stats.api_errors // "?"' 2>/dev/null) + retry_count=$(echo "$result_line" | jq -r '.stats.retry_count // 0' 2>/dev/null) + total_tokens=$(echo "$result_line" | jq -r '.stats.total_tokens // "?"' 2>/dev/null) + input_tokens=$(echo "$result_line" | jq -r '.stats.input_tokens // "?"' 2>/dev/null) + output_tokens=$(echo "$result_line" | jq -r '.stats.output_tokens // "?"' 2>/dev/null) + cached=$(echo "$result_line" | jq -r '.stats.cached // "?"' 2>/dev/null) + tool_calls=$(echo "$result_line" | jq -r '.stats.tool_calls // 0' 2>/dev/null) + duration_ms=$(echo "$result_line" | jq -r '.stats.duration_ms // "?"' 2>/dev/null) + + # Count retries and loop events + retry_events=$(jq -c 'select(.type=="retry")' "$TMPFILE" 2>/dev/null | wc -l | tr -d ' ') + loop_events=$(jq -c 'select(.type=="loop_detected")' "$TMPFILE" 2>/dev/null) + if [[ -n "$loop_events" ]]; then + loop_count=$(echo "$loop_events" | wc -l | tr -d ' ') + loop_type=$(echo "$loop_events" | jq -r '.loop_type // empty' 2>/dev/null | head -1) + else + loop_count=0 + loop_type="" + fi + + # Extract assistant response (concatenate deltas) + response=$(jq -r 'select(.type=="message" and .role=="assistant") | .content' "$TMPFILE" 2>/dev/null | tr -d '\n') + # Truncate for display + if [[ ${#response} -gt 120 ]]; then + response="${response:0:120}..." + fi + + # Format duration + if [[ "$duration_ms" != "?" ]]; then + duration_s=$(echo "scale=1; $duration_ms / 1000" | bc 2>/dev/null || echo "$duration_ms ms") + duration_display="${duration_s}s" + else + duration_display="?" + fi + + # Display + echo -e " ${BOLD}Auth & Session${RESET}" + echo -e " auth_method: ${GREEN}$auth_method${RESET}" + echo -e " user_tier: $user_tier" + echo -e " session_id: ${DIM}$session_id${RESET}" + echo "" + + echo -e " ${BOLD}API Stats${RESET}" + echo -e " status: $([ "$status" = "success" ] && echo "${GREEN}$status${RESET}" || echo "${RED}$status${RESET}")" + echo -e " api_requests: $api_requests" + echo -e " api_errors: $([ "$api_errors" = "0" ] && echo "$api_errors" || echo "${RED}$api_errors${RESET}")" + echo -e " retry_count: $([ "$retry_count" = "0" ] && echo "$retry_count" || echo "${YELLOW}$retry_count${RESET}")" + echo -e " duration: $duration_display" + echo "" + + echo -e " ${BOLD}Tokens${RESET}" + echo -e " total: $total_tokens (in: $input_tokens, out: $output_tokens, cached: $cached)" + echo -e " tools: $tool_calls calls" + echo "" + + if [[ "$retry_events" -gt 0 ]]; then + echo -e " ${BOLD}${YELLOW}Retries ($retry_events)${RESET}" + jq -r 'select(.type=="retry") | " attempt \(.attempt)/\(.max_attempts) delay=\(.delay_ms)ms \(.error // "")"' "$TMPFILE" 2>/dev/null + echo "" + fi + + if [[ "$loop_count" -gt 0 ]]; then + echo -e " ${BOLD}${RED}Loop Detected${RESET}" + echo -e " type: ${loop_type:-unknown}" + echo "" + fi + + echo -e " ${BOLD}Response${RESET}" + echo -e " ${DIM}$response${RESET}" + echo "" + + # Show stderr if any + stderr_content=$(cat "$STDERRFILE") + if [[ -n "$stderr_content" ]]; then + echo -e " ${BOLD}Stderr${RESET}" + echo "$stderr_content" | sed 's/^/ /' + echo "" + fi + + rm -f "$TMPFILE" "$STDERRFILE" + separator + echo "" +done + +# Summary +echo -e "${BOLD}Summary${RESET}" +echo -e " Models tested: $total_models" +echo -e " Passed: ${GREEN}$pass_count${RESET}" +if [[ $fail_count -gt 0 ]]; then + echo -e " Failed: ${RED}$fail_count${RESET}" +else + echo -e " Failed: $fail_count" +fi +echo ""