feat(cli): polish cached token stats and simplify stats display when quota is present. (#14961)

This commit is contained in:
Jacob Richman
2025-12-11 15:17:14 -08:00
committed by GitHub
parent d818fb1d91
commit 54de67536d
7 changed files with 201 additions and 131 deletions
+144 -61
View File
@@ -17,6 +17,8 @@ import {
TOOL_SUCCESS_RATE_MEDIUM,
USER_AGREEMENT_RATE_HIGH,
USER_AGREEMENT_RATE_MEDIUM,
CACHE_EFFICIENCY_HIGH,
CACHE_EFFICIENCY_MEDIUM,
} from '../utils/displayUtils.js';
import { computeSessionStats } from '../utils/computeStats.js';
import {
@@ -36,8 +38,7 @@ const StatRow: React.FC<StatRowProps> = ({ title, children }) => (
<Box width={28}>
<Text color={theme.text.link}>{title}</Text>
</Box>
{/* FIX: Wrap children in a Box that can grow to fill remaining space */}
<Box flexGrow={1}>{children}</Box>
{children}
</Box>
);
@@ -53,8 +54,7 @@ const SubStatRow: React.FC<SubStatRowProps> = ({ title, children }) => (
<Box width={26}>
<Text color={theme.text.secondary}>» {title}</Text>
</Box>
{/* FIX: Apply the same flexGrow fix here */}
<Box flexGrow={1}>{children}</Box>
{children}
</Box>
);
@@ -84,11 +84,16 @@ const buildModelRows = (
// 1. Models with active usage
const activeRows = Object.entries(models).map(([name, metrics]) => {
const modelName = getBaseModelName(name);
const cachedTokens = metrics.tokens.cached;
const totalInputTokens = metrics.tokens.prompt;
const uncachedTokens = Math.max(0, totalInputTokens - cachedTokens);
return {
key: name,
modelName,
requests: metrics.api.totalRequests,
inputTokens: metrics.tokens.prompt.toLocaleString(),
cachedTokens: cachedTokens.toLocaleString(),
uncachedTokens: uncachedTokens.toLocaleString(),
totalInputTokens: totalInputTokens.toLocaleString(),
outputTokens: metrics.tokens.candidates.toLocaleString(),
bucket: quotas?.buckets?.find((b) => b.modelId === modelName),
isActive: true,
@@ -108,7 +113,9 @@ const buildModelRows = (
key: bucket.modelId!,
modelName: bucket.modelId!,
requests: '-',
inputTokens: '-',
cachedTokens: '-',
uncachedTokens: '-',
totalInputTokens: '-',
outputTokens: '-',
bucket,
isActive: false,
@@ -143,50 +150,91 @@ const formatResetTime = (resetTime: string): string => {
const ModelUsageTable: React.FC<{
models: Record<string, ModelMetrics>;
totalCachedTokens: number;
cacheEfficiency: number;
quotas?: RetrieveUserQuotaResponse;
}> = ({ models, totalCachedTokens, cacheEfficiency, quotas }) => {
cacheEfficiency: number;
totalCachedTokens: number;
}> = ({ models, quotas, cacheEfficiency, totalCachedTokens }) => {
const rows = buildModelRows(models, quotas);
if (rows.length === 0) {
return null;
}
const showQuotaColumn = !!quotas && rows.some((row) => !!row.bucket);
const nameWidth = 25;
const requestsWidth = 8;
const inputTokensWidth = 15;
const requestsWidth = 7;
const uncachedWidth = 15;
const cachedWidth = 14;
const outputTokensWidth = 15;
const usageLimitWidth = quotas ? 30 : 0;
const usageLimitWidth = showQuotaColumn ? 28 : 0;
const cacheEfficiencyColor = getStatusColor(cacheEfficiency, {
green: CACHE_EFFICIENCY_HIGH,
yellow: CACHE_EFFICIENCY_MEDIUM,
});
return (
<Box flexDirection="column" marginTop={1}>
{/* Header */}
<Box>
<Box width={nameWidth}>
<Text bold color={theme.text.primary}>
<Box alignItems="flex-end">
<Box width={nameWidth} flexGrow={1}>
<Text bold color={theme.text.primary} wrap="truncate-end">
Model Usage
</Text>
</Box>
<Box width={requestsWidth} justifyContent="flex-end">
<Box
width={requestsWidth}
flexDirection="column"
alignItems="flex-end"
flexShrink={0}
>
<Text bold color={theme.text.primary}>
Reqs
</Text>
</Box>
<Box width={inputTokensWidth} justifyContent="flex-end">
<Text bold color={theme.text.primary}>
Input Tokens
</Text>
</Box>
<Box width={outputTokensWidth} justifyContent="flex-end">
<Text bold color={theme.text.primary}>
Output Tokens
</Text>
</Box>
{quotas && (
<Box width={usageLimitWidth} justifyContent="flex-end">
{!showQuotaColumn && (
<>
<Box
width={uncachedWidth}
flexDirection="column"
alignItems="flex-end"
flexShrink={0}
>
<Text bold color={theme.text.primary}>
Input Tokens
</Text>
</Box>
<Box
width={cachedWidth}
flexDirection="column"
alignItems="flex-end"
flexShrink={0}
>
<Text bold color={theme.text.primary}>
Cache Reads
</Text>
</Box>
<Box
width={outputTokensWidth}
flexDirection="column"
alignItems="flex-end"
flexShrink={0}
>
<Text bold color={theme.text.primary}>
Output Tokens
</Text>
</Box>
</>
)}
{showQuotaColumn && (
<Box
width={usageLimitWidth}
flexDirection="column"
alignItems="flex-end"
>
<Text bold color={theme.text.primary}>
Usage limit remaining
Usage left
</Text>
</Box>
)}
@@ -200,46 +248,77 @@ const ModelUsageTable: React.FC<{
borderLeft={false}
borderRight={false}
borderColor={theme.border.default}
width={
nameWidth +
requestsWidth +
inputTokensWidth +
outputTokensWidth +
usageLimitWidth
}
width="100%"
></Box>
{rows.map((row) => (
<Box key={row.key}>
<Box width={nameWidth}>
<Text color={theme.text.primary}>{row.modelName}</Text>
<Box width={nameWidth} flexGrow={1}>
<Text color={theme.text.primary} wrap="truncate-end">
{row.modelName}
</Text>
</Box>
<Box width={requestsWidth} justifyContent="flex-end">
<Box
width={requestsWidth}
flexDirection="column"
alignItems="flex-end"
flexShrink={0}
>
<Text
color={row.isActive ? theme.text.primary : theme.text.secondary}
>
{row.requests}
</Text>
</Box>
<Box width={inputTokensWidth} justifyContent="flex-end">
<Text
color={row.isActive ? theme.status.warning : theme.text.secondary}
>
{row.inputTokens}
</Text>
</Box>
<Box width={outputTokensWidth} justifyContent="flex-end">
<Text
color={row.isActive ? theme.status.warning : theme.text.secondary}
>
{row.outputTokens}
</Text>
</Box>
<Box width={usageLimitWidth} justifyContent="flex-end">
{!showQuotaColumn && (
<>
<Box
width={uncachedWidth}
flexDirection="column"
alignItems="flex-end"
flexShrink={0}
>
<Text
color={
row.isActive ? theme.text.primary : theme.text.secondary
}
>
{row.uncachedTokens}
</Text>
</Box>
<Box
width={cachedWidth}
flexDirection="column"
alignItems="flex-end"
flexShrink={0}
>
<Text color={theme.text.secondary}>{row.cachedTokens}</Text>
</Box>
<Box
width={outputTokensWidth}
flexDirection="column"
alignItems="flex-end"
flexShrink={0}
>
<Text
color={
row.isActive ? theme.text.primary : theme.text.secondary
}
>
{row.outputTokens}
</Text>
</Box>
</>
)}
<Box
width={usageLimitWidth}
flexDirection="column"
alignItems="flex-end"
>
{row.bucket &&
row.bucket.remainingFraction != null &&
row.bucket.resetTime && (
<Text color={theme.text.secondary}>
<Text color={theme.text.secondary} wrap="truncate-end">
{(row.bucket.remainingFraction * 100).toFixed(1)}%{' '}
{formatResetTime(row.bucket.resetTime)}
</Text>
@@ -248,17 +327,20 @@ const ModelUsageTable: React.FC<{
</Box>
))}
{cacheEfficiency > 0 && (
{cacheEfficiency > 0 && !showQuotaColumn && (
<Box flexDirection="column" marginTop={1}>
<Text color={theme.text.primary}>
<Text color={theme.status.success}>Savings Highlight:</Text>{' '}
{totalCachedTokens.toLocaleString()} ({cacheEfficiency.toFixed(1)}
%) of input tokens were served from the cache, reducing costs.
{totalCachedTokens.toLocaleString()} (
<Text color={cacheEfficiencyColor}>
{cacheEfficiency.toFixed(1)}%
</Text>
) of input tokens were served from the cache, reducing costs.
</Text>
</Box>
)}
{models && (
{showQuotaColumn && (
<>
<Box marginTop={1} marginBottom={2}>
<Text color={theme.text.primary}>
@@ -322,6 +404,7 @@ export const StatsDisplay: React.FC<StatsDisplayProps> = ({
flexDirection="column"
paddingY={1}
paddingX={2}
overflow="hidden"
>
{renderTitle()}
<Box height={1} />
@@ -393,9 +476,9 @@ export const StatsDisplay: React.FC<StatsDisplayProps> = ({
</Section>
<ModelUsageTable
models={models}
totalCachedTokens={computed.totalCachedTokens}
cacheEfficiency={computed.cacheEfficiency}
quotas={quotas}
cacheEfficiency={computed.cacheEfficiency}
totalCachedTokens={computed.totalCachedTokens}
/>
</Box>
);