feat: Add startup profiler to measure and record application initialization phases. (#13638)

2026-07-21 23:40:44 -07:00 · 2025-12-01 10:06:13 -08:00
parent 613fb2fed6
commit db027dd95b
10 changed files with 591 additions and 1 deletions
@@ -87,6 +87,7 @@ import { SubagentToolWrapper } from '../agents/subagent-tool-wrapper.js';
 import { getExperiments } from '../code_assist/experiments/experiments.js';
 import { ExperimentFlags } from '../code_assist/experiments/flagNames.js';
 import { debugLogger } from '../utils/debugLogger.js';
+import { startupProfiler } from '../telemetry/startupProfiler.js';

 import { ApprovalMode } from '../policy/types.js';

@@ -620,6 +621,7 @@ export class Config {
    this.initialized = true;

    // Initialize centralized FileDiscoveryService
+    const discoverToolsHandle = startupProfiler.start('discover_tools');
    this.getFileService();
    if (this.getCheckpointingEnabled()) {
      await this.getGitService();
@@ -630,15 +632,18 @@ export class Config {
    await this.agentRegistry.initialize();

    this.toolRegistry = await this.createToolRegistry();
+    discoverToolsHandle?.end();
    this.mcpClientManager = new McpClientManager(
      this.toolRegistry,
      this,
      this.eventEmitter,
    );
+    const initMcpHandle = startupProfiler.start('initialize_mcp_clients');
    await Promise.all([
      await this.mcpClientManager.startConfiguredMcpServers(),
      await this.getExtensionLoader().start(this),
    ]);
+    initMcpHandle?.end();

    // Initialize hook system if enabled
    if (this.enableHooks) {
@@ -137,3 +137,4 @@ export {
  GenAiTokenType,
 } from './metrics.js';
 export { runInDevTraceSpan, type SpanMetadata } from './trace.js';
+export { startupProfiler, StartupProfiler } from './startupProfiler.js';
@@ -0,0 +1,343 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { StartupProfiler } from './startupProfiler.js';
+import type { Config } from '../config/config.js';
+
+// Mock the metrics module
+vi.mock('./metrics.js', () => ({
+  recordStartupPerformance: vi.fn(),
+}));
+
+// Mock os module
+vi.mock('node:os', () => ({
+  platform: vi.fn(() => 'darwin'),
+  arch: vi.fn(() => 'x64'),
+  release: vi.fn(() => '22.6.0'),
+}));
+
+// Mock fs module
+vi.mock('node:fs', () => ({
+  existsSync: vi.fn(() => false),
+}));
+
+describe('StartupProfiler', () => {
+  let profiler: StartupProfiler;
+  let mockConfig: Config;
+  let recordStartupPerformance: ReturnType<typeof vi.fn>;
+
+  beforeEach(async () => {
+    vi.resetAllMocks();
+
+    // Get the mocked function
+    const metricsModule = await import('./metrics.js');
+    recordStartupPerformance =
+      metricsModule.recordStartupPerformance as ReturnType<typeof vi.fn>;
+
+    // Create a fresh profiler instance
+    profiler = StartupProfiler.getInstance();
+
+    // Clear any existing phases and performance entries
+    profiler['phases'].clear();
+    performance.clearMarks();
+    performance.clearMeasures();
+
+    mockConfig = {
+      getSessionId: () => 'test-session-id',
+      getTelemetryEnabled: () => true,
+    } as unknown as Config;
+  });
+
+  afterEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  describe('getInstance', () => {
+    it('should return a singleton instance', () => {
+      const instance1 = StartupProfiler.getInstance();
+      const instance2 = StartupProfiler.getInstance();
+      expect(instance1).toBe(instance2);
+    });
+  });
+
+  describe('start', () => {
+    it('should create a performance mark for a phase', () => {
+      profiler.start('test_phase');
+
+      const phase = profiler['phases'].get('test_phase');
+      expect(phase).toBeDefined();
+      expect(phase?.name).toBe('test_phase');
+
+      // Verify performance mark was created
+      const marks = performance.getEntriesByType('mark');
+      const startMark = marks.find(
+        (m) => m.name === 'startup:test_phase:start',
+      );
+      expect(startMark).toBeDefined();
+    });
+
+    it('should record start time with details', () => {
+      const details = { key: 'value', count: 42 };
+      profiler.start('test_phase', details);
+
+      const phase = profiler['phases'].get('test_phase');
+      expect(phase?.details).toEqual(details);
+    });
+
+    it('should return undefined when starting a phase that is already active', () => {
+      profiler.start('test_phase');
+      const handle = profiler.start('test_phase');
+      expect(handle).toBeUndefined();
+    });
+  });
+
+  describe('end', () => {
+    it('should create a performance measure for a started phase', () => {
+      const handle = profiler.start('test_phase');
+      handle?.end();
+
+      // Verify performance measure was created
+      const measures = performance.getEntriesByType('measure');
+      const measure = measures.find((m) => m.name === 'test_phase');
+      expect(measure).toBeDefined();
+      expect(measure?.duration).toBeGreaterThan(0);
+    });
+
+    it('should merge details when ending a phase', () => {
+      const handle = profiler.start('test_phase', { initial: 'value' });
+      handle?.end({ additional: 'data' });
+
+      const phase = profiler['phases'].get('test_phase');
+      expect(phase?.details).toEqual({
+        initial: 'value',
+        additional: 'data',
+      });
+    });
+
+    it('should overwrite details with same key', () => {
+      const handle = profiler.start('test_phase', { key: 'original' });
+      handle?.end({ key: 'updated' });
+
+      const phase = profiler['phases'].get('test_phase');
+      expect(phase?.details).toEqual({ key: 'updated' });
+    });
+  });
+
+  describe('flush', () => {
+    it('should call recordStartupPerformance for each completed phase', () => {
+      const handle1 = profiler.start('phase1');
+      handle1?.end();
+
+      const handle2 = profiler.start('phase2');
+      handle2?.end();
+
+      profiler.flush(mockConfig);
+
+      expect(recordStartupPerformance).toHaveBeenCalledTimes(2);
+    });
+
+    it('should not record phases without duration', () => {
+      profiler.start('incomplete_phase');
+      profiler.flush(mockConfig);
+
+      expect(recordStartupPerformance).not.toHaveBeenCalled();
+    });
+
+    it('should include common details in all metrics', () => {
+      const handle = profiler.start('test_phase');
+      handle?.end();
+
+      profiler.flush(mockConfig);
+
+      expect(recordStartupPerformance).toHaveBeenCalledWith(
+        mockConfig,
+        expect.any(Number),
+        expect.objectContaining({
+          phase: 'test_phase',
+          details: expect.objectContaining({
+            os_platform: 'darwin',
+            os_arch: 'x64',
+            os_release: '22.6.0',
+            is_docker: false,
+            cpu_usage_user: expect.any(Number),
+            cpu_usage_system: expect.any(Number),
+          }),
+        }),
+      );
+    });
+
+    it('should merge phase-specific details with common details', () => {
+      const handle = profiler.start('test_phase', { custom: 'value' });
+      handle?.end();
+
+      profiler.flush(mockConfig);
+
+      expect(recordStartupPerformance).toHaveBeenCalledWith(
+        mockConfig,
+        expect.any(Number),
+        expect.objectContaining({
+          phase: 'test_phase',
+          details: expect.objectContaining({
+            custom: 'value',
+            os_platform: 'darwin',
+          }),
+        }),
+      );
+    });
+
+    it('should clear phases after flushing', () => {
+      const handle = profiler.start('test_phase');
+      handle?.end();
+
+      profiler.flush(mockConfig);
+
+      expect(profiler['phases'].size).toBe(0);
+    });
+
+    it('should detect Docker environment', async () => {
+      const fs = await import('node:fs');
+      (fs.existsSync as ReturnType<typeof vi.fn>).mockReturnValue(true);
+
+      const handle = profiler.start('test_phase');
+      handle?.end();
+
+      profiler.flush(mockConfig);
+
+      expect(recordStartupPerformance).toHaveBeenCalledWith(
+        mockConfig,
+        expect.any(Number),
+        expect.objectContaining({
+          details: expect.objectContaining({
+            is_docker: true,
+          }),
+        }),
+      );
+    });
+
+    it('should calculate CPU usage correctly', () => {
+      const cpuUsageSpy = vi.spyOn(process, 'cpuUsage');
+      // Mock start usage
+      cpuUsageSpy.mockReturnValueOnce({ user: 1000, system: 500 });
+      // Mock diff usage (this is what process.cpuUsage(startUsage) returns)
+      cpuUsageSpy.mockReturnValueOnce({ user: 100, system: 50 });
+
+      const handle = profiler.start('cpu_test_phase');
+      handle?.end();
+
+      profiler.flush(mockConfig);
+
+      expect(recordStartupPerformance).toHaveBeenCalledWith(
+        mockConfig,
+        expect.any(Number),
+        expect.objectContaining({
+          phase: 'cpu_test_phase',
+          details: expect.objectContaining({
+            cpu_usage_user: 100,
+            cpu_usage_system: 50,
+          }),
+        }),
+      );
+    });
+  });
+
+  describe('integration scenarios', () => {
+    it('should handle a complete startup profiling workflow', () => {
+      // Simulate startup sequence
+      const totalHandle = profiler.start('total_startup');
+
+      const settingsHandle = profiler.start('load_settings');
+      settingsHandle?.end();
+
+      const argsHandle = profiler.start('parse_arguments');
+      argsHandle?.end();
+
+      const appHandle = profiler.start('initialize_app');
+      appHandle?.end();
+
+      totalHandle?.end();
+
+      profiler.flush(mockConfig);
+
+      expect(recordStartupPerformance).toHaveBeenCalledTimes(4);
+      expect(recordStartupPerformance).toHaveBeenCalledWith(
+        mockConfig,
+        expect.any(Number),
+        expect.objectContaining({ phase: 'total_startup' }),
+      );
+    });
+
+    it('should handle nested timing correctly', () => {
+      const outerHandle = profiler.start('outer');
+      const innerHandle = profiler.start('inner');
+      innerHandle?.end();
+      outerHandle?.end();
+
+      profiler.flush(mockConfig);
+
+      const calls = (recordStartupPerformance as ReturnType<typeof vi.fn>).mock
+        .calls;
+      const outerCall = calls.find((call) => call[2].phase === 'outer');
+      const innerCall = calls.find((call) => call[2].phase === 'inner');
+
+      expect(outerCall).toBeDefined();
+      expect(innerCall).toBeDefined();
+      // Outer duration should be >= inner duration
+      expect(outerCall![1]).toBeGreaterThanOrEqual(innerCall![1]);
+    });
+  });
+
+  describe('sanity checking', () => {
+    it('should return undefined when starting a phase that is already active', () => {
+      profiler.start('test_phase');
+      const handle = profiler.start('test_phase');
+      expect(handle).toBeUndefined();
+    });
+
+    it('should allow restarting a phase after it has ended', () => {
+      const handle1 = profiler.start('test_phase');
+      handle1?.end();
+
+      // Should not throw
+      expect(() => profiler.start('test_phase')).not.toThrow();
+    });
+
+    it('should not throw error when ending a phase that is already ended', () => {
+      const handle = profiler.start('test_phase');
+      handle?.end();
+
+      // Calling end() again on the same handle should not throw
+      expect(() => handle?.end()).not.toThrow();
+    });
+
+    it('should not record metrics for incomplete phases', () => {
+      profiler.start('incomplete_phase');
+      // Never call end()
+
+      profiler.flush(mockConfig);
+
+      expect(recordStartupPerformance).not.toHaveBeenCalled();
+    });
+
+    it('should handle mix of complete and incomplete phases', () => {
+      const completeHandle = profiler.start('complete_phase');
+      completeHandle?.end();
+
+      profiler.start('incomplete_phase');
+      // Never call end()
+
+      profiler.flush(mockConfig);
+
+      // Should only record the complete phase
+      expect(recordStartupPerformance).toHaveBeenCalledTimes(1);
+      expect(recordStartupPerformance).toHaveBeenCalledWith(
+        mockConfig,
+        expect.any(Number),
+        expect.objectContaining({ phase: 'complete_phase' }),
+      );
+    });
+  });
+});
@@ -0,0 +1,215 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { performance } from 'node:perf_hooks';
+import * as os from 'node:os';
+import * as fs from 'node:fs';
+import type { Config } from '../config/config.js';
+import { recordStartupPerformance } from './metrics.js';
+import { debugLogger } from '../utils/debugLogger.js';
+
+interface StartupPhase {
+  name: string;
+  startCpuUsage: NodeJS.CpuUsage;
+  cpuUsage?: NodeJS.CpuUsage;
+  details?: Record<string, string | number | boolean>;
+  ended: boolean;
+}
+
+/**
+ * Handle returned by start() that allows ending the phase without repeating the phase name.
+ */
+export interface StartupPhaseHandle {
+  end(details?: Record<string, string | number | boolean>): void;
+}
+
+/**
+ * Buffers startup performance metrics until the telemetry system is fully initialized.
+ */
+export class StartupProfiler {
+  private phases: Map<string, StartupPhase> = new Map();
+  private static instance: StartupProfiler;
+
+  private constructor() {}
+
+  static getInstance(): StartupProfiler {
+    if (!StartupProfiler.instance) {
+      StartupProfiler.instance = new StartupProfiler();
+    }
+    return StartupProfiler.instance;
+  }
+
+  /**
+   * Returns the mark name for the start of a phase.
+   */
+  private getStartMarkName(phaseName: string): string {
+    return `startup:${phaseName}:start`;
+  }
+
+  /**
+   * Returns the mark name for the end of a phase.
+   */
+  private getEndMarkName(phaseName: string): string {
+    return `startup:${phaseName}:end`;
+  }
+
+  /**
+   * Marks the start of a phase and returns a handle to end it.
+   *
+   * If a phase with the same name is already active (started but not ended),
+   * this method will log a warning and return `undefined`. This allows for
+   * idempotent calls in environments where initialization might happen multiple
+   * times.
+   *
+   * Callers should handle the potential `undefined` return value, typically
+   * by using optional chaining: `handle?.end()`.
+   */
+  start(
+    phaseName: string,
+    details?: Record<string, string | number | boolean>,
+  ): StartupPhaseHandle | undefined {
+    const existingPhase = this.phases.get(phaseName);
+
+    // Error if starting a phase that's already active.
+    if (existingPhase && !existingPhase.ended) {
+      debugLogger.warn(
+        `[STARTUP] Cannot start phase '${phaseName}': phase is already active. Call end() before starting again.`,
+      );
+      return undefined;
+    }
+
+    const startMarkName = this.getStartMarkName(phaseName);
+    performance.mark(startMarkName, { detail: details });
+
+    const phase: StartupPhase = {
+      name: phaseName,
+      startCpuUsage: process.cpuUsage(),
+      details,
+      ended: false,
+    };
+
+    this.phases.set(phaseName, phase);
+
+    // Return a handle that allows ending the phase without repeating the name
+    return {
+      end: (endDetails?: Record<string, string | number | boolean>) => {
+        this._end(phase, endDetails);
+      },
+    };
+  }
+
+  /**
+   * Marks the end of a phase and calculates duration.
+   * This is now a private method; callers should use the handle returned by start().
+   */
+  private _end(
+    phase: StartupPhase,
+    details?: Record<string, string | number | boolean>,
+  ): void {
+    // Error if ending a phase that's already ended.
+    if (phase.ended) {
+      debugLogger.warn(
+        `[STARTUP] Cannot end phase '${phase.name}': phase was already ended.`,
+      );
+      return;
+    }
+
+    const startMarkName = this.getStartMarkName(phase.name);
+    const endMarkName = this.getEndMarkName(phase.name);
+
+    // Check if start mark exists before measuring
+    if (performance.getEntriesByName(startMarkName).length === 0) {
+      debugLogger.warn(
+        `[STARTUP] Cannot measure phase '${phase.name}': start mark '${startMarkName}' not found (likely cleared by reset).`,
+      );
+      phase.ended = true;
+      return;
+    }
+
+    performance.mark(endMarkName, { detail: details });
+    performance.measure(phase.name, startMarkName, endMarkName);
+
+    phase.cpuUsage = process.cpuUsage(phase.startCpuUsage);
+    phase.ended = true;
+    if (details) {
+      phase.details = { ...phase.details, ...details };
+    }
+  }
+
+  /**
+   * Flushes buffered metrics to the telemetry system.
+   */
+  flush(config: Config): void {
+    debugLogger.log(
+      '[STARTUP] StartupProfiler.flush() called with',
+      this.phases.size,
+      'phases',
+    );
+
+    const commonDetails = {
+      os_platform: os.platform(),
+      os_arch: os.arch(),
+      os_release: os.release(),
+      is_docker: fs.existsSync('/.dockerenv'),
+    };
+
+    // Get all performance measures.
+    const measures = performance.getEntriesByType('measure');
+
+    for (const phase of this.phases.values()) {
+      // Warn about incomplete phases.
+      if (!phase.ended) {
+        debugLogger.warn(
+          `[STARTUP] Phase '${phase.name}' was started but never ended. Skipping metrics.`,
+        );
+        continue;
+      }
+
+      // Find the corresponding measure.
+      const measure = measures.find((m) => m.name === phase.name);
+
+      if (measure && phase.cpuUsage) {
+        const details = {
+          ...commonDetails,
+          cpu_usage_user: phase.cpuUsage.user,
+          cpu_usage_system: phase.cpuUsage.system,
+          ...phase.details,
+        };
+
+        debugLogger.log(
+          '[STARTUP] Recording metric for phase:',
+          phase.name,
+          'duration:',
+          measure.duration,
+        );
+        recordStartupPerformance(config, measure.duration, {
+          phase: phase.name,
+          details,
+        });
+      } else {
+        debugLogger.log(
+          '[STARTUP] Skipping phase without measure:',
+          phase.name,
+        );
+      }
+    }
+
+    // Clear performance marks and measures for all tracked phases.
+    for (const phaseName of this.phases.keys()) {
+      const startMarkName = this.getStartMarkName(phaseName);
+      const endMarkName = this.getEndMarkName(phaseName);
+
+      performance.clearMarks(startMarkName);
+      performance.clearMarks(endMarkName);
+      performance.clearMeasures(phaseName);
+    }
+
+    // Clear all phases.
+    this.phases.clear();
+  }
+}
+
+export const startupProfiler = StartupProfiler.getInstance();