diff --git a/.github/workflows/memory-nightly.yml b/.github/workflows/memory-nightly.yml
index ee4e5e589c..6b0fb519c5 100644
--- a/.github/workflows/memory-nightly.yml
+++ b/.github/workflows/memory-nightly.yml
@@ -10,9 +10,21 @@ permissions:
 
 jobs:
   memory-test:
-    name: 'Run Memory Usage Tests'
-    runs-on: 'gemini-cli-ubuntu-16-core'
+    name: 'Run Memory Usage Tests (${{ matrix.machine_family }})'
     if: "github.repository == 'google-gemini/gemini-cli'"
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - runs_on: 'gemini-cli-ubuntu-16-core'
+            machine_family: 'gemini-cli-ubuntu-16-core'
+          - runs_on: 'macos-latest'
+            machine_family: 'macos-latest'
+          - runs_on: 'gemini-cli-windows-16-core'
+            machine_family: 'gemini-cli-windows-16-core'
+    runs-on: '${{ matrix.runs_on }}'
+    env:
+      MEMORY_MACHINE_FAMILY: '${{ matrix.machine_family }}'
     steps:
       - name: 'Checkout'
         uses: 'actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8' # ratchet:actions/checkout@v5
diff --git a/.github/workflows/perf-nightly.yml b/.github/workflows/perf-nightly.yml
index 3749df231a..cbca80bacc 100644
--- a/.github/workflows/perf-nightly.yml
+++ b/.github/workflows/perf-nightly.yml
@@ -10,9 +10,21 @@ permissions:
 
 jobs:
   perf-test:
-    name: 'Run Performance Usage Tests'
-    runs-on: 'gemini-cli-ubuntu-16-core'
+    name: 'Run Performance Tests (${{ matrix.machine_family }})'
     if: "github.repository == 'google-gemini/gemini-cli'"
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - runs_on: 'gemini-cli-ubuntu-16-core'
+            machine_family: 'gemini-cli-ubuntu-16-core'
+          - runs_on: 'macos-latest'
+            machine_family: 'macos-latest'
+          - runs_on: 'gemini-cli-windows-16-core'
+            machine_family: 'gemini-cli-windows-16-core'
+    runs-on: '${{ matrix.runs_on }}'
+    env:
+      PERF_MACHINE_FAMILY: '${{ matrix.machine_family }}'
     steps:
       - name: 'Checkout'
         uses: 'actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8' # ratchet:actions/checkout@v5
diff --git a/.github/workflows/update-baselines.yml b/.github/workflows/update-baselines.yml
new file mode 100644
index 0000000000..e49a80af1d
--- /dev/null
+++ b/.github/workflows/update-baselines.yml
@@ -0,0 +1,243 @@
+# Copyright 2026 Google LLC
+# SPDX-License-Identifier: Apache-2.0
+#
+# Update Perf/Memory Baselines
+#
+# Triggered by:
+#   1. A PR comment starting with one of:
+#        /run perf         — updates only perf baselines
+#        /run mem          — updates only memory baselines
+#        /run perf+mem     — updates both (default)
+#   2. Manual workflow_dispatch from the Actions tab.
+#
+# Both paths are gated behind the 'perf-approvers' GitHub environment,
+# which requires approval from the designated approvers group before the
+# matrix runners are provisioned.
+#
+# After all per-platform runs complete, the updated baseline JSON files
+# are committed back to the triggering branch automatically.
+
+name: 'Update Perf/Memory Baselines'
+
+on:
+  issue_comment:
+    types: ['created']
+  workflow_dispatch:
+    inputs:
+      test_type:
+        description: 'Which baselines to update'
+        required: true
+        default: 'perf+mem'
+        type: 'choice'
+        options:
+          - 'perf'
+          - 'mem'
+          - 'perf+mem'
+      ref:
+        description: 'Branch/SHA to checkout and update baselines on (default: main)'
+        required: false
+        default: 'main'
+
+permissions:
+  contents: 'write' # push the updated baseline commit
+  pull-requests: 'write' # post the result comment
+  issues: 'read'
+
+jobs:
+  # ── 1. Parse slash command / workflow_dispatch ──────────────────────────
+  parse-command:
+    name: 'Parse Command'
+    runs-on: 'gemini-cli-ubuntu-16-core'
+    if: |
+      github.repository == 'google-gemini/gemini-cli' && (
+        github.event_name == 'workflow_dispatch' || (
+          github.event_name == 'issue_comment' &&
+          github.event.issue.pull_request != null &&
+          (
+            startsWith(github.event.comment.body, '/run perf+mem') ||
+            startsWith(github.event.comment.body, '/run perf') ||
+            startsWith(github.event.comment.body, '/run mem')
+          )
+        )
+      )
+    outputs:
+      test_type: '${{ steps.parse.outputs.test_type }}'
+      ref: '${{ steps.parse.outputs.ref }}'
+      pr_number: '${{ steps.parse.outputs.pr_number }}'
+    steps:
+      - name: 'Parse inputs'
+        id: 'parse'
+        env:
+          GH_TOKEN: '${{ secrets.GITHUB_TOKEN }}'
+          COMMENT_BODY: '${{ github.event.comment.body }}'
+          DISPATCH_TEST_TYPE: '${{ inputs.test_type }}'
+          DISPATCH_REF: '${{ inputs.ref }}'
+          PR_NUMBER: '${{ github.event.issue.number }}'
+          EVENT_NAME: '${{ github.event_name }}'
+        run: |
+          if [[ "$EVENT_NAME" == "workflow_dispatch" ]]; then
+            TEST_TYPE="${DISPATCH_TEST_TYPE:-perf+mem}"
+            REF="${DISPATCH_REF:-main}"
+            echo "pr_number=" >> "$GITHUB_OUTPUT"
+          else
+            # Slash command: determine test_type from comment
+            if echo "$COMMENT_BODY" | grep -q "^/run perf+mem"; then
+              TEST_TYPE="perf+mem"
+            elif echo "$COMMENT_BODY" | grep -q "^/run perf"; then
+              TEST_TYPE="perf"
+            else
+              TEST_TYPE="mem"
+            fi
+            # Get the HEAD sha of the PR
+            REF=$(gh pr view "$PR_NUMBER" --json headRefName --jq '.headRefName')
+            echo "pr_number=${PR_NUMBER}" >> "$GITHUB_OUTPUT"
+          fi
+          echo "test_type=${TEST_TYPE}" >> "$GITHUB_OUTPUT"
+          echo "ref=${REF}" >> "$GITHUB_OUTPUT"
+
+      - name: 'Post acknowledgement comment on PR'
+        if: "steps.parse.outputs.pr_number != ''"
+        env:
+          GH_TOKEN: '${{ secrets.GITHUB_TOKEN }}'
+        run: |
+          gh pr comment "${{ steps.parse.outputs.pr_number }}" --body \
+            "⏳ **Baseline update requested** (\`${{ steps.parse.outputs.test_type }}\`).
+            A member of the \`perf-approvers\` group must approve this workflow before the runners start.
+            <!-- baseline-update-ack -->"
+
+  # ── 2. Approval gate (perf-approvers environment) ──────────────────────
+  await-approval:
+    name: 'Await perf-approvers Approval'
+    needs: 'parse-command'
+    # This environment requires manual approval from the perf-approvers group
+    # before GitHub provisions any of the downstream runners.
+    environment: 'perf-approvers'
+    runs-on: 'gemini-cli-ubuntu-16-core'
+    steps:
+      - name: 'Approved'
+        run: 'echo "Approved by perf-approvers — launching baseline update matrix."'
+
+  # ── 3. Run tests to capture fresh baselines on each platform ───────────
+  update-baselines:
+    name: 'Update Baselines (${{ matrix.machine_family }})'
+    needs: 'await-approval'
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - runs_on: 'gemini-cli-ubuntu-16-core'
+            machine_family: 'gemini-cli-ubuntu-16-core'
+          - runs_on: 'macos-latest'
+            machine_family: 'macos-latest'
+          - runs_on: 'gemini-cli-windows-16-core'
+            machine_family: 'gemini-cli-windows-16-core'
+    runs-on: '${{ matrix.runs_on }}'
+    steps:
+      - name: 'Checkout'
+        uses: 'actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8' # ratchet:actions/checkout@v5
+        with:
+          ref: '${{ needs.parse-command.outputs.ref }}'
+          # Need full history so we can push back
+          fetch-depth: 0
+
+      - name: 'Set up Node.js'
+        uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions/setup-node@v4
+        with:
+          node-version-file: '.nvmrc'
+          cache: 'npm'
+
+      - name: 'Install dependencies'
+        run: 'npm ci'
+
+      - name: 'Build project'
+        run: 'npm run build'
+
+      - name: 'Update Perf Baselines'
+        if: "contains(needs.parse-command.outputs.test_type, 'perf')"
+        run: 'npm run test:perf:update-baselines'
+        env:
+          PERF_MACHINE_FAMILY: '${{ matrix.machine_family }}'
+
+      - name: 'Update Memory Baselines'
+        if: "contains(needs.parse-command.outputs.test_type, 'mem')"
+        run: 'npm run test:memory:update-baselines'
+        env:
+          MEMORY_MACHINE_FAMILY: '${{ matrix.machine_family }}'
+
+      - name: 'Upload updated baseline files'
+        uses: 'actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02' # ratchet:actions/upload-artifact@v4
+        with:
+          name: 'baselines-${{ matrix.machine_family }}'
+          # Upload the entire baselines/ subdirectories from both test roots
+          path: |
+            perf-tests/baselines/
+            memory-tests/baselines/
+          if-no-files-found: 'warn'
+
+  # ── 4. Gather artifacts and commit everything back to the branch ────────
+  commit-baselines:
+    name: 'Commit Updated Baselines'
+    needs:
+      - 'parse-command'
+      - 'update-baselines'
+    runs-on: 'gemini-cli-ubuntu-16-core'
+    steps:
+      - name: 'Checkout'
+        uses: 'actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8' # ratchet:actions/checkout@v5
+        with:
+          ref: '${{ needs.parse-command.outputs.ref }}'
+          fetch-depth: 0
+
+      - name: 'Download all baseline artifacts'
+        uses: 'actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093' # ratchet:actions/download-artifact@v4
+        with:
+          # Download each per-platform artifact into its own subdirectory so
+          # the paths mirror the test directory layout.
+          pattern: 'baselines-*'
+          merge-multiple: true
+          path: '.'
+
+      - name: 'Commit and push'
+        env:
+          GH_TOKEN: '${{ secrets.GITHUB_TOKEN }}'
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+
+          # Stage only the per-platform baseline files (not the generic ones)
+          git add perf-tests/baselines/ memory-tests/baselines/ || true
+
+          if git diff --cached --quiet; then
+            echo "No baseline files changed — nothing to commit."
+          else
+            git commit -m "chore: update ${{ needs.parse-command.outputs.test_type }} baselines [skip ci]
+
+            Updated by 'Update Perf/Memory Baselines' workflow run #${{ github.run_id }}.
+            Platforms: gemini-cli-ubuntu-16-core, macos-latest, gemini-cli-windows-16-core"
+            git push
+          fi
+
+      - name: 'Post result comment on PR'
+        if: "needs.parse-command.outputs.pr_number != ''"
+        env:
+          GH_TOKEN: '${{ secrets.GITHUB_TOKEN }}'
+        run: |
+          # Remove the acknowledgement comment before posting the result
+          COMMENT_ID=$(gh pr view "${{ needs.parse-command.outputs.pr_number }}" \
+            --json comments \
+            --jq '.comments[] | select(.body | contains("<!-- baseline-update-ack -->")) | .url' \
+            | grep -oE '[0-9]+$' | head -n 1)
+          if [ -n "$COMMENT_ID" ]; then
+            gh api -X DELETE "repos/${{ github.repository }}/issues/comments/${COMMENT_ID}"
+          fi
+
+          gh pr comment "${{ needs.parse-command.outputs.pr_number }}" --body \
+            "✅ **Baselines updated** (\`${{ needs.parse-command.outputs.test_type }}\`).
+
+            Fresh per-platform baseline files have been committed to this branch for:
+            - \`gemini-cli-ubuntu-16-core\`
+            - \`macos-latest\`
+            - \`gemini-cli-windows-16-core\`
+
+            The nightly tests will now compare against these values.
+            <!-- baseline-update-result -->"
diff --git a/.gitignore b/.gitignore
index 85902b4a7c..a076f9b813 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,8 @@
 .env~
 
 # gemini-cli settings
+.tmp-perf-baselines.json
+.tmp-memory-baselines.json
 # We want to keep the .gemini in the root of the repo and ignore any .gemini
 # in subdirectories. In our root .gemini we want to allow for version control
 # for subcommands.
diff --git a/docs/performance-and-memory-testing.md b/docs/performance-and-memory-testing.md
new file mode 100644
index 0000000000..924f0069cc
--- /dev/null
+++ b/docs/performance-and-memory-testing.md
@@ -0,0 +1,110 @@
+# Performance & Memory Testing Infrastructure
+
+## Overview
+
+Gemini CLI features a highly reliable performance and memory regression testing
+pipeline. To curb anomalies and yields accurate results, the harness applies:
+
+- **IQR Outlier Filtering**: Discards anomalous metrics from evaluation safely.
+- **Median Sampling**: Takes `N` runs, evaluating strictly median averages
+  effortlessly.
+- **Warmup Runs**: Discards first samples smoothly preventing JIT artifacts.
+- **Tolerance Boundary**: Default restrictions at 15% tolerance prevent
+  unwarranted panics effortlessly.
+
+---
+
+## Baseline Management
+
+There are two core strategies for calibrating tolerances on performance
+benchmarks:
+
+- **Approach A: Normalize for Testing Servers**: Tests run directly on the
+  automated cloud servers, and those scores are recorded as official, static
+  baselines.
+- **Approach B: Machine-Agnostic Daily Comparisons**: Static baseline files are
+  ignored. Every night, the test is run against today's and yesterday's code on
+  the exact same server.
+
+### Recommended Strategy: GitHub Action + Approach A
+
+#### Local Development & PR Checks
+
+- **Local Testing**: If you are a developer trying to quickly test your code
+  changes against performance or memory impacts, simply run the standard local
+  perf or memory tests directly without arguments. The harness stashes dirty
+  alterations automatically, refreshes baseline settings against the most
+  up-to-date `main` branch dynamically using non-tracked ephemeral files, and
+  yields immediate comparison feedback.
+- **PR Merges**: Please note that if your alterations intentionally necessitate
+  adjustments across baseline metrics, you should trigger the GitHub Action to
+  recalibrate baselines in tandem with merging your PR. This is so that
+  subsequent nightly audits appropriately do their evaluation comparisons
+  against the new tolerances successfully!
+
+#### Nightly Build Health Audits
+
+- Strict Approach A procedures apply daily across platforms on dedicated
+  environments, avoiding the "boiling frog" issue where micro-regressions
+  quietly slip past over periods of duration.
+
+---
+
+## Running Tests
+
+### Performance CPU Tests
+
+```bash
+# Run tests (compare against committed baselines)
+npm run test:perf
+
+# Verbose output
+VERBOSE=true npm run test:perf
+
+# Keep test artifacts for debugging
+KEEP_OUTPUT=true npm run test:perf
+```
+
+### Memory Tests
+
+```bash
+# Run memory tests (compare against local main baselines)
+npm run test:memory
+```
+
+---
+
+## Architecture & Configuration
+
+### Performance Tests Directory Tree
+
+- `perf-tests/baselines.json`: Committed baseline values
+- `perf-tests/globalSetup.ts`: Test environment setup
+- `perf-tests/perf-usage.test.ts`: Test scenarios
+- `perf-tests/perf.*.responses`: Fake API responses per scenario
+
+### Memory Tests Directory Tree
+
+- `memory-tests/baselines.json`: Committed memory values
+- `memory-tests/memory-usage.test.ts`: Memory test scenarios
+
+---
+
+## CI Integration
+
+These tests are strictly excluded from `preflight` constraints and remain
+designed strictly for nightly daily audits accurately:
+
+```yaml
+- name: Performance regression tests
+  run: npm run test:perf
+```
+
+---
+
+## Adding New Scenarios
+
+1. Add a fake response file: `perf.<scenario-name>.responses` or
+   `memory.<scenario-name>.responses`.
+2. Add a test case in `perf-usage.test.ts` or `memory-usage.test.ts` applying
+   `harness.runScenario()`.
diff --git a/memory-tests/baselines/.gitkeep b/memory-tests/baselines/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/memory-tests/memory-usage.test.ts b/memory-tests/memory-usage.test.ts
index eb363a0135..41a594b646 100644
--- a/memory-tests/memory-usage.test.ts
+++ b/memory-tests/memory-usage.test.ts
@@ -5,7 +5,11 @@
  */
 
 import { describe, it, beforeAll, afterAll, afterEach } from 'vitest';
-import { TestRig, MemoryTestHarness } from '@google/gemini-cli-test-utils';
+import {
+  TestRig,
+  MemoryTestHarness,
+  resolveMemoryBaselinesPath,
+} from '@google/gemini-cli-test-utils';
 import { join, dirname } from 'node:path';
 import { fileURLToPath } from 'node:url';
 import {
@@ -19,7 +23,8 @@ import {
 import { randomUUID } from 'node:crypto';
 
 const __dirname = dirname(fileURLToPath(import.meta.url));
-const BASELINES_PATH = join(__dirname, 'baselines.json');
+const MACHINE_FAMILY = process.env['MEMORY_MACHINE_FAMILY'];
+const BASELINES_PATH = resolveMemoryBaselinesPath(__dirname, MACHINE_FAMILY);
 const UPDATE_BASELINES = process.env['UPDATE_MEMORY_BASELINES'] === 'true';
 const TOLERANCE_PERCENT = 10;
 
@@ -37,6 +42,7 @@ describe('Memory Usage Tests', () => {
       gcCycles: 3,
       gcDelayMs: 100,
       sampleCount: 3,
+      machineFamily: MACHINE_FAMILY,
     });
   });
 
diff --git a/package.json b/package.json
index 150abcf3c3..6f48126f19 100644
--- a/package.json
+++ b/package.json
@@ -51,9 +51,9 @@
     "test:integration:all": "npm run test:integration:sandbox:none && npm run test:integration:sandbox:docker && npm run test:integration:sandbox:podman",
     "test:integration:flaky": "cross-env RUN_FLAKY_INTEGRATION=1 npm run test:integration:sandbox:none",
     "test:integration:sandbox:none": "cross-env GEMINI_SANDBOX=false vitest run --root ./integration-tests",
-    "test:memory": "vitest run --root ./memory-tests",
+    "test:memory": "node scripts/run-perf-tests.js memory",
     "test:memory:update-baselines": "cross-env UPDATE_MEMORY_BASELINES=true vitest run --root ./memory-tests",
-    "test:perf": "vitest run --root ./perf-tests",
+    "test:perf": "node scripts/run-perf-tests.js perf",
     "test:perf:update-baselines": "cross-env UPDATE_PERF_BASELINES=true vitest run --root ./perf-tests",
     "test:integration:sandbox:docker": "cross-env GEMINI_SANDBOX=docker npm run build:sandbox && cross-env GEMINI_SANDBOX=docker vitest run --root ./integration-tests",
     "test:integration:sandbox:podman": "cross-env GEMINI_SANDBOX=podman vitest run --root ./integration-tests",
diff --git a/packages/test-utils/src/index.ts b/packages/test-utils/src/index.ts
index e851e7ab8d..4665993df6 100644
--- a/packages/test-utils/src/index.ts
+++ b/packages/test-utils/src/index.ts
@@ -1,6 +1,6 @@
 /**
  * @license
- * Copyright 2025 Google LLC
+ * Copyright 2026 Google LLC
  * SPDX-License-Identifier: Apache-2.0
  */
 
diff --git a/packages/test-utils/src/memory-baselines.ts b/packages/test-utils/src/memory-baselines.ts
index 3a4578cc50..bcefe7ba69 100644
--- a/packages/test-utils/src/memory-baselines.ts
+++ b/packages/test-utils/src/memory-baselines.ts
@@ -5,6 +5,7 @@
  */
 
 import { readFileSync, writeFileSync, existsSync } from 'node:fs';
+import { join } from 'node:path';
 
 /**
  * Baseline entry for a single memory test scenario.
@@ -77,3 +78,25 @@ export function updateBaseline(
   };
   saveBaselines(path, baselines);
 }
+
+/**
+ * Resolve the path to the correct memory baselines JSON file.
+ *
+ * - If `machineFamily` is provided → returns `<testRootDir>/baselines/<machineFamily>.json`.
+ *   This file may not exist yet; the harness will hard-fail at assertion time if it doesn't.
+ * - If `machineFamily` is absent → returns `<testRootDir>/baselines.json`
+ *   (the legacy generic file used for local development).
+ *
+ * @param testRootDir - Absolute path to the directory containing the test root
+ *   (e.g. `__dirname` inside `memory-tests/`).
+ * @param machineFamily - Optional CI runner label (e.g. `'gemini-cli-ubuntu-16-core'`).
+ */
+export function resolveMemoryBaselinesPath(
+  testRootDir: string,
+  machineFamily?: string,
+): string {
+  if (machineFamily) {
+    return join(testRootDir, 'baselines', `${machineFamily}.json`);
+  }
+  return join(testRootDir, 'baselines.json');
+}
diff --git a/packages/test-utils/src/memory-test-harness.ts b/packages/test-utils/src/memory-test-harness.ts
index c12c220458..2141d66947 100644
--- a/packages/test-utils/src/memory-test-harness.ts
+++ b/packages/test-utils/src/memory-test-harness.ts
@@ -6,6 +6,8 @@
 
 import v8 from 'node:v8';
 import { setTimeout as sleep } from 'node:timers/promises';
+import { mkdirSync } from 'node:fs';
+import { join, dirname } from 'node:path';
 import { loadBaselines, updateBaseline } from './memory-baselines.js';
 import type { MemoryBaseline, MemoryBaselineFile } from './memory-baselines.js';
 
@@ -66,6 +68,14 @@ export interface MemoryTestHarnessOptions {
   sampleCount?: number;
   /** Pause in ms between samples. Default: 50 */
   samplePauseMs?: number;
+  /**
+   * The CI machine family (e.g. 'gemini-cli-ubuntu-16-core').
+   * When set, baselines are loaded from and saved to
+   * `<dir>/baselines/<machineFamily>.json`. If the file does not exist and
+   * UPDATE_MEMORY_BASELINES is not set, tests hard-fail with an actionable
+   * message instead of silently falling back.
+   */
+  machineFamily?: string;
 }
 
 /**
@@ -85,6 +95,7 @@ export class MemoryTestHarness {
   private readonly gcDelayMs: number;
   private readonly sampleCount: number;
   private readonly samplePauseMs: number;
+  private readonly machineFamily?: string;
   private allResults: MemoryTestResult[] = [];
 
   constructor(options: MemoryTestHarnessOptions) {
@@ -94,6 +105,7 @@ export class MemoryTestHarness {
     this.gcDelayMs = options.gcDelayMs ?? 100;
     this.sampleCount = options.sampleCount ?? 3;
     this.samplePauseMs = options.samplePauseMs ?? 50;
+    this.machineFamily = options.machineFamily;
     this.baselines = loadBaselines(this.baselinesPath);
   }
 
@@ -240,6 +252,16 @@ export class MemoryTestHarness {
     const tolerance = tolerancePercent ?? this.defaultTolerancePercent;
 
     if (!result.baseline) {
+      if (this.machineFamily) {
+        throw new Error(
+          `No baseline found for scenario "${result.scenarioName}" on machine family "${this.machineFamily}".\n` +
+            `  Expected file: ${this.baselinesPath}\n` +
+            `  To create it, trigger the 'Update Baselines' workflow:\n` +
+            `    .github/workflows/update-baselines.yml\n` +
+            `  Or locally:\n` +
+            `    UPDATE_MEMORY_BASELINES=true MEMORY_MACHINE_FAMILY=${this.machineFamily} npm run test:memory`,
+        );
+      }
       console.warn(
         `⚠ No baseline found for "${result.scenarioName}". ` +
           `Run with UPDATE_MEMORY_BASELINES=true to create one. ` +
@@ -268,9 +290,21 @@ export class MemoryTestHarness {
 
   /**
    * Update the baseline for a scenario with the current measured values.
+   * When `machineFamily` is set, writes to `baselines/<machineFamily>.json`
+   * (creating the directory if needed). Otherwise writes to `baselinesPath`.
    */
   updateScenarioBaseline(result: MemoryTestResult): void {
-    updateBaseline(this.baselinesPath, result.scenarioName, {
+    const targetPath = this.machineFamily
+      ? join(
+          dirname(this.baselinesPath),
+          'baselines',
+          `${this.machineFamily}.json`,
+        )
+      : this.baselinesPath;
+    if (this.machineFamily) {
+      mkdirSync(dirname(targetPath), { recursive: true });
+    }
+    updateBaseline(targetPath, result.scenarioName, {
       heapUsedBytes: result.finalHeapUsed,
       heapTotalBytes:
         result.snapshots[result.snapshots.length - 1]?.heapTotal ?? 0,
@@ -391,6 +425,9 @@ export class MemoryTestHarness {
     lines.push('');
     lines.push('═══════════════════════════════════════════════════');
     lines.push('         MEMORY USAGE TEST REPORT');
+    if (this.machineFamily) {
+      lines.push(`         Machine family: ${this.machineFamily}`);
+    }
     lines.push('═══════════════════════════════════════════════════');
     lines.push('');
 
diff --git a/packages/test-utils/src/perf-test-harness.ts b/packages/test-utils/src/perf-test-harness.ts
index 2f376f58b6..0a567948d7 100644
--- a/packages/test-utils/src/perf-test-harness.ts
+++ b/packages/test-utils/src/perf-test-harness.ts
@@ -6,7 +6,8 @@
 
 import { performance } from 'node:perf_hooks';
 import { setTimeout as sleep } from 'node:timers/promises';
-import { readFileSync, writeFileSync, existsSync } from 'node:fs';
+import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'node:fs';
+import { join, dirname } from 'node:path';
 
 /** Configuration for asciichart plot function. */
 interface PlotConfig {
@@ -83,6 +84,14 @@ export interface PerfTestHarnessOptions {
   warmupCount?: number;
   /** Pause in ms between samples. Default: 100 */
   samplePauseMs?: number;
+  /**
+   * The CI machine family (e.g. 'gemini-cli-ubuntu-16-core').
+   * When set, baselines are loaded from and saved to
+   * `<dir>/baselines/<machineFamily>.json`. If the file does not exist and
+   * UPDATE_PERF_BASELINES is not set, tests hard-fail with an actionable
+   * message instead of silently falling back.
+   */
+  machineFamily?: string;
 }
 
 /**
@@ -114,6 +123,7 @@ export class PerfTestHarness {
   private readonly sampleCount: number;
   private readonly warmupCount: number;
   private readonly samplePauseMs: number;
+  private readonly machineFamily?: string;
   private allResults: PerfTestResult[] = [];
   private activeTimers: Map<string, ActiveTimer> = new Map();
 
@@ -124,6 +134,7 @@ export class PerfTestHarness {
     this.sampleCount = options.sampleCount ?? 5;
     this.warmupCount = options.warmupCount ?? 1;
     this.samplePauseMs = options.samplePauseMs ?? 100;
+    this.machineFamily = options.machineFamily;
     this.baselines = loadPerfBaselines(this.baselinesPath);
   }
 
@@ -284,6 +295,18 @@ export class PerfTestHarness {
     const cpuTolerance = cpuTolerancePercent ?? this.defaultCpuTolerancePercent;
 
     if (!result.baseline) {
+      if (this.machineFamily) {
+        // In CI with a declared machine family: hard-fail so the problem is
+        // immediately visible, rather than silently skipping the assertion.
+        throw new Error(
+          `No baseline found for scenario "${result.scenarioName}" on machine family "${this.machineFamily}".\n` +
+            `  Expected file: ${this.baselinesPath}\n` +
+            `  To create it, trigger the 'Update Baselines' workflow:\n` +
+            `    .github/workflows/update-baselines.yml\n` +
+            `  Or locally:\n` +
+            `    UPDATE_PERF_BASELINES=true PERF_MACHINE_FAMILY=${this.machineFamily} npm run test:perf`,
+        );
+      }
       console.warn(
         `⚠ No baseline found for "${result.scenarioName}". ` +
           `Run with UPDATE_PERF_BASELINES=true to create one. ` +
@@ -321,16 +344,30 @@ export class PerfTestHarness {
 
   /**
    * Update the baseline for a scenario with the current measured values.
+   * When `machineFamily` is set, writes to `baselines/<machineFamily>.json`
+   * (creating the directory if needed). Otherwise writes to `baselinesPath`.
    */
   updateScenarioBaseline(result: PerfTestResult): void {
-    updatePerfBaseline(this.baselinesPath, result.scenarioName, {
+    const targetPath = this.machineFamily
+      ? join(
+          dirname(this.baselinesPath),
+          'baselines',
+          `${this.machineFamily}.json`,
+        )
+      : this.baselinesPath;
+    // Ensure the baselines/ subdirectory exists
+    if (this.machineFamily) {
+      mkdirSync(dirname(targetPath), { recursive: true });
+    }
+    updatePerfBaseline(targetPath, result.scenarioName, {
       wallClockMs: result.median.wallClockMs,
       cpuTotalUs: result.median.cpuTotalUs,
     });
     // Reload baselines after update
     this.baselines = loadPerfBaselines(this.baselinesPath);
     console.log(
-      `Updated baseline for ${result.scenarioName}: ${result.median.wallClockMs.toFixed(1)} ms`,
+      `Updated baseline for ${result.scenarioName}: ${result.median.wallClockMs.toFixed(1)} ms` +
+        (this.machineFamily ? ` [${this.machineFamily}]` : ''),
     );
   }
 
@@ -344,6 +381,9 @@ export class PerfTestHarness {
     lines.push('');
     lines.push('═══════════════════════════════════════════════════');
     lines.push('         PERFORMANCE TEST REPORT');
+    if (this.machineFamily) {
+      lines.push(`         Machine family: ${this.machineFamily}`);
+    }
     lines.push('═══════════════════════════════════════════════════');
     lines.push('');
 
@@ -484,6 +524,30 @@ export class PerfTestHarness {
   }
 }
 
+// ─── Baseline path resolution ────────────────────────────────────────
+
+/**
+ * Resolve the path to the correct perf baselines JSON file.
+ *
+ * - If `machineFamily` is provided → returns `<testRootDir>/baselines/<machineFamily>.json`.
+ *   This file may not exist yet; the harness will hard-fail at assertion time if it doesn't.
+ * - If `machineFamily` is absent → returns `<testRootDir>/baselines.json`
+ *   (the legacy generic file used for local development).
+ *
+ * @param testRootDir - Absolute path to the directory containing the test root
+ *   (e.g. `__dirname` inside `perf-tests/`).
+ * @param machineFamily - Optional CI runner label (e.g. `'gemini-cli-ubuntu-16-core'`).
+ */
+export function resolvePerfBaselinesPath(
+  testRootDir: string,
+  machineFamily?: string,
+): string {
+  if (machineFamily) {
+    return join(testRootDir, 'baselines', `${machineFamily}.json`);
+  }
+  return join(testRootDir, 'baselines.json');
+}
+
 // ─── Baseline management ─────────────────────────────────────────────
 
 /**
diff --git a/perf-tests/README.md b/perf-tests/README.md
deleted file mode 100644
index c8e9e448c1..0000000000
--- a/perf-tests/README.md
+++ /dev/null
@@ -1,121 +0,0 @@
-# CPU Performance Integration Test Harness
-
-## Overview
-
-This directory contains performance/CPU integration tests for the Gemini CLI.
-These tests measure wall-clock time, CPU usage, and event loop responsiveness to
-detect regressions across key scenarios.
-
-CPU performance is inherently noisy, especially in CI. The harness addresses
-this with:
-
-- **IQR outlier filtering** — discards anomalous samples
-- **Median sampling** — takes N runs, reports the median after filtering
-- **Warmup runs** — discards the first run to mitigate JIT compilation noise
-- **15% default tolerance** — won't panic at slight regressions
-
-## Running
-
-```bash
-# Run tests (compare against committed baselines)
-npm run test:perf
-
-# Update baselines (after intentional changes)
-npm run test:perf:update-baselines
-
-# Verbose output
-VERBOSE=true npm run test:perf
-
-# Keep test artifacts for debugging
-KEEP_OUTPUT=true npm run test:perf
-```
-
-## How It Works
-
-### Measurement Primitives
-
-The `PerfTestHarness` class (in `packages/test-utils`) provides:
-
-- **`performance.now()`** — high-resolution wall-clock timing
-- **`process.cpuUsage()`** — user + system CPU microseconds (delta between
-  start/stop)
-- **`perf_hooks.monitorEventLoopDelay()`** — event loop delay histogram
-  (p50/p95/p99/max)
-
-### Noise Reduction
-
-1. **Warmup**: First run is discarded to mitigate JIT compilation artifacts
-2. **Multiple samples**: Each scenario runs N times (default 5)
-3. **IQR filtering**: Samples outside Q1−1.5×IQR and Q3+1.5×IQR are discarded
-4. **Median**: The median of remaining samples is used for comparison
-
-### Baseline Management
-
-Baselines are stored in `baselines.json` in this directory. Each scenario has:
-
-```json
-{
-  "cold-startup-time": {
-    "wallClockMs": 1234.5,
-    "cpuTotalUs": 567890,
-    "eventLoopDelayP99Ms": 12.3,
-    "timestamp": "2026-04-08T..."
-  }
-}
-```
-
-Tests fail if the measured value exceeds `baseline × 1.15` (15% tolerance).
-
-To recalibrate after intentional changes:
-
-```bash
-npm run test:perf:update-baselines
-# then commit baselines.json
-```
-
-### Report Output
-
-After all tests, the harness prints an ASCII summary:
-
-```
-═══════════════════════════════════════════════════
-         PERFORMANCE TEST REPORT
-═══════════════════════════════════════════════════
-
-cold-startup-time:   1234.5 ms (Baseline: 1200.0 ms, Delta: +2.9%) ✅
-idle-cpu-usage:         2.1 %  (Baseline: 2.0 %, Delta: +5.0%)     ✅
-skill-loading-time:  1567.8 ms (Baseline: 1500.0 ms, Delta: +4.5%) ✅
-```
-
-## Architecture
-
-```
-perf-tests/
-├── README.md              ← you are here
-├── baselines.json         ← committed baseline values
-├── globalSetup.ts         ← test environment setup
-├── perf-usage.test.ts     ← test scenarios
-├── perf.*.responses       ← fake API responses per scenario
-├── tsconfig.json          ← TypeScript config
-└── vitest.config.ts       ← vitest config (serial, isolated)
-
-packages/test-utils/src/
-├── perf-test-harness.ts   ← PerfTestHarness class
-└── index.ts               ← re-exports
-```
-
-## CI Integration
-
-These tests are **excluded from `preflight`** and designed for nightly CI:
-
-```yaml
-- name: Performance regression tests
-  run: npm run test:perf
-```
-
-## Adding a New Scenario
-
-1. Add a fake response file: `perf.<scenario-name>.responses`
-2. Add a test case in `perf-usage.test.ts` using `harness.runScenario()`
-3. Run `npm run test:perf:update-baselines` to establish initial baseline
-4. Commit the updated `baselines.json`
diff --git a/perf-tests/baselines/.gitkeep b/perf-tests/baselines/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/perf-tests/perf-usage.test.ts b/perf-tests/perf-usage.test.ts
index 1a361eda5d..a85eb0b9ca 100644
--- a/perf-tests/perf-usage.test.ts
+++ b/perf-tests/perf-usage.test.ts
@@ -5,13 +5,18 @@
  */
 
 import { describe, it, beforeAll, afterAll } from 'vitest';
-import { TestRig, PerfTestHarness } from '@google/gemini-cli-test-utils';
+import {
+  TestRig,
+  PerfTestHarness,
+  resolvePerfBaselinesPath,
+} from '@google/gemini-cli-test-utils';
 import { join, dirname } from 'node:path';
 import { fileURLToPath } from 'node:url';
 import { existsSync, readFileSync } from 'node:fs';
 
 const __dirname = dirname(fileURLToPath(import.meta.url));
-const BASELINES_PATH = join(__dirname, 'baselines.json');
+const MACHINE_FAMILY = process.env['PERF_MACHINE_FAMILY'];
+const BASELINES_PATH = resolvePerfBaselinesPath(__dirname, MACHINE_FAMILY);
 const UPDATE_BASELINES = process.env['UPDATE_PERF_BASELINES'] === 'true';
 const TOLERANCE_PERCENT = 15;
 
@@ -28,6 +33,7 @@ describe('CPU Performance Tests', () => {
       defaultTolerancePercent: TOLERANCE_PERCENT,
       sampleCount: SAMPLE_COUNT,
       warmupCount: WARMUP_COUNT,
+      machineFamily: MACHINE_FAMILY,
     });
   });
 
diff --git a/scripts/clean.js b/scripts/clean.js
index dbb3849b15..ef591a4697 100644
--- a/scripts/clean.js
+++ b/scripts/clean.js
@@ -27,6 +27,8 @@ const root = join(__dirname, '..');
 // remove npm install/build artifacts
 rmSync(join(root, 'node_modules'), { recursive: true, force: true });
 rmSync(join(root, 'bundle'), { recursive: true, force: true });
+rmSync(join(root, '.tmp-perf-baselines.json'), { force: true });
+rmSync(join(root, '.tmp-memory-baselines.json'), { force: true });
 rmSync(join(root, 'packages/cli/src/generated/'), {
   recursive: true,
   force: true,
diff --git a/scripts/run-perf-tests.js b/scripts/run-perf-tests.js
new file mode 100644
index 0000000000..7e2b38bb85
--- /dev/null
+++ b/scripts/run-perf-tests.js
@@ -0,0 +1,118 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { execSync } from 'node:child_process';
+import path from 'node:path';
+
+const type = process.argv[2]; // 'perf' or 'memory'
+const args = process.argv.slice(3);
+
+if (type !== 'perf' && type !== 'memory') {
+  console.error('Invalid test type. Must be "perf" or "memory".');
+  process.exit(1);
+}
+
+const isLocal = !process.env.CI && !process.env.GITHUB_ACTIONS;
+const noOptions = args.length === 0;
+const testDir = type === 'perf' ? './perf-tests' : './memory-tests';
+const updateEnv =
+  type === 'perf'
+    ? 'UPDATE_PERF_BASELINES=true'
+    : 'UPDATE_MEMORY_BASELINES=true';
+const tempBaselinesPath = path.resolve(
+  process.cwd(),
+  `.tmp-${type}-baselines.json`,
+);
+
+if (isLocal && noOptions) {
+  console.log(
+    `[Auto-Baseline] Detected local run without options for ${type} tests.`,
+  );
+  console.log('[Auto-Baseline] Updating baselines from main branch first...');
+
+  let originalBranch = '';
+  let isDirty = false;
+
+  try {
+    originalBranch = execSync('git rev-parse --abbrev-ref HEAD', {
+      encoding: 'utf-8',
+    }).trim();
+    const status = execSync('git status --porcelain', {
+      encoding: 'utf-8',
+    }).trim();
+    isDirty = status !== '';
+
+    if (isDirty) {
+      console.log('[Auto-Baseline] Stashing current changes...');
+      execSync('git stash push --include-untracked -m "temp-perf-test-run"');
+    }
+
+    console.log('[Auto-Baseline] Switching to main branch...');
+    execSync('git checkout main', { stdio: 'inherit' });
+
+    try {
+      console.log(
+        '[Auto-Baseline] Pulling latest changes for main from origin...',
+      );
+      execSync('git pull origin main', { stdio: 'inherit' });
+    } catch {
+      console.warn(
+        '[Auto-Baseline] Warning: git pull failed. Proceeding with local main branch.',
+      );
+    }
+
+    console.log(
+      `[Auto-Baseline] Running update baselines for ${type} tests on main...`,
+    );
+    execSync(
+      `npx cross-env ${updateEnv} TEMP_BASELINES_PATH=${tempBaselinesPath} npx vitest run --root ${testDir}`,
+      { stdio: 'inherit' },
+    );
+  } catch (err) {
+    console.error(
+      '[Auto-Baseline] Error during main-branch baseline update:',
+      err,
+    );
+  } finally {
+    if (originalBranch) {
+      console.log(
+        `[Auto-Baseline] Returning to original branch: ${originalBranch}...`,
+      );
+      try {
+        execSync(`git checkout ${originalBranch}`, { stdio: 'inherit' });
+        if (isDirty) {
+          console.log('[Auto-Baseline] Restoring stashed changes...');
+          execSync('git stash pop', { stdio: 'inherit' });
+        }
+      } catch {
+        console.error(
+          '[Auto-Baseline] Critical error while trying to restore original branch state.',
+        );
+      }
+    }
+  }
+
+  console.log(
+    `[Auto-Baseline] Running tests on branch ${originalBranch} against updated baselines...`,
+  );
+  try {
+    execSync(
+      `npx cross-env TEMP_BASELINES_PATH=${tempBaselinesPath} npx vitest run --root ${testDir}`,
+      { stdio: 'inherit' },
+    );
+  } catch {
+    process.exit(1);
+  }
+} else {
+  // Just run standard tests directly
+  const command = `npx vitest run --root ${testDir} ${args.join(' ')}`;
+  console.log(`[Standard] Running tests: ${command}`);
+  try {
+    execSync(command, { stdio: 'inherit' });
+  } catch {
+    process.exit(1);
+  }
+}