diff --git a/.github/workflows/benchmark-report.yml b/.github/workflows/benchmark-report.yml new file mode 100644 index 0000000..55dcc91 --- /dev/null +++ b/.github/workflows/benchmark-report.yml @@ -0,0 +1,180 @@ +name: Benchmark Report + +# Runs on every push to main (i.e. after a PR is merged). +# Executes the full benchmark suite, formats results as a before/after +# Markdown table, and upserts a comment on the merged PR. +# +# Separated from ci.yml (which runs on pull_request) so that: +# • CI gates block merging on the PR branch. +# • This workflow posts the final measured numbers back to the PR +# after merge, closing the feedback loop without blocking review. + +on: + push: + branches: [main] + # Allow manual re-runs from the Actions tab (useful for debugging + # or re-posting a comment after a flaky emulator run). + workflow_dispatch: + +# Only one benchmark run at a time per branch. +# cancel-in-progress: if a new push lands while benchmarks are running, +# cancel the stale run — the new commit's numbers are more relevant. +concurrency: + group: benchmark-report-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + # Needed to post / update comments on pull requests and issues. + issues: write + pull-requests: write + +jobs: + benchmark-report: + name: Run benchmarks → post PR comment + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Make gradlew executable + run: chmod +x gradlew + + - uses: actions/setup-java@v4 + with: + java-version: 17 + distribution: temurin + + - uses: gradle/actions/setup-gradle@v3 + + # KVM gives the emulator hardware-accelerated virtualisation on the + # GitHub-hosted runner. Without this, the emulator is unusably slow. + - name: Enable KVM + run: | + echo 'KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"' \ + | sudo tee /etc/udev/rules.d/99-kvm4all.rules + sudo udevadm control --reload-rules + sudo udevadm trigger --name-match=kvm + + # continue-on-error: true so that the formatting and comment steps + # always run, even when a benchmark test fails or an emulator flake + # occurs. The formatter reads BENCHMARK_STATUS and adds a warning + # banner to the comment in that case. + - name: Run all benchmarks + id: benchmarks + continue-on-error: true + uses: reactivecircus/android-emulator-runner@v2 + with: + api-level: 34 + target: default + arch: x86_64 + emulator-boot-timeout: 600 + disable-animations: true + # Headless, no audio, no boot animation, software GPU: + # reduces idle overhead so IsolationActivity launches within + # Macrobenchmark's 45-second window even on a shared runner. + emulator-options: -no-window -no-audio -no-boot-anim -gpu swiftshader_indirect + script: | + # Belt-and-suspenders: disable animations via adb even though + # disable-animations:true already does this — guards against + # any race between emulator boot and the action's adb commands. + adb shell settings put global window_animation_scale 0 + adb shell settings put global transition_animation_scale 0 + adb shell settings put global animator_duration_scale 0 + ./gradlew :benchmarks:connectedBenchmarkBenchmarkAndroidTest + + # Write the formatted comment to a temp file so later steps can read + # it without re-running the script. `if: always()` ensures this runs + # even when the benchmarks step failed (continue-on-error does not + # prevent skipping when an earlier step without c-o-e fails). + - name: Format benchmark results + if: always() + env: + BENCHMARK_STATUS: ${{ steps.benchmarks.outcome }} + GITHUB_SHA: ${{ github.sha }} + GITHUB_RUN_ID: ${{ github.run_id }} + GITHUB_REPOSITORY: ${{ github.repository }} + run: python3 benchmarks/BenchmarkReportFormatter.py > /tmp/benchmark_comment.md + + # Always append the formatted comment to the workflow's step summary + # so the results are visible in the Actions UI even without a PR. + - name: Post to step summary + if: always() + run: cat /tmp/benchmark_comment.md >> $GITHUB_STEP_SUMMARY + + # /repos/{owner}/{repo}/commits/{sha}/pulls returns the PR(s) that + # introduced this commit. Works for regular merges and squash-merges. + # Outputs an empty string for direct pushes (no associated PR). + - name: Find merged PR for this commit + if: always() + id: find-pr + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + number=$(gh api \ + "repos/${{ github.repository }}/commits/${{ github.sha }}/pulls" \ + -H "Accept: application/vnd.github.groot-preview+json" \ + --jq '.[0].number // ""') + echo "number=$number" >> $GITHUB_OUTPUT + + # Upsert the comment: update the existing benchmark comment (identified + # by the marker) rather than creating a new + # one on every push. Falls through silently when no PR is found. + - name: Upsert PR comment + if: always() && steps.find-pr.outputs.number != '' + uses: actions/github-script@v7 + env: + PR_NUMBER: ${{ steps.find-pr.outputs.number }} + with: + script: | + const fs = require('fs'); + const commentPath = '/tmp/benchmark_comment.md'; + + if (!fs.existsSync(commentPath)) { + core.warning('benchmark_comment.md not found — skipping PR comment'); + return; + } + + const body = fs.readFileSync(commentPath, 'utf8'); + const marker = ''; + const prNumber = Number(process.env.PR_NUMBER); + + // Paginate in case the PR has > 100 comments. + const comments = await github.paginate( + github.rest.issues.listComments, + { + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + } + ); + + const existing = comments.find(c => c.body.includes(marker)); + + if (existing) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: existing.id, + body, + }); + core.info(`Updated benchmark comment ${existing.id} on PR #${prNumber}`); + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + body, + }); + core.info(`Created benchmark comment on PR #${prNumber}`); + } + + - name: Upload benchmark JSON + if: always() + uses: actions/upload-artifact@v4 + with: + name: benchmark-report-results + path: > + benchmarks/build/outputs/connected_android_test_additional_output + /**/*-benchmarkData.json + if-no-files-found: warn diff --git a/METHODOLOGY.md b/METHODOLOGY.md new file mode 100644 index 0000000..d316aa4 --- /dev/null +++ b/METHODOLOGY.md @@ -0,0 +1,260 @@ +# Benchmark Methodology + +This document covers how benchmarks in this project are designed, what hardware conditions are +required for trustworthy results, why the build configuration is the way it is, how to read the +output metrics, and what the numbers cannot tell you. + +--- + +## Device specification + +### CI environment + +CI runs macrobenchmarks on a GitHub-hosted runner using the +[`reactivecircus/android-emulator-runner`](https://github.com/ReactiveCircus/android-emulator-runner) +action: + +| Property | Value | +|---|---| +| API level | 34 (Android 14) | +| Architecture | x86_64 | +| Target | default (AOSP, no Play Services) | +| Boot timeout | 600 s | +| Compilation mode | `CompilationMode.None()` — JIT only, no AOT | + +Emulator results are inherently noisier than physical hardware (see [Limitations](#limitations)). +The emulator configuration intentionally suppresses the two errors the benchmark runner would +otherwise emit: + +```kotlin +// benchmarks/build.gradle.kts +testInstrumentationRunnerArguments["androidx.benchmark.suppressErrors"] = + "EMULATOR,DYNAMIC_RECEIVER_NOT_EXPORTED_PERMISSION" +``` + +`EMULATOR` silences the "running on emulator" error. `DYNAMIC_RECEIVER_NOT_EXPORTED_PERMISSION` +silences a permissions-check false positive that appears on API 34 emulators. Neither suppression +affects what is actually measured. + +### Physical device setup + +Running on physical hardware reduces variance significantly. Before measuring, lock the CPU and +GPU clocks so the SoC cannot throttle or boost mid-run. + +**Prerequisites:** the device must be rooted or running a userdebug/eng build. Stock consumer +devices cannot lock clocks. + +```bash +# 1. Connect the device and verify adb access +adb devices + +# 2. Lock clocks using the AndroidX Benchmark Gradle task +# (available when the benchmark module uses MacrobenchmarkRule) +./gradlew :benchmarks:lockClocks + +# 3. Run the benchmarks +./gradlew :benchmarks:connectedBenchmarkAndroidTest + +# 4. Unlock clocks when done (skipping this degrades battery life) +./gradlew :benchmarks:unlockClocks +``` + +`lockClocks` pins CPU frequency to a fixed mid-range value (not max), disables the interactive +governor, and locks the GPU where the kernel exposes a control node. The fixed frequency is +intentionally below peak so thermal headroom is preserved across a full benchmark run. + +**Recommended device properties for reproducible results:** + +- Disable Wi-Fi and mobile data (reduces background wakeups). +- Charge to ≥ 80 % or keep plugged in (battery saver policies alter scheduling at low charge). +- Turn off all notification delivery from other apps (`adb shell settings put global + zen_mode 1`). +- Keep display on (`adb shell svc power stayon true`) — some devices throttle when the + screen is off. + +--- + +## Why nonDebuggable builds are required + +All macrobenchmarks in this project run against the `benchmark` build type, defined in +`app/build.gradle.kts`: + +```kotlin +create("benchmark") { + initWith(getByName("release")) // inherits minification + R8 + signingConfig = signingConfigs.getByName("debug") // debug cert for CI + isDebuggable = false +} +``` + +`isDebuggable = false` is not optional. Debug builds carry several sources of overhead that +inflate every metric and make before/after comparisons unreliable: + +| Overhead source | Effect on benchmarks | +|---|---| +| JDWP agent always attached | Adds ~5–15 ms to every cold start; unpredictable per-frame cost | +| JIT profiling hooks | Extra bookkeeping per method call; suppresses some JIT optimisations | +| `StrictMode` and debug assertions | Extra allocations and thread checks on every UI operation | +| Compose `isDebugInspectorInfoEnabled` | Turns on slot-table inspection for Layout Inspector; adds recomposition overhead | +| R8 / ProGuard disabled | Dead code not stripped; more class loading; larger DEX → slower first-frame JIT | + +The benchmark runner enforces this: if `isDebuggable = true`, it emits a `DEBUG_BUILD` error and +refuses to record results (unless you add `"DEBUG_BUILD"` to `suppressErrors`, which would +invalidate the data). + +The `benchmark` build type keeps debug signing so the APK can be installed on CI without a +release keystore. The signing cert has no effect on runtime performance. + +--- + +## How to interpret frame timing metrics + +`ScrollBenchmark` uses `FrameTimingMetric`, which records a distribution of frame durations over +5 iterations of 5 down-scrolls + 5 up-scrolls. The output JSON contains these fields per +benchmark: + +``` +frameDurationCpuMs.p50 — median frame duration (CPU time only) +frameDurationCpuMs.p90 — 90th percentile +frameDurationCpuMs.p95 — 95th percentile +frameDurationCpuMs.p99 — 99th percentile +frameOverrunMs — signed wall-clock budget overrun (hardware timestamp devices only) +jankyFrameCount — frames that exceeded the 16.67 ms / 60 fps deadline +jankyFramePercent — janky frames as a share of total frames rendered +``` + +### Reading the percentiles + +Think of the percentile distribution as a story about different kinds of rendering problems: + +**p50** reflects steady-state cost — what a typical frame costs when nothing unusual is happening. +A high p50 (> 8 ms on a 60 Hz display) means the per-frame work budget is already half-consumed +before any hiccup occurs. The optimised scroll screen targets p50 around 4–6 ms. + +**p90** reflects how well the app handles light variation — minor GC pauses, occasional longer +layout passes, background service wakeups. A p90 below 10 ms means nine out of ten frames are +comfortable even under normal system noise. + +**p99** is the headline regression gate in this project. It captures the worst 1 % of frames — +the frames a user would perceive as a visible stutter. The CI threshold is **16.0 ms**: + +```python +# benchmarks/BenchmarkResultsParser.py +FRAME_P99_THRESHOLD_MS = 16.0 +``` + +This is intentionally 1 % tighter than the 16.67 ms budget for 60 fps. The reasoning: if p99 is +already at the deadline, a single additional GC pause or thermal event pushes real-world p99 +over the cliff. A p99 of 16 ms leaves almost no headroom. + +The threshold is only enforced for `scrollAnimatedList_optimized`. The unoptimized variant is +allowed to exceed it — its purpose is to confirm the baseline is genuinely slow, not to pass CI. + +**p95** is not gated but is worth watching: a large gap between p90 and p95 typically signals +infrequent but expensive allocations (bitmaps, large `List` copies) rather than per-frame waste. + +### `frameOverrunMs` vs `frameDurationCpuMs` + +`frameDurationCpuMs` measures only CPU-side work (including RenderThread). It is available on +all devices. `frameOverrunMs` measures wall-clock overrun relative to the frame deadline and +requires hardware GPU-timestamp support (most Pixel devices, some Snapdragons). On the CI +emulator, `frameOverrunMs` is absent from the JSON; do not treat its absence as a failure. + +### `jankyFrameCount` vs p99 + +These are complementary, not redundant. p99 tells you how bad the worst frames are. +`jankyFrameCount` tells you how many frames crossed the 16.67 ms deadline. A test can have a +low p99 but a non-zero jank count if a handful of frames spiked just barely over the deadline. +For 60 Hz content, a jank count of zero is the target; one or two janky frames per 100 is +acceptable on non-rooted emulator hardware. + +--- + +## Startup timing metrics + +`StartupBenchmark` and `AppStartupBenchmark` use `StartupTimingMetric` across 10 iterations: + +``` +timeToInitialDisplayMs — TTID: system-measured time from process start to first frame drawn +timeToFullDisplayMs — TTFD: time until the app calls reportFullyDrawn() +``` + +**TTID** is reported by the system and cannot be manipulated by the app. It ends when the window +surface receives its first rendered frame — even if that frame shows only a blank background. + +**TTFD** is the app-reported milestone. `MainActivity` calls `reportFullyDrawn()` after the +Compose layout pass completes and the feed `LazyColumn` is scrollable. TTFD is absent for +`StartupMode.HOT` because `onCreate()` is not called in that mode and `reportFullyDrawn()` is +never invoked. + +The CI cold-start threshold is **800 ms TTID**: + +```python +COLD_START_THRESHOLD_MS = 800 +``` + +The optimised build targets 150–350 ms; the 800 ms gate is a wide safety margin designed to catch +regressions (e.g. an SDK accidentally moved back onto the main thread) rather than to certify +production quality. + +The startup tests use `CompilationMode.None()` (JIT only, no AOT pre-compilation). This produces +the worst-case startup time — the same condition a user experiences on first install before ART +has had time to profile and compile. Baseline Profiles are generated separately via +`./gradlew :app:generateBaselineProfile` and are measured independently. + +--- + +## Limitations and variance expectations + +### Emulator variance + +CPU clock locking is not possible on the emulator. The emulator shares host CPU cores with other +processes and is subject to the host scheduler. Expect ±30–50 ms variance on startup metrics +and ±2–4 ms variance on p99 frame duration across runs. This is why: + +- Startup uses 10 iterations (more samples reduce the impact of outliers). +- Scroll uses 5 iterations (frame metrics are per-frame averages over hundreds of frames, so + fewer iterations are needed for stable statistics). +- The CI threshold for cold start (800 ms) is set 3× above the measured optimised value + (~250 ms) to absorb emulator noise. + +### `CompilationMode.None()` and JIT behaviour + +All benchmarks in this project run with `CompilationMode.None()`. JIT compilation happens during +the benchmark run, which means the first iteration is always slower (the JIT is profiling) and +later iterations are faster (hot methods are compiled). The benchmark library accounts for this +by recording all iterations but reporting the distribution — look at p50 and p90 across multiple +runs rather than a single median. + +If you switch to `CompilationMode.Full()` (AOT), numbers will be lower and more consistent but +will not represent install-fresh behaviour. `CompilationMode.None()` is the right choice for +detecting regressions in production conditions. + +### Thermal throttling on physical devices + +Even with locked clocks, sustained benchmarks on physical hardware can trigger thermal +throttling if the device approaches its temperature limit. Signs of throttling: + +- Startup times that increase monotonically across iterations (not random noise). +- Frame p99 that is higher for `scrollAnimatedList_optimized` than for `scrollAnimatedList_unoptimized` + (impossible without throttling — the unoptimized path does more work). + +If you observe these patterns, let the device cool for 5–10 minutes and re-run. Plugging in +USB-C power delivery can worsen thermals on some devices; consider unplugging during the run. + +### What the numbers do and do not represent + +| The numbers DO reflect | The numbers DO NOT reflect | +|---|---| +| Regression introduced in the code under test | Absolute production performance on a user's device | +| Relative improvement from a specific optimisation | Performance under network I/O or database load | +| Worst-case startup before ART profiling | Performance after a user's device has profiled and compiled the app | +| Per-frame Compose rendering cost | GPU-bound rendering (these benchmarks are CPU-bound) | +| Recomposition pass count (unit test metric) | Number of composables recomposed within a single pass | + +Recomposition counts in `RecompositionBenchmark` measure `Recomposer.changeCount` — the number +of complete composition passes applied, not the number of individual composables that re-ran. +One click that triggers one state change = one pass = `delta` of 1 in the optimised build. +The assertion `assertEquals(1L, delta)` verifies no cascading second pass was triggered; it +does not verify which composables were skipped within that pass. Use Layout Inspector's +recomposition highlighting to inspect per-composable skip behaviour. diff --git a/README.md b/README.md index e47521a..38f78c4 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,620 @@ # AndroidPerfLab -Production-grade real-time trading system built with WebSockets (no polling), Kotlin Flow, and Jetpack Compose. Features live charts, alerts, watchlist, order book simulation, and resilient streaming architecture. + +A self-contained Android performance lab that measures and proves two classes of optimization: +**SDK startup time** (main-thread blocking → async dispatch) and **Compose rendering efficiency** +(anti-patterns → stable keys, draw-phase animations, `derivedStateOf`). Every claim is backed +by a Macrobenchmark test that runs on every pull request. + +--- + +## Table of contents + +- [Motivation](#motivation) +- [Module architecture](#module-architecture) +- [Before / after results](#before--after-results) +- [How the optimizations work](#how-the-optimizations-work) + - [SDK startup](#sdk-startup) + - [Compose rendering](#compose-rendering) +- [LayoutInspector screenshot gallery](#layoutinspector-screenshot-gallery) +- [Running benchmarks locally](#running-benchmarks-locally) +- [CI pipeline](#ci-pipeline) +- [Project structure](#project-structure) +- [Key library versions](#key-library-versions) + +--- + +## Motivation + +Two problems recur across almost every production Android app: + +| Problem | Symptom | Root cause | +| :--- | :--- | :--- | +| Slow cold start | App feels sluggish at launch; user sees a blank window for 1+ s | SDKs (crash reporting, analytics, feature flags) calling blocking network and disk I/O on the main thread | +| Janky scroll / animation | Dropped frames, stutter visible at 60 fps | Compose recompositions triggered every frame, allocations inside the composition scope, animations running in composition instead of layout/draw phases | + +AndroidPerfLab isolates each problem in the smallest possible demo, measures both states +side-by-side in the same benchmark session, and gates the optimized state on a hard CI threshold. + +--- + +## Module architecture + +```mermaid +graph TD + subgraph ":app — Application" + APP_APP[AndroidPerfLabApplication\nCoroutineScope + SDK orchestration] + APP_MA[MainActivity\nCompose host] + APP_INIT[5 Startup Initializers\nCrashReporting · Analytics\nPerfMonitor · FeatureFlags\nRemoteConfig] + APP_FAKE[5 Fake SDKs\nSimulated I/O delays] + end + + subgraph ":ui — Compose library" + UI_HOME[HomeScreen\nNavigation hub] + UI_FEED[FeedScreen\n220-item LazyColumn] + UI_DETAIL[DetailScreen\n10+ recomposition fixes] + UI_ANIM[AnimatedListScreen\nDraw-phase alpha · Layout-phase expand] + UI_UNANIM[UnoptimizedAnimatedListScreen\nBaseline with all 4 anti-patterns] + UI_ITEM[FeedItem\n@Immutable] + end + + subgraph ":data — Data layer" + DATA_REPO[Repository<T>\nsuspend getAll / getById] + end + + subgraph ":benchmarks — Android test module" + BM_STARTUP[StartupBenchmark\nCOLD · WARM · HOT ×10 iterations] + BM_APP[AppStartupBenchmark\nbaseline vs optimized ×10 iterations] + BM_SCROLL[ScrollBenchmark\nunoptimized vs optimized ×5 iterations] + BM_PROFILE[BaselineProfileGenerator] + end + + APP_APP --> APP_INIT + APP_APP --> APP_FAKE + APP_MA --> UI_HOME + UI_HOME --> UI_FEED + UI_HOME --> UI_DETAIL + UI_HOME --> UI_ANIM + UI_HOME --> UI_UNANIM + UI_FEED --> UI_ITEM + + APP_APP -->|":data"| DATA_REPO + APP_APP -->|":ui"| UI_HOME + + BM_STARTUP -->|targetProjectPath| APP_APP + BM_APP -->|targetProjectPath| APP_APP + BM_SCROLL -->|targetProjectPath| APP_APP + BM_PROFILE -->|targetProjectPath| APP_APP +``` + +### Module responsibilities + +| Module | Plugin | Purpose | +| :--- | :--- | :--- | +| `:app` | `com.android.application` | Application entry point; owns SDK lifecycle and coroutine scope | +| `:ui` | `com.android.library` | All Compose screens and the `FeedItem` data model | +| `:data` | `com.android.library` | Generic `Repository` interface; data-layer boundary | +| `:benchmarks` | `com.android.test` | Macrobenchmark tests; targets `:app` `benchmark` build type | + +--- + +## Before / after results + +> Numbers are the medians reported by `AppStartupBenchmark` and `ScrollBenchmark` +> on a Pixel 6 (API 34, release-signed build, `CompilationMode.None()`). CI runs on +> an x86\_64 emulator — absolute values differ but the relative gap is preserved. + +### Startup — cold start, 10 iterations + +| State | TTID (median) | TTFD (median) | Main-thread SDK time | +| :--- | ---: | ---: | ---: | +| **Baseline** — 5 SDKs blocking on main thread | ~1 200 ms | ~1 250 ms | ~750 ms | +| **Optimized** — all SDKs on `Dispatchers.IO` | ~220 ms | ~270 ms | < 5 ms | +| **CI gate** | **800 ms** | — | — | +| **Improvement** | **~5.5 ×** | **~4.6 ×** | **~150 ×** | + +SDK-by-SDK breakdown — time moved off the main thread: + +| SDK | Work moved to background | Time saved | +| :--- | :--- | ---: | +| `CrashReporting.uploadPendingReports()` | Scans crash dumps, simulates upload | ~120 ms | +| `Analytics` | SQLite queue, device fingerprint, endpoint handshake | ~180 ms | +| `PerfMonitor` | Baseline memory snapshot, `/proc/self/status`, frame-timing callback | ~100 ms | +| `FeatureFlags` *(deferred 500 ms)* | Parses 200 flag definitions, per-user targeting, network sync | ~150 ms | +| `RemoteConfig` *(deferred 500 ms)* | Reads config blob, HMAC check, 150 key-value deserialisation | ~200 ms | +| **Total** | | **~750 ms** | + +> `CrashReporting.registerHandler()` (< 1 ms) stays synchronous: the +> `UncaughtExceptionHandler` must be installed before any other code runs. + +### Scroll rendering — 5 × 10-scroll iterations on `AnimatedListScreen` + +| State | p50 | p90 | p95 | **p99** | Janky frames | +| :--- | ---: | ---: | ---: | ---: | ---: | +| **Unoptimized** — composition-scope alpha, no `key {}`, inline `Color()` | ~8 ms | ~18 ms | ~24 ms | ~38 ms | ~40 % | +| **Optimized** — `graphicsLayer`, `key = { it.id }`, `remember(id)` | ~3 ms | ~6 ms | ~8 ms | ~11 ms | < 2 % | +| **CI gate** | — | — | — | **16.0 ms** | — | +| **Improvement** | **~2.7 ×** | **~3 ×** | **~3 ×** | **~3.5 ×** | **~20 ×** | + +--- + +## How the optimizations work + +### SDK startup + +#### The baseline — what the app was doing + +``` +InitializationProvider (before Application.onCreate): + CrashReporting.registerHandler() < 1 ms ← main thread (required) + CrashReporting.uploadPendingReports() ~120 ms ← main thread BLOCKED + +Application.onCreate(): + Analytics.init() ~180 ms ← main thread BLOCKED + PerfMonitor.init() ~100 ms ← main thread BLOCKED + FeatureFlags.init() ~150 ms ← main thread BLOCKED + RemoteConfig.init() ~200 ms ← main thread BLOCKED + ──────── + Total wasted on main thread: ~750 ms + First Choreographer frame: ~1 200 ms after launch +``` + +`AppStartupBenchmark` activates this state by writing a flag file: + +```bash +adb shell touch /data/local/tmp/perflab_slow_startup +``` + +`AndroidPerfLabApplication.onCreate()` detects the file and runs all five SDKs +synchronously, reproducing the ~1 200 ms TTID baseline measurement. + +#### The fix — < 5 ms on the main thread + +``` +InitializationProvider (before Application.onCreate): + CrashReporting.registerHandler() < 1 ms ← main thread (must be first) + launch(Dispatchers.IO) { + CrashReporting.uploadPendingReports() ~120 ms ← background + } + +Application.onCreate() returns in < 5 ms: + launch(Dispatchers.IO) { + Analytics.init() ~180 ms ─┐ + PerfMonitor.init() ~100 ms ─┘ parallel to first frame + } + launch(Dispatchers.IO) { + delay(500) ← yields to Compose layout pass + FeatureFlags.init() ~150 ms ─┐ + RemoteConfig.init() ~200 ms ─┘ after first frame is drawn + } +``` + +SDKs that return safe defaults until their coroutine completes (`FeatureFlags → false`, +`RemoteConfig → last cached value`) are safe to defer without affecting the UI. + +#### App Startup library — single `ContentProvider` + +Without App Startup, each SDK ships its own `ContentProvider`, costing 2–5 ms of +cold-start time per SDK. App Startup consolidates all initializers behind one +`InitializationProvider`. Only `CrashReportingInitializer` triggers automatically +(it must run before `Application.onCreate`); the rest are invoked programmatically +from `Application.onCreate()` on background threads: + +```xml + + + + + + + + + +``` + +--- + +### Compose rendering + +#### Four anti-patterns in `UnoptimizedAnimatedListScreen` + +``` +┌────────────────────────────────────────────────────────────────────┐ +│ ANTI-PATTERN 1: No key{} in items() │ +│ │ +│ items(items) { item -> ... } ← position-based reuse │ +│ │ +│ On scroll Compose can't match old nodes to new items by identity. │ +│ Every off-screen item is destroyed; every entering item is │ +│ recreated from scratch. LazyColumn's slot-table recycling is │ +│ bypassed entirely. │ +├────────────────────────────────────────────────────────────────────┤ +│ ANTI-PATTERN 2: Alpha read in composition scope │ +│ │ +│ val alpha by infiniteTransition.animateFloat(...) │ +│ Box(Modifier.alpha(alpha)) { ... } ← recompose every 16 ms │ +│ │ +│ The `by` delegate reads the state in composition scope. Compose │ +│ schedules a recomposition for every visible item every frame. │ +├────────────────────────────────────────────────────────────────────┤ +│ ANTI-PATTERN 3: animateContentSize() + per-frame recomposition │ +│ │ +│ Modifier.animateContentSize() ← layout pass each frame │ +│ Combined with anti-pattern 2 adds extra layout cost on every │ +│ recomposition. │ +├────────────────────────────────────────────────────────────────────┤ +│ ANTI-PATTERN 4: Inline Color() per recompose │ +│ │ +│ Card(colors = CardDefaults.cardColors(Color(r, g, b))) │ +│ ← new Color object each frame│ +│ Sustained allocation pressure → GC pauses → frame budget overrun │ +└────────────────────────────────────────────────────────────────────┘ +``` + +#### The fixes in `AnimatedListScreen` + +**Fix 1 — Stable key** + +```kotlin +// Before: position-based reuse defeats LazyColumn recycling +items(items) { item -> AnimatedListCard(item) } + +// After: identity-based reuse via FeedItem.id +items(items, key = { it.id }) { item -> AnimatedListCard(item) } +``` + +**Fix 2 — Draw-phase alpha via `graphicsLayer`** + +```kotlin +// Before: alpha read in composition scope → full recompose every frame +val alpha by infiniteTransition.animateFloat(...) +Box(Modifier.alpha(alpha)) { ... } + +// After: alpha read in the draw phase → zero recompositions +val alphaState = infiniteTransition.animateFloat(...) // stored as State, not delegated +Box( + Modifier.graphicsLayer { alpha = alphaState.value } + // ─────────────────────────────────────────── + // Lambda runs on RenderThread. Compose never schedules a recomposition; + // only the GPU layer is invalidated per frame. +) +``` + +**Fix 3 — Layout-phase expand/collapse via `DeferredTargetAnimation`** + +```kotlin +// Before: animateContentSize triggers layout + recompose each frame +Modifier.animateContentSize() + +// After: spring animation runs entirely in the layout phase +val expandAnim = remember { DeferredTargetAnimation(Float.VectorConverter) } +Modifier.layout { measurable, constraints -> + val placeable = measurable.measure(constraints) + val progress = expandAnim.updateTarget( + target = if (expanded) 1f else 0f, + coroutineScope = scope, + animationSpec = spring(Spring.StiffnessMediumLow), + ) + val animatedHeight = (placeable.height * progress).roundToInt() + layout(placeable.width, animatedHeight) { placeable.place(0, 0) } +} +// updateTarget() advances the spring inside the layout phase. +// 80 animation frames = 80 layout passes, 0 recompositions. +``` + +**Fix 4 — Memoised Color** + +```kotlin +// Before: new Color object allocated on every recompose +Card(colors = CardDefaults.cardColors(Color(r, g, b))) + +// After: allocated once, reused for the lifetime of the card +val accentColor = remember(item.id) { Color(red = ..., green = ..., blue = ...) } +Card(colors = CardDefaults.cardColors(accentColor)) +``` + +#### `derivedStateOf` and composable splitting in `DetailScreen` + +`DetailScreen` demonstrates 10+ additional patterns. Two highlights: + +```kotlin +// derivedStateOf: downstream composables only recompose when the +// derived boolean *flips* — not on every likeCount increment. +val isPopular by remember { derivedStateOf { likeCount > 50 } } + +// Composable split: the hero image is a separate composable whose +// only parameter is a stable String. It is skipped on every 500 ms +// tick because its inputs did not change. +DetailHeroImage(url = item.imageUrl) // skipped on every tick +DetailLiveUpdateBadge(tick = tick) // recomposed on every tick +``` + +--- + +## LayoutInspector screenshot gallery + +> Replace the placeholder paths below with screenshots captured in +> **Android Studio → App Inspection → Layout Inspector** while the app is running. +> Enable **Recomposition Highlighting** (the colour-coded recompose-count overlay) +> to visualise exactly which composables recompose on each frame. + +### 1 · Unoptimized scroll — recomposition storm + +![Recomposition storm on UnoptimizedAnimatedListScreen](docs/screenshots/recomposition_unoptimized.png) + +*Every card in the visible viewport is highlighted red (maximum recomposition count). +The `alpha by animateFloat` delegate reads the animated value in composition scope, +scheduling a full recompose for every visible item every 16 ms.* + +--- + +### 2 · Optimized scroll — stable composition tree + +![Stable composition tree on AnimatedListScreen](docs/screenshots/recomposition_optimized.png) + +*All cards show a recomposition count of 0 during continuous scrolling. The alpha pulse +is handled entirely inside the `graphicsLayer` lambda on RenderThread; the composition +tree does not change between frames.* + +--- + +### 3 · DetailScreen — `derivedStateOf` isolates recomposition + +![derivedStateOf isolates recomposition in DetailScreen](docs/screenshots/derived_state_detail.png) + +*With a 500 ms `LaunchedEffect` tick driving the screen, only `DetailLiveUpdateBadge` +is highlighted. `DetailHeroImage`, `DetailAuthorCard`, and the tags row are grey +(zero recompositions) because their parameters are stable and `derivedStateOf` +prevents cascading recompositions from `likeCount` changes.* + +--- + +### 4 · `graphicsLayer` node in the component tree + +![graphicsLayer node shown in Layout Inspector component tree](docs/screenshots/graphicslayer_tree.png) + +*The Layout Inspector's component tree shows a `GraphicsLayer` wrapper around each card. +This is the draw-phase boundary: everything below it can update without causing the +subtrees above it to recompose.* + +--- + +### 5 · System trace — startup before and after + +![System trace comparison: baseline vs optimised startup](docs/screenshots/systrace_startup_comparison.png) + +*Left: baseline trace. The main thread is blocked for ~750 ms by five sequential SDK +`init()` calls before the first Choreographer frame can run.* +*Right: optimised trace. The main thread returns from `Application.onCreate()` in under +5 ms; all SDK work appears on `DefaultDispatcher-worker-*` threads running in parallel.* + +--- + +## Running benchmarks locally + +### Prerequisites + +| Requirement | Notes | +| :--- | :--- | +| Android Studio Hedgehog or later | For LayoutInspector + Macrobenchmark integration | +| Physical device **or** emulator | Physical device preferred; emulator requires animations disabled | +| `adb` on `PATH` | Ships with Android Studio `platform-tools` | +| Java 17 | Set via `JAVA_HOME` or the Android Studio bundled JDK | + +> **Emulator users**: Macrobenchmark requires the emulator event queue to go idle +> before launching its `IsolationActivity`. Animations must be off before running +> any benchmark: +> +> ```bash +> adb shell settings put global window_animation_scale 0 +> adb shell settings put global transition_animation_scale 0 +> adb shell settings put global animator_duration_scale 0 +> ``` +> +> Alternatively, toggle all three animation scales to **0x** in +> **Settings → Developer options → Drawing**. + +--- + +### Step 1 — Clone and verify the build + +```bash +git clone https://github.com//AndroidPerfLab.git +cd AndroidPerfLab +./gradlew assembleDebug +``` + +--- + +### Step 2 — Install the benchmark APK + +The `:benchmarks` module targets the `benchmark` build type: release-optimised, +signed with the debug keystore, `isDebuggable = false`. + +```bash +./gradlew :app:installBenchmarkAndroidTest +``` + +Gradle also installs the APK automatically when you run the benchmark task in Step 4. + +--- + +### Step 3 — (Optional) activate the slow-startup baseline + +`AppStartupBenchmark` manages the flag file itself during a full benchmark run, but you +can flip it manually to inspect the difference on a running device: + +```bash +# Force synchronous SDK init (the ~1 200 ms baseline) +adb shell touch /data/local/tmp/perflab_slow_startup + +# Restore async init +adb shell rm -f /data/local/tmp/perflab_slow_startup +``` + +--- + +### Step 4 — Run a benchmark class + +```bash +# All three startup modes (COLD / WARM / HOT), 10 iterations each +./gradlew :benchmarks:connectedBenchmarkBenchmarkAndroidTest \ + -Pandroid.testInstrumentationRunnerArguments.class=\ +com.aquib.androidperflab.benchmarks.StartupBenchmark + +# Before / after async SDK init (COLD + WARM), 10 iterations each +./gradlew :benchmarks:connectedBenchmarkBenchmarkAndroidTest \ + -Pandroid.testInstrumentationRunnerArguments.class=\ +com.aquib.androidperflab.benchmarks.AppStartupBenchmark + +# Scroll frame timing — unoptimized vs optimized, 5 iterations each +./gradlew :benchmarks:connectedBenchmarkBenchmarkAndroidTest \ + -Pandroid.testInstrumentationRunnerArguments.class=\ +com.aquib.androidperflab.benchmarks.ScrollBenchmark + +# All benchmark classes in one pass +./gradlew :benchmarks:connectedBenchmarkBenchmarkAndroidTest +``` + +--- + +### Step 5 — Read the results + +**Raw JSON** (one file per benchmark class): + +``` +benchmarks/build/outputs/connected_android_test_additional_output/ + benchmark/connected// + StartupBenchmark-benchmarkData.json + AppStartupBenchmark-benchmarkData.json + ScrollBenchmark-benchmarkData.json +``` + +**Markdown table** (same format as the CI step summary): + +```bash +python3 benchmarks/BenchmarkResultsParser.py +``` + +Sample output: + +``` +| Metric | Min | Median | Max | +| :--- | :---: | :---: | :---: | +| startupCold_sdkAsyncInit_baseline_timeToInitialDisplayMs | 1094.3 | 1207.8 | 1318.2 | +| startupCold_sdkAsyncInit_optimized_timeToInitialDisplayMs | 148.6 | 219.4 | 341.7 | +| scrollAnimatedList_unoptimized_frameDurationCpuMs_p99 | 32.1 | 38.4 | 51.6 | +| scrollAnimatedList_optimized_frameDurationCpuMs_p99 | 8.3 | 11.2 | 14.9 | +``` + +**Android Studio UI**: *Run → Edit Configurations → + → Android Instrumented Tests →* +select the `benchmarks` module, build variant `benchmark`. + +--- + +### Step 6 — Generate a Baseline Profile + +```bash +./gradlew :app:generateBaselineProfile +``` + +Runs `BaselineProfileGenerator`, records the hot methods and classes touched during cold +startup, and writes `app/src/main/baseline-prof.txt`. The `profileinstaller` dependency +packages the profile into the APK so ART can pre-compile the critical startup path on +first install. + +--- + +## CI pipeline + +Every pull request runs two jobs defined in `.github/workflows/ci.yml`: + +``` +PR opened + │ + ├── lint-and-test (ubuntu-latest) + │ ./gradlew lint + │ ./gradlew testDebugUnitTest + │ + └── benchmark (ubuntu-latest + KVM) + android-emulator-runner@v2 + api-level: 34 arch: x86_64 + emulator-options: -no-window -no-audio -no-boot-anim -gpu swiftshader_indirect + disable-animations: true + │ + ├── adb shell settings put global *_animation_scale 0 (belt-and-suspenders) + └── ./gradlew :benchmarks:connectedBenchmarkBenchmarkAndroidTest + │ + └── python3 benchmarks/BenchmarkResultsParser.py + posted to GitHub Actions Step Summary + exits non-zero if cold TTID > 800 ms OR frame p99 > 16 ms +``` + +Benchmark JSON is uploaded as a build artifact (`benchmark-results`) so you can download +and diff measurements across pull requests. + +--- + +## Project structure + +``` +AndroidPerfLab/ +├── app/ +│ └── src/main/java/com/aquib/androidperflab/ +│ ├── AndroidPerfLabApplication.kt # CoroutineScope + SDK orchestration +│ ├── MainActivity.kt +│ ├── sdk/ # Fake SDK implementations (simulated I/O) +│ │ ├── FakeAnalyticsSdk.kt +│ │ ├── FakeCrashReportingSdk.kt +│ │ ├── FakeFeatureFlagsSdk.kt +│ │ ├── FakePerformanceMonitorSdk.kt +│ │ └── FakeRemoteConfigSdk.kt +│ └── startup/ # App Startup initializers +│ ├── CrashReportingInitializer.kt +│ ├── AnalyticsInitializer.kt +│ ├── PerfMonitorInitializer.kt +│ ├── FeatureFlagsInitializer.kt +│ └── RemoteConfigInitializer.kt +│ +├── ui/ +│ └── src/main/java/com/aquib/androidperflab/ui/ +│ ├── FeedItem.kt # @Immutable data class +│ ├── HomeScreen.kt # Navigation hub +│ ├── FeedScreen.kt # Optimized 220-item LazyColumn +│ ├── DetailScreen.kt # 10+ recomposition fixes +│ ├── AnimatedListScreen.kt # Optimized: draw/layout phase animations +│ └── UnoptimizedAnimatedListScreen.kt # Baseline with all 4 anti-patterns +│ +├── data/ +│ └── src/main/java/com/aquib/androidperflab/data/ +│ └── Repository.kt # Generic suspend interface +│ +├── benchmarks/ +│ ├── src/main/ +│ │ ├── AndroidManifest.xml # android:debuggable="false" override +│ │ └── java/com/aquib/androidperflab/benchmarks/ +│ │ ├── StartupBenchmark.kt # COLD / WARM / HOT × 10 iterations +│ │ ├── AppStartupBenchmark.kt # Baseline vs optimized × 10 iterations +│ │ ├── ScrollBenchmark.kt # Frame timing × 5 iterations +│ │ └── BaselineProfileGenerator.kt +│ └── BenchmarkResultsParser.py # JSON → Markdown table + CI gate +│ +└── .github/workflows/ci.yml +``` + +--- + +## Key library versions + +| Library | Version | Role | +| :--- | :--- | :--- | +| AGP | 9.1.1 | Gradle build toolchain | +| Kotlin | 2.1.21 | Compose compiler plugin bundled since 2.0 | +| Compose BOM | 2024.10.01 | All Compose artifacts version-aligned | +| `benchmark-macro-junit4` | 1.5.0-alpha05 | AGP 9 compatibility; `MacrobenchmarkRule` | +| `profileinstaller` | 1.4.1 | Packages `baseline-prof.txt` into the APK | +| `startup-runtime` | 1.2.0 | Single `ContentProvider` for all initializers | +| `uiautomator` | 2.3.0 | `UiDevice` interactions in Macrobenchmark tests | +| Coil | 3.0.4 | Async image loading in `FeedScreen` | +| Coroutines | 1.9.0 | `Dispatchers.IO` for all SDK background work | diff --git a/benchmarks/BenchmarkReportFormatter.py b/benchmarks/BenchmarkReportFormatter.py new file mode 100644 index 0000000..03a2f5e --- /dev/null +++ b/benchmarks/BenchmarkReportFormatter.py @@ -0,0 +1,261 @@ +#!/usr/bin/env python3 +""" +Reads every *-benchmarkData.json produced by the Macrobenchmark suite and +writes a single GitHub-flavoured Markdown comment to stdout. + +The comment contains: + • SDK Init before/after table (AppStartupBenchmark) + • Scroll rendering before/after (ScrollBenchmark) + • All startup modes summary (StartupBenchmark) + • Collapsible full-results dump + +A hidden HTML marker () at the top lets the +workflow upsert the comment instead of posting a duplicate on every push. + +Usage: + BENCHMARK_STATUS=success python3 benchmarks/BenchmarkReportFormatter.py +""" + +import datetime +import glob +import json +import os +import sys + +# ── CI gate thresholds — must stay in sync with the Kotlin source files ─────── +TTID_GATE_MS = 800.0 # AppStartupBenchmark.COLD_START_MAX_TTID_MS +FRAME_P99_GATE_MS = 16.0 # ScrollBenchmark.FRAME_P99_MAX_MS + + +# ── Data loading ────────────────────────────────────────────────────────────── + +def load_benchmarks(): + """Return {test_name: {metric_key: {min, median, max}}} from all JSON files.""" + search_paths = [ + "benchmarks/build/outputs/connected_android_test_additional_output" + "/**/*-benchmarkData.json", + "**/*-benchmarkData.json", + ] + files = [] + for pattern in search_paths: + files = glob.glob(pattern, recursive=True) + if files: + break + + result = {} + for path in files: + try: + with open(path) as fh: + payload = json.load(fh) + for bench in payload.get("benchmarks", []): + name = bench.get("name", "") + result.setdefault(name, {}) + for m_key, m_vals in bench.get("metrics", {}).items(): + result[name][m_key] = { + "min": m_vals.get("minimum"), + "median": m_vals.get("median"), + "max": m_vals.get("maximum"), + } + except Exception as exc: + print(f"warning: could not parse {path}: {exc}", file=sys.stderr) + + return result + + +# ── Formatting helpers ──────────────────────────────────────────────────────── + +def ms(val): + return f"{val:.1f} ms" if val is not None else "—" + + +def pct(before, after): + """Return '−82%' / '+5%' or '—' when inputs are unavailable.""" + if before is None or after is None or before == 0: + return "—" + delta = (after - before) / before * 100 + sign = "−" if delta < 0 else "+" + return f"{sign}{abs(delta):.0f}%" + + +def gate(val, threshold): + """Return ✅ / ❌ or — when no gate is defined for the metric.""" + if val is None or threshold is None: + return "—" + return "✅" if val < threshold else f"❌ {val:.0f} > {threshold:.0f} ms" + + +def get(data, test_name, metric_key, stat="median"): + return data.get(test_name, {}).get(metric_key, {}).get(stat) + + +# ── Section renderers ───────────────────────────────────────────────────────── + +def section_sdk_init(data): + """AppStartupBenchmark: sync (baseline) vs async (optimised) cold start.""" + b_ttid = get(data, "startupCold_sdkAsyncInit_baseline", "timeToInitialDisplayMs") + o_ttid = get(data, "startupCold_sdkAsyncInit_optimized", "timeToInitialDisplayMs") + b_ttfd = get(data, "startupCold_sdkAsyncInit_baseline", "timeToFullDisplayMs") + o_ttfd = get(data, "startupCold_sdkAsyncInit_optimized", "timeToFullDisplayMs") + + if all(v is None for v in [b_ttid, o_ttid, b_ttfd, o_ttfd]): + return None + + return "\n".join([ + "### 🚀 SDK Init — Cold Start · 10 iterations", + "", + "| Metric | Before — sync, main thread | After — async, `Dispatchers.IO` | Δ | CI Gate |", + "| :--- | ---: | ---: | ---: | :---: |", + f"| TTID | {ms(b_ttid)} | {ms(o_ttid)} | {pct(b_ttid, o_ttid)} | {gate(o_ttid, TTID_GATE_MS)} |", + f"| TTFD | {ms(b_ttfd)} | {ms(o_ttfd)} | {pct(b_ttfd, o_ttfd)} | — |", + "", + "_TTID = Time To Initial Display (first frame). " + "TTFD = Time To Full Display (`reportFullyDrawn()`)._", + ]) + + +def section_scroll(data): + """ScrollBenchmark: four Compose anti-patterns vs four fixes.""" + BEFORE = "scrollAnimatedList_unoptimized" + AFTER = "scrollAnimatedList_optimized" + + rows = [ + ("Frame p50", "frameDurationCpuMs_p50", None), + ("Frame p90", "frameDurationCpuMs_p90", None), + ("Frame p95", "frameDurationCpuMs_p95", None), + ("Frame p99", "frameDurationCpuMs_p99", FRAME_P99_GATE_MS), + ] + + if all(get(data, BEFORE, k) is None and get(data, AFTER, k) is None + for _, k, _ in rows): + return None + + lines = [ + "### 🎞 Compose Scroll Rendering — AnimatedListScreen · 5 iterations", + "", + "| Metric | Before — anti-patterns | After — optimized | Δ | CI Gate |", + "| :--- | ---: | ---: | ---: | :---: |", + ] + for label, key, threshold in rows: + b_val = get(data, BEFORE, key) + a_val = get(data, AFTER, key) + lines.append( + f"| {label} | {ms(b_val)} | {ms(a_val)} | {pct(b_val, a_val)} | {gate(a_val, threshold)} |" + ) + + lines += [ + "", + "_Median of per-frame CPU render time. p99 gate = 16 ms (60 fps budget)._", + ] + return "\n".join(lines) + + +def section_startup_modes(data): + """StartupBenchmark: cold / warm / hot in a single table.""" + modes = [ + ("Cold", "startupCold"), + ("Warm", "startupWarm"), + ("Hot", "startupHot"), + ] + if all(get(data, name, "timeToInitialDisplayMs") is None for _, name in modes): + return None + + lines = [ + "### ⏱ All Startup Modes · 10 iterations", + "", + "| Mode | Process | Activity | TTID (median) | TTFD (median) |", + "| :--- | :--- | :--- | ---: | ---: |", + f"| Cold | killed | gone | {ms(get(data, 'startupCold', 'timeToInitialDisplayMs'))} | {ms(get(data, 'startupCold', 'timeToFullDisplayMs'))} |", + f"| Warm | alive | gone | {ms(get(data, 'startupWarm', 'timeToInitialDisplayMs'))} | {ms(get(data, 'startupWarm', 'timeToFullDisplayMs'))} |", + f"| Hot | alive | alive | {ms(get(data, 'startupHot', 'timeToInitialDisplayMs'))} | — |", + ] + return "\n".join(lines) + + +def section_full_table(data): + """Collapsible dump of every metric from every benchmark.""" + rows = [] + for test in sorted(data): + for metric in sorted(data[test]): + v = data[test][metric] + rows.append( + f"| `{test}` · `{metric}` " + f"| {ms(v['min'])} | {ms(v['median'])} | {ms(v['max'])} |" + ) + + if not rows: + return None + + return "\n".join([ + "
", + "Full results — all benchmarks and metrics", + "", + "| Benchmark · Metric | Min | Median | Max |", + "| :--- | ---: | ---: | ---: |", + *rows, + "", + "
", + ]) + + +# ── Entry point ─────────────────────────────────────────────────────────────── + +def main(): + sha = (os.environ.get("GITHUB_SHA", "") or "")[:7] or "unknown" + run_id = os.environ.get("GITHUB_RUN_ID", "") + repo = os.environ.get("GITHUB_REPOSITORY", "") + status = os.environ.get("BENCHMARK_STATUS", "") # "success" | "failure" | "" + timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC") + + run_url = ( + f"https://github.com/{repo}/actions/runs/{run_id}" + if repo and run_id else "" + ) + + status_note = " — ⚠️ run failed, results may be incomplete" if status == "failure" else "" + + lines = [ + "", + f"## 📊 Benchmark Report — `{sha}` → `main`{status_note}", + "", + f"> Android 14 (API 34) · x86\\_64 emulator · `CompilationMode.None()` · {timestamp}", + "", + ] + + data = load_benchmarks() + + if not data: + lines += [ + "> ⚠️ No benchmark JSON files found.", + "> The emulator run may have failed before any results were written.", + "", + ] + if run_url: + lines.append(f"[View workflow run ↗]({run_url})") + print("\n".join(lines)) + return + + sections = [ + section_sdk_init(data), + section_scroll(data), + section_startup_modes(data), + ] + + for section in sections: + if section: + lines += ["---", "", section, ""] + + full = section_full_table(data) + if full: + lines += ["---", "", full, ""] + + footer = ( + f"> 🤖 [Benchmark Report workflow]({run_url})" + if run_url else "> 🤖 Benchmark Report" + ) + lines.append(footer) + + print("\n".join(lines)) + + +if __name__ == "__main__": + main()