diff --git a/src/core/bootstrap.c b/src/core/bootstrap.c
index b608146..e7912fb 100644
--- a/src/core/bootstrap.c
+++ b/src/core/bootstrap.c
@@ -1,4 +1,4 @@
-/* Guest bootstrap helpers for elfuse
+/* Guest bootstrap helpers
  *
  * Copyright 2026 elfuse contributors
  * SPDX-License-Identifier: Apache-2.0
@@ -30,7 +30,10 @@
 
 #include "debug/log.h"
 
-#define MAX_BOOT_REGIONS 32
+/* Worst case: 7 fixed regions (shim, shim-data, vDSO, brk, stack, mmap RX, mmap
+ * RW) plus up to ELF_MAX_SEGMENTS for both the executable and the interpreter.
+ */
+#define MAX_BOOT_REGIONS (8 + 2 * ELF_MAX_SEGMENTS)
 
 static bool append_boot_region(mem_region_t *regions,
                                int *nregions,
@@ -83,12 +86,12 @@ static void log_initial_page_tables(const guest_t *g, uint64_t ttbr0)
     }
 }
 
-static int load_interpreter(guest_t *g,
-                            const char *sysroot,
-                            guest_bootstrap_t *boot)
+static bool load_interpreter(guest_t *g,
+                             const char *sysroot,
+                             guest_bootstrap_t *boot)
 {
     if (boot->elf_info.interp_path[0] == '\0')
-        return 0;
+        return true;
 
     elf_resolve_interp(sysroot, boot->elf_info.interp_path,
                        boot->interp_resolved, sizeof(boot->interp_resolved));
@@ -96,20 +99,20 @@ static int load_interpreter(guest_t *g,
 
     if (elf_load(boot->interp_resolved, &boot->interp_info) < 0) {
         log_error("failed to load interpreter: %s", boot->interp_resolved);
-        return -1;
+        return false;
     }
 
     if (boot->interp_info.e_machine != EM_AARCH64) {
         log_error("interpreter has unsupported machine type %u: %s",
                   boot->interp_info.e_machine, boot->interp_resolved);
-        return -1;
+        return false;
     }
 
     boot->interp_base = g->interp_base;
     if (elf_map_segments(&boot->interp_info, boot->interp_resolved,
                          g->host_base, g->guest_size, boot->interp_base) < 0) {
         log_error("failed to map interpreter segments");
-        return -1;
+        return false;
     }
 
     log_debug(
@@ -117,20 +120,27 @@ static int load_interpreter(guest_t *g,
         (unsigned long long) boot->interp_base,
         (unsigned long long) (boot->interp_info.entry + boot->interp_base),
         boot->interp_info.num_segments);
-    return 0;
+    return true;
 }
 
-static int build_boot_regions(mem_region_t *regions,
-                              int *nregions,
-                              guest_t *g,
-                              const guest_bootstrap_t *boot,
-                              size_t shim_bin_len)
+static bool build_boot_regions(mem_region_t *regions,
+                               int *nregions,
+                               guest_t *g,
+                               const guest_bootstrap_t *boot,
+                               size_t shim_bin_len)
 {
+    /* The vDSO trampolines live in the same 2MiB block as the shim. They must
+     * appear in the region set so finalize_block_perms validates and grants RX
+     * to the vDSO page when splitting the block; otherwise vdso_build cannot
+     * write into it through guest_ptr.
+     */
     if (!append_boot_region(regions, nregions, SHIM_BASE,
                             SHIM_BASE + shim_bin_len, MEM_PERM_RX) ||
         !append_boot_region(regions, nregions, SHIM_DATA_BASE,
-                            SHIM_DATA_BASE + BLOCK_2MB, MEM_PERM_RW)) {
-        return -1;
+                            SHIM_DATA_BASE + BLOCK_2MIB, MEM_PERM_RW) ||
+        !append_boot_region(regions, nregions, VDSO_BASE, VDSO_BASE + VDSO_SIZE,
+                            MEM_PERM_RX)) {
+        return false;
     }
 
     for (int i = 0; i < boot->elf_info.num_segments; i++) {
@@ -140,7 +150,7 @@ static int build_boot_regions(mem_region_t *regions,
                 boot->elf_info.segments[i].gpa +
                     boot->elf_info.segments[i].memsz + boot->elf_load_base,
                 elf_pf_to_prot(boot->elf_info.segments[i].flags))) {
-            return -1;
+            return false;
         }
     }
 
@@ -151,7 +161,7 @@ static int build_boot_regions(mem_region_t *regions,
                 boot->interp_info.segments[i].gpa +
                     boot->interp_info.segments[i].memsz + boot->interp_base,
                 elf_pf_to_prot(boot->interp_info.segments[i].flags))) {
-            return -1;
+            return false;
         }
     }
 
@@ -163,12 +173,12 @@ static int build_boot_regions(mem_region_t *regions,
                             MMAP_RX_INITIAL_END, MEM_PERM_RX) ||
         !append_boot_region(regions, nregions, MMAP_BASE, MMAP_INITIAL_END,
                             MEM_PERM_RW)) {
-        return -1;
+        return false;
     }
 
     g->mmap_rx_end = MMAP_RX_INITIAL_END;
     g->mmap_end = MMAP_INITIAL_END;
-    return 0;
+    return true;
 }
 
 int guest_bootstrap_prepare(guest_t *g,
@@ -214,7 +224,7 @@ int guest_bootstrap_prepare(guest_t *g,
     }
     *guest_initialized = true;
 
-    log_debug("IPA size: %u bits (%lluGB primary)", g->ipa_bits,
+    log_debug("IPA size: %u bits (%llu GiB primary)", g->ipa_bits,
               (unsigned long long) (g->guest_size / (1024ULL * 1024 * 1024)));
 
     boot->elf_load_base = (boot->elf_info.e_type == ET_DYN) ? PIE_LOAD_BASE : 0;
@@ -229,15 +239,15 @@ int guest_bootstrap_prepare(guest_t *g,
         g->brk_base = BRK_BASE_DEFAULT;
     g->brk_current = g->brk_base;
 
-    g->stack_top = ALIGN_UP(g->brk_base, BLOCK_2MB) + STACK_SIZE;
+    g->stack_top = ALIGN_UP(g->brk_base, BLOCK_2MIB) + STACK_SIZE;
     if (g->stack_top < STACK_TOP_DEFAULT)
         g->stack_top = STACK_TOP_DEFAULT;
     g->stack_base = g->stack_top - STACK_SIZE;
 
-    if (load_interpreter(g, sysroot, boot) < 0)
+    if (!load_interpreter(g, sysroot, boot))
         return -1;
 
-    if (shim_bin_len > BLOCK_2MB) {
+    if (shim_bin_len > BLOCK_2MIB) {
         log_error("shim binary too large (%zu bytes)", shim_bin_len);
         return -1;
     }
@@ -252,7 +262,7 @@ int guest_bootstrap_prepare(guest_t *g,
                              boot->interp_base);
     sys_icache_invalidate((uint8_t *) g->host_base + SHIM_BASE, shim_bin_len);
 
-    if (build_boot_regions(regions, &nregions, g, boot, shim_bin_len) < 0) {
+    if (!build_boot_regions(regions, &nregions, g, boot, shim_bin_len)) {
         log_error("too many memory regions (%d >= %d)", nregions,
                   MAX_BOOT_REGIONS);
         return -1;
@@ -263,25 +273,12 @@ int guest_bootstrap_prepare(guest_t *g,
         log_error("failed to build page tables");
         return -1;
     }
-
-    for (int i = 1; i < nregions; i++) {
-        uint64_t prev_block = (regions[i - 1].gpa_end - 1) & ~(BLOCK_2MB - 1);
-        uint64_t curr_block = regions[i].gpa_start & ~(BLOCK_2MB - 1);
-        if (prev_block == curr_block &&
-            regions[i - 1].perms != regions[i].perms &&
-            guest_split_block(g, curr_block) == 0) {
-            guest_update_perms(g, regions[i - 1].gpa_start,
-                               regions[i - 1].gpa_end, regions[i - 1].perms);
-            guest_update_perms(g, regions[i].gpa_start, regions[i].gpa_end,
-                               regions[i].perms);
-        }
-    }
     g->need_tlbi = true;
 
     guest_region_add(g, SHIM_BASE, SHIM_BASE + shim_bin_len,
                      LINUX_PROT_READ | LINUX_PROT_EXEC, LINUX_MAP_PRIVATE, 0,
                      "[shim]");
-    guest_region_add(g, SHIM_DATA_BASE, SHIM_DATA_BASE + BLOCK_2MB,
+    guest_region_add(g, SHIM_DATA_BASE, SHIM_DATA_BASE + BLOCK_2MIB,
                      LINUX_PROT_READ | LINUX_PROT_WRITE, LINUX_MAP_PRIVATE, 0,
                      "[shim-data]");
 
@@ -386,7 +383,7 @@ int guest_bootstrap_create_vcpu(guest_t *g,
     uint64_t shim_ipa = guest_ipa(g, SHIM_BASE);
     uint64_t entry_ipa = guest_ipa(g, boot->entry_point);
     uint64_t sp_ipa = guest_ipa(g, boot->stack_pointer);
-    uint64_t el1_sp = guest_ipa(g, SHIM_DATA_BASE + BLOCK_2MB);
+    uint64_t el1_sp = guest_ipa(g, SHIM_DATA_BASE + BLOCK_2MIB);
     hv_vcpu_t vcpu;
     hv_vcpu_exit_t *vexit;
 
diff --git a/src/core/bootstrap.h b/src/core/bootstrap.h
index 0939f95..e2ce4c4 100644
--- a/src/core/bootstrap.h
+++ b/src/core/bootstrap.h
@@ -1,11 +1,11 @@
-#pragma once
-
-/* Guest bootstrap helpers for elfuse
+/* Guest bootstrap helpers
  *
  * Copyright 2026 elfuse contributors
  * SPDX-License-Identifier: Apache-2.0
  */
 
+#pragma once
+
 #include <Hypervisor/Hypervisor.h>
 #include <Hypervisor/hv_vcpu.h>
 #include <stdbool.h>
diff --git a/src/core/elf.c b/src/core/elf.c
index c8837a5..316ad7c 100644
--- a/src/core/elf.c
+++ b/src/core/elf.c
@@ -97,7 +97,7 @@ int elf_load(const char *path, elf_info_t *info)
         fclose(f);
         return -1;
     }
-    /* Linux kernel caps program headers at 64KB. Reject pathological inputs
+    /* Linux kernel caps program headers at 64KiB. Reject pathological inputs
      * before allocating to avoid attacker-controlled large allocations.
      */
     if ((size_t) ehdr.e_phnum * ehdr.e_phentsize > 65536) {
diff --git a/src/core/guest.c b/src/core/guest.c
index 83c5b16..ea4258e 100644
--- a/src/core/guest.c
+++ b/src/core/guest.c
@@ -6,11 +6,11 @@
  *
  * Identity-mapped guest memory: GVA == GPA == offset into host_base.
  * The guest address space size is determined by the VM's configured IPA width
- * (capped at 40-bit = 1TB): 64GB for native aarch64 on M2 (36-bit), 1TB for M3+
- * (40-bit). Reserved via mmap(MAP_ANON); macOS demand-pages physical memory on
- * first touch, so only used pages consume RAM. The slab is mapped RWX to
+ * (capped at 40-bit = 1TiB): 64GiB for native aarch64 on M2 (36-bit), 1TiB for
+ * M3+ (40-bit). Reserved via mmap(MAP_ANON); macOS demand-pages physical memory
+ * on first touch, so only used pages consume RAM. The slab is mapped RWX to
  * Hypervisor.framework. The guest's own page tables (built here) enforce
- * per-region permissions using 2MB block descriptors, which are mandatory for
+ * per-region permissions using 2MiB block descriptors, which are mandatory for
  * transparent misaligned access. Page tables can be extended at runtime via
  * guest_extend_page_tables().
  *
@@ -21,12 +21,12 @@
  * created on demand when mprotect changes PROT_NONE to an accessible
  * permission.
  *
- * Page table format: AArch64 4KB granule, up to 4-level:
- *   L0 entry covers 512GB: multiple entries for >512GB address spaces
- *   L1 entry covers 1GB:  either block or table pointing to L2
- *   L2 entry covers 2MB:  block descriptors with final permissions
- *   L3 entry covers 4KB:  optional, created by guest_split_block() for
- *                           mixed permissions within a 2MB block (W^X)
+ * Page table format: AArch64 4KiB granule, up to 4-level:
+ *   L0 entry covers 512GiB: multiple entries for >512GiB address spaces
+ *   L1 entry covers 1GiB:   either block or table pointing to L2
+ *   L2 entry covers 2MiB:   block descriptors with final permissions
+ *   L3 entry covers 4KiB:   optional, created by guest_split_block() for mixed
+ *                           permissions within a 2MiB block (W^X)
  */
 
 #include <errno.h>
@@ -57,11 +57,11 @@ static void guest_region_clear(guest_t *g);
 #define PT_AP_RW_EL0 (1ULL << 6) /* AP[2:1]=01: RW at EL1, RW at EL0 */
 #define PT_AP_RO (3ULL << 6)     /* AP[2:1]=11: RO at EL1, RO at EL0 */
 
-/* PAGE_SIZE / ALIGN_2MB_* live in utils.h; BLOCK_2MB lives in core/guest.h. */
+/* PAGE_SIZE / ALIGN_2MB_* live in utils.h; BLOCK_2MIB lives in core/guest.h. */
 #define PAGE_SIZE GUEST_PAGE_SIZE
-#define BLOCK_1GB (1ULL * 1024 * 1024 * 1024)
+#define BLOCK_1GIB (1ULL * 1024 * 1024 * 1024)
 
-/* Mask to extract the physical address from a 2MB L2 block descriptor */
+/* Mask to extract the physical address from a 2MiB L2 block descriptor */
 #define L2_BLOCK_ADDR_MASK 0xFFFFFFE00000ULL
 
 /* Forward declaration (defined in the page table section below) */
@@ -77,7 +77,7 @@ static pthread_mutex_t pt_lock = PTHREAD_MUTEX_INITIALIZER; /* Lock order: 2 */
 /* Track whether the 80% warning has been emitted (avoid log spam) */
 static bool pt_pool_warned = false;
 
-/* Allocate a zeroed 4KB page from the page table pool.
+/* Allocate a zeroed 4KiB page from the page table pool.
  * Returns GPA of the page, or 0 on pool exhaustion.
  * Acquires pt_lock internally. Caller typically holds mmap_lock.
  */
@@ -136,8 +136,8 @@ int guest_init(guest_t *g, uint64_t size, uint32_t ipa_bits)
     g->mmap_rx_next = MMAP_RX_BASE;
 
     /* Query the maximum IPA size supported by the hardware/kernel. macOS 15+
-     * on Apple Silicon reports 40 bits (1TB). Older versions or fallback
-     * yields 36 bits (64GB).
+     * on Apple Silicon reports 40 bits (1TiB). Older versions or fallback
+     * yields 36 bits (64GiB).
      */
     uint32_t max_ipa = 0;
     hv_vm_config_get_max_ipa_size(&max_ipa);
@@ -157,7 +157,7 @@ int guest_init(guest_t *g, uint64_t size, uint32_t ipa_bits)
         vm_ipa = 36;
 
     /* Primary buffer size: use the VM's configured IPA width (capped at
-     * 40-bit = 1TB). macOS demand-pages the host reservation, so only touched
+     * 40-bit = 1TiB). macOS demand-pages the host reservation, so only touched
      * pages cost physical memory.
      */
     uint32_t buf_bits = (vm_ipa > 40) ? 40 : vm_ipa;
@@ -168,17 +168,17 @@ int guest_init(guest_t *g, uint64_t size, uint32_t ipa_bits)
     g->ipa_bits = vm_ipa;
 
     /* Compute dynamic layout limits from primary buffer size.
-     * interp_base: last 4GB (dynamic linker load address)
-     * mmap_limit:  last 8GB reserved (max mmap RW address)
-     * For 64GB:  interp=60GB, mmap_limit=56GB
-     * For 1TB:   interp=1020GB, mmap_limit=1016GB
+     * interp_base: last 4GiB (dynamic linker load address)
+     * mmap_limit:  last 8GiB reserved (max mmap RW address)
+     * For 64GiB:   interp=60GiB, mmap_limit=56GiB
+     * For 1TiB:    interp=1020GiB, mmap_limit=1016GiB
      */
     g->interp_base = g->guest_size - 0x100000000ULL;
     g->mmap_limit = g->guest_size - 0x200000000ULL;
 
     /* Reserve primary address space via mmap(MAP_ANON). macOS demand-pages
      * this: physical pages are allocated only on first touch, so reserving up
-     * to 1TB costs nothing until pages are actually used. Do NOT memset
+     * to 1TiB costs nothing until pages are actually used. Do NOT memset
      * because that would touch all pages and defeat demand paging.
      */
     g->host_base =
@@ -261,14 +261,14 @@ int guest_init(guest_t *g, uint64_t size, uint32_t ipa_bits)
     ret = hv_vm_map(g->host_base, GUEST_IPA_BASE, size,
                     HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC);
     if (ret != HV_SUCCESS && buf_bits > max_ipa) {
-        /* 1TB primary map failed; fall back to hardware-default buffer.
+        /* 1TiB primary map failed; fall back to hardware-default buffer.
          * This handles undocumented HVF limits on primary buffer size.
          * Close shm_fd since the fallback uses anonymous memory (the file is no
          * longer mapped to host_base, so CoW fork cannot work).
          */
         log_info(
-            "guest: hv_vm_map %lluGB failed (%d), "
-            "retrying with %u-bit (%lluGB)",
+            "guest: hv_vm_map %llu GiB failed (%d), "
+            "retrying with %u-bit (%llu GiB)",
             (unsigned long long) (size >> 30), (int) ret, max_ipa,
             1ULL << (max_ipa - 30));
         munmap(g->host_base, size);
@@ -372,7 +372,7 @@ int guest_init_from_shm(guest_t *g,
     }
 
     log_debug(
-        "guest: CoW fork: mapped %lluGB from shm "
+        "guest: CoW fork: mapped %llu GiB from shm "
         "(ipa=%u bits)",
         (unsigned long long) (size / (1024ULL * 1024 * 1024)), ipa_bits);
 
@@ -416,7 +416,7 @@ typedef struct {
 /* Per-thread GVA TLB cache.
  *
  * Single-entry translation cache: avoids 3-4 pointer chases through the page
- * table on repeated accesses to the same 2MB block (or 4KB page if L3-split).
+ * table on repeated accesses to the same 2MiB block (or 4KiB page if L3-split).
  * Validated by an atomic generation counter in guest_t that is bumped on every
  * page table modification.
  */
@@ -424,7 +424,7 @@ static _Thread_local struct {
     const guest_t *owner; /* Which guest_t this entry belongs to */
     uint64_t base_gva;    /* Block/page-aligned GVA */
     uint64_t base_gpa;    /* Corresponding GPA offset */
-    uint64_t size;        /* 2MB or 4KB (0 = invalid) */
+    uint64_t size;        /* 2MiB or 4KiB (0 = invalid) */
     int perms;            /* Cached permissions */
     uint64_t gen;         /* guest_t.pt_gen at population time */
 } gva_tlb;
@@ -452,7 +452,7 @@ static int gva_translate_perm(const guest_t *g,
     uint64_t base = g->ipa_base;
 
     const uint64_t *l0 = pt_at(g, g->ttbr0 - base);
-    unsigned l0_idx = (unsigned) (gva / (512ULL * BLOCK_1GB));
+    unsigned l0_idx = (unsigned) (gva / (512ULL * BLOCK_1GIB));
     if (l0_idx >= 512 || !(l0[l0_idx] & PT_VALID))
         return -1;
 
@@ -460,7 +460,7 @@ static int gva_translate_perm(const guest_t *g,
     if (l1_ipa < base || l1_ipa - base >= g->guest_size)
         return -1;
     const uint64_t *l1 = pt_at(g, l1_ipa - base);
-    unsigned l1_idx = (unsigned) ((gva / BLOCK_1GB) % 512);
+    unsigned l1_idx = (unsigned) ((gva / BLOCK_1GIB) % 512);
     if (!(l1[l1_idx] & PT_VALID))
         return -1;
 
@@ -468,12 +468,12 @@ static int gva_translate_perm(const guest_t *g,
     if (l2_ipa < base || l2_ipa - base >= g->guest_size)
         return -1;
     const uint64_t *l2 = pt_at(g, l2_ipa - base);
-    unsigned l2_idx = (unsigned) ((gva / BLOCK_2MB) % 512);
+    unsigned l2_idx = (unsigned) ((gva / BLOCK_2MIB) % 512);
     if (!(l2[l2_idx] & PT_VALID))
         return -1;
 
     if (l2[l2_idx] & PT_TABLE) {
-        /* L3 page descriptor: 4KB granularity. */
+        /* L3 page descriptor: 4KiB granularity. */
         uint64_t l3_ipa = l2[l2_idx] & 0xFFFFFFFFF000ULL;
         if (l3_ipa < base || l3_ipa - base >= g->guest_size)
             return -1;
@@ -496,7 +496,7 @@ static int gva_translate_perm(const guest_t *g,
         out->gpa = gpa;
         out->chunk = PAGE_SIZE - (gva & (PAGE_SIZE - 1));
 
-        /* Populate TLB cache for this 4KB page */
+        /* Populate TLB cache for this 4KiB page */
         gva_tlb.owner = g;
         gva_tlb.base_gva = gva & ~(PAGE_SIZE - 1);
         gva_tlb.base_gpa = page_ipa - base;
@@ -506,7 +506,7 @@ static int gva_translate_perm(const guest_t *g,
         return 0;
     }
 
-    /* L2 block descriptor: 2MB granularity. */
+    /* L2 block descriptor: 2MiB granularity. */
     int perms = desc_to_perms(l2[l2_idx]);
     if ((perms & required_perms) != required_perms)
         return -1;
@@ -514,18 +514,18 @@ static int gva_translate_perm(const guest_t *g,
     uint64_t block_ipa = l2[l2_idx] & L2_BLOCK_ADDR_MASK;
     if (block_ipa < base)
         return -1;
-    uint64_t gpa = (block_ipa - base) + (gva & (BLOCK_2MB - 1));
+    uint64_t gpa = (block_ipa - base) + (gva & (BLOCK_2MIB - 1));
     if (gpa >= g->guest_size)
         return -1;
 
     out->gpa = gpa;
-    out->chunk = BLOCK_2MB - (gva & (BLOCK_2MB - 1));
+    out->chunk = BLOCK_2MIB - (gva & (BLOCK_2MIB - 1));
 
-    /* Populate TLB cache for this 2MB block */
+    /* Populate TLB cache for this 2MiB block */
     gva_tlb.owner = g;
-    gva_tlb.base_gva = gva & ~(BLOCK_2MB - 1);
+    gva_tlb.base_gva = gva & ~(BLOCK_2MIB - 1);
     gva_tlb.base_gpa = block_ipa - base;
-    gva_tlb.size = BLOCK_2MB;
+    gva_tlb.size = BLOCK_2MIB;
     gva_tlb.perms = perms;
     gva_tlb.gen = gen;
     return 0;
@@ -588,7 +588,7 @@ static void *gva_resolve_perm(const guest_t *g,
 {
     /* Always walk page tables to enforce permissions.  The guest slab is
      * identity-mapped (GVA == GPA == offset), but L2 block descriptors carry
-     * permission bits and L3 page tables have per-4KB permissions after
+     * permission bits and L3 page tables have per-4KiB permissions after
      * guest_split_block.  Skipping the walk would bypass W^X enforcement for
      * all normal guest addresses.
      */
@@ -755,7 +755,7 @@ int guest_read_str_small(const guest_t *g, uint64_t gva, char *dst, size_t max)
 
 void guest_reset(guest_t *g)
 {
-    /* Zero only actually-used memory regions. With a potentially 1TB address
+    /* Zero only actually-used memory regions. With a potentially 1TiB address
      * space, memset of the entire range would fault in all demand-paged memory
      * for no benefit. PROT_NONE regions (e.g., a managed runtime's heap
      * reservation) were never written to, so they're already in the MAP_ANON
@@ -783,7 +783,7 @@ void guest_reset(guest_t *g)
      * callers; shim regions are added AFTER reset by the exec path)
      */
     memset((uint8_t *) g->host_base + SHIM_BASE, 0,
-           SHIM_DATA_BASE + BLOCK_2MB - SHIM_BASE);
+           SHIM_DATA_BASE + BLOCK_2MIB - SHIM_BASE);
 
     /* Reset allocation state */
     guest_pt_gen_bump(g);
@@ -826,10 +826,10 @@ int guest_get_used_regions(const guest_t *g,
         n++;
     }
 
-    /* Shim data/stack (full 2MB block) */
+    /* Shim data/stack (full 2MiB block) */
     if (n < max) {
         out[n].offset = SHIM_DATA_BASE;
-        out[n].size = BLOCK_2MB;
+        out[n].size = BLOCK_2MIB;
         n++;
     }
 
@@ -1263,7 +1263,7 @@ static void guest_region_clear(guest_t *g)
 
 /* Page table builder. */
 
-/* Build block descriptor for a 2MB block at the given GPA with perms. */
+/* Build block descriptor for a 2MiB block at the given GPA with perms. */
 static uint64_t make_block_desc(uint64_t gpa, int perms)
 {
     uint64_t desc = (gpa & L2_BLOCK_ADDR_MASK) /* PA bits */
@@ -1289,6 +1289,144 @@ static uint64_t make_block_desc(uint64_t gpa, int perms)
     return desc;
 }
 
+/* Convert mixed-permission and partially-covered 2MiB blocks into L3 4KiB
+ * pages.
+ *
+ * The block-emit loop in guest_build_page_tables uses 2MiB block descriptors
+ * and OR-merges permissions when multiple regions touch the same block. The
+ * merge is correct only when every region in the block agrees on perms AND the
+ * union of those regions covers the entire block; otherwise it leaves
+ * over-permissive PTEs (e.g. .text RX + .data RW + heap RW in one 2MiB block
+ * collapses to RWX) and grants access to gap pages that should fault.
+ *
+ * For each unique 2MiB block touched by the input regions, this pass either
+ * keeps the block descriptor in place (single-perm full coverage) or splits it
+ * into 512 L3 pages, invalidates the lot, and re-validates each region's pages
+ * with the correct perms. Pages with no region coverage stay invalid, matching
+ * Linux semantics for inter-segment gaps in small static binaries.
+ */
+static bool finalize_block_perms(guest_t *g, const mem_region_t *regions, int n)
+{
+    /* Walk every 2MiB block touched by any region. Blocks shared by multiple
+     * regions are processed multiple times; the underlying split / invalidate /
+     * re-validate sequence is idempotent (guest_split_block is a no-op once
+     * the L2 entry is a table descriptor; guest_invalidate_ptes + per-region
+     * guest_update_perms produce the same final L3 state on every pass), so
+     * dedup is an optimization the heap-region scale (~127 blocks for the
+     * default brk window) does not justify against a fixed-size visited set.
+     */
+    for (int r = 0; r < n; r++) {
+        uint64_t r_block_lo = ALIGN_2MIB_DOWN(regions[r].gpa_start);
+        uint64_t r_block_hi = ALIGN_2MIB_UP(regions[r].gpa_end);
+
+        for (uint64_t b = r_block_lo; b < r_block_hi; b += BLOCK_2MIB) {
+            /* Walk all regions touching this block. Track perm uniformity and
+             * collect them into idx[] sorted by start so coverage can be
+             * checked with a single sweep.
+             */
+            int idx[GUEST_MAX_REGIONS];
+            int nidx = 0;
+            int first_perm = -1;
+            bool same_perm = true;
+
+            for (int s = 0; s < n; s++) {
+                if (regions[s].gpa_end <= b ||
+                    regions[s].gpa_start >= b + BLOCK_2MIB)
+                    continue;
+                if (first_perm < 0)
+                    first_perm = regions[s].perms;
+                else if (regions[s].perms != first_perm)
+                    same_perm = false;
+
+                int pos = nidx;
+                while (pos > 0 &&
+                       regions[idx[pos - 1]].gpa_start > regions[s].gpa_start) {
+                    idx[pos] = idx[pos - 1];
+                    pos--;
+                }
+                idx[pos] = s;
+                nidx++;
+            }
+
+            /* Coverage sweep: regions are sorted by start, so the union covers
+             * the block iff each region begins at or before the running
+             * high-water mark.
+             */
+            uint64_t covered_until = b;
+            bool full_coverage = true;
+            for (int i = 0; i < nidx; i++) {
+                uint64_t cs = regions[idx[i]].gpa_start;
+                uint64_t ce = regions[idx[i]].gpa_end;
+                if (cs > covered_until) {
+                    full_coverage = false;
+                    break;
+                }
+                if (ce > covered_until)
+                    covered_until = ce;
+            }
+            if (covered_until < b + BLOCK_2MIB)
+                full_coverage = false;
+
+            /* Single perm covering the whole block: the existing 2MiB
+             * descriptor is already correct.
+             */
+            if (same_perm && full_coverage)
+                continue;
+
+            /* Split into L3 pages, invalidate the lot, then rebuild the block
+             * from per-page unions. This preserves the required permission
+             * union when adjacent ELF segments share a 4KiB page after
+             * page-granularity rounding.
+             */
+            if (guest_split_block(g, b) < 0)
+                return false;
+            if (guest_invalidate_ptes(g, b, b + BLOCK_2MIB) < 0)
+                return false;
+
+            int page_perms[BLOCK_2MIB / PAGE_SIZE] = {0};
+            for (int i = 0; i < nidx; i++) {
+                uint64_t s_start = regions[idx[i]].gpa_start;
+                uint64_t s_end = regions[idx[i]].gpa_end;
+                uint64_t apply_start = (s_start > b) ? s_start : b;
+                uint64_t apply_end =
+                    (s_end < b + BLOCK_2MIB) ? s_end : b + BLOCK_2MIB;
+                /* Page-align to 4KiB so partially covered pages are recreated
+                 * with the union of all overlapping segment permissions.
+                 */
+                apply_start = ALIGN_DOWN(apply_start, PAGE_SIZE);
+                apply_end = PAGE_ALIGN_UP(apply_end);
+                if (apply_end > b + BLOCK_2MIB)
+                    apply_end = b + BLOCK_2MIB;
+
+                for (uint64_t pa = apply_start; pa < apply_end;
+                     pa += PAGE_SIZE) {
+                    unsigned page_idx = (unsigned) ((pa - b) / PAGE_SIZE);
+                    page_perms[page_idx] |= regions[idx[i]].perms;
+                }
+            }
+
+            for (int i = 0; i < (int) ARRAY_SIZE(page_perms);) {
+                int perms = page_perms[i];
+                int run_start = i;
+
+                while (i < (int) ARRAY_SIZE(page_perms) &&
+                       page_perms[i] == perms)
+                    i++;
+                if (!perms)
+                    continue;
+
+                uint64_t run_gpa_start = b + (uint64_t) run_start * PAGE_SIZE;
+                uint64_t run_gpa_end = b + (uint64_t) i * PAGE_SIZE;
+                if (guest_update_perms(g, run_gpa_start, run_gpa_end, perms) <
+                    0)
+                    return false;
+            }
+        }
+    }
+
+    return true;
+}
+
 uint64_t guest_build_page_tables(guest_t *g, const mem_region_t *regions, int n)
 {
     uint64_t base = g->ipa_base;
@@ -1300,20 +1438,20 @@ uint64_t guest_build_page_tables(guest_t *g, const mem_region_t *regions, int n)
 
     uint64_t *l0 = pt_at(g, l0_gpa);
 
-    /* For each region, determine which 2MB blocks need mapping.
+    /* For each region, determine which 2MiB blocks need mapping.
      * Identity-mapped: VA == GPA, so L0/L1/L2 indices and the block
      * descriptor output address are both derived from gpa_start + ipa_base.
      */
     for (int r = 0; r < n; r++) {
-        uint64_t gpa_start = ALIGN_2MB_DOWN(regions[r].gpa_start);
-        uint64_t gpa_end = ALIGN_2MB_UP(regions[r].gpa_end);
+        uint64_t gpa_start = ALIGN_2MIB_DOWN(regions[r].gpa_start);
+        uint64_t gpa_end = ALIGN_2MIB_UP(regions[r].gpa_end);
         int perms = regions[r].perms;
 
-        for (uint64_t gpa = gpa_start; gpa < gpa_end; gpa += BLOCK_2MB) {
+        for (uint64_t gpa = gpa_start; gpa < gpa_end; gpa += BLOCK_2MIB) {
             uint64_t lookup_addr = base + gpa;
 
-            /* L0 index: which 512GB slot this VA falls in */
-            unsigned l0_idx = (unsigned) (lookup_addr / (512ULL * BLOCK_1GB));
+            /* L0 index: which 512GiB slot this VA falls in */
+            unsigned l0_idx = (unsigned) (lookup_addr / (512ULL * BLOCK_1GIB));
             if (l0_idx >= 512) {
                 log_error("guest: VA 0x%llx out of L0 range",
                           (unsigned long long) lookup_addr);
@@ -1330,9 +1468,9 @@ uint64_t guest_build_page_tables(guest_t *g, const mem_region_t *regions, int n)
             uint64_t l1_ipa = l0[l0_idx] & 0xFFFFFFFFF000ULL;
             uint64_t *l1 = pt_at(g, l1_ipa - base);
 
-            /* L1 index within the 512GB L0 entry (from VA) */
+            /* L1 index within the 512GiB L0 entry (from VA) */
             unsigned l1_idx =
-                (unsigned) ((lookup_addr % (512ULL * BLOCK_1GB)) / BLOCK_1GB);
+                (unsigned) ((lookup_addr % (512ULL * BLOCK_1GIB)) / BLOCK_1GIB);
             if (l1_idx >= 512) {
                 log_error("guest: VA 0x%llx out of L1 range",
                           (unsigned long long) lookup_addr);
@@ -1347,19 +1485,19 @@ uint64_t guest_build_page_tables(guest_t *g, const mem_region_t *regions, int n)
                 l1[l1_idx] = (base + l2_gpa) | PT_VALID | PT_TABLE;
             }
 
-            /* L2 table for this 1GB region (stored in host at gpa offset) */
+            /* L2 table for this 1GiB region (stored in host at gpa offset) */
             uint64_t l2_ipa = l1[l1_idx] & 0xFFFFFFFFF000ULL;
             uint64_t l2_gpa_off = l2_ipa - base;
             uint64_t *l2 = pt_at(g, l2_gpa_off);
 
-            /* L2 index: which 2MB block within the 1GB region (from VA) */
+            /* L2 index: which 2MiB block within the 1GiB region (from VA) */
             unsigned l2_idx =
-                (unsigned) ((lookup_addr % BLOCK_1GB) / BLOCK_2MB);
+                (unsigned) ((lookup_addr % BLOCK_1GIB) / BLOCK_2MIB);
 
             /* If block already mapped, merge permissions (most permissive).
              * Use a local variable for the merged perms. Do NOT modify the
              * outer perms variable, which would leak accumulated permissions
-             * to subsequent 2MB blocks in the same region.
+             * to subsequent 2MiB blocks in the same region.
              */
             int block_perms = perms;
             if (l2[l2_idx] & PT_BLOCK) {
@@ -1380,11 +1518,18 @@ uint64_t guest_build_page_tables(guest_t *g, const mem_region_t *regions, int n)
     /* Store TTBR0 for later use by guest_extend_page_tables */
     uint64_t ttbr0 = base + l0_gpa;
     g->ttbr0 = ttbr0;
+
+    /* Convert blocks shared by regions with mixed perms or partial coverage
+     * into L3 4KiB pages so each segment's permissions are honored exactly.
+     */
+    if (!finalize_block_perms(g, regions, n))
+        return 0;
+
     guest_pt_gen_bump(g);
     return ttbr0;
 }
 
-/* Extend page tables to cover [start, end) with 2MB block descriptors.
+/* Extend page tables to cover [start, end) with 2MiB block descriptors.
  * Walks the existing L0->L1 structure (from g->ttbr0) and allocates new
  * L2 tables as needed. This is safe to call while the vCPU is paused
  * (during HVC #5 handling). Sets g->need_tlbi so the shim flushes the
@@ -1401,14 +1546,14 @@ int guest_extend_page_tables(guest_t *g,
     uint64_t l0_gpa_off = g->ttbr0 - base;
     uint64_t *l0 = pt_at(g, l0_gpa_off);
 
-    /* Walk 2MB blocks in [start, end) */
-    uint64_t addr_start = ALIGN_2MB_DOWN(start), addr_end = ALIGN_2MB_UP(end);
+    /* Walk 2MiB blocks in [start, end) */
+    uint64_t addr_start = ALIGN_2MIB_DOWN(start), addr_end = ALIGN_2MIB_UP(end);
 
-    for (uint64_t addr = addr_start; addr < addr_end; addr += BLOCK_2MB) {
+    for (uint64_t addr = addr_start; addr < addr_end; addr += BLOCK_2MIB) {
         uint64_t ipa = base + addr;
 
-        /* L0 index: which 512GB slot (>512GB addresses need L0[1]+) */
-        unsigned l0_idx = (unsigned) (ipa / (512ULL * BLOCK_1GB));
+        /* L0 index: which 512GiB slot (>512GiB addresses need L0[1]+) */
+        unsigned l0_idx = (unsigned) (ipa / (512ULL * BLOCK_1GIB));
         if (l0_idx >= 512) {
             log_error("guest: IPA 0x%llx out of L0 range in extend",
                       (unsigned long long) ipa);
@@ -1426,7 +1571,8 @@ int guest_extend_page_tables(guest_t *g,
         uint64_t l1_ipa = l0[l0_idx] & 0xFFFFFFFFF000ULL;
         uint64_t *l1 = pt_at(g, l1_ipa - base);
 
-        unsigned l1_idx = (unsigned) ((ipa % (512ULL * BLOCK_1GB)) / BLOCK_1GB);
+        unsigned l1_idx =
+            (unsigned) ((ipa % (512ULL * BLOCK_1GIB)) / BLOCK_1GIB);
         if (l1_idx >= 512) {
             log_error("guest: IPA 0x%llx out of L1 range in extend",
                       (unsigned long long) ipa);
@@ -1445,7 +1591,7 @@ int guest_extend_page_tables(guest_t *g,
         uint64_t l2_ipa = l1[l1_idx] & 0xFFFFFFFFF000ULL;
         uint64_t *l2 = pt_at(g, l2_ipa - base);
 
-        unsigned l2_idx = (unsigned) ((ipa % BLOCK_1GB) / BLOCK_2MB);
+        unsigned l2_idx = (unsigned) ((ipa % BLOCK_1GIB) / BLOCK_2MIB);
 
         /* Only map if not already mapped */
         if (!(l2[l2_idx] & PT_BLOCK)) {
@@ -1465,7 +1611,7 @@ int guest_extend_page_tables(guest_t *g,
  */
 #define PT_L3_PAGE (3ULL)
 
-/* Build a 4KB L3 page descriptor with the given permissions.
+/* Build a 4KiB L3 page descriptor with the given permissions.
  * Layout matches block descriptors (AF, SH, NS, MAIR, AP, XN) except
  * bits[1:0]=11 instead of 01.
  */
@@ -1506,26 +1652,26 @@ static uint64_t *find_l2_entry(guest_t *g, uint64_t gpa_offset)
     uint64_t l0_gpa_off = g->ttbr0 - base;
     uint64_t *l0 = pt_at(g, l0_gpa_off);
 
-    /* L0 index from actual IPA (not base), correct for >512GB */
-    unsigned l0_idx = (unsigned) (ipa / (512ULL * BLOCK_1GB));
+    /* L0 index from actual IPA (not base), correct for >512GiB */
+    unsigned l0_idx = (unsigned) (ipa / (512ULL * BLOCK_1GIB));
     if (l0_idx >= 512 || !(l0[l0_idx] & PT_VALID))
         return NULL;
 
     uint64_t l1_ipa = l0[l0_idx] & 0xFFFFFFFFF000ULL;
     uint64_t *l1 = pt_at(g, l1_ipa - base);
 
-    unsigned l1_idx = (unsigned) ((ipa % (512ULL * BLOCK_1GB)) / BLOCK_1GB);
+    unsigned l1_idx = (unsigned) ((ipa % (512ULL * BLOCK_1GIB)) / BLOCK_1GIB);
     if (l1_idx >= 512 || !(l1[l1_idx] & PT_VALID))
         return NULL;
 
     uint64_t l2_ipa = l1[l1_idx] & 0xFFFFFFFFF000ULL;
     uint64_t *l2 = pt_at(g, l2_ipa - base);
 
-    unsigned l2_idx = (unsigned) ((ipa % BLOCK_1GB) / BLOCK_2MB);
+    unsigned l2_idx = (unsigned) ((ipa % BLOCK_1GIB) / BLOCK_2MIB);
     return &l2[l2_idx];
 }
 
-/* Split a 2MB L2 block descriptor into 512 × 4KB L3 page descriptors.
+/* Split a 2MiB L2 block descriptor into 512 × 4KiB L3 page descriptors.
  * The caller provides the L2 entry via find_l2_entry.
  * Extracts the output IPA from the existing descriptor.
  */
@@ -1549,7 +1695,7 @@ static int split_l2_block(guest_t *g, uint64_t *l2_entry)
         return -1;
     uint64_t *l3 = pt_at(g, l3_gpa);
 
-    /* Fill 512 L3 entries with 4KB page descriptors inheriting the
+    /* Fill 512 L3 entries with 4KiB page descriptors inheriting the
      * block's permissions.  Extract the output IPA from bits [47:21]
      * of the existing descriptor (not from the caller's address).
      */
@@ -1564,7 +1710,7 @@ static int split_l2_block(guest_t *g, uint64_t *l2_entry)
 
 int guest_split_block(guest_t *g, uint64_t block_gpa)
 {
-    uint64_t block_start = ALIGN_2MB_DOWN(block_gpa);
+    uint64_t block_start = ALIGN_2MIB_DOWN(block_gpa);
     uint64_t *l2_entry = find_l2_entry(g, block_start);
     return split_l2_block(g, l2_entry);
 }
@@ -1580,13 +1726,13 @@ int guest_invalidate_ptes(guest_t *g, uint64_t start, uint64_t end)
     for (uint64_t addr = start; addr < end;) {
         uint64_t *l2_entry = find_l2_entry(g, addr);
         if (!l2_entry) {
-            /* No L2 entry (already unmapped); skip this 2MB block */
-            addr = ALIGN_2MB_UP(addr + 1);
+            /* No L2 entry (already unmapped); skip this 2MiB block */
+            addr = ALIGN_2MIB_UP(addr + 1);
             continue;
         }
 
-        uint64_t block_start = ALIGN_2MB_DOWN(addr);
-        uint64_t block_end = block_start + BLOCK_2MB;
+        uint64_t block_start = ALIGN_2MIB_DOWN(addr);
+        uint64_t block_end = block_start + BLOCK_2MIB;
 
         /* Not mapped at all: skip */
         if (!(*l2_entry & 1)) {
@@ -1594,25 +1740,25 @@ int guest_invalidate_ptes(guest_t *g, uint64_t start, uint64_t end)
             continue;
         }
 
-        /* Check if this is a 2MB block or already an L3 table */
+        /* Check if this is a 2MiB block or already an L3 table */
         if ((*l2_entry & 3) == 1) {
-            /* 2MB block descriptor */
+            /* 2MiB block descriptor */
             if (start <= block_start && end >= block_end) {
-                /* Invalidating the entire 2MB block: clear the L2 entry */
+                /* Invalidating the entire 2MiB block: clear the L2 entry */
                 *l2_entry = 0;
                 g->need_tlbi = true;
                 addr = block_end;
                 continue;
             }
 
-            /* Partial invalidation within a 2MB block: split first,
+            /* Partial invalidation within a 2MiB block: split first,
              * then invalidate individual L3 pages below.
              */
             if (guest_split_block(g, block_start) < 0)
                 return -1;
         }
 
-        /* L3 table: invalidate individual 4KB page descriptors */
+        /* L3 table: invalidate individual 4KiB page descriptors */
         uint64_t l3_ipa = *l2_entry & 0xFFFFFFFFF000ULL;
         uint64_t *l3 = pt_at(g, l3_ipa - base);
 
@@ -1621,7 +1767,7 @@ int guest_invalidate_ptes(guest_t *g, uint64_t start, uint64_t end)
 
         for (uint64_t pa = page_start; pa < page_end; pa += PAGE_SIZE) {
             unsigned l3_idx =
-                (unsigned) (((base + pa) % BLOCK_2MB) / PAGE_SIZE);
+                (unsigned) (((base + pa) % BLOCK_2MIB) / PAGE_SIZE);
             l3[l3_idx] = 0; /* Invalid descriptor */
         }
 
@@ -1644,13 +1790,13 @@ int guest_update_perms(guest_t *g, uint64_t start, uint64_t end, int perms)
     for (uint64_t addr = start; addr < end;) {
         uint64_t *l2_entry = find_l2_entry(g, addr);
         if (!l2_entry) {
-            /* Skip unmapped 2MB blocks */
-            addr = ALIGN_2MB_UP(addr + 1);
+            /* Skip unmapped 2MiB blocks */
+            addr = ALIGN_2MIB_UP(addr + 1);
             continue;
         }
 
-        uint64_t block_start = ALIGN_2MB_DOWN(addr);
-        uint64_t block_end = block_start + BLOCK_2MB;
+        uint64_t block_start = ALIGN_2MIB_DOWN(addr);
+        uint64_t block_end = block_start + BLOCK_2MIB;
 
         /* Not mapped at all: skip */
         if (!(*l2_entry & 1)) {
@@ -1658,12 +1804,12 @@ int guest_update_perms(guest_t *g, uint64_t start, uint64_t end, int perms)
             continue;
         }
 
-        /* Check if this is a 2MB block or already an L3 table */
+        /* Check if this is a 2MiB block or already an L3 table */
         if ((*l2_entry & 3) == 1) {
-            /* 2MB block descriptor */
+            /* 2MiB block descriptor */
             int old_perms = desc_to_perms(*l2_entry);
 
-            /* If the whole 2MB block changes permissions, rewrite the block
+            /* If the whole 2MiB block changes permissions, rewrite the block
              * descriptor without splitting. Extract the output IPA from the
              * existing descriptor, correct for both identity and non-identity
              * mapped regions.
@@ -1678,7 +1824,7 @@ int guest_update_perms(guest_t *g, uint64_t start, uint64_t end, int perms)
                 continue;
             }
 
-            /* Partial update: split the 2MB block into L3 pages first, then
+            /* Partial update: split the 2MiB block into L3 pages first, then
              * fall through to update individual pages below.
              */
             if (old_perms != perms) {
@@ -1691,17 +1837,17 @@ int guest_update_perms(guest_t *g, uint64_t start, uint64_t end, int perms)
             }
         }
 
-        /* L3 table: update individual 4KB page descriptors */
+        /* L3 table: update individual 4KiB page descriptors */
         uint64_t l3_ipa = *l2_entry & 0xFFFFFFFFF000ULL;
         uint64_t *l3 = pt_at(g, l3_ipa - base);
 
-        /* Update pages within this 2MB block that fall in [start, end) */
+        /* Update pages within this 2MiB block that fall in [start, end) */
         uint64_t page_start = (addr > block_start) ? addr : block_start;
         uint64_t page_end = (end < block_end) ? end : block_end;
 
         for (uint64_t pa = page_start; pa < page_end; pa += PAGE_SIZE) {
             unsigned l3_idx =
-                (unsigned) (((base + pa) % BLOCK_2MB) / PAGE_SIZE);
+                (unsigned) (((base + pa) % BLOCK_2MIB) / PAGE_SIZE);
             /* Extract the existing output IPA from the L3 entry. For
              * non-identity mapped regions, pa is a VA not a GPA, so the builder
              * must use the IPA already stored in the descriptor (set by
@@ -1745,14 +1891,14 @@ int guest_materialize_lazy(guest_t *g, uint64_t fault_offset)
     if (!region)
         return -1; /* Not a noreserve region */
 
-    /* Materialize one 2MB block containing the fault address. This is
+    /* Materialize one 2MiB block containing the fault address. This is
      * the smallest granule that guest_extend_page_tables works with.
      * For the common case (sparse heap touch), materializing one block
      * at a time is the right trade-off: it avoids over-committing the
      * large reservation while keeping the fault rate manageable.
      */
-    uint64_t block_start = fault_offset & ~(BLOCK_2MB - 1);
-    uint64_t block_end = block_start + BLOCK_2MB;
+    uint64_t block_start = fault_offset & ~(BLOCK_2MIB - 1);
+    uint64_t block_end = block_start + BLOCK_2MIB;
 
     /* Clamp to guest size */
     if (block_end > g->guest_size)
@@ -1791,9 +1937,9 @@ int guest_materialize_lazy(guest_t *g, uint64_t fault_offset)
             return -1;
 
         /* If this block had no page-table entry before the lazy fault,
-         * guest_extend_page_tables() necessarily created a full 2MB block.
+         * guest_extend_page_tables() necessarily created a full 2MiB block.
          * Split it and remove pages outside this noreserve region so holes and
-         * guards in the same 2MB block remain faults. Existing split blocks
+         * guards in the same 2MiB block remain faults. Existing split blocks
          * already encode neighboring mappings, so leave them intact.
          */
         if (!had_mapping) {
diff --git a/src/core/guest.h b/src/core/guest.h
index ee99cb1..6e57623 100644
--- a/src/core/guest.h
+++ b/src/core/guest.h
@@ -6,8 +6,8 @@
  *
  * Provides identity-mapped guest physical memory (GVA == GPA == offset into
  * host buffer). Buffer size is determined by the VM's configured IPA width:
- *   - Native aarch64 on M2 (36-bit IPA): 64GB
- *   - Native aarch64 on M3+ (40-bit IPA): 1TB
+ *   - Native aarch64 on M2 (36-bit IPA): 64GiB
+ *   - Native aarch64 on M3+ (40-bit IPA): 1TiB
  *
  * Reserved via mmap(MAP_ANON); macOS demand-pages physical memory on first
  * touch, so unused pages cost nothing. The slab is mapped RWX to
@@ -27,49 +27,49 @@
 /* Memory layout constants.
  *
  * Guest memory size is determined dynamically from the VM's IPA width
- * (36-bit = 64GB on M2, 40-bit = 1TB on M3+). See guest.c for the
+ * (36-bit = 64GiB on M2, 40-bit = 1TiB on M3+). See guest.c for the
  * runtime probe that selects the correct size.
  */
 
 #define PT_POOL_BASE 0x00010000ULL     /* Page table pool start */
-#define PT_POOL_END 0x00100000ULL      /* Page table pool end (960KB) */
-#define SHIM_BASE 0x00100000ULL        /* Shim code (2MB block, RX) */
-#define SHIM_DATA_BASE 0x00200000ULL   /* Shim stack/data (2MB block, RW) */
+#define PT_POOL_END 0x00100000ULL      /* Page table pool end (960KiB) */
+#define SHIM_BASE 0x00100000ULL        /* Shim code (2MiB block, RX) */
+#define SHIM_DATA_BASE 0x00200000ULL   /* Shim stack/data (2MiB block, RW) */
 #define ELF_DEFAULT_BASE 0x00400000ULL /* Typical ELF load base */
-#define PIE_LOAD_BASE 0x00400000ULL    /* PIE (ET_DYN) executable base (4MB) */
-#define BRK_BASE_DEFAULT 0x01000000ULL /* Default brk start (16MB) */
+#define PIE_LOAD_BASE 0x00400000ULL    /* PIE (ET_DYN) executable base (4MiB) */
+#define BRK_BASE_DEFAULT 0x01000000ULL /* Default brk start (16MiB) */
 
-/* 8MB stack (four 2MB blocks); unused HVF backing pages consume no RAM. */
+/* 8MiB stack (four 2MiB blocks); unused HVF backing pages consume no RAM. */
 #define STACK_SIZE 0x00800000ULL
 
-/* Used when brk_start is below 128MB; otherwise placed above brk. */
+/* Used when brk_start is below 128MiB; otherwise placed above brk. */
 #define STACK_TOP_DEFAULT 0x08000000ULL
-#define STACK_GUARD_SIZE 0x00001000ULL /* 4KB guard page at bottom of stack */
+#define STACK_GUARD_SIZE 0x00001000ULL /* 4KiB guard at stack bottom */
 
-/* mmap RX region for PROT_EXEC; placed below 8GB to leave the high mmap
+/* mmap RX region for PROT_EXEC; placed below 8GiB to leave the high mmap
  * region clear for runtimes that demand a specific minimum heap address.
  */
 #define MMAP_RX_BASE 0x10000000ULL
 
-/* Initial pre-mapped mmap RX end. Only covers the first 2MB block;
+/* Initial pre-mapped mmap RX end. Only covers the first 2MiB block;
  * additional pages are mapped lazily by guest_extend_page_tables()
  * when sys_mmap needs more PROT_EXEC space. Reduces startup time
  * and memory pressure for small binaries that never call mmap.
  */
-#define MMAP_RX_INITIAL_END (MMAP_RX_BASE + 0x200000ULL) /* +2MB */
+#define MMAP_RX_INITIAL_END (MMAP_RX_BASE + 0x200000ULL) /* +2MiB */
 
-/* mmap RW region starts at 8GB to match real Linux address layouts. */
+/* mmap RW region starts at 8GiB to match real Linux address layouts. */
 #define MMAP_BASE 0x200000000ULL
 
-/* Initial pre-mapped mmap RW end. Only covers the first 2MB block;
+/* Initial pre-mapped mmap RW end. Only covers the first 2MiB block;
  * additional pages are mapped lazily by guest_extend_page_tables().
  */
-#define MMAP_INITIAL_END (MMAP_BASE + 0x200000ULL) /* +2MB */
+#define MMAP_INITIAL_END (MMAP_BASE + 0x200000ULL) /* +2MiB */
 
 /* mmap_limit and interp_base are computed dynamically from guest_size
  * in main.c and stored in guest_t.
  */
-#define BLOCK_2MB (2ULL * 1024 * 1024)
+#define BLOCK_2MIB (2ULL * 1024 * 1024)
 
 /* IPA base: guest memory is mapped at this IPA in the hypervisor.
  * All guest physical addresses = GUEST_IPA_BASE + offset.
@@ -91,8 +91,8 @@
  * Identity-mapped: VA == GPA.
  */
 typedef struct {
-    uint64_t gpa_start; /* Output IPA/GPA (2MB aligned) */
-    uint64_t gpa_end;   /* Output IPA/GPA end (exclusive, 2MB aligned) */
+    uint64_t gpa_start; /* Output IPA/GPA (2MiB aligned) */
+    uint64_t gpa_end;   /* Output IPA/GPA end (exclusive, 2MiB aligned) */
     int perms;          /* MEM_PERM_* flags */
 } mem_region_t;
 
@@ -261,14 +261,14 @@ int guest_read_str(const guest_t *g, uint64_t gva, char *dst, size_t max);
 int guest_read_str_small(const guest_t *g, uint64_t gva, char *dst, size_t max);
 
 /* Build L0->L1->L2 page tables from an array of memory regions.
- * Uses 2MB block descriptors. Returns the TTBR0 value (GPA of L0 table),
+ * Uses 2MiB block descriptors. Returns the TTBR0 value (GPA of L0 table),
  * or 0 on failure.
  */
 uint64_t guest_build_page_tables(guest_t *g,
                                  const mem_region_t *regions,
                                  int n);
 
-/* Extend page tables to cover a new address range [start, end) with 2MB
+/* Extend page tables to cover a new address range [start, end) with 2MiB
  * block descriptors. Reuses the existing L0->L1 table structure and
  * allocates new L2 tables as needed. Sets g->need_tlbi = true.
  * Returns 0 on success, -1 on failure.
@@ -278,8 +278,8 @@ int guest_extend_page_tables(guest_t *g,
                              uint64_t end,
                              int perms);
 
-/* Split a 2MB block descriptor into 512 x 4KB L3 page descriptors.
- * block_gpa must be within a currently-mapped 2MB block. The block's
+/* Split a 2MiB block descriptor into 512 x 4KiB L3 page descriptors.
+ * block_gpa must be within a currently-mapped 2MiB block. The block's
  * permissions are inherited by all 512 page entries. If the block is
  * already split (L2 entry is a table descriptor), this is a no-op.
  * Sets g->need_tlbi = true. Returns 0 on success, -1 on failure.
@@ -290,16 +290,16 @@ int guest_split_block(guest_t *g, uint64_t block_gpa);
  * Sets L2 block descriptors and L3 page descriptors to 0 (invalid),
  * causing translation faults on access. Used when mprotect sets
  * PROT_NONE; the correct behavior is for the guest to fault.
- * If a 2MB block is only partially invalidated, the block is split
+ * If a 2MiB block is only partially invalidated, the block is split
  * into L3 pages first (preserving the non-invalidated pages).
  * Sets g->need_tlbi = true. Returns 0 on success, -1 on failure.
  */
 int guest_invalidate_ptes(guest_t *g, uint64_t start, uint64_t end);
 
 /* Update page table permissions for the range [start, end).
- * If a 2MB block needs mixed permissions (only part of it is being
- * updated), the block is automatically split into 4KB L3 pages first.
- * If the entire 2MB block is being updated, the block descriptor is
+ * If a 2MiB block needs mixed permissions (only part of it is being
+ * updated), the block is automatically split into 4KiB L3 pages first.
+ * If the entire 2MiB block is being updated, the block descriptor is
  * modified in place without splitting.
  * perms is a MEM_PERM_R/W/X combination. Sets g->need_tlbi = true.
  * Returns 0 on success, -1 on failure.
@@ -377,7 +377,7 @@ void guest_region_set_prot(guest_t *g, uint64_t start, uint64_t end, int prot);
 
 /* Try to materialize a lazy (MAP_NORESERVE) page at the given offset.
  * Called from the data/instruction abort handler when the faulting address
- * falls within a noreserve region. Creates page table entries for one 2MB
+ * falls within a noreserve region. Creates page table entries for one 2MiB
  * block containing the fault address, zeros the memory, and clears the
  * noreserve flag for the materialized sub-range.
  * Returns 0 on success (caller should TLBI and retry), -1 if the offset is not
diff --git a/src/core/shim.S b/src/core/shim.S
index 3b82e74..fe82f9a 100644
--- a/src/core/shim.S
+++ b/src/core/shim.S
@@ -169,7 +169,7 @@ _start:
     eret
 
 /* Exception Vector Table
- * Must be 2KB (0x800) aligned. Each entry is 128 bytes (0x80).
+ * Must be 2KiB (0x800) aligned. Each entry is 128 bytes (0x80).
  *
  * bad_exception vectors: mov x5, #offset + b bad_exception
  *   X5 carries the vector offset for host-side debugging.
diff --git a/src/core/stack.c b/src/core/stack.c
index 1ff369d..fb75916 100644
--- a/src/core/stack.c
+++ b/src/core/stack.c
@@ -161,7 +161,7 @@ uint64_t build_linux_stack(guest_t *g,
     }
 
 /* Bounds-check: Linux returns E2BIG for oversized argument/environment.
- * ARG_MAX on Linux is typically 2MB; stack setup caps at reasonable stack
+ * ARG_MAX on Linux is typically 2MiB; stack setup caps at reasonable stack
  * limits.
  */
 #define MAX_ARGS 131072
diff --git a/src/core/vdso.c b/src/core/vdso.c
index 21078ba..444be88 100644
--- a/src/core/vdso.c
+++ b/src/core/vdso.c
@@ -23,8 +23,6 @@
 #include "core/elf.h"
 #include "debug/log.h"
 
-#define VDSO_SIZE 0x00001000ULL /* 4KB */
-
 /* ELF section header (not in core/elf.h). */
 
 typedef struct {
@@ -72,7 +70,7 @@ typedef struct {
  *   [3] __kernel_gettimeofday
  */
 
-/* Offsets within the 4KB page */
+/* Offsets within the 4KiB page */
 #define VDSO_OFF_EHDR 0x000
 #define VDSO_OFF_PHDR 0x040
 #define VDSO_OFF_PHDR1 0x078
@@ -100,7 +98,7 @@ typedef struct {
 /* 6 * 16 = 96, 0x1D8 + 96 = 0x238 */
 #define VDSO_OFF_SHDR 0x238
 
-/* 6 * 64 = 384, 0x238 + 384 = 0x3B8 (fits in 4KB) */
+/* 6 * 64 = 384, 0x238 + 384 = 0x3B8 (fits in 4KiB) */
 #define VDSO_NUM_SYMS 4
 #define HASH_NCHAIN (VDSO_NUM_SYMS + 1)
 #define HASH_NBUCKET 1
diff --git a/src/core/vdso.h b/src/core/vdso.h
index cb63aa4..e3a41d5 100644
--- a/src/core/vdso.h
+++ b/src/core/vdso.h
@@ -14,14 +14,15 @@
 
 #include "core/guest.h"
 
-/* Guest address where the vDSO is placed (one 4KB page, below PT pool) */
+/* Guest address where the vDSO is placed (one 4KiB page, below PT pool) */
 #define VDSO_BASE 0x0000F000ULL
-#define VDSO_OFF_TEXT 0x0B0 /* Offset of .text (trampoline code) */
+#define VDSO_SIZE 0x00001000ULL /* 4KiB */
+#define VDSO_OFF_TEXT 0x0B0     /* Offset of .text (trampoline code) */
 
 /* Build a minimal vDSO ELF image at VDSO_BASE in guest memory.
- * The image contains a valid ELF header, one LOAD program header,
- * SHT_DYNSYM and SHT_STRTAB sections, and a __kernel_rt_sigreturn
- * symbol pointing to a small trampoline (mov x8, #139; svc #0).
+ * The image contains a valid ELF header, one LOAD program header, SHT_DYNSYM
+ * and SHT_STRTAB sections, and a __kernel_rt_sigreturn symbol pointing to
+ * a small trampoline (mov x8, #139; svc #0).
  * Returns the GVA of the ELF header (== VDSO_BASE), or 0 on failure.
  */
 uint64_t vdso_build(guest_t *g);
diff --git a/src/debug/gdbstub.c b/src/debug/gdbstub.c
index 242478b..a8f3e5b 100644
--- a/src/debug/gdbstub.c
+++ b/src/debug/gdbstub.c
@@ -50,7 +50,7 @@
 
 /* Constants. */
 
-#define GDB_PKT_BUF_SIZE ((size_t) 128 * 1024) /* Max packet size (128KB) */
+#define GDB_PKT_BUF_SIZE ((size_t) 128 * 1024) /* Max packet size (128KiB) */
 #define MAX_HW_BREAKPOINTS 16
 #define MAX_HW_WATCHPOINTS 16
 
diff --git a/src/hvutil.h b/src/hvutil.h
index 5a687f8..6a211d1 100644
--- a/src/hvutil.h
+++ b/src/hvutil.h
@@ -63,7 +63,7 @@
      (1ULL << 7) /* ITD    */)
 
 /* TCR_EL1.
- * 4KB granule, 48-bit VA, EPD1=1 (TTBR1 walks disabled).
+ * 4KiB granule, 48-bit VA, EPD1=1 (TTBR1 walks disabled).
  * Used by main.c (initial setup) and syscall/exec.c (exec re-init).
  */
 #define TCR_EL1_VALUE 0x25B5903510ULL
diff --git a/src/main.c b/src/main.c
index 60397af..fa7ce52 100644
--- a/src/main.c
+++ b/src/main.c
@@ -8,7 +8,7 @@
  *   - A minimal EL1 shim (embedded as shim_blob.h) that provides exception
  *     vectors and forwards SVC #0 (Linux syscalls) to the host via HVC #5.
  *   - All system registers configured from the host before vCPU start.
- *   - Guest memory identity-mapped at GVA=GPA with 2MB block page tables.
+ *   - Guest memory identity-mapped at GVA=GPA with 2MiB block page tables.
  *   - Syscall handlers that translate Linux syscalls to macOS equivalents.
  *
  * Usage: elfuse [--verbose] [--timeout N] [--sysroot PATH] <elf-path> [args...]
diff --git a/src/runtime/forkipc.c b/src/runtime/forkipc.c
index 60cc9de..e01e6a0 100644
--- a/src/runtime/forkipc.c
+++ b/src/runtime/forkipc.c
@@ -1020,7 +1020,7 @@ int64_t sys_clone(hv_vcpu_t vcpu,
      * Siblings may mmap/munmap/mprotect after resume, so the code needs a
      * stable copy for the IPC send. Heap-allocated because
      * GUEST_MAX_REGIONS * sizeof(guest_region_t) exceeds safe
-     * stack limits on worker threads (512KB default).
+     * stack limits on worker threads (512KiB default).
      */
     int nregions_snapshot = g->nregions;
     size_t snap_sz = (size_t) nregions_snapshot * sizeof(guest_region_t);
diff --git a/src/runtime/proctitle.c b/src/runtime/proctitle.c
index 10cbacf..4e296dc 100644
--- a/src/runtime/proctitle.c
+++ b/src/runtime/proctitle.c
@@ -1,4 +1,4 @@
-/* Process-title helpers for elfuse
+/* Process-title helpers
  *
  * Copyright 2026 elfuse contributors
  * SPDX-License-Identifier: Apache-2.0
diff --git a/src/runtime/proctitle.h b/src/runtime/proctitle.h
index d2c55c8..15c39ef 100644
--- a/src/runtime/proctitle.h
+++ b/src/runtime/proctitle.h
@@ -1,6 +1,6 @@
 #pragma once
 
-/* Process-title helpers for elfuse
+/* Process-title helpers
  *
  * Copyright 2026 elfuse contributors
  * SPDX-License-Identifier: Apache-2.0
diff --git a/src/runtime/thread.c b/src/runtime/thread.c
index a2c8ab5..aedddce 100644
--- a/src/runtime/thread.c
+++ b/src/runtime/thread.c
@@ -21,7 +21,7 @@
 
 #include "runtime/thread.h"
 #include "debug/log.h"
-#include "core/guest.h" /* SHIM_DATA_BASE, BLOCK_2MB, GUEST_IPA_BASE */
+#include "core/guest.h" /* SHIM_DATA_BASE, BLOCK_2MIB, GUEST_IPA_BASE */
 #include "hvutil.h"     /* vcpu_get_gpr, vcpu_get_sysreg */
 
 /* From syscall/signal.h, included here directly to avoid pulling in
@@ -32,8 +32,8 @@
 
 static void thread_ptrace_init(thread_entry_t *t);
 
-/* Top of the EL1 exception stack region (one 4KB slot per thread) */
-#define SP_EL1_TOP (GUEST_IPA_BASE + SHIM_DATA_BASE + BLOCK_2MB)
+/* Top of the EL1 exception stack region (one 4KiB slot per thread) */
+#define SP_EL1_TOP (GUEST_IPA_BASE + SHIM_DATA_BASE + BLOCK_2MIB)
 
 /* Thread table. */
 
@@ -61,7 +61,7 @@ static _Atomic int active_thread_count = 0;
 
 /* Bitmask tracking allocated SP_EL1 slots. Bit N set = slot N in use.
  * MAX_THREADS=64 fits exactly in a uint64_t. Slot 0 is the main thread (top of
- * shim data region); each subsequent slot is 4KB below.
+ * shim data region); each subsequent slot is 4KiB below.
  */
 static uint64_t sp_el1_allocated = 0;
 
@@ -272,8 +272,8 @@ uint64_t thread_alloc_sp_el1(void)
         log_error("thread: SP_EL1 slots exhausted");
     } else {
         int slot = bit_ctz64(free_mask);
-        /* Main thread's SP_EL1 = IPA_BASE + SHIM_DATA_BASE + 2MB.
-         * Each subsequent thread is 4KB below.
+        /* Main thread's SP_EL1 = IPA_BASE + SHIM_DATA_BASE + 2MiB.
+         * Each subsequent thread is 4KiB below.
          */
         uint64_t top = SP_EL1_TOP;
         sp = top - (uint64_t) slot * 4096;
diff --git a/src/runtime/thread.h b/src/runtime/thread.h
index 371433c..4304eaa 100644
--- a/src/runtime/thread.h
+++ b/src/runtime/thread.h
@@ -9,9 +9,9 @@
  * threads are added via clone(CLONE_THREAD). A _Thread_local pointer provides
  * O(1) access to the current thread's entry from any syscall handler.
  *
- * SP_EL1 allocation: each thread gets a 4KB EL1 exception stack carved from the
- * shim data region (SHIM_DATA_BASE + 2MB). Thread 0 (main) gets the top, thread
- * N gets offset -(N * 4096).
+ * SP_EL1 allocation: each thread gets a 4KiB EL1 exception stack carved from
+ * the shim data region (SHIM_DATA_BASE + 2MiB). Thread 0 (main) gets the top,
+ * thread N gets offset -(N * 4096).
  */
 
 #pragma once
@@ -156,10 +156,10 @@ int thread_active_count(void);
 /* Fast path: return non-zero when exactly one guest thread is active. */
 int thread_is_single_active(void);
 
-/* Allocate a per-thread SP_EL1 value. Thread N gets the Nth 4KB slot counting
+/* Allocate a per-thread SP_EL1 value. Thread N gets the Nth 4KiB slot counting
  * down from the top of the shim data region. The IPA base (GUEST_IPA_BASE +
- * SHIM_DATA_BASE + 2MB) is the main thread's SP_EL1; each subsequent thread
- * subtracts 4KB. Returns the IPA, or 0 on failure.
+ * SHIM_DATA_BASE + 2MiB) is the main thread's SP_EL1; each subsequent thread
+ * subtracts 4KiB. Returns the IPA, or 0 on failure.
  */
 uint64_t thread_alloc_sp_el1(void);
 
diff --git a/src/syscall/exec.c b/src/syscall/exec.c
index 7b856ef..52f109d 100644
--- a/src/syscall/exec.c
+++ b/src/syscall/exec.c
@@ -369,7 +369,7 @@ int64_t sys_execve(hv_vcpu_t vcpu,
      * Cleanup acquires sfd_lock or inotify_lock, which must NOT be held under
      * fd_lock (lock ordering: fd_lock(3) < sfd_lock(5a) < inotify_lock(7)).
      *
-     * Two passes: count first, then heap-allocate. Avoids placing a ~100KB
+     * Two passes: count first, then heap-allocate. Avoids placing a ~100KiB
      * VLA on the stack (FD_TABLE_SIZE * sizeof(fd_entry_t+int)).
      */
     int cloexec_count = 0;
@@ -529,20 +529,25 @@ int64_t sys_execve(hv_vcpu_t vcpu,
     g->brk_current = brk_start;
 
     /* Keep exec stack placement consistent with initial process startup. */
-    uint64_t stack_top = ALIGN_UP(brk_start, BLOCK_2MB);
+    uint64_t stack_top = ALIGN_UP(brk_start, BLOCK_2MIB);
     stack_top += STACK_SIZE;
     if (stack_top < STACK_TOP_DEFAULT)
         stack_top = STACK_TOP_DEFAULT;
     g->stack_top = stack_top;
     g->stack_base = stack_top - STACK_SIZE;
 
-#define MAX_REGIONS 32
+    /* Worst case: 7 fixed regions (shim, shim-data, vDSO, brk, stack, mmap RX,
+     * mmap RW) plus up to ELF_MAX_SEGMENTS for both the executable and the
+     * interpreter. Sized comfortably to keep the bounds-check loops simple
+     * after the point of no return.
+     */
+#define MAX_REGIONS (8 + 2 * ELF_MAX_SEGMENTS)
     mem_region_t regions[MAX_REGIONS];
     int nregions = 0;
 
-    /* Fixed regions (shim, brk, stack, mmap areas): 6 entries.
-     * Bounds-check before each to prevent array overflow. After the point of no
-     * return, overflow is fatal (exit).
+    /* Fixed regions (shim, shim-data, vDSO, brk, stack, mmap RX, mmap RW): 7
+     * entries. Bounds-check before each to prevent array overflow. After the
+     * point of no return, overflow is fatal (exit).
      */
 
     /* Keep the shim executable-only; HVF faults on merged RWX mappings. */
@@ -555,14 +560,29 @@ int64_t sys_execve(hv_vcpu_t vcpu,
     /* EL1 exception handlers use this block for stack and scratch state. */
     if (nregions >= MAX_REGIONS)
         goto too_many_regions;
-    regions[nregions++] = (mem_region_t) {.gpa_start = SHIM_DATA_BASE,
-                                          .gpa_end = SHIM_DATA_BASE + BLOCK_2MB,
-                                          .perms = MEM_PERM_RW};
+    regions[nregions++] =
+        (mem_region_t) {.gpa_start = SHIM_DATA_BASE,
+                        .gpa_end = SHIM_DATA_BASE + BLOCK_2MIB,
+                        .perms = MEM_PERM_RW};
+
+    /* The vDSO sits in the same 2MiB block as the shim. The page-table builder
+     * splits the block into 4KiB L3 pages when its regions don't fully cover
+     * it, so the vDSO must appear here to keep the trampoline page valid and
+     * RX after rebuild.
+     */
+    if (nregions >= MAX_REGIONS)
+        goto too_many_regions;
+    regions[nregions++] = (mem_region_t) {.gpa_start = VDSO_BASE,
+                                          .gpa_end = VDSO_BASE + VDSO_SIZE,
+                                          .perms = MEM_PERM_RX};
 
-    /* Translate ELF p_flags into guest page permissions. */
+    /* Translate ELF p_flags into guest page permissions. Silent drops would
+     * leave the loaded segment unmapped, so treat overflow as fatal (we are
+     * already past the point of no return).
+     */
     for (int i = 0; i < elf_info.num_segments; i++) {
         if (nregions >= MAX_REGIONS)
-            break;
+            goto too_many_regions;
         regions[nregions++] = (mem_region_t) {
             .gpa_start = elf_info.segments[i].gpa + elf_load_base,
             .gpa_end = elf_info.segments[i].gpa + elf_info.segments[i].memsz +
@@ -571,11 +591,11 @@ int64_t sys_execve(hv_vcpu_t vcpu,
     }
 
     /* Interpreter segments use the same permission translation, shifted by
-     * interp_base.
+     * interp_base. Same fatal-overflow rule as the executable's segments.
      */
     for (int i = 0; i < interp_info.num_segments; i++) {
         if (nregions >= MAX_REGIONS)
-            break;
+            goto too_many_regions;
         regions[nregions++] = (mem_region_t) {
             .gpa_start = interp_info.segments[i].gpa + interp_base,
             .gpa_end = interp_info.segments[i].gpa +
@@ -598,7 +618,7 @@ int64_t sys_execve(hv_vcpu_t vcpu,
                                           .perms = MEM_PERM_RW};
 
     /* PROT_EXEC mmap allocations start in a separate RX area to preserve W^X
-     * with 2MB page-table blocks.
+     * with 2MiB page-table blocks.
      */
     if (nregions >= MAX_REGIONS)
         goto too_many_regions;
@@ -629,7 +649,7 @@ int64_t sys_execve(hv_vcpu_t vcpu,
     guest_region_add(g, SHIM_BASE, SHIM_BASE + shim_size,
                      LINUX_PROT_READ | LINUX_PROT_EXEC, LINUX_MAP_PRIVATE, 0,
                      "[shim]");
-    guest_region_add(g, SHIM_DATA_BASE, SHIM_DATA_BASE + BLOCK_2MB,
+    guest_region_add(g, SHIM_DATA_BASE, SHIM_DATA_BASE + BLOCK_2MIB,
                      LINUX_PROT_READ | LINUX_PROT_WRITE, LINUX_MAP_PRIVATE, 0,
                      "[shim-data]");
     for (int i = 0; i < elf_info.num_segments; i++) {
diff --git a/src/syscall/fs.c b/src/syscall/fs.c
index 8d062d3..283ef7b 100644
--- a/src/syscall/fs.c
+++ b/src/syscall/fs.c
@@ -572,7 +572,7 @@ int64_t sys_fcntl(guest_t *g, int fd, int cmd, uint64_t arg)
          * macOS layout: {off_t l_start, off_t l_len, pid_t l_pid,
          *   short l_type, short l_whence}
          * Use guest_read/guest_write (not guest_ptr) to safely handle
-         * structs that span 2MB page table block boundaries.
+         * structs that span 2MiB page table block boundaries.
          */
         uint8_t lflock[32]; /* Linux struct flock is 32 bytes on aarch64 */
         if (guest_read_small(g, arg, lflock, sizeof(lflock)) < 0)
@@ -620,7 +620,7 @@ int64_t sys_fcntl(guest_t *g, int fd, int cmd, uint64_t arg)
         return 0;
     }
     case 1024: /* F_GETPIPE_SZ */
-        /* macOS does not support pipe size queries; return default 64KB */
+        /* macOS does not support pipe size queries; return default 64KiB */
         return 65536;
     case 1031: /* F_SETPIPE_SZ */
         /* macOS does not support pipe size setting; pretend success */
@@ -720,7 +720,7 @@ int64_t sys_getdents64(guest_t *g, int fd, uint64_t buf_gva, uint64_t count)
     /* Temp buffer for dirent serialization. Max dirent64 is 280 bytes
      * (19-byte header + NAME_MAX=255 + null + padding to 8). Using a
      * stack buffer avoids guest_ptr boundary issues: guest_write() handles
-     * 2MB block crossings that raw memcpy into guest_ptr() cannot.
+     * 2MiB block crossings that raw memcpy into guest_ptr() cannot.
      */
     uint8_t entry_buf[280];
 
@@ -751,7 +751,7 @@ int64_t sys_getdents64(guest_t *g, int fd, uint64_t buf_gva, uint64_t count)
         lde.d_type = de->d_type;
 
         /* Serialize entry into temp buffer, then copy to guest via
-         * guest_write() which handles 2MB block boundary crossings.
+         * guest_write() which handles 2MiB block boundary crossings.
          */
         memcpy(entry_buf, &lde, sizeof(lde));
         memcpy(entry_buf + 19, de->d_name, name_len + 1);
diff --git a/src/syscall/inotify.c b/src/syscall/inotify.c
index 3398aa4..cf5205e 100644
--- a/src/syscall/inotify.c
+++ b/src/syscall/inotify.c
@@ -650,7 +650,7 @@ int64_t inotify_read(int guest_fd, guest_t *g, uint64_t buf_gva, uint64_t count)
         pos += event_size;
     }
 
-    /* Copy event data to a local buffer (max 4KB) */
+    /* Copy event data to a local buffer (max 4KiB) */
     uint8_t local_buf[INOTIFY_BUFSIZE];
     if (copied > 0)
         memcpy(local_buf, inst->event_buf, copied);
diff --git a/src/syscall/io.c b/src/syscall/io.c
index bfb14b1..3b40c03 100644
--- a/src/syscall/io.c
+++ b/src/syscall/io.c
@@ -686,7 +686,7 @@ int64_t sys_pwrite64(guest_t *g,
 }
 
 /* Helper: build host iovec array from guest iovec array.
- * Uses guest_read for the iovec array (may cross 2MB block boundary)
+ * Uses guest_read for the iovec array (may cross 2MiB block boundary)
  * and guest_ptr_avail for each buffer (caps to contiguous bytes).
  * required_perms: MEM_PERM_W for readv (host writes to guest buffers),
  *                 MEM_PERM_R for writev (host reads from guest buffers).
@@ -808,7 +808,7 @@ int64_t sys_readv(guest_t *g, int fd, uint64_t iov_gva, int iovcnt)
             if (iovcnt <= 0)
                 return -LINUX_EINVAL;
             /* Use guest_read for the iov array since guest_ptr alone is unsafe
-             * if the array spans a 2MB block boundary.
+             * if the array spans a 2MiB block boundary.
              */
             linux_iovec_t giov;
             if (guest_read_small(g, iov_gva, &giov, sizeof(giov)) < 0)
diff --git a/src/syscall/mem.c b/src/syscall/mem.c
index ad584d6..106b189 100644
--- a/src/syscall/mem.c
+++ b/src/syscall/mem.c
@@ -201,8 +201,8 @@ static int mremap_extend_range(guest_t *g,
     }
 
     int page_perms = prot_to_perms(prot);
-    uint64_t ext_start = ALIGN_DOWN(off, BLOCK_2MB);
-    uint64_t ext_end = ALIGN_UP(off + size, BLOCK_2MB);
+    uint64_t ext_start = ALIGN_DOWN(off, BLOCK_2MIB);
+    uint64_t ext_end = ALIGN_UP(off + size, BLOCK_2MIB);
     if (ext_end > g->guest_size)
         ext_end = g->guest_size;
     if (guest_extend_page_tables(g, ext_start, ext_end, page_perms) < 0)
@@ -237,11 +237,11 @@ int64_t sys_brk(guest_t *g, uint64_t addr)
      * The brk region is initially mapped up to MMAP_RX_BASE; if it grows
      * past that, the mmap allocator needs to extend dynamically.
      */
-    uint64_t brk_pt_end = ALIGN_UP(g->brk_current, BLOCK_2MB);
+    uint64_t brk_pt_end = ALIGN_UP(g->brk_current, BLOCK_2MIB);
     if (brk_pt_end < MMAP_RX_BASE)
         brk_pt_end = MMAP_RX_BASE;
     if (new_off > brk_pt_end) {
-        uint64_t new_end = ALIGN_UP(new_off, BLOCK_2MB);
+        uint64_t new_end = ALIGN_UP(new_off, BLOCK_2MIB);
         if (guest_extend_page_tables(g, brk_pt_end, new_end, MEM_PERM_RW) < 0)
             return (int64_t) ipa_brk;
     }
@@ -426,8 +426,8 @@ int64_t sys_mmap(guest_t *g,
              */
             int page_perms = prot_to_perms(prot);
 
-            uint64_t ext_start = ALIGN_DOWN(result_off, BLOCK_2MB);
-            uint64_t ext_end = ALIGN_UP(result_off + length, BLOCK_2MB);
+            uint64_t ext_start = ALIGN_DOWN(result_off, BLOCK_2MIB);
+            uint64_t ext_end = ALIGN_UP(result_off + length, BLOCK_2MIB);
             if (ext_end > g->guest_size)
                 ext_end = g->guest_size;
 
@@ -446,7 +446,7 @@ int64_t sys_mmap(guest_t *g,
 
             /* Fine-tune permissions for the exact range. Handles L3
              * splitting when MAP_FIXED overlays different permissions
-             * onto an existing 2MB block (e.g., .data RW over .text RX).
+             * onto an existing 2MiB block (e.g., .data RW over .text RX).
              */
             guest_update_perms(g, result_off, result_off + length, page_perms);
 
@@ -503,8 +503,8 @@ int64_t sys_mmap(guest_t *g,
     if (!is_fixed) {
         if (needs_exec && !(prot & LINUX_PROT_WRITE)) {
             /* PROT_EXEC without PROT_WRITE: allocate from the RX mmap region.
-             * Apple HVF enforces W^X on 2MB block page table entries, so
-             * executable mappings must be in separate 2MB blocks from writable
+             * Apple HVF enforces W^X on 2MiB block page table entries, so
+             * executable mappings must be in separate 2MiB blocks from writable
              * ones. The RX region at MMAP_RX_BASE is pre-mapped with execute
              * permission.
              */
@@ -512,7 +512,7 @@ int64_t sys_mmap(guest_t *g,
             if (result_off == UINT64_MAX) {
                 log_debug(
                     "mmap: RX address space exhausted "
-                    "(len=0x%llx, limit=0x%llx, %u-bit IPA / %lluGB)",
+                    "(len=0x%llx, limit=0x%llx, %u-bit IPA / %llu GiB)",
                     (unsigned long long) length,
                     (unsigned long long) g->mmap_limit, g->ipa_bits,
                     (unsigned long long) (g->guest_size >> 30));
@@ -526,7 +526,7 @@ int64_t sys_mmap(guest_t *g,
             /* RW (or PROT_NONE, or PROT_READ): allocate from main mmap region.
              * Honor the address hint if provided and within bounds. Some
              * managed-runtime allocators need the heap at a specific high
-             * address range (e.g., ~264GB for a megablock-style map) and
+             * address range (e.g., ~264GiB for a megablock-style map) and
              * spin-retry if they get a low address instead. On real Linux,
              * mmap tries the hint first and falls back to any suitable address.
              */
@@ -543,7 +543,7 @@ int64_t sys_mmap(guest_t *g,
             if (result_off == UINT64_MAX) {
                 log_debug(
                     "mmap: RW address space exhausted "
-                    "(len=0x%llx, limit=0x%llx, %u-bit IPA / %lluGB)",
+                    "(len=0x%llx, limit=0x%llx, %u-bit IPA / %llu GiB)",
                     (unsigned long long) length,
                     (unsigned long long) g->mmap_limit, g->ipa_bits,
                     (unsigned long long) (g->guest_size >> 30));
@@ -590,8 +590,8 @@ int64_t sys_mmap(guest_t *g,
          * creating entries for PROT_NONE gaps between allocations.
          */
         if (needs_exec && !(prot & LINUX_PROT_WRITE)) {
-            uint64_t ext_start = ALIGN_DOWN(result_off, BLOCK_2MB);
-            uint64_t ext_end = ALIGN_UP(result_off + length, BLOCK_2MB);
+            uint64_t ext_start = ALIGN_DOWN(result_off, BLOCK_2MIB);
+            uint64_t ext_end = ALIGN_UP(result_off + length, BLOCK_2MIB);
             if (ext_end > g->mmap_limit)
                 ext_end = g->mmap_limit;
             if (guest_extend_page_tables(g, ext_start, ext_end, MEM_PERM_RX) <
@@ -608,8 +608,8 @@ int64_t sys_mmap(guest_t *g,
             if (ext_end > g->mmap_rx_end)
                 g->mmap_rx_end = ext_end;
         } else {
-            uint64_t ext_start = ALIGN_DOWN(result_off, BLOCK_2MB);
-            uint64_t ext_end = ALIGN_UP(result_off + length, BLOCK_2MB);
+            uint64_t ext_start = ALIGN_DOWN(result_off, BLOCK_2MIB);
+            uint64_t ext_end = ALIGN_UP(result_off + length, BLOCK_2MIB);
             if (ext_end > g->mmap_limit)
                 ext_end = g->mmap_limit;
             /* Preserve execute permission for RWX requests. Stage-2
@@ -1133,7 +1133,7 @@ int64_t sys_munmap(guest_t *g, uint64_t addr, uint64_t length)
             if (unmap_off < ELF_DEFAULT_BASE && end > PT_POOL_BASE)
                 return -LINUX_EINVAL;
 
-            /* Invalidate PTEs first. This may need to split a 2MB block
+            /* Invalidate PTEs first. This may need to split a 2MiB block
              * which can fail if the page table pool is exhausted. Failing
              * before region removal keeps metadata consistent.
              */
diff --git a/src/syscall/net-absock.h b/src/syscall/net-absock.h
index 6aec06d..4e5eae8 100644
--- a/src/syscall/net-absock.h
+++ b/src/syscall/net-absock.h
@@ -1,11 +1,11 @@
-#pragma once
-
-/* Abstract AF_UNIX emulation helpers for elfuse
+/* Abstract AF_UNIX emulation helpers
  *
  * Copyright 2026 elfuse contributors
  * SPDX-License-Identifier: Apache-2.0
  */
 
+#pragma once
+
 #include <stdint.h>
 #include <sys/socket.h>
 
diff --git a/src/syscall/proc-state.h b/src/syscall/proc-state.h
index c48c246..77adeca 100644
--- a/src/syscall/proc-state.h
+++ b/src/syscall/proc-state.h
@@ -1,9 +1,9 @@
-#pragma once
-
-/* Process metadata state helpers for elfuse
+/* Process metadata state helpers
  *
  * Copyright 2026 elfuse contributors
  * SPDX-License-Identifier: Apache-2.0
  */
 
+#pragma once
+
 void proc_state_init(void);
diff --git a/src/syscall/proc.c b/src/syscall/proc.c
index 9c0dc62..60adda1 100644
--- a/src/syscall/proc.c
+++ b/src/syscall/proc.c
@@ -1360,12 +1360,13 @@ int vcpu_run_loop(hv_vcpu_t vcpu,
                      * EC=0x24 data abort) and forwards the faulting address
                      * here.
                      *
-                     * Toggling at 2MB granularity causes thrashing when the
+                     * Toggling at 2MiB granularity causes thrashing when the
                      * JIT writes new code and executes existing code within
-                     * the same 2MB block. Instead, the code splits the 2MB
-                     * block into 4KB L3 pages and toggle only the faulting 4KB
-                     * page. This allows different pages within a 2MB block to
-                     * have independent RW/RX permissions simultaneously.
+                     * the same 2MiB block. Instead, the code splits the 2MiB
+                     * block into 4KiB L3 pages and toggle only the faulting
+                     * 4KiB page. This allows different pages within a 2MiB
+                     * block to have independent RW/RX permissions
+                     * simultaneously.
                      *
                      * x0 = FAR_EL1 (faulting virtual address)
                      * x1 = type: 0 = exec fault -> flip to RX
@@ -1421,7 +1422,7 @@ int vcpu_run_loop(hv_vcpu_t vcpu,
                             prefix, (unsigned long long) far,
                             (type == 0) ? "RX" : "RW",
                             (unsigned long long) page_start);
-                    uint64_t block_start = far & ~(BLOCK_2MB - 1);
+                    uint64_t block_start = far & ~(BLOCK_2MIB - 1);
                     int sr = guest_split_block(g, block_start);
                     int ur =
                         guest_update_perms(g, page_start, page_end, new_perms);
diff --git a/src/syscall/sys.c b/src/syscall/sys.c
index 65c26ad..4d540f4 100644
--- a/src/syscall/sys.c
+++ b/src/syscall/sys.c
@@ -88,7 +88,7 @@ static void sysinfo_init_cached_host_state(void)
     size_t ms_len = sizeof(memsize);
     int mib_mem[2] = {CTL_HW, HW_MEMSIZE};
     if (sysctl(mib_mem, 2, &memsize, &ms_len, NULL, 0) == 0) {
-        const uint64_t vm_ram_cap = 4094595072ULL; /* Match Lima VZ 4GB VM */
+        const uint64_t vm_ram_cap = 4094595072ULL; /* Match Lima VZ 4GiB VM */
         cached_real_memsize = memsize;
         cached_totalram = (memsize > vm_ram_cap) ? vm_ram_cap : memsize;
     }
@@ -367,8 +367,8 @@ static linux_rlimit64_t translate_host_rlimit(int resource, struct rlimit rl)
     lim.rlim_cur = (rl.rlim_cur == RLIM_INFINITY) ? UINT64_MAX : rl.rlim_cur;
     lim.rlim_max = (rl.rlim_max == RLIM_INFINITY) ? UINT64_MAX : rl.rlim_max;
 
-    /* macOS returns ~8MB-16KB for the default stack; round to Linux's
-     * conventional 8MB to keep guest userspace behavior stable.
+    /* macOS returns ~8MiB-16KiB for the default stack; round to Linux's
+     * conventional 8MiB to keep guest userspace behavior stable.
      */
     if (resource == 3 /* RLIMIT_STACK */ && lim.rlim_cur > 0 &&
         lim.rlim_cur < 8388608) {
diff --git a/src/utils.h b/src/utils.h
index 153b94a..efe55c5 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -30,18 +30,18 @@
 /* Align x down to the previous multiple of a; a must be a power of two. */
 #define ALIGN_DOWN(x, a) ((uint64_t) (x) & ~((uint64_t) (a) - 1))
 
-/* The Linux ABI fixes the page size at 4KB on aarch64 regardless of the host
+/* The Linux ABI fixes the page size at 4KiB on aarch64 regardless of the host
  * page size, so this is shared by every guest memory path (mmap, brk,
  * mprotect, ELF loading).
  */
 #define GUEST_PAGE_SIZE 4096ULL
 #define PAGE_ALIGN_UP(x) ALIGN_UP(x, GUEST_PAGE_SIZE)
 
-/* 2MB block alignment shared by region setup, page table walking, and stack
- * placement. BLOCK_2MB itself is defined in core/guest.h.
+/* 2MiB block alignment shared by region setup, page table walking, and stack
+ * placement. BLOCK_2MIB itself is defined in core/guest.h.
  */
-#define ALIGN_2MB_DOWN(x) ALIGN_DOWN(x, 2ULL * 1024 * 1024)
-#define ALIGN_2MB_UP(x) ALIGN_UP(x, 2ULL * 1024 * 1024)
+#define ALIGN_2MIB_DOWN(x) ALIGN_DOWN(x, 2ULL * 1024 * 1024)
+#define ALIGN_2MIB_UP(x) ALIGN_UP(x, 2ULL * 1024 * 1024)
 
 /* Branchless range check: true when minx <= x < minx + size.
  *
diff --git a/tests/test-cow-fork.c b/tests/test-cow-fork.c
index 3df6ffd..8770420 100644
--- a/tests/test-cow-fork.c
+++ b/tests/test-cow-fork.c
@@ -170,7 +170,7 @@ static void test_mmap_isolation(void)
 
 static void test_large_cow(void)
 {
-    TEST("fork: 1MB COW integrity");
+    TEST("fork: 1MiB COW integrity");
 
     int pipefd[2];
     if (pipe(pipefd) != 0) {
@@ -182,7 +182,7 @@ static void test_large_cow(void)
     char *buf = mmap(NULL, sz, PROT_READ | PROT_WRITE,
                      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
     if (buf == MAP_FAILED) {
-        FAIL("mmap 1MB");
+        FAIL("mmap 1MiB");
         return;
     }
 
@@ -229,7 +229,7 @@ static void test_large_cow(void)
     int status;
     waitpid(pid, &status, 0);
 
-    EXPECT_TRUE(parent_ok && child_ok, "1MB COW integrity failed");
+    EXPECT_TRUE(parent_ok && child_ok, "1MiB COW integrity failed");
     munmap(buf, sz);
 }
 
diff --git a/tests/test-futex-pi.c b/tests/test-futex-pi.c
index 0bfe294..8c5ca60 100644
--- a/tests/test-futex-pi.c
+++ b/tests/test-futex-pi.c
@@ -68,7 +68,7 @@ static long raw_futex_unlock_pi(uint32_t *addr)
 
 /* Child thread for dead-owner test */
 
-/* Stack for child thread (8KB, 16-byte aligned) */
+/* Stack for child thread (8KiB, 16-byte aligned) */
 static char child_stack_buf[8192] __attribute__((aligned(16)));
 
 /* Child: acquire PI lock, signal parent, exit WITHOUT releasing.
diff --git a/tests/test-guard-page.c b/tests/test-guard-page.c
index b4809ce..caf91ac 100644
--- a/tests/test-guard-page.c
+++ b/tests/test-guard-page.c
@@ -61,13 +61,13 @@ static void test_prot_none(void)
 
 static void test_large_mmap(void)
 {
-    TEST("mmap 64MB anonymous");
+    TEST("mmap 64MiB anonymous");
 
     size_t sz = 64UL * 1024 * 1024;
     void *p = mmap(NULL, sz, PROT_READ | PROT_WRITE,
                    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
     if (p == MAP_FAILED) {
-        FAIL("mmap 64MB failed");
+        FAIL("mmap 64MiB failed");
         return;
     }
 
@@ -78,7 +78,7 @@ static void test_large_mmap(void)
     c[sz - 1] = 'C';
 
     EXPECT_TRUE(c[0] == 'A' && c[sz / 2] == 'B' && c[sz - 1] == 'C',
-                "data mismatch in 64MB region");
+                "data mismatch in 64MiB region");
 
     munmap(p, sz);
 }
diff --git a/tests/test-large-io-boundary.c b/tests/test-large-io-boundary.c
index 18230d8..28b76e7 100644
--- a/tests/test-large-io-boundary.c
+++ b/tests/test-large-io-boundary.c
@@ -4,7 +4,7 @@
  * Copyright 2025 Moritz Angermann, zw3rk pte. ltd.
  * SPDX-License-Identifier: Apache-2.0
  *
- * Tests: read/write buffers crossing 2MB L2 blocks and split L3 tables.
+ * Tests: read/write buffers crossing 2MiB L2 blocks and split L3 tables.
  */
 
 #include <fcntl.h>
@@ -19,7 +19,7 @@
 
 int passes = 0, fails = 0;
 
-#define BLOCK_2MB (2UL * 1024 * 1024)
+#define BLOCK_2MIB (2UL * 1024 * 1024)
 #define MAP_SIZE (6UL * 1024 * 1024)
 #define IO_OFFSET 12345UL
 #define IO_SIZE (3UL * 1024 * 1024)
@@ -27,7 +27,7 @@ int passes = 0, fails = 0;
 static unsigned char *next_2mb_boundary(unsigned char *p)
 {
     uintptr_t addr = (uintptr_t) p;
-    addr = (addr + BLOCK_2MB - 1) & ~(uintptr_t) (BLOCK_2MB - 1);
+    addr = (addr + BLOCK_2MIB - 1) & ~(uintptr_t) (BLOCK_2MIB - 1);
     return (unsigned char *) addr;
 }
 
@@ -53,7 +53,7 @@ static int verify_pattern(const unsigned char *buf, size_t len)
     return 0;
 }
 
-/* Verify a repeating 4KB seed pattern across a large buffer.
+/* Verify a repeating 4KiB seed pattern across a large buffer.
  * The seed is: seed[i] = (i * 131 + 17) for i in [0, 4096).
  */
 static int verify_repeating_seed(const unsigned char *buf, size_t len)
@@ -68,7 +68,7 @@ static int verify_repeating_seed(const unsigned char *buf, size_t len)
 
 static void test_large_write(void)
 {
-    TEST("write crosses 2MB boundary");
+    TEST("write crosses 2MiB boundary");
 
     unsigned char *map = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE,
                               MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
@@ -97,7 +97,7 @@ static void test_large_write(void)
         ok = 0;
 
     /* Read back the entire write and verify all bytes, including those
-     * spanning the 2MB page table boundary.
+     * spanning the 2MiB page table boundary.
      */
     unsigned char *readback = mmap(NULL, IO_SIZE, PROT_READ | PROT_WRITE,
                                    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
@@ -129,12 +129,12 @@ static void test_large_read_from_split_block(void)
         return;
     }
 
-    /* Force the first 2MB block to remain split into L3 pages while ending
+    /* Force the first 2MiB block to remain split into L3 pages while ending
      * with RW permissions, then read across the L3-to-L2 boundary.
      */
     unsigned char *block = next_2mb_boundary(map);
     unsigned char *buf = block + IO_OFFSET;
-    void *page = block + BLOCK_2MB / 2;
+    void *page = block + BLOCK_2MIB / 2;
     if (mprotect(page, 4096, PROT_READ) != 0 ||
         mprotect(page, 4096, PROT_READ | PROT_WRITE) != 0) {
         munmap(map, MAP_SIZE);
@@ -170,7 +170,7 @@ static void test_large_read_from_split_block(void)
         ssize_t ret = read(fd, buf, IO_SIZE);
         ok = (ret == (ssize_t) IO_SIZE);
     }
-    /* Verify the entire read buffer, including the 2MB boundary
+    /* Verify the entire read buffer, including the 2MiB boundary
      * crossing where L3-to-L2 page table transitions happen.
      */
     if (ok && verify_repeating_seed(buf, IO_SIZE) != 0)
diff --git a/tests/test-madvise.c b/tests/test-madvise.c
index 0b153d4..a81a9ba 100644
--- a/tests/test-madvise.c
+++ b/tests/test-madvise.c
@@ -212,7 +212,7 @@ static void test_advisory_hints(void)
 
 static void test_dontneed_large(void)
 {
-    TEST("MADV_DONTNEED 1MB range");
+    TEST("MADV_DONTNEED 1MiB range");
     size_t sz = 1024 * 1024;
     void *p = mmap(NULL, sz, PROT_READ | PROT_WRITE,
                    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
@@ -239,7 +239,7 @@ static void test_dontneed_large(void)
         }
     }
 
-    EXPECT_TRUE(ok, "1MB range not zeroed");
+    EXPECT_TRUE(ok, "1MiB range not zeroed");
 
     munmap(p, sz);
 }
diff --git a/tests/test-mremap.c b/tests/test-mremap.c
index 5375d7f..61d2764 100644
--- a/tests/test-mremap.c
+++ b/tests/test-mremap.c
@@ -234,7 +234,7 @@ static void test_same_size(void)
 
 static void test_large_realloc(void)
 {
-    TEST("mremap large (256KB->512KB)");
+    TEST("mremap large (256KiB->512KiB)");
     size_t old_sz = 256 * 1024, new_sz = 512 * 1024;
     void *p = mmap(NULL, old_sz, PROT_READ | PROT_WRITE,
                    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
diff --git a/tests/test-multi-vcpu.c b/tests/test-multi-vcpu.c
index b3277e4..01d9301 100644
--- a/tests/test-multi-vcpu.c
+++ b/tests/test-multi-vcpu.c
@@ -55,9 +55,9 @@
 #define PT_AP_RO (3ULL << 6)     /* RO at EL0 */
 
 #define PAGE_SIZE_4K 4096ULL
-#define BLOCK_2MB (2ULL * 1024 * 1024)
+#define BLOCK_2MIB (2ULL * 1024 * 1024)
 
-/* Memory layout (16MB total, much smaller than elfuse's 32GB) */
+/* Memory layout (16MiB total, much smaller than elfuse's 32GiB) */
 
 #define GUEST_SIZE (16ULL * 1024 * 1024)
 
@@ -70,18 +70,18 @@
 #define STACK_A_BASE 0x00A00000ULL   /* EL0 stack A (RW) */
 #define STACK_B_BASE 0x00C00000ULL   /* EL0 stack B (RW) */
 
-/* vCPU-A and vCPU-B SP_EL1 (top of respective 512KB regions within shim data)
+/* vCPU-A and vCPU-B SP_EL1 (top of respective 512KiB regions within shim data)
  */
-#define SP_EL1_A (SHIM_DATA_BASE + BLOCK_2MB)     /* 0x400000 */
-#define SP_EL1_B (SHIM_DATA_BASE + BLOCK_2MB / 2) /* 0x300000 */
+#define SP_EL1_A (SHIM_DATA_BASE + BLOCK_2MIB)     /* 0x400000 */
+#define SP_EL1_B (SHIM_DATA_BASE + BLOCK_2MIB / 2) /* 0x300000 */
 
 /* vCPU-A and vCPU-B EL0 code offsets within GUEST_CODE region */
 #define CODE_A_OFF 0x0000ULL
-#define CODE_B_OFF 0x1000ULL /* 4KB apart */
+#define CODE_B_OFF 0x1000ULL /* 4KiB apart */
 
-/* EL0 stack tops (top of each 2MB region) */
-#define SP_EL0_A (STACK_A_BASE + BLOCK_2MB) /* 0xC00000 */
-#define SP_EL0_B (STACK_B_BASE + BLOCK_2MB) /* 0xE00000 */
+/* EL0 stack tops (top of each 2MiB region) */
+#define SP_EL0_A (STACK_A_BASE + BLOCK_2MIB) /* 0xC00000 */
+#define SP_EL0_B (STACK_B_BASE + BLOCK_2MIB) /* 0xE00000 */
 
 /* System register values (from main.c) */
 
@@ -134,7 +134,7 @@ static uint64_t pt_alloc(vm_state_t *vm)
     return off;
 }
 
-/* Build a 2MB block descriptor at a given GPA with RX or RW perms. */
+/* Build a 2MiB block descriptor at a given GPA with RX or RW perms. */
 static uint64_t make_block(uint64_t gpa, int perm)
 {
     uint64_t desc = (gpa & 0xFFFFFFFFE00000ULL) | PT_AF | PT_SH_ISH | PT_NS |
@@ -162,10 +162,10 @@ static uint64_t build_page_tables(vm_state_t *vm, int include_tlbi_region)
     uint64_t *l0 = (uint64_t *) ((uint8_t *) vm->host_base + l0_off);
     uint64_t *l1 = (uint64_t *) ((uint8_t *) vm->host_base + l1_off);
 
-    /* L0[0] -> L1 table (all the current addresses are < 512GB) */
+    /* L0[0] -> L1 table (all the current addresses are < 512GiB) */
     l0[0] = l1_off | PT_VALID | PT_TABLE;
 
-    /* L1[0] -> L2 table (all the current addresses are < 1GB) */
+    /* L1[0] -> L2 table (all the current addresses are < 1GiB) */
     uint64_t l2_off = pt_alloc(vm);
     if (!l2_off)
         return 0;
@@ -173,7 +173,7 @@ static uint64_t build_page_tables(vm_state_t *vm, int include_tlbi_region)
 
     uint64_t *l2 = (uint64_t *) ((uint8_t *) vm->host_base + l2_off);
 
-    /* Map 2MB blocks. L2 index = addr / 2MB. */
+    /* Map 2MiB blocks. L2 index = addr / 2MiB. */
     /* Shim code (RX) at 0x100000 -> L2[0] (shares 0x0-0x1FFFFF) */
     l2[0] = make_block(0x000000, PERM_RX);
 
@@ -199,8 +199,8 @@ static uint64_t build_page_tables(vm_state_t *vm, int include_tlbi_region)
 
     /* Stack B spills into 0xE00000 (SP=0xE00000 grows down into 0xC00000
      * block), already covered by L2[6] since SP_EL0_B = 0xE00000 is top of
-     * 0xC00000 block. Actually 0xE00000 = 7 * 2MB, that's a separate block. Map
-     * it too:
+     * 0xC00000 block. Actually 0xE00000 = 7 * 2MiB, that's a separate block.
+     * Map it too:
      */
     l2[7] = make_block(0xE00000, PERM_RW);
 
@@ -475,7 +475,7 @@ static int vm_create(vm_state_t *vm)
     vm->pt_next = PT_POOL_BASE;
 
     /* Query max IPA size and configure VM (matches guest.c pattern).
-     * The test uses only 16MB, so any IPA size works; this is for
+     * The test uses only 16MiB, so any IPA size works; this is for
      * API consistency with elfuse's production code path.
      */
     uint32_t max_ipa = 0;
diff --git a/tests/test-perf.sh b/tests/test-perf.sh
index f729cfc..8175409 100755
--- a/tests/test-perf.sh
+++ b/tests/test-perf.sh
@@ -104,10 +104,10 @@ benchmark "elfuse guest wc" sh -c "'$ELFUSE' '$TOOL_BIN/wc' -l '$SRC_SUBDIR'/*.c
 echo
 
 # --- Test 4: I/O throughput — cat large file through wc ---
-printf "${YELLOW}▸ cat ~10MB | wc -l (I/O throughput)${RESET}\n"
+printf "${YELLOW}▸ cat ~10MiB | wc -l (I/O throughput)${RESET}\n"
 TMPFILE=$(mktemp)
 trap 'rm -f "$TMPFILE"' EXIT
-# Build ~10MB test file by repeating syscall.c (~100 times)
+# Build ~10MiB test file by repeating syscall.c (~100 times)
 for _ in $(seq 1 100); do cat "$SYSCALL_C" >> "$TMPFILE"; done
 TMPSIZE=$(wc -c < "$TMPFILE" | tr -d ' ')
 printf "  ${CYAN}(test file: %s bytes)${RESET}\n" "$TMPSIZE"
diff --git a/tests/test-rwx.c b/tests/test-rwx.c
index e04b75e..180e743 100644
--- a/tests/test-rwx.c
+++ b/tests/test-rwx.c
@@ -9,8 +9,8 @@
  * page table entries work at stage-1 when SCTLR_EL1.WXN=0.
  *
  * Tests:
- *   1. RWX 2MB block: L2 block descriptor with AP=RW_EL0, UXN=0, PXN=0
- *   2. RWX 4KB page:  L3 page descriptor with the same RWX permissions
+ *   1. RWX 2MiB block: L2 block descriptor with AP=RW_EL0, UXN=0, PXN=0
+ *   2. RWX 4KiB page:  L3 page descriptor with the same RWX permissions
  *   3. Baseline RX:   Confirm execution works on a normal RX page
  *   4. Baseline RW:   Confirm writes work on a normal RW page
  *
@@ -65,9 +65,9 @@
 #define PT_AP_RO (3ULL << 6)     /* AP[2:1]=11 -> RO at EL0 */
 
 #define PAGE_SIZE_4K 4096ULL
-#define BLOCK_2MB (2ULL * 1024 * 1024)
+#define BLOCK_2MIB (2ULL * 1024 * 1024)
 
-/* Memory layout (16MB total) */
+/* Memory layout (16MiB total) */
 
 #define GUEST_SIZE (16ULL * 1024 * 1024)
 
@@ -75,20 +75,20 @@
 #define SHIM_BASE 0x00100000ULL      /* Shim code (RX) */
 #define SHIM_DATA_BASE 0x00200000ULL /* Shim data / EL1 stack (RW) */
 #define GUEST_CODE 0x00400000ULL     /* EL0 test code (RX) */
-#define RWX_BLOCK 0x00600000ULL      /* 2MB block for RWX test (test 1) */
+#define RWX_BLOCK 0x00600000ULL      /* 2MiB block for RWX test (test 1) */
 #define RWX_PAGE_BLOCK \
-    0x00800000ULL /* 2MB region containing RWX 4KB page (test 2) */
+    0x00800000ULL /* 2MiB region containing RWX 4KiB page (test 2) */
 #define GUEST_DATA 0x00A00000ULL /* RW data (test 4 baseline) */
 #define STACK_BASE 0x00C00000ULL /* EL0 stack (RW) */
 
-/* Within RWX_PAGE_BLOCK, the RWX 4KB page is at offset 0 */
+/* Within RWX_PAGE_BLOCK, the RWX 4KiB page is at offset 0 */
 #define RWX_PAGE_ADDR RWX_PAGE_BLOCK
 
 /* EL0 stack top and SP_EL1 */
-#define SP_EL0 (STACK_BASE + BLOCK_2MB)
-#define SP_EL1 (SHIM_DATA_BASE + BLOCK_2MB)
+#define SP_EL0 (STACK_BASE + BLOCK_2MIB)
+#define SP_EL1 (SHIM_DATA_BASE + BLOCK_2MIB)
 
-/* Code offsets within GUEST_CODE (4KB apart for different tests) */
+/* Code offsets within GUEST_CODE (4KiB apart for different tests) */
 #define CODE_TEST1 0x0000ULL /* Test 1: RWX block write+exec */
 #define CODE_TEST2 0x1000ULL /* Test 2: RWX page write+exec */
 #define CODE_TEST3 0x2000ULL /* Test 3: baseline RX exec */
@@ -142,27 +142,27 @@ static uint64_t pt_alloc(vm_state_t *vm)
 
 /* Descriptor builders */
 
-/* Common base attributes for a 2MB block or 4KB page */
+/* Common base attributes for a 2MiB block or 4KiB page */
 static uint64_t common_attrs(void)
 {
     return PT_AF | PT_SH_ISH | PT_NS | PT_ATTR1;
 }
 
-/* 2MB block: RX (executable, read-only at EL0) */
+/* 2MiB block: RX (executable, read-only at EL0) */
 static uint64_t make_block_rx(uint64_t gpa)
 {
     return (gpa & 0xFFFFFFFFE00000ULL) | common_attrs() | PT_BLOCK | PT_AP_RO;
     /* UXN=0, PXN=0 -> executable */
 }
 
-/* 2MB block: RW (writable, not executable) */
+/* 2MiB block: RW (writable, not executable) */
 static uint64_t make_block_rw(uint64_t gpa)
 {
     return (gpa & 0xFFFFFFFFE00000ULL) | common_attrs() | PT_BLOCK |
            PT_AP_RW_EL0 | PT_UXN | PT_PXN;
 }
 
-/* 2MB block: RWX (writable AND executable at EL0, the test subject) */
+/* 2MiB block: RWX (writable AND executable at EL0, the test subject) */
 static uint64_t make_block_rwx(uint64_t gpa)
 {
     return (gpa & 0xFFFFFFFFE00000ULL) | common_attrs() | PT_BLOCK |
@@ -170,7 +170,7 @@ static uint64_t make_block_rwx(uint64_t gpa)
     /* UXN=0, PXN=0 -> executable; AP=01 -> writable at EL0 */
 }
 
-/* 4KB L3 page: RWX (writable AND executable at EL0) */
+/* 4KiB L3 page: RWX (writable AND executable at EL0) */
 static uint64_t make_page_rwx(uint64_t gpa)
 {
     return (gpa & 0xFFFFFFFFF000ULL) | common_attrs() | PT_VALID | PT_PAGE |
@@ -178,7 +178,7 @@ static uint64_t make_page_rwx(uint64_t gpa)
     /* UXN=0, PXN=0 -> executable; AP=01 -> writable at EL0 */
 }
 
-/* 4KB L3 page: RW (not executable) */
+/* 4KiB L3 page: RW (not executable) */
 static uint64_t make_page_rw(uint64_t gpa)
 {
     return (gpa & 0xFFFFFFFFF000ULL) | common_attrs() | PT_VALID | PT_PAGE |
@@ -230,7 +230,7 @@ static uint64_t build_page_tables(vm_state_t *vm)
     l2[3] = make_block_rwx(0x600000);
 
     /* L2[4]: Table descriptor -> L3 page table for Test 2.
-     * the code splits this 2MB block into 512 x 4KB pages. The first page
+     * the code splits this 2MiB block into 512 x 4KiB pages. The first page
      * at 0x800000 is RWX, the rest are RW (non-executable).
      */
     {
@@ -519,9 +519,9 @@ static void print_bad_exception(const vcpu_exit_t *ex)
     }
 }
 
-/* TEST 1: RWX 2MB Block
+/* TEST 1: RWX 2MiB Block
  *
- * Stage-1 page table has a 2MB block at 0x600000 with:
+ * Stage-1 page table has a 2MiB block at 0x600000 with:
  *   AP[2:1]=01 (RW at EL0), UXN=0, PXN=0 (executable)
  * This is a true RWX mapping.
  *
@@ -678,10 +678,10 @@ static int test1_rwx_block(void)
     return result;
 }
 
-/* TEST 2: RWX 4KB Page (L3 descriptor)
+/* TEST 2: RWX 4KiB Page (L3 descriptor)
  *
- * Same as test 1, but using a 4KB L3 page descriptor at 0x800000
- * instead of a 2MB L2 block descriptor. Tests whether the
+ * Same as test 1, but using a 4KiB L3 page descriptor at 0x800000
+ * instead of a 2MiB L2 block descriptor. Tests whether the
  * granularity matters for W^X enforcement.
  */
 
@@ -753,13 +753,13 @@ static int test2_rwx_page(void)
                (unsigned long long) ex.x0, (unsigned long long) ex.x1,
                ex.x1 == 0 ? "exec fault -> flip to RX"
                           : "write fault -> flip to RW");
-        printf("    " YELLOW "HVF enforces W^X at stage-2 (4KB page)" RESET
+        printf("    " YELLOW "HVF enforces W^X at stage-2 (4KiB page)" RESET
                "\n");
         result = -1;
 
     } else if (ex.reason == HVF_EXIT_HVC5 && ex.x0 == 42) {
         printf("\n    " GREEN "RWX works!" RESET
-               " Written code executed (4KB page, x0=%llu)\n",
+               " Written code executed (4KiB page, x0=%llu)\n",
                (unsigned long long) ex.x0);
         result = 0;
 
@@ -769,11 +769,11 @@ static int test2_rwx_page(void)
         uint32_t ec = (uint32_t) (ex.esr >> 26) & 0x3F;
         if (ec == 0x20)
             printf("    " YELLOW
-                   "Instruction abort: W^X blocks execution (4KB page)" RESET
+                   "Instruction abort: W^X blocks execution (4KiB page)" RESET
                    "\n");
         else if (ec == 0x24)
-            printf("    " YELLOW "Data abort: W^X blocks write (4KB page)" RESET
-                   "\n");
+            printf("    " YELLOW
+                   "Data abort: W^X blocks write (4KiB page)" RESET "\n");
         result = -1;
 
     } else {
@@ -952,8 +952,8 @@ int main(void)
     } tests[] = {
         {"Baseline: RX execution", test3_baseline_rx},
         {"Baseline: RW write", test4_baseline_rw},
-        {"RWX 2MB block (write+exec)", test1_rwx_block},
-        {"RWX 4KB page  (write+exec)", test2_rwx_page},
+        {"RWX 2MiB block (write+exec)", test1_rwx_block},
+        {"RWX 4KiB page  (write+exec)", test2_rwx_page},
     };
     int ntests = (int) ARRAY_SIZE(tests);
 
diff --git a/tests/test-stress.c b/tests/test-stress.c
index 687afd3..f81d599 100644
--- a/tests/test-stress.c
+++ b/tests/test-stress.c
@@ -97,7 +97,7 @@ static void test_mmap_churn(void)
     TEST("mmap/munmap churn (256 cycles)");
 
 #define CHURN_CYCLES 256
-#define CHURN_SIZE (64 * 1024) /* 64KB each */
+#define CHURN_SIZE (64 * 1024) /* 64KiB each */
     bool ok = true;
 
     for (int i = 0; i < CHURN_CYCLES; i++) {
@@ -275,7 +275,7 @@ static void test_mprotect_cycling(void)
 
 static void test_large_mmap(void)
 {
-    TEST("large mmap (16MB)");
+    TEST("large mmap (16MiB)");
 
     size_t sz = 16 * 1024 * 1024;
     void *p = mmap(NULL, sz, PROT_READ | PROT_WRITE,
@@ -285,7 +285,7 @@ static void test_large_mmap(void)
         return;
     }
 
-    /* Touch every page (4KB stride) */
+    /* Touch every page (4KiB stride) */
     volatile char *vp = (volatile char *) p;
     for (size_t off = 0; off < sz; off += 4096) {
         vp[off] = (char) (off >> 12);
diff --git a/tests/test-thread.c b/tests/test-thread.c
index d4f88c6..da427df 100644
--- a/tests/test-thread.c
+++ b/tests/test-thread.c
@@ -57,7 +57,7 @@ static void child_work(void)
 
 /* Tests */
 
-/* Stack for child thread (8KB, 16-byte aligned) */
+/* Stack for child thread (8KiB, 16-byte aligned) */
 static char child_stack_buf[8192] __attribute__((aligned(16)));
 
 /* Test 1: clone(CLONE_THREAD) creates a new thread that runs concurrently */