diff --git a/src/hpc/distance.rs b/src/hpc/distance.rs
index 7ef3c6d7..79f4229e 100644
--- a/src/hpc/distance.rs
+++ b/src/hpc/distance.rs
@@ -3,6 +3,23 @@
 //! SIMD-accelerated squared-distance, radius filtering, and K-nearest-neighbor
 //! searches over contiguous point slices. All operations work on borrowed slices
 //! with no internal copies. Scalar fallback is provided for non-x86 targets.
+//!
+//! # Slice-shape geometric distance (PR-X10 A6)
+//!
+//! For arbitrary-length f64 slices (non-3D-point shape), use:
+//!
+//! - [`l1_f64_simd`]  — Manhattan: `Σ |a_i − b_i|`
+//! - [`l2_f64_simd`]  — Euclidean: `√Σ (a_i − b_i)²`
+//! - [`linf_f64_simd`] — Chebyshev: `max |a_i − b_i|`
+//!
+//! These use the `F64x8` polyfill (no `target_feature`, no `unsafe`),
+//! matching the [`crate::hpc::heel_f64x8::cosine_f64_simd`] idiom: F64x8
+//! chunks with FMA / SIMD-max accumulator + scalar remainder. They are
+//! the salvaged kernels from the rolled-back PR #160 cross-repo arc
+//! (lance-graph `heel_f64x8::{l1, l2, linf}_f64_simd`), re-landed here
+//! per the linalg-core design's A6 worker scope and the
+//! `crate::hpc::linalg/mod.rs` hard boundary ("No distance metrics —
+//! those live in `crate::hpc::distance`").
 
 // ---------------------------------------------------------------------------
 // Scalar helpers
@@ -165,6 +182,108 @@ pub fn knn_f64(query: [f64; 3], points: &[[f64; 3]], k: usize) -> (Vec<usize>, V
     (indices, sq_dists)
 }
 
+// ---------------------------------------------------------------------------
+// Slice-shape geometric distance — PR-X10 A6
+// ---------------------------------------------------------------------------
+//
+// Polyfilled F64x8 chunked path with scalar remainder; no `target_feature`,
+// no `unsafe` — the polyfill in `crate::simd::F64x8` owns runtime feature
+// dispatch (AVX-512 native zmm / AVX2 2×ymm / scalar [f64; 8]).
+//
+// All three kernels read `min(a.len(), b.len())` elements. Empty inputs
+// return 0.0.
+
+use crate::simd::F64x8;
+
+/// L1 (Manhattan) distance between two f64 slices: `Σ |a_i − b_i|`.
+///
+/// EXACT precision class — the per-lane `(a - b).abs()` introduces no
+/// rounding beyond the standard subtract, and the reduce-sum order is
+/// lane-tree within each F64x8 chunk + sequential across chunks (matches
+/// the [`crate::hpc::heel_f64x8::cosine_f64_simd`] order so callers can
+/// reason about determinism the same way).
+///
+/// Reads `min(a.len(), b.len())` elements. Returns 0.0 for empty inputs.
+pub fn l1_f64_simd(a: &[f64], b: &[f64]) -> f64 {
+    let n = a.len().min(b.len());
+    let chunks = n / 8;
+    let mut acc = F64x8::splat(0.0);
+    for i in 0..chunks {
+        let va = F64x8::from_slice(&a[i * 8..]);
+        let vb = F64x8::from_slice(&b[i * 8..]);
+        acc = acc + (va - vb).abs();
+    }
+    let mut sum = acc.reduce_sum();
+    let offset = chunks * 8;
+    for i in 0..(n - offset) {
+        sum += (a[offset + i] - b[offset + i]).abs();
+    }
+    sum
+}
+
+/// L2 (Euclidean) distance between two f64 slices: `√Σ (a_i − b_i)²`.
+///
+/// VERIFY precision class — the final `sqrt` is one ULP; the sum is
+/// lane-tree within each F64x8 + sequential across chunks (same order
+/// pattern as L1). Determinism across runs holds for fixed slice
+/// length and fixed chunking. For full order-independence use a
+/// pairwise-reduce variant (see `blas_level1::nrm2`).
+///
+/// Reads `min(a.len(), b.len())` elements. Returns 0.0 for empty inputs.
+pub fn l2_f64_simd(a: &[f64], b: &[f64]) -> f64 {
+    let n = a.len().min(b.len());
+    let chunks = n / 8;
+    let mut acc = F64x8::splat(0.0);
+    for i in 0..chunks {
+        let va = F64x8::from_slice(&a[i * 8..]);
+        let vb = F64x8::from_slice(&b[i * 8..]);
+        let d = va - vb;
+        acc = d.mul_add(d, acc); // acc += d*d (single FMA per chunk)
+    }
+    let mut sum_sq = acc.reduce_sum();
+    let offset = chunks * 8;
+    for i in 0..(n - offset) {
+        let d = a[offset + i] - b[offset + i];
+        sum_sq += d * d;
+    }
+    sum_sq.sqrt()
+}
+
+/// L∞ (Chebyshev) distance between two f64 slices: `max |a_i − b_i|`.
+///
+/// EXACT precision class — `(a - b).abs()` and `max` introduce no
+/// rounding; the result is determined by the inputs alone (order-
+/// independent across chunks since `max` is associative and commutative
+/// under IEEE-754 for non-NaN inputs).
+///
+/// Reads `min(a.len(), b.len())` elements. Returns 0.0 for empty inputs.
+///
+/// # NaN handling
+///
+/// IEEE-754 `_mm512_max_pd` returns the second operand when either input
+/// is NaN; callers passing NaN-tainted slices may observe non-deterministic
+/// max across runs (an upstream constraint, not a kernel bug). Audit
+/// upstream for NaN before relying on this kernel on production data.
+pub fn linf_f64_simd(a: &[f64], b: &[f64]) -> f64 {
+    let n = a.len().min(b.len());
+    let chunks = n / 8;
+    let mut max_v = F64x8::splat(0.0);
+    for i in 0..chunks {
+        let va = F64x8::from_slice(&a[i * 8..]);
+        let vb = F64x8::from_slice(&b[i * 8..]);
+        max_v = max_v.simd_max((va - vb).abs());
+    }
+    let mut max_d = max_v.reduce_max();
+    let offset = chunks * 8;
+    for i in 0..(n - offset) {
+        let d = (a[offset + i] - b[offset + i]).abs();
+        if d > max_d {
+            max_d = d;
+        }
+    }
+    max_d
+}
+
 // ---------------------------------------------------------------------------
 // Tests
 // ---------------------------------------------------------------------------
@@ -315,4 +434,159 @@ mod tests {
         let result = squared_distances_f32(query, &points);
         assert!(approx_eq_f32(result[0], 0.0));
     }
+
+    // -- PR-X10 A6 slice-shape L1 / L2 / L∞ --
+
+    fn approx_eq_f64_tol(a: f64, b: f64, tol: f64) -> bool {
+        (a - b).abs() < tol
+    }
+
+    /// Deterministic SplitMix64 — matches the pillar harness so the
+    /// corpus is reproducible across runs and across machines.
+    fn splitmix(state: &mut u64) -> u64 {
+        *state = state.wrapping_add(0x9E37_79B9_7F4A_7C15);
+        let mut z = *state;
+        z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9);
+        z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB);
+        z ^ (z >> 31)
+    }
+
+    fn random_vec_f64(seed: u64, n: usize) -> Vec<f64> {
+        let mut s = seed;
+        (0..n)
+            .map(|_| {
+                let bits = splitmix(&mut s) >> 11;
+                (bits as f64) / (1u64 << 53) as f64 * 2.0 - 1.0 // uniform in [-1, 1)
+            })
+            .collect()
+    }
+
+    // -- L1 boundary + parity --
+
+    #[test]
+    fn l1_f64_simd_self_zero() {
+        let a = random_vec_f64(0xC1A0, 200);
+        assert_eq!(l1_f64_simd(&a, &a), 0.0);
+    }
+
+    #[test]
+    fn l1_f64_simd_empty_is_zero() {
+        let a: Vec<f64> = vec![];
+        let b: Vec<f64> = vec![];
+        assert_eq!(l1_f64_simd(&a, &b), 0.0);
+    }
+
+    #[test]
+    fn l1_f64_simd_uniform_diff() {
+        let a = vec![3.0f64; 17];
+        let b = vec![1.0f64; 17];
+        // 17 * |3 - 1| = 34
+        assert!(approx_eq_f64_tol(l1_f64_simd(&a, &b), 34.0, 1e-12));
+    }
+
+    #[test]
+    fn l1_f64_simd_matches_scalar() {
+        // 200 elements covers chunked path (25 chunks of 8) + remainder of 0;
+        // 199 covers chunked + remainder of 7.
+        for &n in &[1usize, 7, 8, 15, 16, 17, 64, 199, 200, 1024] {
+            let a = random_vec_f64(0xA110_C1A0, n);
+            let b = random_vec_f64(0xB220_C1A0, n);
+            let simd = l1_f64_simd(&a, &b);
+            let scalar: f64 = a.iter().zip(&b).map(|(x, y)| (x - y).abs()).sum();
+            assert!(approx_eq_f64_tol(simd, scalar, 1e-11), "n={} simd={:.15} scalar={:.15}", n, simd, scalar);
+        }
+    }
+
+    // -- L2 boundary + parity --
+
+    #[test]
+    fn l2_f64_simd_self_zero() {
+        let a = random_vec_f64(0xC2A0, 200);
+        assert_eq!(l2_f64_simd(&a, &a), 0.0);
+    }
+
+    #[test]
+    fn l2_f64_simd_empty_is_zero() {
+        let a: Vec<f64> = vec![];
+        let b: Vec<f64> = vec![];
+        assert_eq!(l2_f64_simd(&a, &b), 0.0);
+    }
+
+    #[test]
+    fn l2_f64_simd_pythagoras() {
+        // (3, 0, …) vs (0, 4, …): √(9 + 16) = 5
+        let a = vec![3.0f64, 0.0];
+        let b = vec![0.0f64, 4.0];
+        assert!(approx_eq_f64_tol(l2_f64_simd(&a, &b), 5.0, 1e-12));
+    }
+
+    #[test]
+    fn l2_f64_simd_matches_scalar() {
+        for &n in &[1usize, 7, 8, 15, 16, 17, 64, 199, 200, 1024] {
+            let a = random_vec_f64(0xA110_C2A0, n);
+            let b = random_vec_f64(0xB220_C2A0, n);
+            let simd = l2_f64_simd(&a, &b);
+            let sum_sq: f64 = a.iter().zip(&b).map(|(x, y)| (x - y).powi(2)).sum();
+            let scalar = sum_sq.sqrt();
+            // Sqrt is 1 ULP; cross-chunk summation order differs by chunks
+            // of 8 vs sequential — allow generous relative tolerance.
+            let rel = (simd - scalar).abs() / scalar.max(1e-12);
+            assert!(rel < 1e-10, "n={} simd={:.15} scalar={:.15} rel={:.2e}", n, simd, scalar, rel);
+        }
+    }
+
+    // -- L∞ boundary + parity --
+
+    #[test]
+    fn linf_f64_simd_self_zero() {
+        let a = random_vec_f64(0xC1FF, 200);
+        assert_eq!(linf_f64_simd(&a, &a), 0.0);
+    }
+
+    #[test]
+    fn linf_f64_simd_empty_is_zero() {
+        let a: Vec<f64> = vec![];
+        let b: Vec<f64> = vec![];
+        assert_eq!(linf_f64_simd(&a, &b), 0.0);
+    }
+
+    #[test]
+    fn linf_f64_simd_picks_max_in_chunk() {
+        // Max difference must land inside a chunked path (index 5 < 8) and
+        // also outside (index 13 > 8) to exercise both halves.
+        let mut a = vec![0.0f64; 16];
+        let mut b = vec![0.0f64; 16];
+        a[5] = 0.5;
+        a[13] = -0.7; // |Δ| = 0.7 — should win
+        b[2] = 0.1;
+        assert!(approx_eq_f64_tol(linf_f64_simd(&a, &b), 0.7, 1e-12));
+    }
+
+    #[test]
+    fn linf_f64_simd_matches_scalar() {
+        for &n in &[1usize, 7, 8, 15, 16, 17, 64, 199, 200, 1024] {
+            let a = random_vec_f64(0xA110_C1FF, n);
+            let b = random_vec_f64(0xB220_C1FF, n);
+            let simd = linf_f64_simd(&a, &b);
+            let scalar: f64 = a
+                .iter()
+                .zip(&b)
+                .map(|(x, y)| (x - y).abs())
+                .fold(0.0_f64, f64::max);
+            assert!(approx_eq_f64_tol(simd, scalar, 1e-15), "n={} simd={:.15} scalar={:.15}", n, simd, scalar);
+        }
+    }
+
+    /// Mismatched-length slices: must use the shorter length, no panic.
+    #[test]
+    fn slice_distances_mismatched_length_uses_min() {
+        let a = vec![1.0f64; 17];
+        let b = vec![2.0f64; 10];
+        // L1 over min=10: 10 * |1 - 2| = 10
+        assert!(approx_eq_f64_tol(l1_f64_simd(&a, &b), 10.0, 1e-12));
+        // L2 over min=10: √(10 * 1) = √10
+        assert!(approx_eq_f64_tol(l2_f64_simd(&a, &b), 10f64.sqrt(), 1e-12));
+        // L∞ = 1
+        assert!(approx_eq_f64_tol(linf_f64_simd(&a, &b), 1.0, 1e-12));
+    }
 }
diff --git a/src/hpc/dn_tree.rs b/src/hpc/dn_tree.rs
index 573153e9..4aea1b60 100644
--- a/src/hpc/dn_tree.rs
+++ b/src/hpc/dn_tree.rs
@@ -132,8 +132,15 @@ pub(crate) fn bundle_into(current: &GraphHV, hv: &GraphHV, lr: f64, boost: f64,
 /// Create a u64 bitmask where each bit is independently 1 with probability ~`p`.
 ///
 /// Uses cascaded AND of random words to achieve the target probability:
-/// - p >= 0.5 → OR of inverse masks
-/// - p < 0.5 → AND cascade
+/// - p > 0.5  → invert the (1-p) mask
+/// - p <= 0.5 → AND cascade
+///
+/// At exactly `p = 0.5` the AND-cascade branch executes a single
+/// `rng.next_u64()` (n = ceil(-log2(0.5)) = 1) — each bit is then
+/// IID Bernoulli(0.5). Note the **strict** comparison here: an earlier
+/// version used `p >= 0.5`, which recursed with `1.0 - 0.5 = 0.5`
+/// infinitely. The Pillar-13 drift-check (`hpc::pillar::hhtl_contraction`)
+/// already uses the strict comparison and is the canonical reference.
 fn make_probability_mask(p: f64, rng: &mut SplitMix64) -> u64 {
     if p >= 1.0 {
         return u64::MAX;
@@ -142,13 +149,14 @@ fn make_probability_mask(p: f64, rng: &mut SplitMix64) -> u64 {
         return 0;
     }
 
-    if p >= 0.5 {
-        // p >= 0.5: use OR approach — each AND of randoms gives ~0.25, NOT gives ~0.75, etc.
-        // Simpler: just AND enough randoms to get (1-p) kill rate, then NOT.
+    if p > 0.5 {
+        // p > 0.5: invert the (1-p) mask. Strict > 0.5 so p == 0.5
+        // falls through to the AND-cascade and produces a single
+        // Bernoulli(0.5) word in one rng draw.
         return !make_probability_mask(1.0 - p, rng);
     }
 
-    // p < 0.5: AND cascade. Each AND halves the probability.
+    // p <= 0.5: AND cascade. Each AND halves the probability.
     // We need n ANDs where 0.5^n ≈ p, so n = -log2(p).
     let n = (-p.log2()).ceil() as u32;
     let mut mask = rng.next_u64();
@@ -543,6 +551,38 @@ mod tests {
         SplitMix64::new(42)
     }
 
+    /// Regression: at p = 0.5 exactly, the previous `p >= 0.5` branch
+    /// recursed with `1.0 - 0.5 = 0.5` infinitely. The strict `p > 0.5`
+    /// fix routes p=0.5 to the AND-cascade (n=1, one rng draw) which
+    /// produces a Bernoulli(0.5) mask in O(1) time.
+    #[test]
+    fn make_probability_mask_at_half_terminates() {
+        let mut rng = make_rng();
+        // If this stack-overflows, the recursion fix has regressed.
+        let mask = make_probability_mask(0.5, &mut rng);
+        // Bernoulli(0.5) over 64 bits — popcount should be near 32, but
+        // any value 0..=64 is valid for a single draw. The test's
+        // load-bearing assertion is that the call returns.
+        assert!(mask <= u64::MAX);
+    }
+
+    /// Empirical Bernoulli(0.5) check: average popcount over N=1024
+    /// independent masks must land near 32 (the true mean) within a
+    /// generous tolerance.
+    #[test]
+    fn make_probability_mask_at_half_is_bernoulli_half() {
+        let mut rng = make_rng();
+        const N: u32 = 1024;
+        let mut total: u64 = 0;
+        for _ in 0..N {
+            total += make_probability_mask(0.5, &mut rng).count_ones() as u64;
+        }
+        let mean = total as f64 / N as f64;
+        // σ per word = sqrt(64 * 0.5 * 0.5) = 4; mean's SE = 4 / √N = 0.125.
+        // Tolerance 2.0 ≈ 16 SEs — comfortable margin against flakes.
+        assert!((mean - 32.0).abs() < 2.0, "make_probability_mask(0.5) mean popcount {mean:.4} not near 32");
+    }
+
     #[test]
     fn test_new_tree_structure() {
         let tree = DNTree::with_capacity(4096);
diff --git a/src/hpc/mod.rs b/src/hpc/mod.rs
index 11081ad6..ff7981fc 100644
--- a/src/hpc/mod.rs
+++ b/src/hpc/mod.rs
@@ -13,8 +13,10 @@
 //! - FFT (forward, inverse, real-to-complex)
 //! - VML (vectorized math library)
 
-// SIMD capability singleton — detect once, all modules share
-pub mod simd_caps;
+// SIMD capability singleton — graduated to crate root (it never depended
+// on anything else in `hpc/`); re-exported here for back-compat with
+// existing `crate::hpc::simd_caps::*` imports across the workspace.
+pub use crate::simd_caps;
 // LazyLock frozen SIMD dispatch — function pointers selected once at startup
 pub mod simd_dispatch;
 
diff --git a/src/hpc/ogit_bridge/schema.rs b/src/hpc/ogit_bridge/schema.rs
index 945b19b4..58671bd6 100644
--- a/src/hpc/ogit_bridge/schema.rs
+++ b/src/hpc/ogit_bridge/schema.rs
@@ -84,8 +84,19 @@ pub struct EntityClass {
     pub iri: Box<str>,
     /// Human-readable label (`rdfs:label`); empty string when absent.
     pub label: Box<str>,
-    /// Parent class IRI (`rdfs:subClassOf`); `None` for root classes.
+    /// First-observed parent class IRI (`rdfs:subClassOf`); `None` for
+    /// root classes. OWL allows a class to declare multiple
+    /// `rdfs:subClassOf` targets (multi-inheritance); the second and
+    /// later parents land in [`Self::extra_parents`]. Consumers wanting
+    /// the full parent set should iterate via [`Self::parents`].
     pub parent: Option<Box<str>>,
+    /// Additional parent IRIs beyond the first. Empty for single-parent
+    /// classes (the common case in RDFS-style ontologies); non-empty when
+    /// the source declares multi-inheritance (common in OWL biomedical
+    /// ontologies — FMA, ChEBI, etc.). Order is source order of the
+    /// surplus `rdfs:subClassOf` triples; the first parent stays in
+    /// [`Self::parent`].
+    pub extra_parents: Vec<Box<str>>,
     /// Properties declared with `ogit:mandatory`.
     pub mandatory: Vec<Property>,
     /// Properties declared with `ogit:optional`.
@@ -99,11 +110,27 @@ pub struct EntityClass {
 }
 
 impl EntityClass {
+    /// Iterator over every parent class IRI declared on this entity —
+    /// the first-observed [`Self::parent`] (if present) followed by
+    /// every IRI in [`Self::extra_parents`]. Empty when the class is
+    /// a root.
+    ///
+    /// Use this in preference to reading `.parent` directly when the
+    /// caller's logic should cover multi-inheritance — e.g. transitive
+    /// closure walks like [`OntologySchema::is_ancestor`].
+    pub fn parents(&self) -> impl Iterator<Item = &str> {
+        self.parent
+            .as_deref()
+            .into_iter()
+            .chain(self.extra_parents.iter().map(|s| s.as_ref()))
+    }
+
     fn new(iri: Box<str>) -> Self {
         EntityClass {
             iri,
             label: "".into(),
             parent: None,
+            extra_parents: Vec::new(),
             mandatory: Vec::new(),
             optional: Vec::new(),
             indexed: Vec::new(),
@@ -365,7 +392,19 @@ impl OntologySchema {
                 RDFS_SUB_CLASS_OF => {
                     if let Some(parent_iri) = node_iri(&triple.object) {
                         if let Some(cls) = entities.get_mut(subject_iri) {
-                            cls.parent = Some(parent_iri.into());
+                            // First parent → `parent`; subsequent
+                            // parents → `extra_parents` (multi-inheritance
+                            // as permitted by OWL; common in biomedical
+                            // ontologies like FMA / ChEBI). The previous
+                            // behaviour silently overwrote — the second
+                            // declared parent won, the first was discarded.
+                            if cls.parent.is_none() {
+                                cls.parent = Some(parent_iri.into());
+                            } else if cls.parent.as_deref() != Some(parent_iri)
+                                && !cls.extra_parents.iter().any(|p| p.as_ref() == parent_iri)
+                            {
+                                cls.extra_parents.push(parent_iri.into());
+                            }
                         }
                     }
                 }
@@ -641,26 +680,44 @@ impl OntologySchema {
             return false;
         }
 
-        // Walk the parent chain from descendant upward, looking for ancestor.
-        // Defensive depth cap — see method docstring.
-        const MAX_DEPTH: usize = 64;
-        let mut current: &str = descendant;
-        for _ in 0..MAX_DEPTH {
+        // BFS over the multi-parent DAG. The previous version walked a
+        // linear chain via `EntityClass.parent` alone — correct for
+        // single-inheritance schemas but missed ancestors reachable
+        // only through `EntityClass.extra_parents` (OWL multi-inheritance,
+        // common in FMA / ChEBI).
+        //
+        // # Termination
+        //
+        // `visited` is a monotonically-growing `HashSet<&str>` keyed by
+        // IRI; each parent IRI enters the set at most once. Frontier
+        // pushes are gated on `visited.insert(...)`, so every IRI is
+        // pushed at most once across the entire walk. Total work is
+        // therefore O(unique IRIs reachable from descendant) — finite
+        // by the schema's finiteness, regardless of branching factor
+        // or depth. No explicit visit cap is needed; previous codex P2
+        // pointed out that a hard cap would produce false-negatives on
+        // large biomedical ontologies (FMA: 75k classes; ChEBI: 200k+).
+        let mut frontier: Vec<&str> = vec![descendant];
+        let mut visited: std::collections::HashSet<&str> = std::collections::HashSet::new();
+        visited.insert(descendant);
+        while let Some(current) = frontier.pop() {
             let entity = match self.entities.get(current) {
                 Some(e) => e,
-                None => return false, // descendant unknown — no chain to walk
-            };
-            let parent = match entity.parent.as_deref() {
-                Some(p) => p,
-                None => return false, // reached root without finding ancestor
+                // Walk hit an unknown IRI mid-chain — that subtree of
+                // the closure terminates here. Continue exploring
+                // siblings rather than aborting, since other parents
+                // may yet reach `ancestor`.
+                None => continue,
             };
-            if parent == ancestor {
-                return true;
+            for parent in entity.parents() {
+                if parent == ancestor {
+                    return true;
+                }
+                if visited.insert(parent) {
+                    frontier.push(parent);
+                }
             }
-            current = parent;
         }
-        // Exceeded depth cap — treat as not-an-ancestor (defensive; this
-        // path should be unreachable on a well-formed schema).
         false
     }
 }
@@ -960,4 +1017,84 @@ mod tests {
         assert!(!schema.is_ancestor("ogit:Heel", "ogit:OtherHip"));
         assert!(!schema.is_ancestor("ogit:OtherHeel", "ogit:Hip"));
     }
+
+    // -----------------------------------------------------------------------
+    // Multi-inheritance — OWL biomedical-ontology shape (FMA, ChEBI, etc.)
+    // -----------------------------------------------------------------------
+
+    /// A class declaring two `rdfs:subClassOf` triples must reach both
+    /// ancestors through `is_ancestor`. The previous single-parent
+    /// implementation silently picked one and discarded the other.
+    #[test]
+    fn is_ancestor_multi_parent_direct() {
+        // Hand mimics an OWL fragment: ogit:Hybrid is both a kind of
+        // ogit:Animal AND a kind of ogit:Mineral.
+        let src = "\
+            @prefix ogit: <http://www.purl.org/ogit/> .\n\
+            @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n\
+            ogit:Animal a rdfs:Class .\n\
+            ogit:Mineral a rdfs:Class .\n\
+            ogit:Hybrid a rdfs:Class ; rdfs:subClassOf ogit:Animal ; rdfs:subClassOf ogit:Mineral .";
+        let triples = TurtleParser::parse(src).unwrap();
+        let schema = OntologySchema::from_triples(&triples).unwrap();
+        // Both parents must be reachable from the hybrid.
+        assert!(schema.is_ancestor("ogit:Animal", "ogit:Hybrid"));
+        assert!(schema.is_ancestor("ogit:Mineral", "ogit:Hybrid"));
+        // Reverse direction still false (antisymmetry).
+        assert!(!schema.is_ancestor("ogit:Hybrid", "ogit:Animal"));
+        assert!(!schema.is_ancestor("ogit:Hybrid", "ogit:Mineral"));
+    }
+
+    /// Multi-parent transitivity: an ancestor reachable only through
+    /// the SECOND parent of a multi-inheritance class must still be
+    /// found. This is the case the previous linear-walk implementation
+    /// silently missed.
+    #[test]
+    fn is_ancestor_multi_parent_transitive_through_second_parent() {
+        // Two disjoint chains converge at ogit:Hybrid:
+        //   ogit:Root1 ← ogit:Mid1 ← ogit:Hybrid (via "first" parent)
+        //   ogit:Root2 ← ogit:Mid2 ← ogit:Hybrid (via "second" parent)
+        let src = "\
+            @prefix ogit: <http://www.purl.org/ogit/> .\n\
+            @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n\
+            ogit:Root1 a rdfs:Class .\n\
+            ogit:Mid1  a rdfs:Class ; rdfs:subClassOf ogit:Root1 .\n\
+            ogit:Root2 a rdfs:Class .\n\
+            ogit:Mid2  a rdfs:Class ; rdfs:subClassOf ogit:Root2 .\n\
+            ogit:Hybrid a rdfs:Class ; rdfs:subClassOf ogit:Mid1 ; rdfs:subClassOf ogit:Mid2 .";
+        let triples = TurtleParser::parse(src).unwrap();
+        let schema = OntologySchema::from_triples(&triples).unwrap();
+        // Reachable through first parent chain.
+        assert!(schema.is_ancestor("ogit:Root1", "ogit:Hybrid"));
+        assert!(schema.is_ancestor("ogit:Mid1", "ogit:Hybrid"));
+        // Reachable through second parent chain — the case the
+        // previous implementation missed.
+        assert!(schema.is_ancestor("ogit:Root2", "ogit:Hybrid"));
+        assert!(schema.is_ancestor("ogit:Mid2", "ogit:Hybrid"));
+    }
+
+    /// The `parents()` iterator must surface both `parent` and every
+    /// `extra_parents` IRI in source order.
+    #[test]
+    fn entity_class_parents_iterator_yields_all() {
+        let src = "\
+            @prefix ogit: <http://www.purl.org/ogit/> .\n\
+            @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n\
+            ogit:A a rdfs:Class .\n\
+            ogit:B a rdfs:Class .\n\
+            ogit:C a rdfs:Class .\n\
+            ogit:X a rdfs:Class ; rdfs:subClassOf ogit:A ; rdfs:subClassOf ogit:B ; rdfs:subClassOf ogit:C .";
+        let triples = TurtleParser::parse(src).unwrap();
+        let schema = OntologySchema::from_triples(&triples).unwrap();
+        let x = schema.entities.get("ogit:X").expect("ogit:X declared");
+        let parents: Vec<&str> = x.parents().collect();
+        assert_eq!(parents.len(), 3, "expected 3 parents, got {parents:?}");
+        // First parent populates `parent`; the rest go to extra_parents.
+        // Source-order is preserved within extra_parents but the "first"
+        // parent depends on triple processing order, so just check set.
+        let parent_set: std::collections::HashSet<&str> = parents.iter().copied().collect();
+        assert!(parent_set.contains("ogit:A"));
+        assert!(parent_set.contains("ogit:B"));
+        assert!(parent_set.contains("ogit:C"));
+    }
 }
diff --git a/src/hpc/pillar/hhtl_contraction.rs b/src/hpc/pillar/hhtl_contraction.rs
index 5f946dfe..ee10e57c 100644
--- a/src/hpc/pillar/hhtl_contraction.rs
+++ b/src/hpc/pillar/hhtl_contraction.rs
@@ -486,7 +486,12 @@ mod tests {
         use crate::hpc::dn_tree::{bundle_into, SplitMix64 as DnSplitMix64};
 
         const N_TRIALS: u32 = 16;
-        const TEST_LR: f64 = 0.25;
+        // Was 0.25 to avoid the latent p=0.5 infinite-recursion bug in
+        // production's make_probability_mask; that bug is fixed in the
+        // same commit/PR that updates this constant. lr=0.5 now matches
+        // Pillar 13's canonical mid-range learning rate and exercises
+        // the previously-broken branch.
+        const TEST_LR: f64 = 0.5;
 
         // Both SplitMix64 implementations use identical algorithm (same
         // multiplier constants 0x9E3779B97F4A7C15, 0xBF58476D1CE4E5B9,
diff --git a/src/lib.rs b/src/lib.rs
index 60edbcac..5b5851fd 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -262,6 +262,16 @@ pub mod simd_nightly;
 #[cfg(target_arch = "x86_64")]
 pub mod simd_amx;
 
+/// SIMD capability detection (CPUID on x86_64, runtime feature detection
+/// on aarch64). One `LazyLock<SimdCaps>` detected at first access; every
+/// substrate dispatch site is one pointer deref. Graduated from
+/// `crate::hpc::simd_caps::*` in this same migration; the old path stays
+/// available as a `pub use` re-export inside `crate::hpc::*` for
+/// back-compat. Uses `std::sync::LazyLock`, hence the `std` gate (a
+/// `core::sync::LazyLock` follow-up could lift it).
+#[cfg(feature = "std")]
+pub mod simd_caps;
+
 #[cfg(feature = "std")]
 #[allow(clippy::all, missing_docs, dead_code, unused_variables, unused_imports)]
 pub mod simd_neon;
diff --git a/src/hpc/simd_caps.rs b/src/simd_caps.rs
similarity index 100%
rename from src/hpc/simd_caps.rs
rename to src/simd_caps.rs