diff --git a/src/hpc/distance.rs b/src/hpc/distance.rs index 7ef3c6d7..79f4229e 100644 --- a/src/hpc/distance.rs +++ b/src/hpc/distance.rs @@ -3,6 +3,23 @@ //! SIMD-accelerated squared-distance, radius filtering, and K-nearest-neighbor //! searches over contiguous point slices. All operations work on borrowed slices //! with no internal copies. Scalar fallback is provided for non-x86 targets. +//! +//! # Slice-shape geometric distance (PR-X10 A6) +//! +//! For arbitrary-length f64 slices (non-3D-point shape), use: +//! +//! - [`l1_f64_simd`] — Manhattan: `Σ |a_i − b_i|` +//! - [`l2_f64_simd`] — Euclidean: `√Σ (a_i − b_i)²` +//! - [`linf_f64_simd`] — Chebyshev: `max |a_i − b_i|` +//! +//! These use the `F64x8` polyfill (no `target_feature`, no `unsafe`), +//! matching the [`crate::hpc::heel_f64x8::cosine_f64_simd`] idiom: F64x8 +//! chunks with FMA / SIMD-max accumulator + scalar remainder. They are +//! the salvaged kernels from the rolled-back PR #160 cross-repo arc +//! (lance-graph `heel_f64x8::{l1, l2, linf}_f64_simd`), re-landed here +//! per the linalg-core design's A6 worker scope and the +//! `crate::hpc::linalg/mod.rs` hard boundary ("No distance metrics — +//! those live in `crate::hpc::distance`"). // --------------------------------------------------------------------------- // Scalar helpers @@ -165,6 +182,108 @@ pub fn knn_f64(query: [f64; 3], points: &[[f64; 3]], k: usize) -> (Vec, V (indices, sq_dists) } +// --------------------------------------------------------------------------- +// Slice-shape geometric distance — PR-X10 A6 +// --------------------------------------------------------------------------- +// +// Polyfilled F64x8 chunked path with scalar remainder; no `target_feature`, +// no `unsafe` — the polyfill in `crate::simd::F64x8` owns runtime feature +// dispatch (AVX-512 native zmm / AVX2 2×ymm / scalar [f64; 8]). +// +// All three kernels read `min(a.len(), b.len())` elements. Empty inputs +// return 0.0. + +use crate::simd::F64x8; + +/// L1 (Manhattan) distance between two f64 slices: `Σ |a_i − b_i|`. +/// +/// EXACT precision class — the per-lane `(a - b).abs()` introduces no +/// rounding beyond the standard subtract, and the reduce-sum order is +/// lane-tree within each F64x8 chunk + sequential across chunks (matches +/// the [`crate::hpc::heel_f64x8::cosine_f64_simd`] order so callers can +/// reason about determinism the same way). +/// +/// Reads `min(a.len(), b.len())` elements. Returns 0.0 for empty inputs. +pub fn l1_f64_simd(a: &[f64], b: &[f64]) -> f64 { + let n = a.len().min(b.len()); + let chunks = n / 8; + let mut acc = F64x8::splat(0.0); + for i in 0..chunks { + let va = F64x8::from_slice(&a[i * 8..]); + let vb = F64x8::from_slice(&b[i * 8..]); + acc = acc + (va - vb).abs(); + } + let mut sum = acc.reduce_sum(); + let offset = chunks * 8; + for i in 0..(n - offset) { + sum += (a[offset + i] - b[offset + i]).abs(); + } + sum +} + +/// L2 (Euclidean) distance between two f64 slices: `√Σ (a_i − b_i)²`. +/// +/// VERIFY precision class — the final `sqrt` is one ULP; the sum is +/// lane-tree within each F64x8 + sequential across chunks (same order +/// pattern as L1). Determinism across runs holds for fixed slice +/// length and fixed chunking. For full order-independence use a +/// pairwise-reduce variant (see `blas_level1::nrm2`). +/// +/// Reads `min(a.len(), b.len())` elements. Returns 0.0 for empty inputs. +pub fn l2_f64_simd(a: &[f64], b: &[f64]) -> f64 { + let n = a.len().min(b.len()); + let chunks = n / 8; + let mut acc = F64x8::splat(0.0); + for i in 0..chunks { + let va = F64x8::from_slice(&a[i * 8..]); + let vb = F64x8::from_slice(&b[i * 8..]); + let d = va - vb; + acc = d.mul_add(d, acc); // acc += d*d (single FMA per chunk) + } + let mut sum_sq = acc.reduce_sum(); + let offset = chunks * 8; + for i in 0..(n - offset) { + let d = a[offset + i] - b[offset + i]; + sum_sq += d * d; + } + sum_sq.sqrt() +} + +/// L∞ (Chebyshev) distance between two f64 slices: `max |a_i − b_i|`. +/// +/// EXACT precision class — `(a - b).abs()` and `max` introduce no +/// rounding; the result is determined by the inputs alone (order- +/// independent across chunks since `max` is associative and commutative +/// under IEEE-754 for non-NaN inputs). +/// +/// Reads `min(a.len(), b.len())` elements. Returns 0.0 for empty inputs. +/// +/// # NaN handling +/// +/// IEEE-754 `_mm512_max_pd` returns the second operand when either input +/// is NaN; callers passing NaN-tainted slices may observe non-deterministic +/// max across runs (an upstream constraint, not a kernel bug). Audit +/// upstream for NaN before relying on this kernel on production data. +pub fn linf_f64_simd(a: &[f64], b: &[f64]) -> f64 { + let n = a.len().min(b.len()); + let chunks = n / 8; + let mut max_v = F64x8::splat(0.0); + for i in 0..chunks { + let va = F64x8::from_slice(&a[i * 8..]); + let vb = F64x8::from_slice(&b[i * 8..]); + max_v = max_v.simd_max((va - vb).abs()); + } + let mut max_d = max_v.reduce_max(); + let offset = chunks * 8; + for i in 0..(n - offset) { + let d = (a[offset + i] - b[offset + i]).abs(); + if d > max_d { + max_d = d; + } + } + max_d +} + // --------------------------------------------------------------------------- // Tests // --------------------------------------------------------------------------- @@ -315,4 +434,159 @@ mod tests { let result = squared_distances_f32(query, &points); assert!(approx_eq_f32(result[0], 0.0)); } + + // -- PR-X10 A6 slice-shape L1 / L2 / L∞ -- + + fn approx_eq_f64_tol(a: f64, b: f64, tol: f64) -> bool { + (a - b).abs() < tol + } + + /// Deterministic SplitMix64 — matches the pillar harness so the + /// corpus is reproducible across runs and across machines. + fn splitmix(state: &mut u64) -> u64 { + *state = state.wrapping_add(0x9E37_79B9_7F4A_7C15); + let mut z = *state; + z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9); + z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB); + z ^ (z >> 31) + } + + fn random_vec_f64(seed: u64, n: usize) -> Vec { + let mut s = seed; + (0..n) + .map(|_| { + let bits = splitmix(&mut s) >> 11; + (bits as f64) / (1u64 << 53) as f64 * 2.0 - 1.0 // uniform in [-1, 1) + }) + .collect() + } + + // -- L1 boundary + parity -- + + #[test] + fn l1_f64_simd_self_zero() { + let a = random_vec_f64(0xC1A0, 200); + assert_eq!(l1_f64_simd(&a, &a), 0.0); + } + + #[test] + fn l1_f64_simd_empty_is_zero() { + let a: Vec = vec![]; + let b: Vec = vec![]; + assert_eq!(l1_f64_simd(&a, &b), 0.0); + } + + #[test] + fn l1_f64_simd_uniform_diff() { + let a = vec![3.0f64; 17]; + let b = vec![1.0f64; 17]; + // 17 * |3 - 1| = 34 + assert!(approx_eq_f64_tol(l1_f64_simd(&a, &b), 34.0, 1e-12)); + } + + #[test] + fn l1_f64_simd_matches_scalar() { + // 200 elements covers chunked path (25 chunks of 8) + remainder of 0; + // 199 covers chunked + remainder of 7. + for &n in &[1usize, 7, 8, 15, 16, 17, 64, 199, 200, 1024] { + let a = random_vec_f64(0xA110_C1A0, n); + let b = random_vec_f64(0xB220_C1A0, n); + let simd = l1_f64_simd(&a, &b); + let scalar: f64 = a.iter().zip(&b).map(|(x, y)| (x - y).abs()).sum(); + assert!(approx_eq_f64_tol(simd, scalar, 1e-11), "n={} simd={:.15} scalar={:.15}", n, simd, scalar); + } + } + + // -- L2 boundary + parity -- + + #[test] + fn l2_f64_simd_self_zero() { + let a = random_vec_f64(0xC2A0, 200); + assert_eq!(l2_f64_simd(&a, &a), 0.0); + } + + #[test] + fn l2_f64_simd_empty_is_zero() { + let a: Vec = vec![]; + let b: Vec = vec![]; + assert_eq!(l2_f64_simd(&a, &b), 0.0); + } + + #[test] + fn l2_f64_simd_pythagoras() { + // (3, 0, …) vs (0, 4, …): √(9 + 16) = 5 + let a = vec![3.0f64, 0.0]; + let b = vec![0.0f64, 4.0]; + assert!(approx_eq_f64_tol(l2_f64_simd(&a, &b), 5.0, 1e-12)); + } + + #[test] + fn l2_f64_simd_matches_scalar() { + for &n in &[1usize, 7, 8, 15, 16, 17, 64, 199, 200, 1024] { + let a = random_vec_f64(0xA110_C2A0, n); + let b = random_vec_f64(0xB220_C2A0, n); + let simd = l2_f64_simd(&a, &b); + let sum_sq: f64 = a.iter().zip(&b).map(|(x, y)| (x - y).powi(2)).sum(); + let scalar = sum_sq.sqrt(); + // Sqrt is 1 ULP; cross-chunk summation order differs by chunks + // of 8 vs sequential — allow generous relative tolerance. + let rel = (simd - scalar).abs() / scalar.max(1e-12); + assert!(rel < 1e-10, "n={} simd={:.15} scalar={:.15} rel={:.2e}", n, simd, scalar, rel); + } + } + + // -- L∞ boundary + parity -- + + #[test] + fn linf_f64_simd_self_zero() { + let a = random_vec_f64(0xC1FF, 200); + assert_eq!(linf_f64_simd(&a, &a), 0.0); + } + + #[test] + fn linf_f64_simd_empty_is_zero() { + let a: Vec = vec![]; + let b: Vec = vec![]; + assert_eq!(linf_f64_simd(&a, &b), 0.0); + } + + #[test] + fn linf_f64_simd_picks_max_in_chunk() { + // Max difference must land inside a chunked path (index 5 < 8) and + // also outside (index 13 > 8) to exercise both halves. + let mut a = vec![0.0f64; 16]; + let mut b = vec![0.0f64; 16]; + a[5] = 0.5; + a[13] = -0.7; // |Δ| = 0.7 — should win + b[2] = 0.1; + assert!(approx_eq_f64_tol(linf_f64_simd(&a, &b), 0.7, 1e-12)); + } + + #[test] + fn linf_f64_simd_matches_scalar() { + for &n in &[1usize, 7, 8, 15, 16, 17, 64, 199, 200, 1024] { + let a = random_vec_f64(0xA110_C1FF, n); + let b = random_vec_f64(0xB220_C1FF, n); + let simd = linf_f64_simd(&a, &b); + let scalar: f64 = a + .iter() + .zip(&b) + .map(|(x, y)| (x - y).abs()) + .fold(0.0_f64, f64::max); + assert!(approx_eq_f64_tol(simd, scalar, 1e-15), "n={} simd={:.15} scalar={:.15}", n, simd, scalar); + } + } + + /// Mismatched-length slices: must use the shorter length, no panic. + #[test] + fn slice_distances_mismatched_length_uses_min() { + let a = vec![1.0f64; 17]; + let b = vec![2.0f64; 10]; + // L1 over min=10: 10 * |1 - 2| = 10 + assert!(approx_eq_f64_tol(l1_f64_simd(&a, &b), 10.0, 1e-12)); + // L2 over min=10: √(10 * 1) = √10 + assert!(approx_eq_f64_tol(l2_f64_simd(&a, &b), 10f64.sqrt(), 1e-12)); + // L∞ = 1 + assert!(approx_eq_f64_tol(linf_f64_simd(&a, &b), 1.0, 1e-12)); + } } diff --git a/src/hpc/dn_tree.rs b/src/hpc/dn_tree.rs index 573153e9..4aea1b60 100644 --- a/src/hpc/dn_tree.rs +++ b/src/hpc/dn_tree.rs @@ -132,8 +132,15 @@ pub(crate) fn bundle_into(current: &GraphHV, hv: &GraphHV, lr: f64, boost: f64, /// Create a u64 bitmask where each bit is independently 1 with probability ~`p`. /// /// Uses cascaded AND of random words to achieve the target probability: -/// - p >= 0.5 → OR of inverse masks -/// - p < 0.5 → AND cascade +/// - p > 0.5 → invert the (1-p) mask +/// - p <= 0.5 → AND cascade +/// +/// At exactly `p = 0.5` the AND-cascade branch executes a single +/// `rng.next_u64()` (n = ceil(-log2(0.5)) = 1) — each bit is then +/// IID Bernoulli(0.5). Note the **strict** comparison here: an earlier +/// version used `p >= 0.5`, which recursed with `1.0 - 0.5 = 0.5` +/// infinitely. The Pillar-13 drift-check (`hpc::pillar::hhtl_contraction`) +/// already uses the strict comparison and is the canonical reference. fn make_probability_mask(p: f64, rng: &mut SplitMix64) -> u64 { if p >= 1.0 { return u64::MAX; @@ -142,13 +149,14 @@ fn make_probability_mask(p: f64, rng: &mut SplitMix64) -> u64 { return 0; } - if p >= 0.5 { - // p >= 0.5: use OR approach — each AND of randoms gives ~0.25, NOT gives ~0.75, etc. - // Simpler: just AND enough randoms to get (1-p) kill rate, then NOT. + if p > 0.5 { + // p > 0.5: invert the (1-p) mask. Strict > 0.5 so p == 0.5 + // falls through to the AND-cascade and produces a single + // Bernoulli(0.5) word in one rng draw. return !make_probability_mask(1.0 - p, rng); } - // p < 0.5: AND cascade. Each AND halves the probability. + // p <= 0.5: AND cascade. Each AND halves the probability. // We need n ANDs where 0.5^n ≈ p, so n = -log2(p). let n = (-p.log2()).ceil() as u32; let mut mask = rng.next_u64(); @@ -543,6 +551,38 @@ mod tests { SplitMix64::new(42) } + /// Regression: at p = 0.5 exactly, the previous `p >= 0.5` branch + /// recursed with `1.0 - 0.5 = 0.5` infinitely. The strict `p > 0.5` + /// fix routes p=0.5 to the AND-cascade (n=1, one rng draw) which + /// produces a Bernoulli(0.5) mask in O(1) time. + #[test] + fn make_probability_mask_at_half_terminates() { + let mut rng = make_rng(); + // If this stack-overflows, the recursion fix has regressed. + let mask = make_probability_mask(0.5, &mut rng); + // Bernoulli(0.5) over 64 bits — popcount should be near 32, but + // any value 0..=64 is valid for a single draw. The test's + // load-bearing assertion is that the call returns. + assert!(mask <= u64::MAX); + } + + /// Empirical Bernoulli(0.5) check: average popcount over N=1024 + /// independent masks must land near 32 (the true mean) within a + /// generous tolerance. + #[test] + fn make_probability_mask_at_half_is_bernoulli_half() { + let mut rng = make_rng(); + const N: u32 = 1024; + let mut total: u64 = 0; + for _ in 0..N { + total += make_probability_mask(0.5, &mut rng).count_ones() as u64; + } + let mean = total as f64 / N as f64; + // σ per word = sqrt(64 * 0.5 * 0.5) = 4; mean's SE = 4 / √N = 0.125. + // Tolerance 2.0 ≈ 16 SEs — comfortable margin against flakes. + assert!((mean - 32.0).abs() < 2.0, "make_probability_mask(0.5) mean popcount {mean:.4} not near 32"); + } + #[test] fn test_new_tree_structure() { let tree = DNTree::with_capacity(4096); diff --git a/src/hpc/mod.rs b/src/hpc/mod.rs index 11081ad6..ff7981fc 100644 --- a/src/hpc/mod.rs +++ b/src/hpc/mod.rs @@ -13,8 +13,10 @@ //! - FFT (forward, inverse, real-to-complex) //! - VML (vectorized math library) -// SIMD capability singleton — detect once, all modules share -pub mod simd_caps; +// SIMD capability singleton — graduated to crate root (it never depended +// on anything else in `hpc/`); re-exported here for back-compat with +// existing `crate::hpc::simd_caps::*` imports across the workspace. +pub use crate::simd_caps; // LazyLock frozen SIMD dispatch — function pointers selected once at startup pub mod simd_dispatch; diff --git a/src/hpc/ogit_bridge/schema.rs b/src/hpc/ogit_bridge/schema.rs index 945b19b4..58671bd6 100644 --- a/src/hpc/ogit_bridge/schema.rs +++ b/src/hpc/ogit_bridge/schema.rs @@ -84,8 +84,19 @@ pub struct EntityClass { pub iri: Box, /// Human-readable label (`rdfs:label`); empty string when absent. pub label: Box, - /// Parent class IRI (`rdfs:subClassOf`); `None` for root classes. + /// First-observed parent class IRI (`rdfs:subClassOf`); `None` for + /// root classes. OWL allows a class to declare multiple + /// `rdfs:subClassOf` targets (multi-inheritance); the second and + /// later parents land in [`Self::extra_parents`]. Consumers wanting + /// the full parent set should iterate via [`Self::parents`]. pub parent: Option>, + /// Additional parent IRIs beyond the first. Empty for single-parent + /// classes (the common case in RDFS-style ontologies); non-empty when + /// the source declares multi-inheritance (common in OWL biomedical + /// ontologies — FMA, ChEBI, etc.). Order is source order of the + /// surplus `rdfs:subClassOf` triples; the first parent stays in + /// [`Self::parent`]. + pub extra_parents: Vec>, /// Properties declared with `ogit:mandatory`. pub mandatory: Vec, /// Properties declared with `ogit:optional`. @@ -99,11 +110,27 @@ pub struct EntityClass { } impl EntityClass { + /// Iterator over every parent class IRI declared on this entity — + /// the first-observed [`Self::parent`] (if present) followed by + /// every IRI in [`Self::extra_parents`]. Empty when the class is + /// a root. + /// + /// Use this in preference to reading `.parent` directly when the + /// caller's logic should cover multi-inheritance — e.g. transitive + /// closure walks like [`OntologySchema::is_ancestor`]. + pub fn parents(&self) -> impl Iterator { + self.parent + .as_deref() + .into_iter() + .chain(self.extra_parents.iter().map(|s| s.as_ref())) + } + fn new(iri: Box) -> Self { EntityClass { iri, label: "".into(), parent: None, + extra_parents: Vec::new(), mandatory: Vec::new(), optional: Vec::new(), indexed: Vec::new(), @@ -365,7 +392,19 @@ impl OntologySchema { RDFS_SUB_CLASS_OF => { if let Some(parent_iri) = node_iri(&triple.object) { if let Some(cls) = entities.get_mut(subject_iri) { - cls.parent = Some(parent_iri.into()); + // First parent → `parent`; subsequent + // parents → `extra_parents` (multi-inheritance + // as permitted by OWL; common in biomedical + // ontologies like FMA / ChEBI). The previous + // behaviour silently overwrote — the second + // declared parent won, the first was discarded. + if cls.parent.is_none() { + cls.parent = Some(parent_iri.into()); + } else if cls.parent.as_deref() != Some(parent_iri) + && !cls.extra_parents.iter().any(|p| p.as_ref() == parent_iri) + { + cls.extra_parents.push(parent_iri.into()); + } } } } @@ -641,26 +680,44 @@ impl OntologySchema { return false; } - // Walk the parent chain from descendant upward, looking for ancestor. - // Defensive depth cap — see method docstring. - const MAX_DEPTH: usize = 64; - let mut current: &str = descendant; - for _ in 0..MAX_DEPTH { + // BFS over the multi-parent DAG. The previous version walked a + // linear chain via `EntityClass.parent` alone — correct for + // single-inheritance schemas but missed ancestors reachable + // only through `EntityClass.extra_parents` (OWL multi-inheritance, + // common in FMA / ChEBI). + // + // # Termination + // + // `visited` is a monotonically-growing `HashSet<&str>` keyed by + // IRI; each parent IRI enters the set at most once. Frontier + // pushes are gated on `visited.insert(...)`, so every IRI is + // pushed at most once across the entire walk. Total work is + // therefore O(unique IRIs reachable from descendant) — finite + // by the schema's finiteness, regardless of branching factor + // or depth. No explicit visit cap is needed; previous codex P2 + // pointed out that a hard cap would produce false-negatives on + // large biomedical ontologies (FMA: 75k classes; ChEBI: 200k+). + let mut frontier: Vec<&str> = vec![descendant]; + let mut visited: std::collections::HashSet<&str> = std::collections::HashSet::new(); + visited.insert(descendant); + while let Some(current) = frontier.pop() { let entity = match self.entities.get(current) { Some(e) => e, - None => return false, // descendant unknown — no chain to walk - }; - let parent = match entity.parent.as_deref() { - Some(p) => p, - None => return false, // reached root without finding ancestor + // Walk hit an unknown IRI mid-chain — that subtree of + // the closure terminates here. Continue exploring + // siblings rather than aborting, since other parents + // may yet reach `ancestor`. + None => continue, }; - if parent == ancestor { - return true; + for parent in entity.parents() { + if parent == ancestor { + return true; + } + if visited.insert(parent) { + frontier.push(parent); + } } - current = parent; } - // Exceeded depth cap — treat as not-an-ancestor (defensive; this - // path should be unreachable on a well-formed schema). false } } @@ -960,4 +1017,84 @@ mod tests { assert!(!schema.is_ancestor("ogit:Heel", "ogit:OtherHip")); assert!(!schema.is_ancestor("ogit:OtherHeel", "ogit:Hip")); } + + // ----------------------------------------------------------------------- + // Multi-inheritance — OWL biomedical-ontology shape (FMA, ChEBI, etc.) + // ----------------------------------------------------------------------- + + /// A class declaring two `rdfs:subClassOf` triples must reach both + /// ancestors through `is_ancestor`. The previous single-parent + /// implementation silently picked one and discarded the other. + #[test] + fn is_ancestor_multi_parent_direct() { + // Hand mimics an OWL fragment: ogit:Hybrid is both a kind of + // ogit:Animal AND a kind of ogit:Mineral. + let src = "\ + @prefix ogit: .\n\ + @prefix rdfs: .\n\ + ogit:Animal a rdfs:Class .\n\ + ogit:Mineral a rdfs:Class .\n\ + ogit:Hybrid a rdfs:Class ; rdfs:subClassOf ogit:Animal ; rdfs:subClassOf ogit:Mineral ."; + let triples = TurtleParser::parse(src).unwrap(); + let schema = OntologySchema::from_triples(&triples).unwrap(); + // Both parents must be reachable from the hybrid. + assert!(schema.is_ancestor("ogit:Animal", "ogit:Hybrid")); + assert!(schema.is_ancestor("ogit:Mineral", "ogit:Hybrid")); + // Reverse direction still false (antisymmetry). + assert!(!schema.is_ancestor("ogit:Hybrid", "ogit:Animal")); + assert!(!schema.is_ancestor("ogit:Hybrid", "ogit:Mineral")); + } + + /// Multi-parent transitivity: an ancestor reachable only through + /// the SECOND parent of a multi-inheritance class must still be + /// found. This is the case the previous linear-walk implementation + /// silently missed. + #[test] + fn is_ancestor_multi_parent_transitive_through_second_parent() { + // Two disjoint chains converge at ogit:Hybrid: + // ogit:Root1 ← ogit:Mid1 ← ogit:Hybrid (via "first" parent) + // ogit:Root2 ← ogit:Mid2 ← ogit:Hybrid (via "second" parent) + let src = "\ + @prefix ogit: .\n\ + @prefix rdfs: .\n\ + ogit:Root1 a rdfs:Class .\n\ + ogit:Mid1 a rdfs:Class ; rdfs:subClassOf ogit:Root1 .\n\ + ogit:Root2 a rdfs:Class .\n\ + ogit:Mid2 a rdfs:Class ; rdfs:subClassOf ogit:Root2 .\n\ + ogit:Hybrid a rdfs:Class ; rdfs:subClassOf ogit:Mid1 ; rdfs:subClassOf ogit:Mid2 ."; + let triples = TurtleParser::parse(src).unwrap(); + let schema = OntologySchema::from_triples(&triples).unwrap(); + // Reachable through first parent chain. + assert!(schema.is_ancestor("ogit:Root1", "ogit:Hybrid")); + assert!(schema.is_ancestor("ogit:Mid1", "ogit:Hybrid")); + // Reachable through second parent chain — the case the + // previous implementation missed. + assert!(schema.is_ancestor("ogit:Root2", "ogit:Hybrid")); + assert!(schema.is_ancestor("ogit:Mid2", "ogit:Hybrid")); + } + + /// The `parents()` iterator must surface both `parent` and every + /// `extra_parents` IRI in source order. + #[test] + fn entity_class_parents_iterator_yields_all() { + let src = "\ + @prefix ogit: .\n\ + @prefix rdfs: .\n\ + ogit:A a rdfs:Class .\n\ + ogit:B a rdfs:Class .\n\ + ogit:C a rdfs:Class .\n\ + ogit:X a rdfs:Class ; rdfs:subClassOf ogit:A ; rdfs:subClassOf ogit:B ; rdfs:subClassOf ogit:C ."; + let triples = TurtleParser::parse(src).unwrap(); + let schema = OntologySchema::from_triples(&triples).unwrap(); + let x = schema.entities.get("ogit:X").expect("ogit:X declared"); + let parents: Vec<&str> = x.parents().collect(); + assert_eq!(parents.len(), 3, "expected 3 parents, got {parents:?}"); + // First parent populates `parent`; the rest go to extra_parents. + // Source-order is preserved within extra_parents but the "first" + // parent depends on triple processing order, so just check set. + let parent_set: std::collections::HashSet<&str> = parents.iter().copied().collect(); + assert!(parent_set.contains("ogit:A")); + assert!(parent_set.contains("ogit:B")); + assert!(parent_set.contains("ogit:C")); + } } diff --git a/src/hpc/pillar/hhtl_contraction.rs b/src/hpc/pillar/hhtl_contraction.rs index 5f946dfe..ee10e57c 100644 --- a/src/hpc/pillar/hhtl_contraction.rs +++ b/src/hpc/pillar/hhtl_contraction.rs @@ -486,7 +486,12 @@ mod tests { use crate::hpc::dn_tree::{bundle_into, SplitMix64 as DnSplitMix64}; const N_TRIALS: u32 = 16; - const TEST_LR: f64 = 0.25; + // Was 0.25 to avoid the latent p=0.5 infinite-recursion bug in + // production's make_probability_mask; that bug is fixed in the + // same commit/PR that updates this constant. lr=0.5 now matches + // Pillar 13's canonical mid-range learning rate and exercises + // the previously-broken branch. + const TEST_LR: f64 = 0.5; // Both SplitMix64 implementations use identical algorithm (same // multiplier constants 0x9E3779B97F4A7C15, 0xBF58476D1CE4E5B9, diff --git a/src/lib.rs b/src/lib.rs index 60edbcac..5b5851fd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -262,6 +262,16 @@ pub mod simd_nightly; #[cfg(target_arch = "x86_64")] pub mod simd_amx; +/// SIMD capability detection (CPUID on x86_64, runtime feature detection +/// on aarch64). One `LazyLock` detected at first access; every +/// substrate dispatch site is one pointer deref. Graduated from +/// `crate::hpc::simd_caps::*` in this same migration; the old path stays +/// available as a `pub use` re-export inside `crate::hpc::*` for +/// back-compat. Uses `std::sync::LazyLock`, hence the `std` gate (a +/// `core::sync::LazyLock` follow-up could lift it). +#[cfg(feature = "std")] +pub mod simd_caps; + #[cfg(feature = "std")] #[allow(clippy::all, missing_docs, dead_code, unused_variables, unused_imports)] pub mod simd_neon; diff --git a/src/hpc/simd_caps.rs b/src/simd_caps.rs similarity index 100% rename from src/hpc/simd_caps.rs rename to src/simd_caps.rs