Skip to content

simd_half: TD-SIMD-8 — F16C-vectorized F16↔f32 batch conversion #403

simd_half: TD-SIMD-8 — F16C-vectorized F16↔f32 batch conversion

simd_half: TD-SIMD-8 — F16C-vectorized F16↔f32 batch conversion #403

Workflow file for this run

on:
pull_request:
paths-ignore:
- '.github/workflows/latest-deps.yaml'
merge_group:
push:
branches:
- master
- main
name: Continuous integration
env:
CARGO_TERM_COLOR: always
HOST: x86_64-unknown-linux-gnu
FEATURES: "approx,serde,rayon"
# `-C target-cpu=x86-64-v3` was removed from the global env. It conflicts
# with the cross_test matrix (`i686-unknown-linux-gnu` is 32-bit, `s390x`
# isn't even x86) and contradicts the design intent recorded in
# `.cargo/config.toml`: per-function `#[target_feature]` + runtime
# `LazyLock<Tier>` detection means one binary, all ISAs. Jobs that
# specifically need a higher target-cpu can opt in via per-job env.
RUSTFLAGS: "-D warnings"
MSRV: 1.95.0
BLAS_MSRV: 1.95.0
jobs:
pass-msrv:
runs-on: ubuntu-latest
name: Pass MSRV values to other jobs
outputs:
MSRV: ${{ env.MSRV }}
BLAS_MSRV: ${{ env.BLAS_MSRV }}
steps:
- name: Pass MSRV
run: |
echo "MSRV=${{ env.MSRV }}" >> $GITHUB_OUTPUT
echo "BLAS_MSRV=${{ env.BLAS_MSRV }}" >> $GITHUB_OUTPUT
clippy:
runs-on: ubuntu-latest
strategy:
matrix:
# Pinned to 1.95.0 to match `rust-toolchain.toml`. The workspace
# bumped from 1.94.1 → 1.95.0 in commit
# https://github.com/AdaWorldAPI/ndarray (this PR) to align with
# bevy (edition 2024 → 1.95 MSRV) and lance-graph. 1.95 added the
# `clippy::manual_checked_ops` lint which fires on
# `impl_owned_array.rs::into_scalar` — fixed in this same PR.
rust:
- "1.95.0"
name: clippy/${{ matrix.rust }}
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@1.95.0
with:
components: clippy
- uses: Swatinem/rust-cache@v2
- run: cargo clippy --features approx,serde,rayon -- -D warnings
- run: cargo clippy --features native -- -D warnings
format:
runs-on: ubuntu-latest
name: format/stable
# Blocking — no continue-on-error. `rustfmt.toml` was stripped of all
# nightly-only directives in PR #133 (`7ca1cf3d chore(fmt): strip
# unstable rustfmt options, keep stable-only config` + `ed59b3b1
# style: normalize codebase to stable rustfmt 1.94.1`) and the
# codebase normalized — so stable rustfmt enforces the project's
# style losslessly. The previous `continue-on-error: true` +
# `dtolnay/rust-toolchain@nightly` setup was a stale band-aid from
# when `brace_style = AlwaysNextLine`, `imports_granularity =
# Preserve`, `unstable_features = true` still lived in rustfmt.toml.
# That state no longer exists; the band-aid is removed in this PR.
steps:
- uses: actions/checkout@v4
# Stable rustfmt 1.95.0 — pinned in `rust-toolchain.toml`. No
# nightly dependency since rustfmt.toml is stable-clean post-PR
# #133.
- uses: dtolnay/rust-toolchain@1.95.0
with:
components: rustfmt
- run: cargo fmt --all --check
nostd:
runs-on: ubuntu-latest
continue-on-error: ${{ matrix.experimental }}
strategy:
matrix:
include:
- rust: stable
experimental: false
target: thumbv6m-none-eabi
name: nostd/${{ matrix.target }}/${{ matrix.rust }}
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
with:
toolchain: ${{ matrix.rust }}
targets: ${{ matrix.target }}
# rust-toolchain.toml pins 1.95.0 — install the cross-compile target
# for that toolchain too, since dtolnay/rust-toolchain only installs
# for the matrix value which may differ from the pinned version.
- run: rustup target add ${{ matrix.target }}
- name: Tests
run: |
# Scope to `-p ndarray` so workspace dev-deps (criterion → serde_core
# → getrandom) don't get evaluated against the no_std target. The
# library itself builds cleanly under no_std + portable-atomic-
# critical-section; only its dev-dependency tree pulls std-requiring
# crates that have no business existing in the nostd build.
cargo rustc -p ndarray "--target=${{ matrix.target }}" --no-default-features --features portable-atomic-critical-section
tests:
runs-on: ubuntu-latest
needs: pass-msrv
strategy:
matrix:
rust:
- stable
- beta
- ${{ needs.pass-msrv.outputs.MSRV }}
name: tests/${{ matrix.rust }}
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ matrix.rust }}
- uses: rui314/setup-mold@v1
- uses: Swatinem/rust-cache@v2
- uses: taiki-e/install-action@nextest
- name: Install openblas
run: sudo apt-get install libopenblas-dev gfortran
- run: ./scripts/all-tests.sh "$FEATURES" ${{ matrix.rust }}
native-backend:
runs-on: ubuntu-latest
name: native-backend/stable
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
- uses: taiki-e/install-action@nextest
- name: Build native backend
run: cargo build --features native
- name: Test native backend
run: cargo nextest run -p ndarray --features native
- name: Test native + approx
run: cargo nextest run -p ndarray --features native,approx
hpc-stream-parallel:
# D-CSV-17 (sprint-13 W-I4): rayon par_* variants for hpc::stream.
# This job co-ships with the par_* implementation so the rayon feature
# gate is always exercised alongside the code it guards — prevents
# silent-dead-code drift (spec §0 + worker-template-v2 §5 CI ownership).
runs-on: ubuntu-latest
name: hpc-stream-parallel/rayon
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@1.95.0
- uses: Swatinem/rust-cache@v2
- uses: taiki-e/install-action@nextest
- name: cargo check (no rayon — scalar path unchanged)
run: cargo check -p ndarray
- name: cargo check --features rayon
run: cargo check -p ndarray --features rayon
- name: par_* stream tests (--features rayon, hpc::stream filter)
run: cargo nextest run -p ndarray --features rayon -E 'test(hpc::stream)'
- name: clippy --features rayon
run: cargo clippy -p ndarray --features rayon --lib -- -D warnings
tier4-avx512-check:
# Phase 6 of the SIMD integration plan (.claude/knowledge/
# simd-dispatch-architecture.md). Exercises the AVX-512 dispatch
# arm (`#[cfg(target_feature = "avx512f")]` in `src/simd.rs`) so it
# doesn't bit-rot under the v3-default cargo config.
#
# `cargo check` rather than `cargo test`: GH-hosted `ubuntu-latest`
# runners may not have AVX-512 silicon (intermittent across SKUs),
# so a v4-baked binary would SIGILL at run time. `check` compiles
# the AVX-512 code path through the type checker + borrow checker
# + monomorphization without producing a runnable artifact —
# catches type mismatches and dispatch-arm holes that the v3
# default never touches.
#
# # Why `CARGO_TARGET_<triple>_RUSTFLAGS` instead of plain `RUSTFLAGS`:
#
# The first iteration used `env: RUSTFLAGS: "-Ctarget-cpu=x86-64-v4"`
# and failed in ~23 s — RUSTFLAGS env applies to BOTH the target
# compilation AND host build scripts (`build.rs` artifacts that
# cargo runs natively). On a GH runner without AVX-512 silicon,
# those v4-baked build scripts SIGILL during the dep build.
#
# `CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUSTFLAGS` is documented to
# apply only when cargo is producing artifacts for that triple, NOT
# to host build scripts. Combined with explicit `--target` (so cargo
# distinguishes host from target even when they're the same triple),
# this gives us "v4 for our crate, baseline for build scripts."
runs-on: ubuntu-latest
name: tier4-avx512-check
env:
CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUSTFLAGS: "-D warnings -Ctarget-cpu=x86-64-v4"
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
- name: cargo check (v4 / AVX-512 dispatch arm)
run: cargo check --target=x86_64-unknown-linux-gnu -p ndarray --features approx,serde,rayon
- name: cargo check (v4 / AVX-512 + hpc-extras)
run: cargo check --target=x86_64-unknown-linux-gnu -p ndarray --features approx,serde,rayon,hpc-extras
nightly-simd-polyfill:
# TD-SIMD-9 from .claude/knowledge/simd-dispatch-architecture.md.
# Exercises the `feature = "nightly-simd"` dispatch arm in
# `src/simd.rs` (added in Phase 2 / PR #173) so the portable
# `core::simd::*` backend doesn't bit-rot between PRs.
#
# Why nightly: `src/simd_nightly/*` uses `#![feature(portable_simd)]`
# which only compiles on nightly rustc. The stable `cargo check`
# paths in tests/clippy never reach this arm.
#
# `cargo check` rather than full build/test — pure type/borrow/cfg
# verification. Catches arm shadowing bugs and the kind of API-break
# codex flagged on PR #173 (lowercase aliases dropped).
#
# Runs only on merge_group + push (not every PR) to keep nightly
# toolchain pulls out of the per-PR critical path. Mirrors the
# `miri` job's gating strategy.
if: github.event_name == 'merge_group' || github.event_name == 'push'
runs-on: ubuntu-latest
name: nightly-simd-polyfill
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@nightly
- uses: Swatinem/rust-cache@v2
- name: cargo check (nightly + nightly-simd feature)
run: cargo check -p ndarray --features approx,serde,rayon,nightly-simd
- name: cargo check (nightly + nightly-simd + hpc-extras)
run: cargo check -p ndarray --features approx,serde,rayon,nightly-simd,hpc-extras
blas-msrv:
runs-on: ubuntu-latest
name: blas-msrv
needs: pass-msrv
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ needs.pass-msrv.outputs.BLAS_MSRV }}
- uses: rui314/setup-mold@v1
- uses: Swatinem/rust-cache@v2
- uses: taiki-e/install-action@nextest
- name: Install openblas
run: sudo apt-get install libopenblas-dev gfortran
- run: ./scripts/blas-integ-tests.sh $BLAS_MSRV
miri:
# Nightly-only. Cannot run SIMD intrinsics or FFI.
# Only runs on merge queue / push to main — never blocks PR checks.
if: github.event_name == 'merge_group' || github.event_name == 'push'
runs-on: ubuntu-latest
name: miri
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@nightly
with:
components: miri
- uses: Swatinem/rust-cache@v2
- uses: taiki-e/install-action@nextest
- run: ./scripts/miri-tests.sh
cross_test:
# Gated on merge_group only — cross-compile via docker (cross-rs) for
# s390x / i686 is slow, flaky on the s390x docker image's toolchain
# resolution (rust-toolchain.toml's 1.94.1 pin doesn't resolve cleanly
# inside the s390x cross container), and reliably caught by the
# `tests/{stable,beta,1.94.0}` jobs on every PR push. Reserve cross
# validation for the merge queue where it can fail loudly without
# gating individual PRs on infra flakiness. The commented `if:` was
# the original intent (per the pre-existing comment) — uncommenting
# per the PR #143 codex thread that surfaced this consistently.
if: ${{ github.event_name == 'merge_group' }}
runs-on: ubuntu-latest
strategy:
matrix:
include:
- rust: stable
target: s390x-unknown-linux-gnu
- rust: stable
target: i686-unknown-linux-gnu
name: cross_test/${{ matrix.target }}/${{ matrix.rust }}
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ matrix.rust }}
targets: ${{ matrix.target }}
- uses: rui314/setup-mold@v1
- uses: Swatinem/rust-cache@v2
- name: Install cross
run: cargo install cross
- run: ./scripts/cross-tests.sh "approx,serde,rayon" ${{ matrix.rust }} ${{ matrix.target }}
cargo-careful:
# Nightly-only. Only runs on merge queue / push — never blocks PR checks.
if: github.event_name == 'merge_group' || github.event_name == 'push'
runs-on: ubuntu-latest
name: cargo-careful
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@nightly
- uses: Swatinem/rust-cache@v2
- uses: taiki-e/install-action@nextest
- name: Install cargo-careful
run: cargo install cargo-careful
- run: cargo careful nextest run -Zcareful-sanitizer --features="$FEATURES"
docs:
# Nightly needed for --cfg docsrs. Only on merge queue / push.
if: github.event_name == 'merge_group' || github.event_name == 'push'
runs-on: ubuntu-latest
name: docs/nightly
env:
RUSTDOCFLAGS: "-Dwarnings --cfg docsrs"
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@nightly
- run: cargo doc --no-deps --all-features
conclusion:
needs:
- clippy
- format
- nostd
- tests
- native-backend
- hpc-stream-parallel
- tier4-avx512-check
- nightly-simd-polyfill
- miri
- cross_test
- cargo-careful
- docs
runs-on: ubuntu-latest
steps:
- name: Result
run: |
jq -C <<< "${needs}"
# Check if all needs were successful or skipped.
"$(jq -r 'all(.result as $result | (["success", "skipped"] | contains([$result])))' <<< "${needs}")"
env:
needs: ${{ toJson(needs) }}