simd_half: TD-SIMD-8 — F16C-vectorized F16↔f32 batch conversion #403
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| on: | |
| pull_request: | |
| paths-ignore: | |
| - '.github/workflows/latest-deps.yaml' | |
| merge_group: | |
| push: | |
| branches: | |
| - master | |
| - main | |
| name: Continuous integration | |
| env: | |
| CARGO_TERM_COLOR: always | |
| HOST: x86_64-unknown-linux-gnu | |
| FEATURES: "approx,serde,rayon" | |
| # `-C target-cpu=x86-64-v3` was removed from the global env. It conflicts | |
| # with the cross_test matrix (`i686-unknown-linux-gnu` is 32-bit, `s390x` | |
| # isn't even x86) and contradicts the design intent recorded in | |
| # `.cargo/config.toml`: per-function `#[target_feature]` + runtime | |
| # `LazyLock<Tier>` detection means one binary, all ISAs. Jobs that | |
| # specifically need a higher target-cpu can opt in via per-job env. | |
| RUSTFLAGS: "-D warnings" | |
| MSRV: 1.95.0 | |
| BLAS_MSRV: 1.95.0 | |
| jobs: | |
| pass-msrv: | |
| runs-on: ubuntu-latest | |
| name: Pass MSRV values to other jobs | |
| outputs: | |
| MSRV: ${{ env.MSRV }} | |
| BLAS_MSRV: ${{ env.BLAS_MSRV }} | |
| steps: | |
| - name: Pass MSRV | |
| run: | | |
| echo "MSRV=${{ env.MSRV }}" >> $GITHUB_OUTPUT | |
| echo "BLAS_MSRV=${{ env.BLAS_MSRV }}" >> $GITHUB_OUTPUT | |
| clippy: | |
| runs-on: ubuntu-latest | |
| strategy: | |
| matrix: | |
| # Pinned to 1.95.0 to match `rust-toolchain.toml`. The workspace | |
| # bumped from 1.94.1 → 1.95.0 in commit | |
| # https://github.com/AdaWorldAPI/ndarray (this PR) to align with | |
| # bevy (edition 2024 → 1.95 MSRV) and lance-graph. 1.95 added the | |
| # `clippy::manual_checked_ops` lint which fires on | |
| # `impl_owned_array.rs::into_scalar` — fixed in this same PR. | |
| rust: | |
| - "1.95.0" | |
| name: clippy/${{ matrix.rust }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: dtolnay/rust-toolchain@1.95.0 | |
| with: | |
| components: clippy | |
| - uses: Swatinem/rust-cache@v2 | |
| - run: cargo clippy --features approx,serde,rayon -- -D warnings | |
| - run: cargo clippy --features native -- -D warnings | |
| format: | |
| runs-on: ubuntu-latest | |
| name: format/stable | |
| # Blocking — no continue-on-error. `rustfmt.toml` was stripped of all | |
| # nightly-only directives in PR #133 (`7ca1cf3d chore(fmt): strip | |
| # unstable rustfmt options, keep stable-only config` + `ed59b3b1 | |
| # style: normalize codebase to stable rustfmt 1.94.1`) and the | |
| # codebase normalized — so stable rustfmt enforces the project's | |
| # style losslessly. The previous `continue-on-error: true` + | |
| # `dtolnay/rust-toolchain@nightly` setup was a stale band-aid from | |
| # when `brace_style = AlwaysNextLine`, `imports_granularity = | |
| # Preserve`, `unstable_features = true` still lived in rustfmt.toml. | |
| # That state no longer exists; the band-aid is removed in this PR. | |
| steps: | |
| - uses: actions/checkout@v4 | |
| # Stable rustfmt 1.95.0 — pinned in `rust-toolchain.toml`. No | |
| # nightly dependency since rustfmt.toml is stable-clean post-PR | |
| # #133. | |
| - uses: dtolnay/rust-toolchain@1.95.0 | |
| with: | |
| components: rustfmt | |
| - run: cargo fmt --all --check | |
| nostd: | |
| runs-on: ubuntu-latest | |
| continue-on-error: ${{ matrix.experimental }} | |
| strategy: | |
| matrix: | |
| include: | |
| - rust: stable | |
| experimental: false | |
| target: thumbv6m-none-eabi | |
| name: nostd/${{ matrix.target }}/${{ matrix.rust }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: dtolnay/rust-toolchain@stable | |
| with: | |
| toolchain: ${{ matrix.rust }} | |
| targets: ${{ matrix.target }} | |
| # rust-toolchain.toml pins 1.95.0 — install the cross-compile target | |
| # for that toolchain too, since dtolnay/rust-toolchain only installs | |
| # for the matrix value which may differ from the pinned version. | |
| - run: rustup target add ${{ matrix.target }} | |
| - name: Tests | |
| run: | | |
| # Scope to `-p ndarray` so workspace dev-deps (criterion → serde_core | |
| # → getrandom) don't get evaluated against the no_std target. The | |
| # library itself builds cleanly under no_std + portable-atomic- | |
| # critical-section; only its dev-dependency tree pulls std-requiring | |
| # crates that have no business existing in the nostd build. | |
| cargo rustc -p ndarray "--target=${{ matrix.target }}" --no-default-features --features portable-atomic-critical-section | |
| tests: | |
| runs-on: ubuntu-latest | |
| needs: pass-msrv | |
| strategy: | |
| matrix: | |
| rust: | |
| - stable | |
| - beta | |
| - ${{ needs.pass-msrv.outputs.MSRV }} | |
| name: tests/${{ matrix.rust }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: dtolnay/rust-toolchain@master | |
| with: | |
| toolchain: ${{ matrix.rust }} | |
| - uses: rui314/setup-mold@v1 | |
| - uses: Swatinem/rust-cache@v2 | |
| - uses: taiki-e/install-action@nextest | |
| - name: Install openblas | |
| run: sudo apt-get install libopenblas-dev gfortran | |
| - run: ./scripts/all-tests.sh "$FEATURES" ${{ matrix.rust }} | |
| native-backend: | |
| runs-on: ubuntu-latest | |
| name: native-backend/stable | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: dtolnay/rust-toolchain@stable | |
| - uses: Swatinem/rust-cache@v2 | |
| - uses: taiki-e/install-action@nextest | |
| - name: Build native backend | |
| run: cargo build --features native | |
| - name: Test native backend | |
| run: cargo nextest run -p ndarray --features native | |
| - name: Test native + approx | |
| run: cargo nextest run -p ndarray --features native,approx | |
| hpc-stream-parallel: | |
| # D-CSV-17 (sprint-13 W-I4): rayon par_* variants for hpc::stream. | |
| # This job co-ships with the par_* implementation so the rayon feature | |
| # gate is always exercised alongside the code it guards — prevents | |
| # silent-dead-code drift (spec §0 + worker-template-v2 §5 CI ownership). | |
| runs-on: ubuntu-latest | |
| name: hpc-stream-parallel/rayon | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: dtolnay/rust-toolchain@1.95.0 | |
| - uses: Swatinem/rust-cache@v2 | |
| - uses: taiki-e/install-action@nextest | |
| - name: cargo check (no rayon — scalar path unchanged) | |
| run: cargo check -p ndarray | |
| - name: cargo check --features rayon | |
| run: cargo check -p ndarray --features rayon | |
| - name: par_* stream tests (--features rayon, hpc::stream filter) | |
| run: cargo nextest run -p ndarray --features rayon -E 'test(hpc::stream)' | |
| - name: clippy --features rayon | |
| run: cargo clippy -p ndarray --features rayon --lib -- -D warnings | |
| tier4-avx512-check: | |
| # Phase 6 of the SIMD integration plan (.claude/knowledge/ | |
| # simd-dispatch-architecture.md). Exercises the AVX-512 dispatch | |
| # arm (`#[cfg(target_feature = "avx512f")]` in `src/simd.rs`) so it | |
| # doesn't bit-rot under the v3-default cargo config. | |
| # | |
| # `cargo check` rather than `cargo test`: GH-hosted `ubuntu-latest` | |
| # runners may not have AVX-512 silicon (intermittent across SKUs), | |
| # so a v4-baked binary would SIGILL at run time. `check` compiles | |
| # the AVX-512 code path through the type checker + borrow checker | |
| # + monomorphization without producing a runnable artifact — | |
| # catches type mismatches and dispatch-arm holes that the v3 | |
| # default never touches. | |
| # | |
| # # Why `CARGO_TARGET_<triple>_RUSTFLAGS` instead of plain `RUSTFLAGS`: | |
| # | |
| # The first iteration used `env: RUSTFLAGS: "-Ctarget-cpu=x86-64-v4"` | |
| # and failed in ~23 s — RUSTFLAGS env applies to BOTH the target | |
| # compilation AND host build scripts (`build.rs` artifacts that | |
| # cargo runs natively). On a GH runner without AVX-512 silicon, | |
| # those v4-baked build scripts SIGILL during the dep build. | |
| # | |
| # `CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUSTFLAGS` is documented to | |
| # apply only when cargo is producing artifacts for that triple, NOT | |
| # to host build scripts. Combined with explicit `--target` (so cargo | |
| # distinguishes host from target even when they're the same triple), | |
| # this gives us "v4 for our crate, baseline for build scripts." | |
| runs-on: ubuntu-latest | |
| name: tier4-avx512-check | |
| env: | |
| CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUSTFLAGS: "-D warnings -Ctarget-cpu=x86-64-v4" | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: dtolnay/rust-toolchain@stable | |
| - uses: Swatinem/rust-cache@v2 | |
| - name: cargo check (v4 / AVX-512 dispatch arm) | |
| run: cargo check --target=x86_64-unknown-linux-gnu -p ndarray --features approx,serde,rayon | |
| - name: cargo check (v4 / AVX-512 + hpc-extras) | |
| run: cargo check --target=x86_64-unknown-linux-gnu -p ndarray --features approx,serde,rayon,hpc-extras | |
| nightly-simd-polyfill: | |
| # TD-SIMD-9 from .claude/knowledge/simd-dispatch-architecture.md. | |
| # Exercises the `feature = "nightly-simd"` dispatch arm in | |
| # `src/simd.rs` (added in Phase 2 / PR #173) so the portable | |
| # `core::simd::*` backend doesn't bit-rot between PRs. | |
| # | |
| # Why nightly: `src/simd_nightly/*` uses `#![feature(portable_simd)]` | |
| # which only compiles on nightly rustc. The stable `cargo check` | |
| # paths in tests/clippy never reach this arm. | |
| # | |
| # `cargo check` rather than full build/test — pure type/borrow/cfg | |
| # verification. Catches arm shadowing bugs and the kind of API-break | |
| # codex flagged on PR #173 (lowercase aliases dropped). | |
| # | |
| # Runs only on merge_group + push (not every PR) to keep nightly | |
| # toolchain pulls out of the per-PR critical path. Mirrors the | |
| # `miri` job's gating strategy. | |
| if: github.event_name == 'merge_group' || github.event_name == 'push' | |
| runs-on: ubuntu-latest | |
| name: nightly-simd-polyfill | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: dtolnay/rust-toolchain@nightly | |
| - uses: Swatinem/rust-cache@v2 | |
| - name: cargo check (nightly + nightly-simd feature) | |
| run: cargo check -p ndarray --features approx,serde,rayon,nightly-simd | |
| - name: cargo check (nightly + nightly-simd + hpc-extras) | |
| run: cargo check -p ndarray --features approx,serde,rayon,nightly-simd,hpc-extras | |
| blas-msrv: | |
| runs-on: ubuntu-latest | |
| name: blas-msrv | |
| needs: pass-msrv | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: dtolnay/rust-toolchain@master | |
| with: | |
| toolchain: ${{ needs.pass-msrv.outputs.BLAS_MSRV }} | |
| - uses: rui314/setup-mold@v1 | |
| - uses: Swatinem/rust-cache@v2 | |
| - uses: taiki-e/install-action@nextest | |
| - name: Install openblas | |
| run: sudo apt-get install libopenblas-dev gfortran | |
| - run: ./scripts/blas-integ-tests.sh $BLAS_MSRV | |
| miri: | |
| # Nightly-only. Cannot run SIMD intrinsics or FFI. | |
| # Only runs on merge queue / push to main — never blocks PR checks. | |
| if: github.event_name == 'merge_group' || github.event_name == 'push' | |
| runs-on: ubuntu-latest | |
| name: miri | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: dtolnay/rust-toolchain@nightly | |
| with: | |
| components: miri | |
| - uses: Swatinem/rust-cache@v2 | |
| - uses: taiki-e/install-action@nextest | |
| - run: ./scripts/miri-tests.sh | |
| cross_test: | |
| # Gated on merge_group only — cross-compile via docker (cross-rs) for | |
| # s390x / i686 is slow, flaky on the s390x docker image's toolchain | |
| # resolution (rust-toolchain.toml's 1.94.1 pin doesn't resolve cleanly | |
| # inside the s390x cross container), and reliably caught by the | |
| # `tests/{stable,beta,1.94.0}` jobs on every PR push. Reserve cross | |
| # validation for the merge queue where it can fail loudly without | |
| # gating individual PRs on infra flakiness. The commented `if:` was | |
| # the original intent (per the pre-existing comment) — uncommenting | |
| # per the PR #143 codex thread that surfaced this consistently. | |
| if: ${{ github.event_name == 'merge_group' }} | |
| runs-on: ubuntu-latest | |
| strategy: | |
| matrix: | |
| include: | |
| - rust: stable | |
| target: s390x-unknown-linux-gnu | |
| - rust: stable | |
| target: i686-unknown-linux-gnu | |
| name: cross_test/${{ matrix.target }}/${{ matrix.rust }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: dtolnay/rust-toolchain@master | |
| with: | |
| toolchain: ${{ matrix.rust }} | |
| targets: ${{ matrix.target }} | |
| - uses: rui314/setup-mold@v1 | |
| - uses: Swatinem/rust-cache@v2 | |
| - name: Install cross | |
| run: cargo install cross | |
| - run: ./scripts/cross-tests.sh "approx,serde,rayon" ${{ matrix.rust }} ${{ matrix.target }} | |
| cargo-careful: | |
| # Nightly-only. Only runs on merge queue / push — never blocks PR checks. | |
| if: github.event_name == 'merge_group' || github.event_name == 'push' | |
| runs-on: ubuntu-latest | |
| name: cargo-careful | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: dtolnay/rust-toolchain@nightly | |
| - uses: Swatinem/rust-cache@v2 | |
| - uses: taiki-e/install-action@nextest | |
| - name: Install cargo-careful | |
| run: cargo install cargo-careful | |
| - run: cargo careful nextest run -Zcareful-sanitizer --features="$FEATURES" | |
| docs: | |
| # Nightly needed for --cfg docsrs. Only on merge queue / push. | |
| if: github.event_name == 'merge_group' || github.event_name == 'push' | |
| runs-on: ubuntu-latest | |
| name: docs/nightly | |
| env: | |
| RUSTDOCFLAGS: "-Dwarnings --cfg docsrs" | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: dtolnay/rust-toolchain@nightly | |
| - run: cargo doc --no-deps --all-features | |
| conclusion: | |
| needs: | |
| - clippy | |
| - format | |
| - nostd | |
| - tests | |
| - native-backend | |
| - hpc-stream-parallel | |
| - tier4-avx512-check | |
| - nightly-simd-polyfill | |
| - miri | |
| - cross_test | |
| - cargo-careful | |
| - docs | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Result | |
| run: | | |
| jq -C <<< "${needs}" | |
| # Check if all needs were successful or skipped. | |
| "$(jq -r 'all(.result as $result | (["success", "skipped"] | contains([$result])))' <<< "${needs}")" | |
| env: | |
| needs: ${{ toJson(needs) }} |