Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .github/workflows/ci_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -152,11 +152,14 @@ jobs:
--exclude cust
'

# Exclude rustc_codegen_nvvm: `--all-features` enables its `llvm19` feature,
# whose build.rs requires an LLVM 19 toolchain not present in the CI image.
- name: Check documentation
run: |
docker exec "$CONTAINER_NAME" bash -lc 'set -euo pipefail
export RUSTDOCFLAGS=-Dwarnings
cargo doc --workspace --all-features --document-private-items --no-deps
cargo doc --workspace --all-features --document-private-items --no-deps \
--exclude rustc_codegen_nvvm
'

- name: Stop build container
Expand Down
4 changes: 3 additions & 1 deletion .github/workflows/ci_windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,14 @@ jobs:
--exclude blastoff --exclude cudnn --exclude cudnn-sys --exclude cust

# Exclude crates that require cuDNN, not available on Windows CI: cudnn, cudnn-sys.
# Exclude rustc_codegen_nvvm: `--all-features` enables its `llvm19` feature,
# whose build.rs requires an LLVM 19 toolchain not present in the CI image.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guss we should add this to the images for the build step?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • Linux LLVM 19 in CI images
  • Windows LLVM 19 prebuilt
  • RockyLinux 9 specifically (the genuinely awkward one)
  • Dual LLVM 7 + LLVM 19 testing in CI

could I land those in a separate followup PR? I'll create a tracking issue and then go figure out how to get Linux and Windows to both work https://github.com/rust-gpu/rustc_codegen_nvvm-llvm/releases/ 19

- name: Check documentation
env:
RUSTDOCFLAGS: -Dwarnings
run: |
cargo doc --workspace --all-features --document-private-items --no-deps `
--exclude cudnn --exclude cudnn-sys
--exclude cudnn --exclude cudnn-sys --exclude rustc_codegen_nvvm

# Disabled due to dll issues, someone with Windows knowledge needed
# - name: Compiletest
Expand Down
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,7 @@ book
/target
**/.vscode
.devcontainer
.codex
rustc-ice-*.txt
.nix-driver-libs
.claude
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions crates/cuda_builder/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ default = []
# HACK(see rust-gpu/spirv-builder): use `dep:` to avoid Cargo auto-creating a feature
# with the dependency name. Consumers must explicitly opt-in to compiling the backend.
rustc_codegen_nvvm = ["dep:rustc_codegen_nvvm"]
# Build the backend against LLVM 19 instead of LLVM 7. Propagates to `nvvm` (which
# uses it to flip the default `NvvmArch` to `Compute100`) and, when the optional
# `rustc_codegen_nvvm` dep is also enabled, to `rustc_codegen_nvvm` itself. Even
# when the optional dep is disabled, the build script's nested `cargo build -p
# rustc_codegen_nvvm` reads `cfg(feature = "llvm19")` here and forwards it.
llvm19 = ["nvvm/llvm19", "rustc_codegen_nvvm?/llvm19"]

[dependencies]
rustc_codegen_nvvm = { version = "0.3", path = "../rustc_codegen_nvvm", optional = true }
Expand Down
17 changes: 12 additions & 5 deletions crates/cuda_builder/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -550,13 +550,20 @@ fn build_backend_and_find(filename: &str) -> Option<PathBuf> {

let target_dir = workspace_dir.join("target").join("cuda-builder-codegen");

let status = Command::new("cargo")
.args(["build", "-p", "rustc_codegen_nvvm"])
let mut cmd = Command::new("cargo");
cmd.args(["build", "-p", "rustc_codegen_nvvm"])
.arg("--target-dir")
.arg(&target_dir)
.current_dir(&workspace_dir)
.status()
.ok()?;
.current_dir(&workspace_dir);

// Propagate the `llvm19` cargo feature to the nested backend build. Without this
// `rustc_codegen_nvvm`'s build script falls through to the prebuilt LLVM 7
// download, which the LLVM 19 codegen path can't link against.
if cfg!(feature = "llvm19") {
cmd.args(["--features", "llvm19"]);
}

let status = cmd.status().ok()?;

if !status.success() {
return None;
Expand Down
8 changes: 8 additions & 0 deletions crates/cust/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,12 @@ fn main() {
println!("cargo::rustc-cfg=cuGraphGetEdges_v2");
println!("cargo::rustc-cfg=cuCtxCreate_v4");
}

// In CUDA 13.2 the `id` field in `CUmemLocation_st` was placed inside an anonymous union.
// Bindgen renders this as `__bindgen_anon_1: CUmemLocation_st__bindgen_ty_1` instead of a
// direct `id` field. This cfg gates the struct initialization syntax accordingly.
println!("cargo::rustc-check-cfg=cfg(cuMemLocation_anon_id)");
if driver_version >= 13020 {
println!("cargo::rustc-cfg=cuMemLocation_anon_id");
}
}
15 changes: 15 additions & 0 deletions crates/cust/src/memory/unified.rs
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,9 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
#[cfg(cuMemPrefetchAsync_v2)]
driver_sys::CUmemLocation {
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
#[cfg(cuMemLocation_anon_id)]
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is from #368

__bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id },
#[cfg(not(cuMemLocation_anon_id))]
id,
},
#[cfg(not(cuMemPrefetchAsync_v2))]
Expand Down Expand Up @@ -693,6 +696,9 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
#[cfg(cuMemPrefetchAsync_v2)]
driver_sys::CUmemLocation {
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
#[cfg(cuMemLocation_anon_id)]
__bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id },
#[cfg(not(cuMemLocation_anon_id))]
id,
},
#[cfg(not(cuMemPrefetchAsync_v2))]
Expand Down Expand Up @@ -735,6 +741,9 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
#[cfg(cuMemAdvise_v2)]
driver_sys::CUmemLocation {
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
#[cfg(cuMemLocation_anon_id)]
__bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id },
#[cfg(not(cuMemLocation_anon_id))]
id,
},
#[cfg(not(cuMemAdvise_v2))]
Expand Down Expand Up @@ -777,6 +786,9 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
#[cfg(cuMemAdvise_v2)]
driver_sys::CUmemLocation {
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
#[cfg(cuMemLocation_anon_id)]
__bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id },
#[cfg(not(cuMemLocation_anon_id))]
id,
},
#[cfg(not(cuMemAdvise_v2))]
Expand All @@ -801,6 +813,9 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
#[cfg(cuMemAdvise_v2)]
driver_sys::CUmemLocation {
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
#[cfg(cuMemLocation_anon_id)]
__bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id },
#[cfg(not(cuMemLocation_anon_id))]
id,
},
#[cfg(not(cuMemAdvise_v2))]
Expand Down
1 change: 1 addition & 0 deletions crates/cust_raw/build/cuda_sdk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ impl CudaSdk {
vec![
cuda_root.join("nvvm").join("bin"),
cuda_root.join("nvvm").join("lib64"),
cuda_root.join("nvvm").join("lib"),
]
};
let library_dirs = Self::normalize_dirpaths(search_dirs);
Expand Down
9 changes: 6 additions & 3 deletions crates/cust_raw/build/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,12 @@ fn main() {
println!("cargo::rustc-link-search=native={}", libdir.display());
}
println!("cargo::rustc-link-lib=dylib=nvvm");
// Handle libdevice support.
fs::copy(sdk.libdevice_bitcode_path(), outdir.join("libdevice.bc"))
.expect("Cannot copy libdevice bitcode file.");
// `fs::copy` preserves source mode. When libdevice.10.bc comes from
// the Nix store (0444), re-running this build can't overwrite the
// previous copy in OUT_DIR. Drop it first.
let dest = outdir.join("libdevice.bc");
let _ = fs::remove_file(&dest);
fs::copy(sdk.libdevice_bitcode_path(), &dest).expect("Cannot copy libdevice bitcode file.");
}
}

Expand Down
8 changes: 8 additions & 0 deletions crates/nvvm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,14 @@ description = "High level bindings to libnvvm"
repository = "https://github.com/Rust-GPU/rust-cuda"
readme = "../../README.md"

[features]
default = []
# Match the `llvm19` feature on `rustc_codegen_nvvm`. Currently only flips the
# default `NvvmArch` to the lowest Blackwell capability, since the LLVM 7
# bitcode dialect can't target `compute_100+` and the LLVM 19 dialect can't
# target pre-Blackwell archs.
llvm19 = []

[dependencies]
cust_raw = { version = "0.11.3", path = "../cust_raw", default-features = false, features = ["nvvm"] }
strum = { version = "0.27", features = ["derive"] }
39 changes: 35 additions & 4 deletions crates/nvvm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use std::{
ffi::{CStr, CString},
fmt::Display,
mem::MaybeUninit,
ptr::null_mut,
str::FromStr,
};

Expand Down Expand Up @@ -311,12 +310,13 @@ pub enum NvvmArch {
Compute73,
/// This default value of 7.5 corresponds to Turing and later devices. We default to this
/// because it is the minimum supported by CUDA 13.0 while being in the middle of the range
/// supported by CUDA 12.x.
/// supported by CUDA 12.x. Selected as the default only when the `llvm19` feature is off;
/// the LLVM 19 NVVM dialect can't target pre-Blackwell archs.
// WARNING: If you change the default, consider updating:
// - The `--target-arch` values used for compiletests in `ci_linux.yml` and
// `.github/workflows/ci_{linux,windows}.yml`.
// - The CUDA versions used in `setup_cuda_environment` in `compiletests`.
#[default]
#[cfg_attr(not(feature = "llvm19"), default)]
Compute75,
Compute80,
Compute86,
Expand All @@ -325,6 +325,12 @@ pub enum NvvmArch {
Compute89,
Compute90,
Compute90a,
/// First Blackwell arch and the cutoff for NVVM's modern IR dialect — everything at
/// or above this capability uses the LLVM 19-flavored bitcode accepted by CUDA 12.9+
/// `libnvvm`. See [`NvvmArch::uses_modern_ir_dialect`]. Selected as the default when
/// the `llvm19` feature is enabled, since the LLVM 7 dialect can't target this and
/// the LLVM 19 dialect can't target anything below it.
#[cfg_attr(feature = "llvm19", default)]
Compute100,
Compute100f,
Compute100a,
Expand Down Expand Up @@ -448,6 +454,14 @@ impl NvvmArch {
self.capability_value() % 10
}

/// Whether this target uses NVVM's modern IR dialect rather than the legacy LLVM 7 dialect.
///
/// CUDA 13.2 documents the modern dialect as Blackwell-and-later only, which begins at
/// `compute_100`.
pub fn uses_modern_ir_dialect(&self) -> bool {
self.capability_value() >= 100
}

/// Get the target feature string (e.g., "compute_50" for `Compute50`, "compute_90a" for
/// `Compute90a`).
pub fn target_feature(&self) -> &'static str {
Expand Down Expand Up @@ -739,7 +753,24 @@ impl NvvmProgram {
/// Verify the program without actually compiling it. In the case of invalid IR, you can find
/// more detailed error info by calling [`compiler_log`](Self::compiler_log).
pub fn verify(&self) -> Result<(), NvvmError> {
unsafe { nvvm_sys::nvvmVerifyProgram(self.raw, 0, null_mut()).to_result() }
self.verify_with_options(&[])
}

/// Like [`verify`](Self::verify), but runs the verifier with the same `NvvmOption`s that will
/// be passed to [`compile`](Self::compile). Passing the user-selected `-arch=compute_XXX` in
/// particular matters for CUDA 12.9+ / LLVM 19 bitcode: without it the verifier can fall back
/// to the legacy LLVM 7 parser and reject modern-dialect bitcode that would otherwise compile
/// fine.
pub fn verify_with_options(&self, options: &[NvvmOption]) -> Result<(), NvvmError> {
unsafe {
let options = options.iter().map(|x| format!("{x}\0")).collect::<Vec<_>>();
let mut options_ptr = options
.iter()
.map(|x| x.as_ptr().cast())
.collect::<Vec<_>>();
nvvm_sys::nvvmVerifyProgram(self.raw, options.len() as i32, options_ptr.as_mut_ptr())
.to_result()
}
}
}

Expand Down
4 changes: 4 additions & 0 deletions crates/rustc_codegen_nvvm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ readme = "../../README.md"
[lib]
crate-type = ["dylib"]

[features]
default = []
llvm19 = []

[dependencies]
nvvm = { version = "0.1", path = "../nvvm" }
rustc-demangle = "0.1.24"
Expand Down
Loading