From 91d4e40e0220cfe7a498f416499adf3ffd6e5c38 Mon Sep 17 00:00:00 2001 From: Flakebi Date: Fri, 9 Jan 2026 10:41:37 +0100 Subject: [PATCH] Add amdgpu_dispatch_ptr intrinsic Add a rustc intrinsic `amdgpu_dispatch_ptr` to access the kernel dispatch packet on amdgpu. The HSA kernel dispatch packet contains important information like the launch size and workgroup size. The Rust intrinsic lowers to the `llvm.amdgcn.dispatch.ptr` LLVM intrinsic, which returns a `ptr addrspace(4)`, plus an addrspacecast to `addrspace(0)`, so it can be returned as a Rust reference. The returned pointer/reference is valid for the whole program lifetime, and is therefore `'static`. The return type of the intrinsic (`*const ()`) does not mention the struct so that rustc does not need to know the exact struct type. An alternative would be to define the struct as lang item or add a generic argument to the function. Short version: ```rust #[cfg(target_arch = "amdgpu")] pub fn amdgpu_dispatch_ptr() -> *const (); ``` --- compiler/rustc_codegen_llvm/src/intrinsic.rs | 6 +++++ .../rustc_codegen_ssa/src/mir/intrinsic.rs | 1 + .../rustc_hir_analysis/src/check/intrinsic.rs | 2 ++ compiler/rustc_span/src/symbol.rs | 1 + library/core/src/intrinsics/gpu.rs | 23 ++++++++++++++++ library/core/src/intrinsics/mod.rs | 1 + tests/codegen-llvm/amdgpu-dispatch-ptr.rs | 27 +++++++++++++++++++ 7 files changed, 61 insertions(+) create mode 100644 library/core/src/intrinsics/gpu.rs create mode 100644 tests/codegen-llvm/amdgpu-dispatch-ptr.rs diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs index b4057eea735ea..8803963f4bd33 100644 --- a/compiler/rustc_codegen_llvm/src/intrinsic.rs +++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs @@ -560,6 +560,12 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> { return Ok(()); } + sym::amdgpu_dispatch_ptr => { + let val = self.call_intrinsic("llvm.amdgcn.dispatch.ptr", &[], &[]); + // Relying on `LLVMBuildPointerCast` to produce an addrspacecast + self.pointercast(val, self.type_ptr()) + } + _ if name.as_str().starts_with("simd_") => { // Unpack non-power-of-2 #[repr(packed, simd)] arguments. // This gives them the expected layout of a regular #[repr(simd)] vector. diff --git a/compiler/rustc_codegen_ssa/src/mir/intrinsic.rs b/compiler/rustc_codegen_ssa/src/mir/intrinsic.rs index f4fae40d8828f..f5ee9406f4bf1 100644 --- a/compiler/rustc_codegen_ssa/src/mir/intrinsic.rs +++ b/compiler/rustc_codegen_ssa/src/mir/intrinsic.rs @@ -112,6 +112,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { | sym::unreachable | sym::cold_path | sym::breakpoint + | sym::amdgpu_dispatch_ptr | sym::assert_zero_valid | sym::assert_mem_uninitialized_valid | sym::assert_inhabited diff --git a/compiler/rustc_hir_analysis/src/check/intrinsic.rs b/compiler/rustc_hir_analysis/src/check/intrinsic.rs index 4e8333f678b66..d4c4a73e64a29 100644 --- a/compiler/rustc_hir_analysis/src/check/intrinsic.rs +++ b/compiler/rustc_hir_analysis/src/check/intrinsic.rs @@ -70,6 +70,7 @@ fn intrinsic_operation_unsafety(tcx: TyCtxt<'_>, intrinsic_id: LocalDefId) -> hi | sym::add_with_overflow | sym::aggregate_raw_ptr | sym::align_of + | sym::amdgpu_dispatch_ptr | sym::assert_inhabited | sym::assert_mem_uninitialized_valid | sym::assert_zero_valid @@ -285,6 +286,7 @@ pub(crate) fn check_intrinsic_type( let (n_tps, n_cts, inputs, output) = match intrinsic_name { sym::autodiff => (4, 0, vec![param(0), param(1), param(2)], param(3)), sym::abort => (0, 0, vec![], tcx.types.never), + sym::amdgpu_dispatch_ptr => (0, 0, vec![], Ty::new_imm_ptr(tcx, tcx.types.unit)), sym::unreachable => (0, 0, vec![], tcx.types.never), sym::breakpoint => (0, 0, vec![], tcx.types.unit), sym::size_of | sym::align_of | sym::variant_count => (1, 0, vec![], tcx.types.usize), diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 72709753b1dff..676e9a9ae0424 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -454,6 +454,7 @@ symbols! { alu32, always, amdgpu, + amdgpu_dispatch_ptr, analysis, and, and_then, diff --git a/library/core/src/intrinsics/gpu.rs b/library/core/src/intrinsics/gpu.rs new file mode 100644 index 0000000000000..9e7624841d0c6 --- /dev/null +++ b/library/core/src/intrinsics/gpu.rs @@ -0,0 +1,23 @@ +//! Intrinsics for GPU targets. +//! +//! Intrinsics in this module are intended for use on GPU targets. +//! They can be target specific but in general GPU targets are similar. + +#![unstable(feature = "gpu_intrinsics", issue = "none")] + +/// Returns a pointer to the HSA kernel dispatch packet. +/// +/// A `gpu-kernel` on amdgpu is always launched through a kernel dispatch packet. +/// The dispatch packet contains the workgroup size, launch size and other data. +/// The content is defined by the [HSA Platform System Architecture Specification], +/// which is implemented e.g. in AMD's [hsa.h]. +/// The intrinsic returns a unit pointer so that rustc does not need to know the packet struct. +/// The pointer is valid for the whole lifetime of the program. +/// +/// [HSA Platform System Architecture Specification]: https://hsafoundation.com/wp-content/uploads/2021/02/HSA-SysArch-1.2.pdf +/// [hsa.h]: https://github.com/ROCm/rocm-systems/blob/rocm-7.1.0/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa.h#L2959 +#[rustc_nounwind] +#[rustc_intrinsic] +#[cfg(target_arch = "amdgpu")] +#[must_use = "returns a pointer that does nothing unless used"] +pub fn amdgpu_dispatch_ptr() -> *const (); diff --git a/library/core/src/intrinsics/mod.rs b/library/core/src/intrinsics/mod.rs index d46d3ed9d5137..0c26aba8618ed 100644 --- a/library/core/src/intrinsics/mod.rs +++ b/library/core/src/intrinsics/mod.rs @@ -60,6 +60,7 @@ use crate::{mem, ptr}; mod bounds; pub mod fallback; +pub mod gpu; pub mod mir; pub mod simd; diff --git a/tests/codegen-llvm/amdgpu-dispatch-ptr.rs b/tests/codegen-llvm/amdgpu-dispatch-ptr.rs new file mode 100644 index 0000000000000..00bde96c3d596 --- /dev/null +++ b/tests/codegen-llvm/amdgpu-dispatch-ptr.rs @@ -0,0 +1,27 @@ +// Tests the amdgpu_dispatch_ptr intrinsic. + +//@ compile-flags: --crate-type=rlib --target amdgcn-amd-amdhsa -Ctarget-cpu=gfx900 +//@ needs-llvm-components: amdgpu +//@ add-minicore +#![feature(intrinsics, no_core, rustc_attrs)] +#![no_core] + +extern crate minicore; + +pub struct DispatchPacket { + pub header: u16, + pub setup: u16, + pub workgroup_size_x: u16, // and more +} + +#[rustc_intrinsic] +#[rustc_nounwind] +fn amdgpu_dispatch_ptr() -> *const (); + +// CHECK-LABEL: @get_dispatch_data +// CHECK: %[[ORIG_PTR:[^ ]+]] = {{(tail )?}}call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// CHECK-NEXT: %[[PTR:[^ ]+]] = addrspacecast ptr addrspace(4) %[[ORIG_PTR]] to ptr +#[unsafe(no_mangle)] +pub fn get_dispatch_data() -> &'static DispatchPacket { + unsafe { &*(amdgpu_dispatch_ptr() as *const _) } +}