Skip to content

Commit

Permalink
riscv64: Support 128-bit atomics (Zacas extension)
Browse files Browse the repository at this point in the history
  • Loading branch information
taiki-e committed Sep 1, 2024
1 parent 284e8da commit aef1268
Show file tree
Hide file tree
Showing 38 changed files with 3,315 additions and 12 deletions.
3 changes: 3 additions & 0 deletions .github/.cspell/project-dictionary.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ adde
alcgr
algr
allnoconfig
amocas
aosp
aqrl
armasm
Expand Down Expand Up @@ -175,4 +176,6 @@ xmmword
xsave
xsub
zaamo
zabha
zacas
Zhaoxin
9 changes: 9 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,8 @@ jobs:
target: riscv32gc-unknown-linux-gnu
- rust: '1.59'
target: riscv64gc-unknown-linux-gnu
- rust: '1.73' # LLVM 17 (oldest version we can use experimental-zacas on this target)
target: riscv64gc-unknown-linux-gnu
- rust: stable
target: riscv64gc-unknown-linux-gnu
- rust: nightly
Expand Down Expand Up @@ -354,6 +356,13 @@ jobs:
RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-cpu=pwr8
RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-cpu=pwr8
if: startsWith(matrix.target, 'powerpc64-')
# riscv64 +experimental-zacas
- run: tools/test.sh -vv --tests ${TARGET:-} ${BUILD_STD:-} ${RELEASE:-}
env:
RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-feature=+experimental-zacas
RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-feature=+experimental-zacas
# TODO: cranelift doesn't support cfg(target_feature): /~https://github.com/rust-lang/rustc_codegen_cranelift/issues/1400
if: startsWith(matrix.target, 'riscv64') && !contains(matrix.flags, 'codegen-backend=cranelift')
# s390x z196 (arch9)
- run: tools/test.sh -vv --tests ${TARGET:-} ${BUILD_STD:-} ${RELEASE:-}
env:
Expand Down
19 changes: 15 additions & 4 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ fn main() {

if version.minor >= 80 {
println!(
r#"cargo:rustc-check-cfg=cfg(target_feature,values("zaamo","quadword-atomics","fast-serialization","load-store-on-cond","distinct-ops","miscellaneous-extensions-3"))"#
r#"cargo:rustc-check-cfg=cfg(target_feature,values("zaamo","experimental-zacas","quadword-atomics","fast-serialization","load-store-on-cond","distinct-ops","miscellaneous-extensions-3"))"#
);

// Custom cfgs set by build script. Not public API.
Expand All @@ -58,7 +58,7 @@ fn main() {
// TODO: handle multi-line target_feature_fallback
// grep -E 'target_feature_fallback\("' build.rs | sed -E 's/^.*target_feature_fallback\(//; s/",.*$/"/' | LC_ALL=C sort -u | tr '\n' ','
println!(
r#"cargo:rustc-check-cfg=cfg(portable_atomic_target_feature,values("cmpxchg16b","distinct-ops","fast-serialization","load-store-on-cond","lse","lse128","lse2","mclass","miscellaneous-extensions-3","quadword-atomics","rcpc3","v6","zaamo"))"#
r#"cargo:rustc-check-cfg=cfg(portable_atomic_target_feature,values("cmpxchg16b","distinct-ops","fast-serialization","load-store-on-cond","lse","lse128","lse2","mclass","miscellaneous-extensions-3","quadword-atomics","rcpc3","v6","zaamo","experimental-zacas"))"#
);
}

Expand Down Expand Up @@ -310,9 +310,20 @@ fn main() {
}
}
"riscv32" | "riscv64" => {
// As of rustc 1.80, target_feature "zaamo" is not available on rustc side:
// As of rustc 1.80, target_feature "zaamo"/"zacas" is not available on rustc side:
// /~https://github.com/rust-lang/rust/blob/1.80.0/compiler/rustc_target/src/target_features.rs#L273
target_feature_fallback("zaamo", false); // amo*.{w,d}
// zacas and zabha implies zaamo on GCC, but do not on LLVM.
// /~https://github.com/llvm/llvm-project/blob/llvmorg-19.1.0-rc3/llvm/lib/TargetParser/RISCVISAInfo.cpp#L774
// /~https://github.com/gcc-mirror/gcc/blob/08693e29ec186fd7941d0b73d4d466388971fe2f/gcc/config/riscv/arch-canonicalize#L45-L46
if version.llvm >= 17 {
// amocas.{w,d,q} (amocas.{b,h} if zabha is also available)
// available as experimental since 17 /~https://github.com/llvm/llvm-project/commit/29f630a1ddcbb03caa31b5002f0cbc105ff3a869
// attempted to make non-experimental in 19 /~https://github.com/llvm/llvm-project/commit/95aab69c109adf29e183090c25dc95c773215746
// but reverted in /~https://github.com/llvm/llvm-project/commit/70e7d26e560173c8b9db4c75ab4a3004cd5f021a
target_feature_fallback("experimental-zacas", false);
}
// amo*.{w,d}
target_feature_fallback("zaamo", false);
}
"powerpc64" => {
// For Miri and ThreadSanitizer.
Expand Down
58 changes: 58 additions & 0 deletions src/cfgs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,35 @@ mod atomic_64_macros {
),
),
),
all(
target_arch = "riscv64",
not(portable_atomic_no_asm),
any(
target_feature = "experimental-zacas",
portable_atomic_target_feature = "experimental-zacas",
// TODO(riscv64)
// all(
// feature = "fallback",
// not(portable_atomic_no_outline_atomics),
// any(test, portable_atomic_outline_atomics), // TODO(riscv64): currently disabled by default
// any(
// all(
// target_os = "linux",
// any(
// target_env = "gnu",
// all(
// any(target_env = "musl", target_env = "ohos"),
// not(target_feature = "crt-static"),
// ),
// portable_atomic_outline_atomics,
// ),
// ),
// target_os = "android",
// ),
// not(any(miri, portable_atomic_sanitize_thread)),
// ),
),
),
all(
target_arch = "powerpc64",
portable_atomic_unstable_asm_experimental_arch,
Expand Down Expand Up @@ -331,6 +360,35 @@ mod atomic_128_macros {
),
),
),
all(
target_arch = "riscv64",
not(portable_atomic_no_asm),
any(
target_feature = "experimental-zacas",
portable_atomic_target_feature = "experimental-zacas",
// TODO(riscv64)
// all(
// feature = "fallback",
// not(portable_atomic_no_outline_atomics),
// any(test, portable_atomic_outline_atomics), // TODO(riscv64): currently disabled by default
// any(
// all(
// target_os = "linux",
// any(
// target_env = "gnu",
// all(
// any(target_env = "musl", target_env = "ohos"),
// not(target_feature = "crt-static"),
// ),
// portable_atomic_outline_atomics,
// ),
// ),
// target_os = "android",
// ),
// not(any(miri, portable_atomic_sanitize_thread)),
// ),
),
),
all(
target_arch = "powerpc64",
portable_atomic_unstable_asm_experimental_arch,
Expand Down
4 changes: 3 additions & 1 deletion src/imp/atomic128/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Here is the table of targets that support 128-bit atomics and the instructions u
| ----------- | ---- | ----- | --- | --- | ---- |
| x86_64 | cmpxchg16b or vmovdqa | cmpxchg16b or vmovdqa | cmpxchg16b | cmpxchg16b | cmpxchg16b target feature required. vmovdqa requires Intel, AMD, or Zhaoxin CPU with AVX. <br> Both compile-time and run-time detection are supported for cmpxchg16b. vmovdqa is currently run-time detection only. <br> Requires rustc 1.59+ |
| aarch64 | ldxp/stxp or casp or ldp/ldiapp | ldxp/stxp or casp or stp/stilp/swpp | ldxp/stxp or casp | ldxp/stxp or casp/swpp/ldclrp/ldsetp | casp requires lse target feature, ldp/stp requires lse2 target feature, ldiapp/stilp requires lse2 and rcpc3 target features, swpp/ldclrp/ldsetp requires lse128 target feature. <br> Both compile-time and run-time detection are supported for lse and lse2. Others are currently compile-time detection only. <br> Requires rustc 1.59+ |
| riscv64 | amocas.q | amocas.q | amocas.q | amocas.q | Requires experimental-zacas target feature. Currently compile-time detection only due to LLVM marking it as experimental. <br> Requires 1.73+ (LLVM 17+) |
| powerpc64 | lq | stq | lqarx/stqcx. | lqarx/stqcx. | Requires target-cpu pwr8+ (powerpc64le is pwr8 by default). Both compile-time and run-time detection are supported (run-time detection is currently disabled by default). <br> Requires nightly |
| s390x | lpq | stpq | cdsg | cdsg | Requires nightly |

Expand All @@ -17,7 +18,7 @@ See [aarch64.rs](aarch64.rs) module-level comments for more details on the instr

## Comparison with core::intrinsics::atomic_\* (core::sync::atomic::Atomic{I,U}128)

This directory has target-specific implementations with inline assembly ([aarch64.rs](aarch64.rs), [x86_64.rs](x86_64.rs), [powerpc64.rs](powerpc64.rs), [s390x.rs](s390x.rs)) and an implementation without inline assembly ([intrinsics.rs](intrinsics.rs)). The latter currently always needs nightly compilers and is only used for Miri and ThreadSanitizer, which do not support inline assembly.
This directory has target-specific implementations with inline assembly ([aarch64.rs](aarch64.rs), [x86_64.rs](x86_64.rs), [powerpc64.rs](powerpc64.rs), [riscv64.rs](riscv64.rs), [s390x.rs](s390x.rs)) and an implementation without inline assembly ([intrinsics.rs](intrinsics.rs)). The latter currently always needs nightly compilers and is only used for Miri and ThreadSanitizer, which do not support inline assembly.

Implementations with inline assembly generate assemblies almost equivalent to the `core::intrinsics::atomic_*` (used in `core::sync::atomic::Atomic{I,U}128`) for many operations, but some operations may or may not generate more efficient code. For example:

Expand Down Expand Up @@ -45,6 +46,7 @@ Here is the table of targets that support run-time CPU feature detection and the
| aarch64 | macos | sysctl | all | Currently only used in tests because FEAT_LSE and FEAT_LSE2 are always available at compile-time. |
| aarch64 | windows | IsProcessorFeaturePresent | lse | Enabled by default |
| aarch64 | fuchsia | zx_system_get_features | lse | Enabled by default |
| riscv64 | linux | riscv_hwprobe | all | Currently only used in tests due to LLVM marking zacas as experimental |
| powerpc64 | linux | getauxval | all | Disabled by default |
| powerpc64 | freebsd | elf_aux_info | all | Disabled by default |
| powerpc64 | openbsd | elf_aux_info | all | Disabled by default |
Expand Down
45 changes: 44 additions & 1 deletion src/imp/atomic128/detect/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,24 @@ impl CpuInfo {
}
}

#[cfg(target_arch = "riscv64")]
impl CpuInfo {
// NB: update test_bit_flags test when adding new flag.
const HAS_ZACAS: u32 = 1; // amocas.{w,d,q}

#[cfg(any(
test,
not(any(
target_feature = "experimental-zacas",
portable_atomic_target_feature = "experimental-zacas"
))
))]
#[inline]
pub(crate) fn has_zacas(self) -> bool {
self.test(CpuInfo::HAS_ZACAS)
}
}

#[cfg(target_arch = "powerpc64")]
impl CpuInfo {
// NB: update test_bit_flags test when adding new flag.
Expand All @@ -122,7 +140,7 @@ impl CpuInfo {
}

// core::ffi::c_* (except c_void) requires Rust 1.64, libc will soon require Rust 1.47
#[cfg(any(target_arch = "aarch64", target_arch = "powerpc64"))]
#[cfg(any(target_arch = "aarch64", target_arch = "powerpc64", target_arch = "riscv64"))]
#[cfg(not(windows))]
#[allow(dead_code, non_camel_case_types)]
mod c_types {
Expand Down Expand Up @@ -216,6 +234,8 @@ mod tests_common {
]);
#[cfg(target_arch = "x86_64")]
test_flags(&[CpuInfo::INIT, CpuInfo::HAS_CMPXCHG16B, CpuInfo::HAS_VMOVDQA_ATOMIC]);
#[cfg(target_arch = "riscv64")]
test_flags(&[CpuInfo::INIT, CpuInfo::HAS_ZACAS]);
#[cfg(target_arch = "powerpc64")]
test_flags(&[CpuInfo::INIT, CpuInfo::HAS_QUADWORD_ATOMICS]);
}
Expand Down Expand Up @@ -265,6 +285,19 @@ mod tests_common {
)),
);
}
#[cfg(target_arch = "riscv64")]
{
features.push_str("run-time:\n");
print_feature!("zacas", detect().test(CpuInfo::HAS_ZACAS));
features.push_str("compile-time:\n");
print_feature!(
"zacas",
cfg!(any(
target_feature = "experimental-zacas",
portable_atomic_target_feature = "experimental-zacas"
)),
);
}
#[cfg(target_arch = "powerpc64")]
{
features.push_str("run-time:\n");
Expand Down Expand Up @@ -339,6 +372,16 @@ mod tests_common {
assert!(!detect().test(CpuInfo::HAS_RCPC3));
}
}
#[cfg(target_arch = "riscv64")]
#[test]
#[cfg_attr(portable_atomic_test_outline_atomics_detect_false, ignore)]
fn test_detect() {
if detect().has_zacas() {
assert!(detect().test(CpuInfo::HAS_ZACAS));
} else {
assert!(!detect().test(CpuInfo::HAS_ZACAS));
}
}
#[cfg(target_arch = "powerpc64")]
#[test]
#[cfg_attr(portable_atomic_test_outline_atomics_detect_false, ignore)]
Expand Down
102 changes: 102 additions & 0 deletions src/imp/atomic128/detect/riscv64_linux.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT

// Run-time CPU feature detection on riscv64 Linux/Android by using riscv_hwprobe.
//
// On RISC-V, detection using auxv only supports single-character extensions.
//
// Refs:
// - /~https://github.com/torvalds/linux/blob/v6.10/Documentation/arch/riscv/hwprobe.rst
// - /~https://github.com/golang/sys/commit/3283fc3f6160baf63bec24fbaa24e094e9ff6daf

include!("common.rs");

// core::ffi::c_* (except c_void) requires Rust 1.64, libc will soon require Rust 1.47
#[allow(non_camel_case_types, non_upper_case_globals)]
mod ffi {
pub(crate) use super::c_types::c_long;

extern "C" {
// https://man7.org/linux/man-pages/man2/syscall.2.html
// /~https://github.com/rust-lang/libc/blob/0.2.139/src/unix/linux_like/android/mod.rs#L3215
// /~https://github.com/rust-lang/libc/blob/0.2.139/src/unix/linux_like/linux/mod.rs#L4080
pub(crate) fn syscall(number: c_long, ...) -> c_long;
}

// /~https://github.com/torvalds/linux/blob/v6.10/arch/riscv/include/uapi/asm/hwprobe.h
#[derive(Copy, Clone)]
#[repr(C)]
pub(crate) struct riscv_hwprobe {
pub(crate) key: i64,
pub(crate) value: u64,
}

pub(crate) const __NR_riscv_hwprobe: c_long = 258;

// /~https://github.com/torvalds/linux/blob/v6.10/arch/riscv/include/uapi/asm/hwprobe.h
pub(crate) const RISCV_HWPROBE_KEY_IMA_EXT_0: i64 = 4;
pub(crate) const RISCV_HWPROBE_EXT_ZACAS: u64 = 1 << 34;
}

fn riscv_hwprobe(out: &mut ffi::riscv_hwprobe, flags: usize) -> bool {
// SAFETY: We've passed the valid pointer and length.
unsafe {
ffi::syscall(
ffi::__NR_riscv_hwprobe,
out as *mut ffi::riscv_hwprobe,
1_usize,
0_usize,
0_usize,
flags,
0_usize,
) == 0
}
}

#[cold]
fn _detect(info: &mut CpuInfo) {
let mut out = ffi::riscv_hwprobe { key: ffi::RISCV_HWPROBE_KEY_IMA_EXT_0, value: 0 };
if riscv_hwprobe(&mut out, 0) {
if out.key != -1 {
if out.value & ffi::RISCV_HWPROBE_EXT_ZACAS != 0 {
info.set(CpuInfo::HAS_ZACAS);
}
}
}
}

#[allow(
clippy::alloc_instead_of_core,
clippy::std_instead_of_alloc,
clippy::std_instead_of_core,
clippy::undocumented_unsafe_blocks,
clippy::wildcard_imports
)]
#[cfg(test)]
mod tests {
use super::*;

// Static assertions for FFI bindings.
// This checks that FFI bindings defined in this crate, FFI bindings defined
// in libc, and FFI bindings generated for the platform's latest header file
// using bindgen have compatible signatures (or the same values if constants).
// Since this is static assertion, we can detect problems with
// `cargo check --tests --target <target>` run in CI (via TESTS=1 build.sh)
// without actually running tests on these platforms.
// See also tools/codegen/src/ffi.rs.
// TODO(codegen): auto-generate this test
#[allow(
clippy::cast_possible_wrap,
clippy::cast_sign_loss,
clippy::no_effect_underscore_binding
)]
const _: fn() = || {
use test_helper::sys;
// TODO: syscall,riscv_hwprobe
// static_assert!(ffi::__NR_riscv_hwprobe == libc::__NR_riscv_hwprobe);
static_assert!(ffi::__NR_riscv_hwprobe == sys::__NR_riscv_hwprobe as ffi::c_long);
// static_assert!(ffi::RISCV_HWPROBE_KEY_IMA_EXT_0 == libc::RISCV_HWPROBE_KEY_IMA_EXT_0);
static_assert!(ffi::RISCV_HWPROBE_KEY_IMA_EXT_0 == sys::RISCV_HWPROBE_KEY_IMA_EXT_0 as i64);
// static_assert!(ffi::RISCV_HWPROBE_EXT_ZACAS == libc::RISCV_HWPROBE_EXT_ZACAS);
static_assert!(ffi::RISCV_HWPROBE_EXT_ZACAS == sys::RISCV_HWPROBE_EXT_ZACAS);
};
}
7 changes: 6 additions & 1 deletion src/imp/atomic128/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,12 @@ macro_rules! atomic128 {
};
}

#[cfg(any(target_arch = "powerpc64", target_arch = "s390x", target_arch = "x86_64"))]
#[cfg(any(
target_arch = "powerpc64",
target_arch = "riscv64",
target_arch = "s390x",
target_arch = "x86_64",
))]
#[allow(unused_macros)] // also used by intrinsics.rs
macro_rules! atomic_rmw_by_atomic_update {
() => {
Expand Down
Loading

0 comments on commit aef1268

Please sign in to comment.