From c9d72863c9701ed6b91088631fd6c83c3635d8dd Mon Sep 17 00:00:00 2001 From: Taiki Endo Date: Mon, 11 Nov 2024 21:44:14 +0900 Subject: [PATCH] Update to stabilized s390x asm --- .github/workflows/ci.yml | 8 +- README.md | 71 +++--- build.rs | 37 +-- src/arch/mod.rs | 5 +- src/arch/s390x_no_reg_addr.rs | 440 ---------------------------------- src/arch_legacy/mod.rs | 4 +- src/lib.rs | 71 +++--- 7 files changed, 100 insertions(+), 536 deletions(-) delete mode 100644 src/arch/s390x_no_reg_addr.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 25181d25..3d154226 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -226,11 +226,9 @@ jobs: # - rust: nightly # target: riscv64gc-unknown-linux-gnu # flags: -Z codegen-backend=cranelift - - rust: nightly-2023-08-24 # Rust 1.74, LLVM 17 (oldest version we can use asm_experimental_arch on this target) - target: s390x-unknown-linux-gnu - - rust: nightly-2024-01-04 # Rust 1.77, LLVM 17 (last version that does not support reg_addr register class) - target: s390x-unknown-linux-gnu - - rust: nightly-2024-01-05 # Rust 1.77, LLVM 17 (oldest version that supports reg_addr register class) + # - rust: '1.84' # LLVM 19 (oldest stable version we can use asm on this target) + # target: s390x-unknown-linux-gnu + - rust: nightly-2024-01-05 # Rust 1.77, LLVM 17 (oldest version we can use asm_experimental_arch on this target) target: s390x-unknown-linux-gnu - rust: nightly target: s390x-unknown-linux-gnu diff --git a/README.md b/README.md index f5cecccb..452d2bde 100644 --- a/README.md +++ b/README.md @@ -18,41 +18,42 @@ This crate provides a way to soundly perform such operations. ## Platform Support -Currently, x86, x86_64, Arm, AArch64, RISC-V, LoongArch64, MIPS32, MIPS64, PowerPC, s390x, MSP430, Arm64EC, AVR, SPARC, Hexagon, M68k, and Xtensa are supported. - -| target_arch | primitives | load/store | swap/CAS | -| -------------------------------- | --------------------------------------------------- |:----------:|:--------:| -| x86 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | -| x86_64 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | -| x86_64 (+cmpxchg16b) \[5] | i128,u128 | ✓ | ✓ | -| arm (v6+ or Linux/Android) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | -| arm (except for M-profile) \[2] | i64,u64 | ✓ | ✓ | -| aarch64 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 | ✓ | ✓ | -| riscv32 | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | -| riscv64 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓\[1] | -| loongarch64 \[3] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | -| mips / mips32r6 \[4] | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | -| mips64 / mips64r6 \[4] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | -| powerpc \[4] | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | -| powerpc64 \[4] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | -| powerpc64 (pwr8+) \[4] \[6] | i128,u128 | ✓ | ✓ | -| s390x \[4] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 | ✓ | ✓ | -| arm64ec \[4] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 | ✓ | ✓ | -| msp430 \[4] (experimental) | isize,usize,i8,u8,i16,u16 | ✓ | ✓ | -| avr \[4] (experimental) | isize,usize,i8,u8,i16,u16 | ✓ | ✓ | -| sparc \[4] \[7] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | -| sparc64 \[4] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | -| hexagon \[4] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | -| m68k \[4] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | -| xtensa \[4] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | - -\[1] Arm's atomic RMW operations are not available on v6-m (thumbv6m). RISC-V's atomic RMW operations are not available on targets without the A (or G which means IMAFD) extension such as riscv32i, riscv32imc, etc. M68k's atomic RMW operations requires M68020+ (Linux is M68020+ by default). Xtensa's atomic RMW operations are not available on esp32s2.
-\[2] Armv6+ or Linux/Android, except for M-profile architecture such as thumbv6m, thumbv7m, etc.
-\[3] Requires Rust 1.72+.
-\[4] Requires nightly due to `#![feature(asm_experimental_arch)]`.
-\[5] Requires cmpxchg16b target feature (enabled by default on Apple and Windows (except Windows 7) targets).
-\[6] Requires target-cpu pwr8+ (powerpc64le is pwr8 by default).
-\[7] Requires CAS instruction support.
+Currently, x86, x86_64, Arm, AArch64, RISC-V, LoongArch64, s390x, Arm64EC, MIPS, PowerPC, MSP430, AVR, SPARC, Hexagon, M68k, and Xtensa are supported. + +| target_arch | primitives | load/store | swap/CAS | +| ------------------------------- | --------------------------------------------------- |:----------:|:--------:| +| x86 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | +| x86_64 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | +| x86_64 (+cmpxchg16b) \[2] | i128,u128 | ✓ | ✓ | +| arm (v6+ or Linux/Android) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | +| arm (except for M-profile) \[3] | i64,u64 | ✓ | ✓ | +| aarch64 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 | ✓ | ✓ | +| riscv32 | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | +| riscv64 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓\[1] | +| loongarch64 \[6] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | +| s390x \[7] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 | ✓ | ✓ | +| arm64ec \[8] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 | ✓ | ✓ | +| mips / mips32r6 \[8] | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | +| mips64 / mips64r6 \[8] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | +| powerpc \[8] | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | +| powerpc64 \[8] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | +| powerpc64 (pwr8+) \[4] \[8] | i128,u128 | ✓ | ✓ | +| msp430 \[8] (experimental) | isize,usize,i8,u8,i16,u16 | ✓ | ✓ | +| avr \[8] (experimental) | isize,usize,i8,u8,i16,u16 | ✓ | ✓ | +| sparc \[5] \[8] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | +| sparc64 \[8] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | +| hexagon \[8] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | +| m68k \[8] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | +| xtensa \[8] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | + +\[1] Arm's atomic RMW operations are not available on v6-m (thumbv6m). RISC-V's atomic RMW operations are not available on targets without the A (or G which means IMAFD) extension such as riscv32i, riscv32imc, etc. M68k's atomic RMW operations requires target-cpu M68020+ (Linux is M68020 by default). Xtensa's atomic RMW operations are not available on esp32s2.
+\[2] Requires `cmpxchg16b` target feature (enabled by default on Apple and Windows (except Windows 7) targets).
+\[3] Armv6+ or Linux/Android, except for M-profile architecture such as thumbv6m, thumbv7m, etc.
+\[4] Requires `quadword-atomics` target feature (enabled by default on powerpc64le).
+\[5] Requires `v9` or `leoncasa` target feature (enabled by default on Linux).
+\[6] Requires Rust 1.72+.
+\[7] Requires Rust 1.84+.
+\[8] Requires nightly due to `#![feature(asm_experimental_arch)]`.
Feel free to submit an issue if your target is not supported yet. diff --git a/build.rs b/build.rs index 82785b1b..0f4bdd18 100644 --- a/build.rs +++ b/build.rs @@ -36,7 +36,7 @@ fn main() { // Custom cfgs set by build script. Not public API. // grep -F 'cargo:rustc-cfg=' build.rs | grep -Ev '^ *//' | sed -E 's/^.*cargo:rustc-cfg=//; s/(=\\)?".*$//' | LC_ALL=C sort -u | tr '\n' ',' | sed -E 's/,$/\n/' println!( - "cargo:rustc-check-cfg=cfg(atomic_maybe_uninit_no_asm_maybe_uninit,atomic_maybe_uninit_no_const_fn_trait_bound,atomic_maybe_uninit_no_const_mut_refs,atomic_maybe_uninit_no_loongarch64_asm,atomic_maybe_uninit_s390x_no_reg_addr,atomic_maybe_uninit_target_feature,atomic_maybe_uninit_unstable_asm_experimental_arch,portable_atomic_pre_llvm_20)" + "cargo:rustc-check-cfg=cfg(atomic_maybe_uninit_no_asm,atomic_maybe_uninit_no_asm_maybe_uninit,atomic_maybe_uninit_no_const_fn_trait_bound,atomic_maybe_uninit_no_const_mut_refs,atomic_maybe_uninit_target_feature,atomic_maybe_uninit_unstable_asm_experimental_arch,portable_atomic_pre_llvm_20)" ); // TODO: handle multi-line target_feature_fallback // grep -F 'target_feature_fallback("' build.rs | grep -Ev '^ *//' | sed -E 's/^.*target_feature_fallback\(//; s/",.*$/"/' | LC_ALL=C sort -u | tr '\n' ',' | sed -E 's/,$/\n/' @@ -84,8 +84,26 @@ fn main() { match target_arch { "loongarch64" => { + // asm! on LoongArch64 stabilized in Rust 1.72 if version.minor < 72 { - println!("cargo:rustc-cfg=atomic_maybe_uninit_no_loongarch64_asm"); + println!("cargo:rustc-cfg=atomic_maybe_uninit_no_asm"); + } + } + "s390x" => { + // asm! on s390x stabilized in Rust 1.84 (nightly-2024-11-11): /~https://github.com/rust-lang/rust/pull/131258 + if !version.probe(84, 2024, 11, 10) { + if version.nightly + && version.probe(77, 2024, 1, 4) + && is_allowed_feature("asm_experimental_arch") + { + // /~https://github.com/rust-lang/rust/pull/119431 merged in Rust 1.77 (nightly-2024-01-05). + // The part of this feature we use has not been changed since nightly-2024-01-05 + // until it was stabilized in nightly-2024-11-11, so it can be safely enabled in + // nightly, which is older than nightly-2024-11-11. + println!("cargo:rustc-cfg=atomic_maybe_uninit_unstable_asm_experimental_arch"); + } else { + println!("cargo:rustc-cfg=atomic_maybe_uninit_no_asm"); + } } } "arm64ec" | "avr" | "hexagon" | "m68k" | "mips" | "mips32r6" | "mips64" | "mips64r6" @@ -94,21 +112,8 @@ fn main() { println!("cargo:rustc-cfg=atomic_maybe_uninit_unstable_asm_experimental_arch"); } } - // /~https://github.com/rust-lang/rust/pull/111331 merged in Rust 1.71 (nightly-2023-05-09). - "s390x" => { - if version.nightly - && version.probe(71, 2023, 5, 8) - && is_allowed_feature("asm_experimental_arch") - { - // /~https://github.com/rust-lang/rust/pull/119431 merged in Rust 1.77 (nightly-2024-01-05). - if !version.probe(77, 2024, 1, 4) { - println!("cargo:rustc-cfg=atomic_maybe_uninit_s390x_no_reg_addr"); - } - println!("cargo:rustc-cfg=atomic_maybe_uninit_unstable_asm_experimental_arch"); - } - } - // /~https://github.com/rust-lang/rust/pull/132472 merged in Rust 1.84 (nightly-2024-11-08). "sparc" | "sparc64" => { + // /~https://github.com/rust-lang/rust/pull/132472 merged in Rust 1.84 (nightly-2024-11-08). if version.nightly && version.probe(84, 2024, 11, 7) && is_allowed_feature("asm_experimental_arch") diff --git a/src/arch/mod.rs b/src/arch/mod.rs index 1d31ff84..2c64843d 100644 --- a/src/arch/mod.rs +++ b/src/arch/mod.rs @@ -25,6 +25,7 @@ target_arch = "riscv32", target_arch = "riscv64", target_arch = "loongarch64", + all(target_arch = "s390x", not(atomic_maybe_uninit_no_asm)), all( any( target_arch = "arm64ec", @@ -38,7 +39,6 @@ target_arch = "msp430", target_arch = "powerpc", target_arch = "powerpc64", - target_arch = "s390x", all( target_arch = "sparc", any( @@ -115,8 +115,7 @@ mod powerpc; #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] mod riscv; #[cfg(target_arch = "s390x")] -#[cfg(atomic_maybe_uninit_unstable_asm_experimental_arch)] -#[cfg_attr(atomic_maybe_uninit_s390x_no_reg_addr, path = "s390x_no_reg_addr.rs")] +#[cfg(not(atomic_maybe_uninit_no_asm))] mod s390x; #[cfg(any( all( diff --git a/src/arch/s390x_no_reg_addr.rs b/src/arch/s390x_no_reg_addr.rs deleted file mode 100644 index 1c5a9bd4..00000000 --- a/src/arch/s390x_no_reg_addr.rs +++ /dev/null @@ -1,440 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 OR MIT - -// This module contains the atomic implementation for older rustc that does not support reg_addr register class. -// -// The implementation is based on the code just before we started using reg_addr register class. - -/* -s390x - -Refs: -- z/Architecture Principles of Operation https://publibfp.dhe.ibm.com/epubs/pdf/a227832d.pdf -- z/Architecture Reference Summary https://www.ibm.com/support/pages/zarchitecture-reference-summary -- portable-atomic /~https://github.com/taiki-e/portable-atomic - -Generated asm: -- s390x https://godbolt.org/z/5hcfK57fo -- s390x (z196) https://godbolt.org/z/GW98K9hnW -*/ - -#[path = "cfgs/s390x.rs"] -mod cfgs; - -use core::{ - arch::asm, - mem::{self, MaybeUninit}, - sync::atomic::Ordering, -}; - -use crate::{ - raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap}, - utils::{MaybeUninit128, Pair}, -}; - -// Extracts and checks condition code. -#[inline(always)] -fn extract_cc(r: i64) -> bool { - r.wrapping_add(-268435456) & (1 << 31) != 0 -} - -#[inline(always)] -fn complement(v: u32) -> u32 { - (v ^ !0).wrapping_add(1) -} - -macro_rules! atomic_load_store { - ($int_type:ident, $l_suffix:tt, $asm_suffix:tt) => { - impl AtomicLoad for $int_type { - #[inline] - unsafe fn atomic_load( - src: *const MaybeUninit, - _order: Ordering, - ) -> MaybeUninit { - debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); - let out: MaybeUninit; - - // SAFETY: the caller must uphold the safety contract. - unsafe { - // atomic load is always SeqCst. - asm!( - // (atomic) load from src to out - concat!("l", $l_suffix, " {out}, 0({src})"), - src = in(reg) ptr_reg!(src), - out = lateout(reg) out, - out("r0") _, - options(nostack, preserves_flags), - ); - } - out - } - } - impl AtomicStore for $int_type { - #[inline] - unsafe fn atomic_store( - dst: *mut MaybeUninit, - val: MaybeUninit, - order: Ordering, - ) { - debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - - // SAFETY: the caller must uphold the safety contract. - unsafe { - macro_rules! atomic_store { - ($fence:tt) => { - asm!( - // (atomic) store val to dst - concat!("st", $asm_suffix, " {val}, 0({dst})"), - $fence, - dst = in(reg) ptr_reg!(dst), - val = in(reg) val, - out("r0") _, - options(nostack, preserves_flags), - ) - }; - } - match order { - // Relaxed and Release stores are equivalent. - Ordering::Relaxed | Ordering::Release => atomic_store!(""), - // bcr 14,0 (fast-BCR-serialization) requires z196 or later. - #[cfg(any( - target_feature = "fast-serialization", - atomic_maybe_uninit_target_feature = "fast-serialization", - ))] - Ordering::SeqCst => atomic_store!("bcr 14, 0"), - #[cfg(not(any( - target_feature = "fast-serialization", - atomic_maybe_uninit_target_feature = "fast-serialization", - )))] - Ordering::SeqCst => atomic_store!("bcr 15, 0"), - _ => unreachable!(), - } - } - } - } - }; -} - -macro_rules! atomic { - ($int_type:ident, $asm_suffix:tt) => { - atomic_load_store!($int_type, $asm_suffix, $asm_suffix); - impl AtomicSwap for $int_type { - #[inline] - unsafe fn atomic_swap( - dst: *mut MaybeUninit, - val: MaybeUninit, - _order: Ordering, - ) -> MaybeUninit { - debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - let mut out: MaybeUninit; - - // SAFETY: the caller must uphold the safety contract. - unsafe { - // atomic swap is always SeqCst. - asm!( - // (atomic) swap (CAS loop) - concat!("l", $asm_suffix, " %r0, 0({dst})"), - "2:", - concat!("cs", $asm_suffix, " %r0, {val}, 0({dst})"), - "jl 2b", - dst = in(reg) ptr_reg!(dst), - val = in(reg) val, - out("r0") out, - // Do not use `preserves_flags` because CS modifies the condition code. - options(nostack), - ); - } - out - } - } - impl AtomicCompareExchange for $int_type { - #[inline] - unsafe fn atomic_compare_exchange( - dst: *mut MaybeUninit, - old: MaybeUninit, - new: MaybeUninit, - _success: Ordering, - _failure: Ordering, - ) -> (MaybeUninit, bool) { - debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - let out: MaybeUninit; - - // SAFETY: the caller must uphold the safety contract. - unsafe { - let r; - // compare_exchange is always SeqCst. - asm!( - // (atomic) CAS - concat!("cs", $asm_suffix, " %r0, {new}, 0({dst})"), - // store condition code - "ipm {r}", - dst = in(reg) ptr_reg!(dst), - new = in(reg) new, - r = lateout(reg) r, - inout("r0") old => out, - // Do not use `preserves_flags` because CS modifies the condition code. - options(nostack), - ); - (out, extract_cc(r)) - } - } - } - }; -} - -macro_rules! atomic_sub_word { - ($int_type:ident, $l_suffix:tt, $asm_suffix:tt, $bits:tt, $risbg_swap:tt, $risbg_cas:tt) => { - atomic_load_store!($int_type, $l_suffix, $asm_suffix); - impl AtomicSwap for $int_type { - #[inline] - unsafe fn atomic_swap( - dst: *mut MaybeUninit, - val: MaybeUninit, - _order: Ordering, - ) -> MaybeUninit { - debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - let (dst, shift, _mask) = crate::utils::create_sub_word_mask_values(dst); - let mut out: MaybeUninit; - - // SAFETY: the caller must uphold the safety contract. - unsafe { - // Implement sub-word atomic operations using word-sized CAS loop. - // Based on assemblies generated by rustc/LLVM. - // See also create_sub_word_mask_values. - asm!( - "l %r0, 0({dst})", - "2:", - "rll {tmp}, %r0, 0({shift})", - concat!("risbg {tmp}, {val}, 32, ", $risbg_swap), - "rll {tmp}, {tmp}, 0({shift_c})", - "cs %r0, {tmp}, 0({dst})", - "jl 2b", - concat!("rll {out}, %r0, ", $bits ,"({shift})"), - dst = in(reg) ptr_reg!(dst), - val = in(reg) val, - out = lateout(reg) out, - shift = in(reg) shift, - shift_c = in(reg) complement(shift), - tmp = out(reg) _, - out("r0") _, // prev - // Do not use `preserves_flags` because CS modifies the condition code. - options(nostack), - ); - } - out - } - } - impl AtomicCompareExchange for $int_type { - #[inline] - unsafe fn atomic_compare_exchange( - dst: *mut MaybeUninit, - old: MaybeUninit, - new: MaybeUninit, - _success: Ordering, - _failure: Ordering, - ) -> (MaybeUninit, bool) { - debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - let (dst, shift, _mask) = crate::utils::create_sub_word_mask_values(dst); - let mut out: MaybeUninit; - - // SAFETY: the caller must uphold the safety contract. - unsafe { - let r; - // Implement sub-word atomic operations using word-sized CAS loop. - // Based on assemblies generated by rustc/LLVM. - // See also create_sub_word_mask_values. - asm!( - "l {prev}, 0({dst})", - "2:", - concat!("rll %r0, {prev}, ", $bits ,"({shift})"), - concat!("risbg {new}, %r0, 32, ", $risbg_cas, ", 0"), - concat!("ll", $asm_suffix, "r %r0, %r0"), - "cr %r0, {old}", - "jlh 3f", - concat!("rll {tmp}, {new}, -", $bits ,"({shift_c})"), - "cs {prev}, {tmp}, 0({dst})", - "jl 2b", - "3:", - // store condition code - "ipm {r}", - dst = in(reg) ptr_reg!(dst), - prev = out(reg) _, - old = in(reg) crate::utils::ZeroExtend::zero_extend(old), - new = inout(reg) new => _, - shift = in(reg) shift, - shift_c = in(reg) complement(shift), - tmp = out(reg) _, - r = lateout(reg) r, - out("r0") out, - // Do not use `preserves_flags` because CS modifies the condition code. - options(nostack), - ); - (out, extract_cc(r)) - } - } - } - }; -} - -atomic_sub_word!(i8, "b", "c", "8", "39, 24", "55"); -atomic_sub_word!(u8, "b", "c", "8", "39, 24", "55"); -atomic_sub_word!(i16, "h", "h", "16", "47, 16", "47"); -atomic_sub_word!(u16, "h", "h", "16", "47, 16", "47"); -atomic!(i32, ""); -atomic!(u32, ""); -atomic!(i64, "g"); -atomic!(u64, "g"); -atomic!(isize, "g"); -atomic!(usize, "g"); - -// s390x has 128-bit atomic load/store/CAS instructions and other operations are emulated by CAS loop. -// See /~https://github.com/taiki-e/portable-atomic/blob/HEAD/src/imp/atomic128/README.md for details. -macro_rules! atomic128 { - ($int_type:ident) => { - impl AtomicLoad for $int_type { - #[inline] - unsafe fn atomic_load( - src: *const MaybeUninit, - _order: Ordering, - ) -> MaybeUninit { - debug_assert!(src as usize % mem::size_of::<$int_type>() == 0); - let (prev_hi, prev_lo); - - // SAFETY: the caller must uphold the safety contract. - unsafe { - // atomic load is always SeqCst. - asm!( - // (atomic) load from src to out pair - "lpq %r0, 0({src})", - src = in(reg) ptr_reg!(src), - // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. - out("r0") prev_hi, - out("r1") prev_lo, - options(nostack, preserves_flags), - ); - MaybeUninit128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.$int_type - } - } - } - impl AtomicStore for $int_type { - #[inline] - unsafe fn atomic_store( - dst: *mut MaybeUninit, - val: MaybeUninit, - order: Ordering, - ) { - debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - let val = MaybeUninit128 { $int_type: val }; - - // SAFETY: the caller must uphold the safety contract. - unsafe { - macro_rules! atomic_store { - ($fence:tt) => { - asm!( - // (atomic) store val pair to dst - "stpq %r0, 0({dst})", - $fence, - dst = in(reg) ptr_reg!(dst), - // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. - in("r0") val.pair.hi, - in("r1") val.pair.lo, - options(nostack, preserves_flags), - ) - }; - } - match order { - // Relaxed and Release stores are equivalent. - Ordering::Relaxed | Ordering::Release => atomic_store!(""), - // bcr 14,0 (fast-BCR-serialization) requires z196 or later. - #[cfg(any( - target_feature = "fast-serialization", - atomic_maybe_uninit_target_feature = "fast-serialization", - ))] - Ordering::SeqCst => atomic_store!("bcr 14, 0"), - #[cfg(not(any( - target_feature = "fast-serialization", - atomic_maybe_uninit_target_feature = "fast-serialization", - )))] - Ordering::SeqCst => atomic_store!("bcr 15, 0"), - _ => unreachable!(), - } - } - } - } - impl AtomicSwap for $int_type { - #[inline] - unsafe fn atomic_swap( - dst: *mut MaybeUninit, - val: MaybeUninit, - _order: Ordering, - ) -> MaybeUninit { - debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - let val = MaybeUninit128 { $int_type: val }; - let (mut prev_hi, mut prev_lo); - - // SAFETY: the caller must uphold the safety contract. - unsafe { - // atomic swap is always SeqCst. - asm!( - // (atomic) swap (CAS loop) - "lpq %r0, 0({dst})", - "2:", - "cdsg %r0, %r12, 0({dst})", - "jl 2b", - dst = inout(reg) ptr_reg!(dst) => _, - // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. - out("r0") prev_hi, - out("r1") prev_lo, - in("r12") val.pair.hi, - in("r13") val.pair.lo, - // Do not use `preserves_flags` because CDSG modifies the condition code. - options(nostack), - ); - MaybeUninit128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.$int_type - } - } - } - impl AtomicCompareExchange for $int_type { - #[inline] - unsafe fn atomic_compare_exchange( - dst: *mut MaybeUninit, - old: MaybeUninit, - new: MaybeUninit, - _success: Ordering, - _failure: Ordering, - ) -> (MaybeUninit, bool) { - debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); - let old = MaybeUninit128 { $int_type: old }; - let new = MaybeUninit128 { $int_type: new }; - let (prev_hi, prev_lo); - - // SAFETY: the caller must uphold the safety contract. - unsafe { - let r; - // compare_exchange is always SeqCst. - asm!( - // (atomic) CAS - "cdsg %r0, %r12, 0({dst})", - // store condition code - "ipm {r}", - dst = in(reg) ptr_reg!(dst), - r = lateout(reg) r, - // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. - inout("r0") old.pair.hi => prev_hi, - inout("r1") old.pair.lo => prev_lo, - in("r12") new.pair.hi, - in("r13") new.pair.lo, - // Do not use `preserves_flags` because CDSG modifies the condition code. - options(nostack), - ); - ( - MaybeUninit128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.$int_type, - extract_cc(r) - ) - } - } - } - }; -} - -atomic128!(i128); -atomic128!(u128); diff --git a/src/arch_legacy/mod.rs b/src/arch_legacy/mod.rs index aea5b174..7713b0e5 100644 --- a/src/arch_legacy/mod.rs +++ b/src/arch_legacy/mod.rs @@ -19,7 +19,7 @@ target_arch = "aarch64", target_arch = "riscv32", target_arch = "riscv64", - all(target_arch = "loongarch64", not(atomic_maybe_uninit_no_loongarch64_asm)), + all(target_arch = "loongarch64", not(atomic_maybe_uninit_no_asm)), )))] #[path = "../arch/cfgs/unsupported.rs"] mod unsupported; @@ -52,7 +52,7 @@ mod arm_linux; ))] mod armv8; #[cfg(target_arch = "loongarch64")] -#[cfg(not(atomic_maybe_uninit_no_loongarch64_asm))] +#[cfg(not(atomic_maybe_uninit_no_asm))] mod loongarch; #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] mod riscv; diff --git a/src/lib.rs b/src/lib.rs index d404c36f..8443a276 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,41 +12,42 @@ This crate provides a way to soundly perform such operations. ## Platform Support -Currently, x86, x86_64, Arm, AArch64, RISC-V, LoongArch64, MIPS32, MIPS64, PowerPC, s390x, MSP430, Arm64EC, AVR, SPARC, Hexagon, M68k, and Xtensa are supported. - -| target_arch | primitives | load/store | swap/CAS | -| -------------------------------- | --------------------------------------------------- |:----------:|:--------:| -| x86 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | -| x86_64 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | -| x86_64 (+cmpxchg16b) \[5] | i128,u128 | ✓ | ✓ | -| arm (v6+ or Linux/Android) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | -| arm (except for M-profile) \[2] | i64,u64 | ✓ | ✓ | -| aarch64 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 | ✓ | ✓ | -| riscv32 | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | -| riscv64 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓\[1] | -| loongarch64 \[3] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | -| mips / mips32r6 \[4] | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | -| mips64 / mips64r6 \[4] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | -| powerpc \[4] | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | -| powerpc64 \[4] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | -| powerpc64 (pwr8+) \[4] \[6] | i128,u128 | ✓ | ✓ | -| s390x \[4] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 | ✓ | ✓ | -| arm64ec \[4] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 | ✓ | ✓ | -| msp430 \[4] (experimental) | isize,usize,i8,u8,i16,u16 | ✓ | ✓ | -| avr \[4] (experimental) | isize,usize,i8,u8,i16,u16 | ✓ | ✓ | -| sparc \[4] \[7] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | -| sparc64 \[4] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | -| hexagon \[4] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | -| m68k \[4] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | -| xtensa \[4] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | - -\[1] Arm's atomic RMW operations are not available on v6-m (thumbv6m). RISC-V's atomic RMW operations are not available on targets without the A (or G which means IMAFD) extension such as riscv32i, riscv32imc, etc. M68k's atomic RMW operations requires M68020+ (Linux is M68020+ by default). Xtensa's atomic RMW operations are not available on esp32s2.
-\[2] Armv6+ or Linux/Android, except for M-profile architecture such as thumbv6m, thumbv7m, etc.
-\[3] Requires Rust 1.72+.
-\[4] Requires nightly due to `#![feature(asm_experimental_arch)]`.
-\[5] Requires cmpxchg16b target feature (enabled by default on Apple and Windows (except Windows 7) targets).
-\[6] Requires target-cpu pwr8+ (powerpc64le is pwr8 by default).
-\[7] Requires CAS instruction support.
+Currently, x86, x86_64, Arm, AArch64, RISC-V, LoongArch64, s390x, Arm64EC, MIPS, PowerPC, MSP430, AVR, SPARC, Hexagon, M68k, and Xtensa are supported. + +| target_arch | primitives | load/store | swap/CAS | +| ------------------------------- | --------------------------------------------------- |:----------:|:--------:| +| x86 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | +| x86_64 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | +| x86_64 (+cmpxchg16b) \[2] | i128,u128 | ✓ | ✓ | +| arm (v6+ or Linux/Android) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | +| arm (except for M-profile) \[3] | i64,u64 | ✓ | ✓ | +| aarch64 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 | ✓ | ✓ | +| riscv32 | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | +| riscv64 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓\[1] | +| loongarch64 \[6] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | +| s390x \[7] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 | ✓ | ✓ | +| arm64ec \[8] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 | ✓ | ✓ | +| mips / mips32r6 \[8] | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | +| mips64 / mips64r6 \[8] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | +| powerpc \[8] | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | +| powerpc64 \[8] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | +| powerpc64 (pwr8+) \[4] \[8] | i128,u128 | ✓ | ✓ | +| msp430 \[8] (experimental) | isize,usize,i8,u8,i16,u16 | ✓ | ✓ | +| avr \[8] (experimental) | isize,usize,i8,u8,i16,u16 | ✓ | ✓ | +| sparc \[5] \[8] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | +| sparc64 \[8] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | +| hexagon \[8] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | +| m68k \[8] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | +| xtensa \[8] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | + +\[1] Arm's atomic RMW operations are not available on v6-m (thumbv6m). RISC-V's atomic RMW operations are not available on targets without the A (or G which means IMAFD) extension such as riscv32i, riscv32imc, etc. M68k's atomic RMW operations requires target-cpu M68020+ (Linux is M68020 by default). Xtensa's atomic RMW operations are not available on esp32s2.
+\[2] Requires `cmpxchg16b` target feature (enabled by default on Apple and Windows (except Windows 7) targets).
+\[3] Armv6+ or Linux/Android, except for M-profile architecture such as thumbv6m, thumbv7m, etc.
+\[4] Requires `quadword-atomics` target feature (enabled by default on powerpc64le).
+\[5] Requires `v9` or `leoncasa` target feature (enabled by default on Linux).
+\[6] Requires Rust 1.72+.
+\[7] Requires Rust 1.84+.
+\[8] Requires nightly due to `#![feature(asm_experimental_arch)]`.
Feel free to submit an issue if your target is not supported yet.