From bdf99212eb66b63642c4a66a1adb4125efcd8f90 Mon Sep 17 00:00:00 2001 From: Taiki Endo Date: Tue, 3 Sep 2024 00:52:09 +0900 Subject: [PATCH] aarch64: Enable run-time detection of FEAT_LRCPC3/FEAT_LSE128 in load/store --- .github/.cspell/project-dictionary.txt | 2 + src/imp/atomic128/README.md | 2 +- src/imp/atomic128/aarch64.rs | 664 ++++++++++++++++---- src/imp/atomic128/detect/aarch64_aa64reg.rs | 66 +- src/imp/atomic128/detect/aarch64_macos.rs | 20 +- src/imp/atomic128/detect/auxv.rs | 9 +- src/imp/atomic128/detect/common.rs | 15 +- 7 files changed, 571 insertions(+), 207 deletions(-) diff --git a/.github/.cspell/project-dictionary.txt b/.github/.cspell/project-dictionary.txt index 250304193..d1afb58eb 100644 --- a/.github/.cspell/project-dictionary.txt +++ b/.github/.cspell/project-dictionary.txt @@ -67,6 +67,8 @@ ldiapp ldsetp ldxp lghi +libatomic +libatomic's libcalls libelf libfdio diff --git a/src/imp/atomic128/README.md b/src/imp/atomic128/README.md index 76364c311..c421af5c7 100644 --- a/src/imp/atomic128/README.md +++ b/src/imp/atomic128/README.md @@ -7,7 +7,7 @@ Here is the table of targets that support 128-bit atomics and the instructions u | target_arch | load | store | CAS | RMW | note | | ----------- | ---- | ----- | --- | --- | ---- | | x86_64 | cmpxchg16b or vmovdqa | cmpxchg16b or vmovdqa | cmpxchg16b | cmpxchg16b | cmpxchg16b target feature required. vmovdqa requires Intel, AMD, or Zhaoxin CPU with AVX.
Both compile-time and run-time detection are supported for cmpxchg16b. vmovdqa is currently run-time detection only.
Requires rustc 1.59+ | -| aarch64 | ldxp/stxp or casp or ldp/ldiapp | ldxp/stxp or casp or stp/stilp/swpp | ldxp/stxp or casp | ldxp/stxp or casp/swpp/ldclrp/ldsetp | casp requires lse target feature, ldp/stp requires lse2 target feature, ldiapp/stilp requires lse2 and rcpc3 target features, swpp/ldclrp/ldsetp requires lse128 target feature.
Both compile-time and run-time detection are supported for lse and lse2. Others are currently compile-time detection only.
Requires rustc 1.59+ | +| aarch64 | ldxp/stxp or casp or ldp/ldiapp | ldxp/stxp or casp or stp/stilp/swpp | ldxp/stxp or casp | ldxp/stxp or casp/swpp/ldclrp/ldsetp | casp requires lse target feature, ldp/stp requires lse2 target feature, ldiapp/stilp requires lse2 and rcpc3 target features, swpp/ldclrp/ldsetp requires lse128 target feature.
Both compile-time and run-time detection are supported.
Requires rustc 1.59+ | | powerpc64 | lq | stq | lqarx/stqcx. | lqarx/stqcx. | Requires target-cpu pwr8+ (powerpc64le is pwr8 by default). Both compile-time and run-time detection are supported (run-time detection is currently disabled by default).
Requires nightly | | s390x | lpq | stpq | cdsg | cdsg | Requires nightly | diff --git a/src/imp/atomic128/aarch64.rs b/src/imp/atomic128/aarch64.rs index d7f7f145e..1b00d4282 100644 --- a/src/imp/atomic128/aarch64.rs +++ b/src/imp/atomic128/aarch64.rs @@ -10,17 +10,29 @@ // - LDIAPP/STILP (DW acquire-load/release-store) added as FEAT_LRCPC3 (optional from armv8.9-a/armv9.4-a) (if FEAT_LSE2 is also available) // - LDCLRP/LDSETP/SWPP (DW RMW) added as FEAT_LSE128 (optional from armv9.4-a) // -// If outline-atomics is not enabled and FEAT_LSE is not available at -// compile-time, we use LDXP/STXP loop. -// If outline-atomics is enabled and FEAT_LSE is not available at -// compile-time, we use CASP for CAS if FEAT_LSE is available -// at run-time, otherwise, use LDXP/STXP loop. -// If FEAT_LSE is available at compile-time, we use CASP for load/store/CAS/RMW. -// However, when portable_atomic_ll_sc_rmw cfg is set, use LDXP/STXP loop instead of CASP -// loop for RMW (by default, it is set on Apple hardware; see build script for details). -// If FEAT_LSE2 is available at compile-time, we use LDP/STP for load/store. -// If FEAT_LSE128 is available at compile-time, we use LDCLRP/LDSETP/SWPP for fetch_and/fetch_or/swap/{release,seqcst}-store. -// If FEAT_LSE2 and FEAT_LRCPC3 are available at compile-time, we use LDIAPP/STILP for acquire-load/release-store. +// This module supports all of these instructions and attempts to select the best +// one based on compile-time and run-time information about available CPU features +// and platforms. For example: +// +// - If outline-atomics is not enabled and FEAT_LSE is not available at +// compile-time, we use LDXP/STXP loop. +// - If outline-atomics is enabled and FEAT_LSE is not available at +// compile-time, we use CASP for CAS if FEAT_LSE is available +// at run-time, otherwise, use LDXP/STXP loop. +// - If FEAT_LSE is available at compile-time, we use CASP for load/store/CAS/RMW. +// However, when portable_atomic_ll_sc_rmw cfg is set, use LDXP/STXP loop instead of CASP +// loop for RMW (by default, it is set on Apple hardware where CASP is slow; +// see build script for details). +// - If outline-atomics is enabled and FEAT_LSE2 is not available at compile-time, +// we use LDP/STP (and also LDIAPP/STILP/SWPP if FEAT_LRCPC3/FEAT_LSE128 is +// available) for load/store if FEAT_LSE2 is available at run-time, otherwise, +// use LDXP/STXP or CASP depending on whether FEAT_LSE is available. +// - If FEAT_LSE2 is available at compile-time, we use LDP/STP for load/store. +// - If FEAT_LSE128 is available at compile-time, we use LDCLRP/LDSETP/SWPP for fetch_and/fetch_or/swap/{release,seqcst}-store. +// - If FEAT_LSE2 and FEAT_LRCPC3 are available at compile-time, we use LDIAPP/STILP for acquire-load/release-store. +// +// See each "Instruction selection flow for ..." comment in this file for the exact +// instruction selection per operation. // // Note: FEAT_LSE2 doesn't imply FEAT_LSE. FEAT_LSE128 implies FEAT_LSE but not FEAT_LSE2. // @@ -71,8 +83,10 @@ include!("macros.rs"); #[cfg(any( test, not(all( - any(target_feature = "lse2", portable_atomic_target_feature = "lse2"), any(target_feature = "lse", portable_atomic_target_feature = "lse"), + any(target_feature = "lse2", portable_atomic_target_feature = "lse2"), + any(target_feature = "lse128", portable_atomic_target_feature = "lse128"), + any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3"), )), ))] #[cfg(any( @@ -93,8 +107,10 @@ mod detect; #[cfg(any( test, not(all( - any(target_feature = "lse2", portable_atomic_target_feature = "lse2"), any(target_feature = "lse", portable_atomic_target_feature = "lse"), + any(target_feature = "lse2", portable_atomic_target_feature = "lse2"), + any(target_feature = "lse128", portable_atomic_target_feature = "lse128"), + any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3"), )), ))] #[cfg(any(target_os = "netbsd", target_os = "openbsd"))] @@ -209,6 +225,82 @@ macro_rules! debug_assert_lse2 { } }; } +#[rustfmt::skip] +#[cfg(portable_atomic_llvm_16)] +#[cfg(any( + target_feature = "lse128", + portable_atomic_target_feature = "lse128", + not(portable_atomic_no_outline_atomics), +))] +macro_rules! debug_assert_lse128 { + () => { + #[cfg(all( + not(portable_atomic_no_outline_atomics), + any( + all( + target_os = "linux", + any( + target_env = "gnu", + all( + any(target_env = "musl", target_env = "ohos"), + not(target_feature = "crt-static"), + ), + portable_atomic_outline_atomics, + ), + ), + target_os = "android", + target_os = "freebsd", + target_os = "netbsd", + target_os = "openbsd", + // These don't support detection of FEAT_LSE128. + // target_os = "fuchsia", + // target_os = "windows", + ), + ))] + #[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))] + { + debug_assert!(detect::detect().has_lse128()); + } + }; +} +#[rustfmt::skip] +#[cfg(portable_atomic_llvm_16)] +#[cfg(any( + target_feature = "rcpc3", + portable_atomic_target_feature = "rcpc3", + not(portable_atomic_no_outline_atomics), +))] +macro_rules! debug_assert_rcpc3 { + () => { + #[cfg(all( + not(portable_atomic_no_outline_atomics), + any( + all( + target_os = "linux", + any( + target_env = "gnu", + all( + any(target_env = "musl", target_env = "ohos"), + not(target_feature = "crt-static"), + ), + portable_atomic_outline_atomics, + ), + ), + target_os = "android", + target_os = "freebsd", + target_os = "netbsd", + target_os = "openbsd", + // These don't support detection of FEAT_LRCPC3. + // target_os = "fuchsia", + // target_os = "windows", + ), + ))] + #[cfg(not(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3")))] + { + debug_assert!(detect::detect().has_rcpc3()); + } + }; +} // Refs: https://developer.arm.com/documentation/100067/0611/armclang-Integrated-Assembler/AArch32-Target-selection-directives?lang=en // @@ -252,7 +344,7 @@ macro_rules! start_lse { #[cfg(any( target_feature = "lse128", portable_atomic_target_feature = "lse128", - // not(portable_atomic_no_outline_atomics), + not(portable_atomic_no_outline_atomics), ))] macro_rules! start_lse128 { () => { @@ -263,7 +355,7 @@ macro_rules! start_lse128 { #[cfg(any( target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3", - // not(portable_atomic_no_outline_atomics), + not(portable_atomic_no_outline_atomics), ))] macro_rules! start_rcpc3 { () => { @@ -305,6 +397,35 @@ macro_rules! atomic_rmw { }; } +// ----------------------------------------------------------------------------- +// load + +/* + +Instruction selection flow for load: +- if compile_time(FEAT_LSE2) => ldp: + - if compile_time(FEAT_LRCPC3) && order != relaxed => ldiapp + - else => ldp +- if platform_supports_detection_of(FEAT_LSE2): + - if detect(FEAT_LSE2) && detect(FEAT_LRCPC3) && order != relaxed => lse2_rcpc3 (ldiapp) + - if detect(FEAT_LSE2) => lse2 (ldp) +- else => no_lse2: + - if compile_time(FEAT_LSE) => casp + - else => ldxp_stxp + +Note: +- If FEAT_LSE2 is available at compile-time, we don't do run-time detection of + FEAT_LRCPC3 at this time, since FEAT_LRCPC3 is not yet available for most CPUs. + (macOS that doesn't have any FEAT_LRCPC3-enabled CPUs as of M4 is only a platform + that currently enables FEAT_LSE2 at compile-time by default.) +- If FEAT_LSE2 is not available at compile-time, we want to do run-time detection + of FEAT_LSE2, so we do run-time detection of FEAT_LRCPC3 at the same time. +- We don't do run-time detection of FEAT_LSE for load at this time, but since + load by CAS is wait-free, it would probably make sense to do run-time detection. (TODO) + +*/ + +// if compile_time(FEAT_LSE2) => ldp: // cfg guarantee that the CPU supports FEAT_LSE2. #[cfg(any(target_feature = "lse2", portable_atomic_target_feature = "lse2"))] use _atomic_load_ldp as atomic_load; @@ -313,45 +434,21 @@ use _atomic_load_ldp as atomic_load; unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 { #[inline] unsafe fn atomic_load_no_lse2(src: *mut u128, order: Ordering) -> u128 { + // if compile_time(FEAT_LSE) => casp #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))] // SAFETY: the caller must uphold the safety contract. // cfg guarantee that the CPU supports FEAT_LSE. unsafe { _atomic_load_casp(src, order) } + // else => ldxp_stxp #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))] // SAFETY: the caller must uphold the safety contract. unsafe { _atomic_load_ldxp_stxp(src, order) } } - #[cfg(not(all( - not(portable_atomic_no_outline_atomics), - any( - all( - target_os = "linux", - any( - target_env = "gnu", - all( - any(target_env = "musl", target_env = "ohos"), - not(target_feature = "crt-static"), - ), - portable_atomic_outline_atomics, - ), - ), - target_os = "android", - target_os = "freebsd", - target_os = "netbsd", - target_os = "openbsd", - // These don't support detection of FEAT_LSE2. - // target_os = "fuchsia", - // target_os = "windows", - ), - )))] - // SAFETY: the caller must uphold the safety contract. - unsafe { - atomic_load_no_lse2(src, order) - } + // if platform_supports_detection_of(FEAT_LSE2): #[cfg(all( not(portable_atomic_no_outline_atomics), any( @@ -384,6 +481,10 @@ unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 { atomic_load_lse2_relaxed = _atomic_load_ldp(Ordering::Relaxed); atomic_load_lse2_acquire = _atomic_load_ldp(Ordering::Acquire); atomic_load_lse2_seqcst = _atomic_load_ldp(Ordering::SeqCst); + #[cfg(portable_atomic_llvm_16)] + atomic_load_lse2_rcpc3_acquire = _atomic_load_ldiapp(Ordering::Acquire); + #[cfg(portable_atomic_llvm_16)] + atomic_load_lse2_rcpc3_seqcst = _atomic_load_ldiapp(Ordering::SeqCst); } fn_alias! { unsafe fn(src: *mut u128) -> u128; @@ -399,8 +500,10 @@ unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 { ifunc!(unsafe fn(src: *mut u128) -> u128 { let cpuinfo = detect::detect(); if cpuinfo.has_lse2() { + // if detect(FEAT_LSE2) => lse2 (ldp) atomic_load_lse2_relaxed } else { + // else => no_lse2: atomic_load_no_lse2_relaxed } }) @@ -409,8 +512,21 @@ unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 { ifunc!(unsafe fn(src: *mut u128) -> u128 { let cpuinfo = detect::detect(); if cpuinfo.has_lse2() { - atomic_load_lse2_acquire + #[cfg(portable_atomic_llvm_16)] + if cpuinfo.has_rcpc3() { + // if detect(FEAT_LSE2) && detect(FEAT_LRCPC3) && order != relaxed => lse2_rcpc3 (ldiapp) + atomic_load_lse2_rcpc3_acquire + } else { + // if detect(FEAT_LSE2) => lse2 (ldp) + atomic_load_lse2_acquire + } + #[cfg(not(portable_atomic_llvm_16))] + { + // if detect(FEAT_LSE2) => lse2 (ldp) + atomic_load_lse2_acquire + } } else { + // else => no_lse2: atomic_load_no_lse2_acquire } }) @@ -419,8 +535,21 @@ unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 { ifunc!(unsafe fn(src: *mut u128) -> u128 { let cpuinfo = detect::detect(); if cpuinfo.has_lse2() { - atomic_load_lse2_seqcst + #[cfg(portable_atomic_llvm_16)] + if cpuinfo.has_rcpc3() { + // if detect(FEAT_LSE2) && detect(FEAT_LRCPC3) && order != relaxed => lse2_rcpc3 (ldiapp) + atomic_load_lse2_rcpc3_seqcst + } else { + // if detect(FEAT_LSE2) => lse2 (ldp) + atomic_load_lse2_seqcst + } + #[cfg(not(portable_atomic_llvm_16))] + { + // if detect(FEAT_LSE2) => lse2 (ldp) + atomic_load_lse2_seqcst + } } else { + // else => no_lse2: atomic_load_no_lse2_seqcst } }) @@ -429,6 +558,34 @@ unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 { } } } + // else => no_lse2: + #[cfg(not(all( + not(portable_atomic_no_outline_atomics), + any( + all( + target_os = "linux", + any( + target_env = "gnu", + all( + any(target_env = "musl", target_env = "ohos"), + not(target_feature = "crt-static"), + ), + portable_atomic_outline_atomics, + ), + ), + target_os = "android", + target_os = "freebsd", + target_os = "netbsd", + target_os = "openbsd", + // These don't support detection of FEAT_LSE2. + // target_os = "fuchsia", + // target_os = "windows", + ), + )))] + // SAFETY: the caller must uphold the safety contract. + unsafe { + atomic_load_no_lse2(src, order) + } } // If CPU supports FEAT_LSE2, LDP/LDIAPP is single-copy atomic reads, // otherwise it is two single-copy atomic reads. @@ -446,12 +603,11 @@ unsafe fn _atomic_load_ldp(src: *mut u128, order: Ordering) -> u128 { // SAFETY: the caller must guarantee that `dst` is valid for reads, // 16-byte aligned, that there are no concurrent non-atomic operations. // - // Refs: - // - LDP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/LDP--A64- + // Refs: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/LDP--A64- unsafe { let (out_lo, out_hi); macro_rules! atomic_load_relaxed { - ($acquire:tt) => { + ($acquire:tt) => {{ asm!( "ldp {out_lo}, {out_hi}, [{src}]", $acquire, @@ -459,15 +615,65 @@ unsafe fn _atomic_load_ldp(src: *mut u128, order: Ordering) -> u128 { out_hi = lateout(reg) out_hi, out_lo = lateout(reg) out_lo, options(nostack, preserves_flags), - ) - }; + ); + U128 { pair: Pair { lo: out_lo, hi: out_hi } }.whole + }}; } match order { - Ordering::Relaxed => atomic_load_relaxed!(""), + // if compile_time(FEAT_LRCPC3) && order != relaxed => ldiapp + // SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC3. #[cfg(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3"))] + Ordering::Acquire | Ordering::SeqCst => _atomic_load_ldiapp(src, order), + + // else => ldp + Ordering::Relaxed => atomic_load_relaxed!(""), + #[cfg(not(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3")))] + Ordering::Acquire => atomic_load_relaxed!("dmb ishld"), + #[cfg(not(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3")))] + Ordering::SeqCst => { + asm!( + // ldar (or dmb ishld) is required to prevent reordering with preceding stlxp. + // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108891 for details. + "ldar {tmp}, [{src}]", + "ldp {out_lo}, {out_hi}, [{src}]", + "dmb ishld", + src = in(reg) ptr_reg!(src), + out_hi = lateout(reg) out_hi, + out_lo = lateout(reg) out_lo, + tmp = out(reg) _, + options(nostack, preserves_flags), + ); + U128 { pair: Pair { lo: out_lo, hi: out_hi } }.whole + } + _ => unreachable!(), + } + } +} +#[cfg(portable_atomic_llvm_16)] +#[cfg(any( + target_feature = "lse2", + portable_atomic_target_feature = "lse2", + not(portable_atomic_no_outline_atomics), +))] +#[cfg(any( + target_feature = "rcpc3", + portable_atomic_target_feature = "rcpc3", + not(portable_atomic_no_outline_atomics), +))] +#[inline] +unsafe fn _atomic_load_ldiapp(src: *mut u128, order: Ordering) -> u128 { + debug_assert!(src as usize % 16 == 0); + debug_assert_lse2!(); + debug_assert_rcpc3!(); + + // SAFETY: the caller must guarantee that `dst` is valid for reads, + // 16-byte aligned, that there are no concurrent non-atomic operations. + // + // Refs: https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/LDIAPP--Load-Acquire-RCpc-ordered-Pair-of-registers- + unsafe { + let (out_lo, out_hi); + match order { Ordering::Acquire => { - // SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC3. - // Refs: https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/LDIAPP--Load-Acquire-RCpc-ordered-Pair-of-registers- asm!( start_rcpc3!(), "ldiapp {out_lo}, {out_hi}, [{src}]", @@ -477,15 +683,13 @@ unsafe fn _atomic_load_ldp(src: *mut u128, order: Ordering) -> u128 { options(nostack, preserves_flags), ); } - #[cfg(not(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3")))] - Ordering::Acquire => atomic_load_relaxed!("dmb ishld"), Ordering::SeqCst => { asm!( + start_rcpc3!(), // ldar (or dmb ishld) is required to prevent reordering with preceding stlxp. // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108891 for details. "ldar {tmp}, [{src}]", - "ldp {out_lo}, {out_hi}, [{src}]", - "dmb ishld", + "ldiapp {out_lo}, {out_hi}, [{src}]", src = in(reg) ptr_reg!(src), out_hi = lateout(reg) out_hi, out_lo = lateout(reg) out_lo, @@ -572,6 +776,38 @@ unsafe fn _atomic_load_ldxp_stxp(src: *mut u128, order: Ordering) -> u128 { } } +// ----------------------------------------------------------------------------- +// store + +/* + +Instruction selection flow for store: +- if compile_time(FEAT_LSE2) => stp: + - if compile_time(FEAT_LSE128) && order == seqcst => swpp + - if compile_time(FEAT_LRCPC3) && order != relaxed => stilp + - if compile_time(FEAT_LSE128) && order != relaxed => swpp + - else => stp +- if platform_supports_detection_of(FEAT_LSE2): + - if detect(FEAT_LSE2) && detect(FEAT_LSE128) && order == seqcst => lse128 (swpp) + - if detect(FEAT_LSE2) && detect(FEAT_LRCPC3) && order != relaxed => lse2_rcpc3 (stilp) + - if detect(FEAT_LSE2) && detect(FEAT_LSE128) && order != relaxed => lse128 (swpp) + - if detect(FEAT_LSE2) => lse2 (stp) +- else => no_lse2: + - if compile_time(FEAT_LSE) && not(ll_sc_rmw) => casp + - else => ldxp_stxp + +Note: +- If FEAT_LSE2 is available at compile-time, we don't do run-time detection of + FEAT_LRCPC3/FEAT_LSE128 at this time, since FEAT_LRCPC3/FEAT_LSE128 is not yet available for most CPUs. + (macOS that doesn't have any FEAT_LRCPC3/FEAT_LSE128-enabled CPUs as of M4 is only a platform + that currently enables FEAT_LSE2 at compile-time by default.) +- If FEAT_LSE2 is not available at compile-time, we want to do run-time detection + of FEAT_LSE2, so we do run-time detection of FEAT_LRCPC3/FEAT_LSE128 at the same time. +- We don't do run-time detection of FEAT_LSE for store at this time. + +*/ + +// if compile_time(FEAT_LSE2) => stp: // cfg guarantee that the CPU supports FEAT_LSE2. #[cfg(any(target_feature = "lse2", portable_atomic_target_feature = "lse2"))] use _atomic_store_stp as atomic_store; @@ -580,6 +816,7 @@ use _atomic_store_stp as atomic_store; unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { #[inline] unsafe fn atomic_store_no_lse2(dst: *mut u128, val: u128, order: Ordering) { + // if compile_time(FEAT_LSE) && not(ll_sc_rmw) => casp // If FEAT_LSE is available at compile-time and portable_atomic_ll_sc_rmw cfg is not set, // we use CAS-based atomic RMW. #[cfg(all( @@ -591,6 +828,7 @@ unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { unsafe { _atomic_swap_casp(dst, val, order); } + // else => ldxp_stxp #[cfg(not(all( any(target_feature = "lse", portable_atomic_target_feature = "lse"), not(portable_atomic_ll_sc_rmw), @@ -600,33 +838,20 @@ unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { _atomic_store_ldxp_stxp(dst, val, order); } } - #[cfg(not(all( + #[cfg(portable_atomic_llvm_16)] + #[cfg(any( + target_feature = "lse128", + portable_atomic_target_feature = "lse128", not(portable_atomic_no_outline_atomics), - any( - all( - target_os = "linux", - any( - target_env = "gnu", - all( - any(target_env = "musl", target_env = "ohos"), - not(target_feature = "crt-static"), - ), - portable_atomic_outline_atomics, - ), - ), - target_os = "android", - target_os = "freebsd", - target_os = "netbsd", - target_os = "openbsd", - // These don't support detection of FEAT_LSE2. - // target_os = "fuchsia", - // target_os = "windows", - ), - )))] - // SAFETY: the caller must uphold the safety contract. - unsafe { - atomic_store_no_lse2(dst, val, order); + ))] + #[inline] + unsafe fn _atomic_store_swpp(dst: *mut u128, val: u128, order: Ordering) { + // SAFETY: the caller must uphold the safety contract. + unsafe { + _atomic_swap_swpp(dst, val, order); + } } + // if platform_supports_detection_of(FEAT_LSE2): #[cfg(all( not(portable_atomic_no_outline_atomics), any( @@ -659,6 +884,14 @@ unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { atomic_store_lse2_relaxed = _atomic_store_stp(Ordering::Relaxed); atomic_store_lse2_release = _atomic_store_stp(Ordering::Release); atomic_store_lse2_seqcst = _atomic_store_stp(Ordering::SeqCst); + #[cfg(portable_atomic_llvm_16)] + atomic_store_lse2_rcpc3_release = _atomic_store_stilp(Ordering::Release); + #[cfg(portable_atomic_llvm_16)] + atomic_store_lse2_rcpc3_seqcst = _atomic_store_stilp(Ordering::SeqCst); + #[cfg(portable_atomic_llvm_16)] + atomic_store_lse128_release = _atomic_store_swpp(Ordering::Release); + #[cfg(portable_atomic_llvm_16)] + atomic_store_lse128_seqcst = _atomic_store_swpp(Ordering::SeqCst); } fn_alias! { unsafe fn(dst: *mut u128, val: u128); @@ -674,8 +907,10 @@ unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { ifunc!(unsafe fn(dst: *mut u128, val: u128) { let cpuinfo = detect::detect(); if cpuinfo.has_lse2() { + // if detect(FEAT_LSE2) => lse2 (stp) atomic_store_lse2_relaxed } else { + // else => no_lse2: atomic_store_no_lse2_relaxed } }); @@ -684,8 +919,24 @@ unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { ifunc!(unsafe fn(dst: *mut u128, val: u128) { let cpuinfo = detect::detect(); if cpuinfo.has_lse2() { - atomic_store_lse2_release + #[cfg(portable_atomic_llvm_16)] + if cpuinfo.has_rcpc3() { + // if detect(FEAT_LSE2) && detect(FEAT_LRCPC3) && order != relaxed => lse2_rcpc3 (stilp) + atomic_store_lse2_rcpc3_release + } else if cpuinfo.has_lse128() { + // if detect(FEAT_LSE2) && detect(FEAT_LSE128) && order != relaxed => lse128 (swpp) + atomic_store_lse128_release + } else { + // if detect(FEAT_LSE2) => lse2 (stp) + atomic_store_lse2_release + } + #[cfg(not(portable_atomic_llvm_16))] + { + // if detect(FEAT_LSE2) => lse2 (stp) + atomic_store_lse2_release + } } else { + // else => no_lse2: atomic_store_no_lse2_release } }); @@ -694,8 +945,24 @@ unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { ifunc!(unsafe fn(dst: *mut u128, val: u128) { let cpuinfo = detect::detect(); if cpuinfo.has_lse2() { - atomic_store_lse2_seqcst + #[cfg(portable_atomic_llvm_16)] + if cpuinfo.has_lse128() { + // if detect(FEAT_LSE2) && detect(FEAT_LSE128) && order == seqcst => lse128 (swpp) + atomic_store_lse128_seqcst + } else if cpuinfo.has_rcpc3() { + // if detect(FEAT_LSE2) && detect(FEAT_LRCPC3) && order != relaxed => lse2_rcpc3 (stilp) + atomic_store_lse2_rcpc3_seqcst + } else { + // if detect(FEAT_LSE2) => lse2 (stp) + atomic_store_lse2_seqcst + } + #[cfg(not(portable_atomic_llvm_16))] + { + // if detect(FEAT_LSE2) => lse2 (stp) + atomic_store_lse2_seqcst + } } else { + // else => no_lse2: atomic_store_no_lse2_seqcst } }); @@ -704,6 +971,34 @@ unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { } } } + // else => no_lse2: + #[cfg(not(all( + not(portable_atomic_no_outline_atomics), + any( + all( + target_os = "linux", + any( + target_env = "gnu", + all( + any(target_env = "musl", target_env = "ohos"), + not(target_feature = "crt-static"), + ), + portable_atomic_outline_atomics, + ), + ), + target_os = "android", + target_os = "freebsd", + target_os = "netbsd", + target_os = "openbsd", + // These don't support detection of FEAT_LSE2. + // target_os = "fuchsia", + // target_os = "windows", + ), + )))] + // SAFETY: the caller must uphold the safety contract. + unsafe { + atomic_store_no_lse2(dst, val, order); + } } // If CPU supports FEAT_LSE2, STP/STILP is single-copy atomic writes, // otherwise it is two single-copy atomic writes. @@ -740,45 +1035,91 @@ unsafe fn _atomic_store_stp(dst: *mut u128, val: u128, order: Ordering) { }}; } match order { - Ordering::Relaxed => atomic_store!("", ""), - #[cfg(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3"))] - Ordering::Release => { - let val = U128 { whole: val }; - // SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC3. - // Refs: https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/STILP--Store-Release-ordered-Pair-of-registers- - asm!( - start_rcpc3!(), - "stilp {val_lo}, {val_hi}, [{dst}]", - dst = in(reg) ptr_reg!(dst), - val_lo = in(reg) val.pair.lo, - val_hi = in(reg) val.pair.hi, - options(nostack, preserves_flags), - ); + // if compile_time(FEAT_LSE128) && order == seqcst => swpp + // Prefer swpp if stp requires fences. https://reviews.llvm.org/D143506 + // SAFETY: cfg guarantee that the CPU supports FEAT_LSE128. + #[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))] + Ordering::SeqCst => { + _atomic_swap_swpp(dst, val, order); } + + // if compile_time(FEAT_LRCPC3) && order != relaxed: + // SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC3. + #[cfg(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3"))] + Ordering::Release => _atomic_store_stilp(dst, val, order), + #[cfg(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3"))] + #[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))] + Ordering::SeqCst => _atomic_store_stilp(dst, val, order), + + // if compile_time(FEAT_LSE128) && order != relaxed => swpp + // Prefer swpp if stp requires fences. https://reviews.llvm.org/D143506 + // SAFETY: cfg guarantee that the CPU supports FEAT_LSE128. #[cfg(not(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3")))] #[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))] Ordering::Release => { - // Use swpp if stp requires fences. - // https://reviews.llvm.org/D143506 - // SAFETY: cfg guarantee that the CPU supports FEAT_LSE128. _atomic_swap_swpp(dst, val, order); } + + // else => stp + Ordering::Relaxed => atomic_store!("", ""), #[cfg(not(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3")))] #[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))] Ordering::Release => atomic_store!("", "dmb ish"), - #[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))] - Ordering::SeqCst => { - // Use swpp if stp requires fences. - // https://reviews.llvm.org/D143506 - // SAFETY: cfg guarantee that the CPU supports FEAT_LSE128. - _atomic_swap_swpp(dst, val, order); - } + #[cfg(not(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3")))] #[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))] Ordering::SeqCst => atomic_store!("dmb ish", "dmb ish"), _ => unreachable!(), } } } +#[cfg(portable_atomic_llvm_16)] +#[cfg(any( + target_feature = "lse2", + portable_atomic_target_feature = "lse2", + not(portable_atomic_no_outline_atomics), +))] +#[cfg(any( + target_feature = "rcpc3", + portable_atomic_target_feature = "rcpc3", + not(portable_atomic_no_outline_atomics), +))] +#[inline] +unsafe fn _atomic_store_stilp(dst: *mut u128, val: u128, order: Ordering) { + debug_assert!(dst as usize % 16 == 0); + debug_assert_lse2!(); + debug_assert_rcpc3!(); + + // SAFETY: the caller must guarantee that `dst` is valid for writes, + // 16-byte aligned, that there are no concurrent non-atomic operations. + // + // Refs: https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/STILP--Store-Release-ordered-Pair-of-registers- + unsafe { + macro_rules! atomic_store { + ($acquire:tt) => {{ + let val = U128 { whole: val }; + asm!( + start_rcpc3!(), + "stilp {val_lo}, {val_hi}, [{dst}]", + $acquire, + dst = in(reg) ptr_reg!(dst), + val_lo = in(reg) val.pair.lo, + val_hi = in(reg) val.pair.hi, + options(nostack, preserves_flags), + ); + }}; + } + match order { + Ordering::Release => atomic_store!(""), + // LLVM uses store-release (dmb ish; stp); dmb ish, GCC (libatomic) + // uses store-release (stilp) without fence for SeqCst store + // (/~https://github.com/gcc-mirror/gcc/commit/7107574958e2bed11d916a1480ef1319f15e5ffe). + // Considering https://reviews.llvm.org/D141748, LLVM's lowing seems + // to be the safer option here (I'm not convinced that the libatomic's implementation is wrong). + Ordering::SeqCst => atomic_store!("dmb ish"), + _ => unreachable!(), + } + } +} // Do not use _atomic_swap_ldxp_stxp because it needs extra registers to implement store. #[cfg(any( test, @@ -815,6 +1156,19 @@ unsafe fn _atomic_store_ldxp_stxp(dst: *mut u128, val: u128, order: Ordering) { } } +// ----------------------------------------------------------------------------- +// compare_exchange + +/* + +Instruction selection flow for compare_exchange: +- if compile_time(FEAT_LSE) => casp +- if platform_supports_detection_of(FEAT_LSE): + - if detect(FEAT_LSE) => casp +- else => ldxp_stxp + +*/ + #[inline] unsafe fn atomic_compare_exchange( dst: *mut u128, @@ -823,35 +1177,12 @@ unsafe fn atomic_compare_exchange( success: Ordering, failure: Ordering, ) -> Result { + // if compile_time(FEAT_LSE) => casp #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))] // SAFETY: the caller must uphold the safety contract. // cfg guarantee that the CPU supports FEAT_LSE. let prev = unsafe { _atomic_compare_exchange_casp(dst, old, new, success, failure) }; - #[cfg(not(all( - not(portable_atomic_no_outline_atomics), - any( - all( - target_os = "linux", - any( - target_env = "gnu", - all( - any(target_env = "musl", target_env = "ohos"), - not(target_feature = "crt-static"), - ), - portable_atomic_outline_atomics, - ), - ), - target_os = "android", - target_os = "freebsd", - target_os = "netbsd", - target_os = "openbsd", - target_os = "fuchsia", - target_os = "windows", - ), - )))] - #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))] - // SAFETY: the caller must uphold the safety contract. - let prev = unsafe { _atomic_compare_exchange_ldxp_stxp(dst, old, new, success, failure) }; + // if platform_supports_detection_of(FEAT_LSE): #[cfg(all( not(portable_atomic_no_outline_atomics), any( @@ -918,8 +1249,10 @@ unsafe fn atomic_compare_exchange( Ordering::Relaxed => { ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 { if detect::detect().has_lse() { + // if detect(FEAT_LSE) => casp atomic_compare_exchange_casp_relaxed } else { + // else => ldxp_stxp atomic_compare_exchange_ldxp_stxp_relaxed } }) @@ -927,8 +1260,10 @@ unsafe fn atomic_compare_exchange( Ordering::Acquire => { ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 { if detect::detect().has_lse() { + // if detect(FEAT_LSE) => casp atomic_compare_exchange_casp_acquire } else { + // else => ldxp_stxp atomic_compare_exchange_ldxp_stxp_acquire } }) @@ -936,8 +1271,10 @@ unsafe fn atomic_compare_exchange( Ordering::Release => { ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 { if detect::detect().has_lse() { + // if detect(FEAT_LSE) => casp atomic_compare_exchange_casp_release } else { + // else => ldxp_stxp atomic_compare_exchange_ldxp_stxp_release } }) @@ -947,8 +1284,10 @@ unsafe fn atomic_compare_exchange( Ordering::AcqRel | Ordering::SeqCst => { ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 { if detect::detect().has_lse() { + // if detect(FEAT_LSE) => casp atomic_compare_exchange_casp_acqrel } else { + // else => ldxp_stxp atomic_compare_exchange_ldxp_stxp_acqrel } }) @@ -957,8 +1296,10 @@ unsafe fn atomic_compare_exchange( Ordering::AcqRel => { ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 { if detect::detect().has_lse() { + // if detect(FEAT_LSE) => casp atomic_compare_exchange_casp_acqrel } else { + // else => ldxp_stxp atomic_compare_exchange_ldxp_stxp_acqrel } }) @@ -967,8 +1308,10 @@ unsafe fn atomic_compare_exchange( Ordering::SeqCst => { ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 { if detect::detect().has_lse() { + // if detect(FEAT_LSE) => casp atomic_compare_exchange_casp_seqcst } else { + // else => ldxp_stxp atomic_compare_exchange_ldxp_stxp_seqcst } }) @@ -977,6 +1320,32 @@ unsafe fn atomic_compare_exchange( } } }; + // else => ldxp_stxp + #[cfg(not(all( + not(portable_atomic_no_outline_atomics), + any( + all( + target_os = "linux", + any( + target_env = "gnu", + all( + any(target_env = "musl", target_env = "ohos"), + not(target_feature = "crt-static"), + ), + portable_atomic_outline_atomics, + ), + ), + target_os = "android", + target_os = "freebsd", + target_os = "netbsd", + target_os = "openbsd", + target_os = "fuchsia", + target_os = "windows", + ), + )))] + #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))] + // SAFETY: the caller must uphold the safety contract. + let prev = unsafe { _atomic_compare_exchange_ldxp_stxp(dst, old, new, success, failure) }; if prev == old { Ok(prev) } else { @@ -1105,6 +1474,29 @@ unsafe fn _atomic_compare_exchange_ldxp_stxp( // (i.e., aarch64 doesn't have 128-bit weak CAS) use self::atomic_compare_exchange as atomic_compare_exchange_weak; +// ----------------------------------------------------------------------------- +// RMW + +/* + +Instruction selection flow for swap/fetch_and/fetch_or: +- if compile_time(FEAT_LSE128) => swpp/ldclrp/ldsetp +- if compile_time(FEAT_LSE) && not(ll_sc_rmw) => casp +- else => ldxp_stxp + +Instruction selection flow for other RMWs: +- if compile_time(FEAT_LSE) && not(ll_sc_rmw) => casp +- else => ldxp_stxp + +Note: +- We don't do run-time detection of FEAT_LSE128 at this time, because + FEAT_LSE128 is not yet available for most CPUs, but since + swpp/ldclrp/ldsetp is wait-free, it would make sense to do run-time + detection in the future. (TODO) +- We don't do run-time detection of FEAT_LSE for store at this time. + +*/ + // If FEAT_LSE is available at compile-time and portable_atomic_ll_sc_rmw cfg is not set, // we use CAS-based atomic RMW. #[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))] @@ -1121,10 +1513,16 @@ use _atomic_swap_casp as atomic_swap; use _atomic_swap_ldxp_stxp as atomic_swap; #[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))] use _atomic_swap_swpp as atomic_swap; -#[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))] +#[cfg(portable_atomic_llvm_16)] +#[cfg(any( + target_feature = "lse128", + portable_atomic_target_feature = "lse128", + not(portable_atomic_no_outline_atomics), +))] #[inline] unsafe fn _atomic_swap_swpp(dst: *mut u128, val: u128, order: Ordering) -> u128 { debug_assert!(dst as usize % 16 == 0); + debug_assert_lse128!(); // SAFETY: the caller must guarantee that `dst` is valid for both writes and // reads, 16-byte aligned, that there are no concurrent non-atomic operations, diff --git a/src/imp/atomic128/detect/aarch64_aa64reg.rs b/src/imp/atomic128/detect/aarch64_aa64reg.rs index 30720174d..901bf9c81 100644 --- a/src/imp/atomic128/detect/aarch64_aa64reg.rs +++ b/src/imp/atomic128/detect/aarch64_aa64reg.rs @@ -36,41 +36,27 @@ include!("common.rs"); #[cfg_attr(test, derive(Debug, PartialEq))] struct AA64Reg { aa64isar0: u64, - #[cfg(test)] aa64isar1: u64, aa64mmfr2: u64, } #[cold] fn _detect(info: &mut CpuInfo) { - let AA64Reg { - aa64isar0, - #[cfg(test)] - aa64isar1, - aa64mmfr2, - } = imp::aa64reg(); + let AA64Reg { aa64isar0, aa64isar1, aa64mmfr2 } = imp::aa64reg(); // ID_AA64ISAR0_EL1, AArch64 Instruction Set Attribute Register 0 // https://developer.arm.com/documentation/ddi0601/2024-06/AArch64-Registers/ID-AA64ISAR0-EL1--AArch64-Instruction-Set-Attribute-Register-0 let atomic = extract(aa64isar0, 23, 20); if atomic >= 0b0010 { info.set(CpuInfo::HAS_LSE); - // we currently only use FEAT_LSE and FEAT_LSE2 in outline-atomics. - #[cfg(test)] - { - if atomic >= 0b0011 { - info.set(CpuInfo::HAS_LSE128); - } + if atomic >= 0b0011 { + info.set(CpuInfo::HAS_LSE128); } } - // we currently only use FEAT_LSE and FEAT_LSE2 in outline-atomics. - #[cfg(test)] - { - // ID_AA64ISAR1_EL1, AArch64 Instruction Set Attribute Register 1 - // https://developer.arm.com/documentation/ddi0601/2024-06/AArch64-Registers/ID-AA64ISAR1-EL1--AArch64-Instruction-Set-Attribute-Register-1 - if extract(aa64isar1, 23, 20) >= 0b0011 { - info.set(CpuInfo::HAS_RCPC3); - } + // ID_AA64ISAR1_EL1, AArch64 Instruction Set Attribute Register 1 + // https://developer.arm.com/documentation/ddi0601/2024-06/AArch64-Registers/ID-AA64ISAR1-EL1--AArch64-Instruction-Set-Attribute-Register-1 + if extract(aa64isar1, 23, 20) >= 0b0011 { + info.set(CpuInfo::HAS_RCPC3); } // ID_AA64MMFR2_EL1, AArch64 Memory Model Feature Register 2 // https://developer.arm.com/documentation/ddi0601/2024-06/AArch64-Registers/ID-AA64MMFR2-EL1--AArch64-Memory-Model-Feature-Register-2 @@ -103,28 +89,19 @@ mod imp { out(reg) aa64isar0, options(pure, nomem, nostack, preserves_flags), ); - #[cfg(test)] let aa64isar1: u64; - #[cfg(test)] - { - asm!( - "mrs {0}, ID_AA64ISAR1_EL1", - out(reg) aa64isar1, - options(pure, nomem, nostack, preserves_flags), - ); - } + asm!( + "mrs {0}, ID_AA64ISAR1_EL1", + out(reg) aa64isar1, + options(pure, nomem, nostack, preserves_flags), + ); let aa64mmfr2: u64; asm!( "mrs {0}, ID_AA64MMFR2_EL1", out(reg) aa64mmfr2, options(pure, nomem, nostack, preserves_flags), ); - AA64Reg { - aa64isar0, - #[cfg(test)] - aa64isar1, - aa64mmfr2, - } + AA64Reg { aa64isar0, aa64isar1, aa64mmfr2 } } } } @@ -216,7 +193,6 @@ mod imp { } Some(AA64Reg { aa64isar0: buf.aa64isar0, - #[cfg(test)] aa64isar1: buf.aa64isar1, aa64mmfr2: buf.aa64mmfr2, }) @@ -232,12 +208,7 @@ mod imp { // /~https://github.com/golang/sys/commit/ef9fd89ba245e184bdd308f7f2b4f3c551fa5b0f match unsafe { sysctl_cpu_id(b"machdep.cpu0.cpu_id\0") } { Some(cpu_id) => cpu_id, - None => AA64Reg { - aa64isar0: 0, - #[cfg(test)] - aa64isar1: 0, - aa64mmfr2: 0, - }, + None => AA64Reg { aa64isar0: 0, aa64isar1: 0, aa64mmfr2: 0 }, } } } @@ -262,7 +233,6 @@ mod imp { // Defined in machine/cpu.h. // /~https://github.com/openbsd/src/blob/ed8f5e8d82ace15e4cefca2c82941b15cb1a7830/sys/arch/arm64/include/cpu.h#L25-L40 pub(crate) const CPU_ID_AA64ISAR0: c_int = 2; - #[cfg(test)] pub(crate) const CPU_ID_AA64ISAR1: c_int = 3; pub(crate) const CPU_ID_AA64MMFR2: c_int = 7; @@ -291,15 +261,9 @@ mod imp { // so we can safely use this function on older versions of OpenBSD. pub(super) fn aa64reg() -> AA64Reg { let aa64isar0 = sysctl64(&[ffi::CTL_MACHDEP, ffi::CPU_ID_AA64ISAR0]).unwrap_or(0); - #[cfg(test)] let aa64isar1 = sysctl64(&[ffi::CTL_MACHDEP, ffi::CPU_ID_AA64ISAR1]).unwrap_or(0); let aa64mmfr2 = sysctl64(&[ffi::CTL_MACHDEP, ffi::CPU_ID_AA64MMFR2]).unwrap_or(0); - AA64Reg { - aa64isar0, - #[cfg(test)] - aa64isar1, - aa64mmfr2, - } + AA64Reg { aa64isar0, aa64isar1, aa64mmfr2 } } fn sysctl64(mib: &[ffi::c_int]) -> Option { diff --git a/src/imp/atomic128/detect/aarch64_macos.rs b/src/imp/atomic128/detect/aarch64_macos.rs index a70296cf4..fc3e23a25 100644 --- a/src/imp/atomic128/detect/aarch64_macos.rs +++ b/src/imp/atomic128/detect/aarch64_macos.rs @@ -7,7 +7,7 @@ // // If macOS supporting FEAT_LSE128/FEAT_LRCPC3 becomes popular in the future, this module will // be used to support outline-atomics for FEAT_LSE128/FEAT_LRCPC3. -// M4 is armv9.4-a but I don't know if it supports FEAT_LSE128/FEAT_LRCPC3. +// M4 is armv9.2-a and it doesn't support FEAT_LSE128/FEAT_LRCPC3. // // Refs: https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics // @@ -83,17 +83,13 @@ fn _detect(info: &mut CpuInfo) { if unsafe { sysctlbyname32(b"hw.optional.arm.FEAT_LSE2\0").unwrap_or(0) != 0 } { info.set(CpuInfo::HAS_LSE2); } - // we currently only use FEAT_LSE and FEAT_LSE2 in outline-atomics. - #[cfg(test)] - { - // SAFETY: we passed a valid C string. - if unsafe { sysctlbyname32(b"hw.optional.arm.FEAT_LSE128\0").unwrap_or(0) != 0 } { - info.set(CpuInfo::HAS_LSE128); - } - // SAFETY: we passed a valid C string. - if unsafe { sysctlbyname32(b"hw.optional.arm.FEAT_LRCPC3\0").unwrap_or(0) != 0 } { - info.set(CpuInfo::HAS_RCPC3); - } + // SAFETY: we passed a valid C string. + if unsafe { sysctlbyname32(b"hw.optional.arm.FEAT_LSE128\0").unwrap_or(0) != 0 } { + info.set(CpuInfo::HAS_LSE128); + } + // SAFETY: we passed a valid C string. + if unsafe { sysctlbyname32(b"hw.optional.arm.FEAT_LRCPC3\0").unwrap_or(0) != 0 } { + info.set(CpuInfo::HAS_RCPC3); } } diff --git a/src/imp/atomic128/detect/auxv.rs b/src/imp/atomic128/detect/auxv.rs index 76454f108..9f69b8d23 100644 --- a/src/imp/atomic128/detect/auxv.rs +++ b/src/imp/atomic128/detect/auxv.rs @@ -102,7 +102,11 @@ mod os { // /~https://github.com/torvalds/linux/blob/v6.10/include/uapi/linux/auxvec.h #[cfg(any(test, target_arch = "aarch64"))] pub(crate) const AT_HWCAP: c_ulong = 16; - #[cfg(any(test, target_arch = "powerpc64"))] + #[cfg(any( + test, + all(target_arch = "aarch64", target_pointer_width = "64"), + target_arch = "powerpc64", + ))] pub(crate) const AT_HWCAP2: c_ulong = 26; // Defined in sys/system_properties.h. @@ -227,11 +231,9 @@ mod arch { pub(super) const HWCAP_USCAT: ffi::c_ulong = 1 << 25; #[cfg(any(target_os = "linux", target_os = "android"))] #[cfg(target_pointer_width = "64")] - #[cfg(test)] pub(super) const HWCAP2_LRCPC3: ffi::c_ulong = 1 << 46; #[cfg(any(target_os = "linux", target_os = "android"))] #[cfg(target_pointer_width = "64")] - #[cfg(test)] pub(super) const HWCAP2_LSE128: ffi::c_ulong = 1 << 47; #[cold] @@ -246,7 +248,6 @@ mod arch { } #[cfg(any(target_os = "linux", target_os = "android"))] #[cfg(target_pointer_width = "64")] - #[cfg(test)] { let hwcap2 = os::getauxval(ffi::AT_HWCAP2); if hwcap2 & HWCAP2_LRCPC3 != 0 { diff --git a/src/imp/atomic128/detect/common.rs b/src/imp/atomic128/detect/common.rs index 9f9244053..81bbe9a9d 100644 --- a/src/imp/atomic128/detect/common.rs +++ b/src/imp/atomic128/detect/common.rs @@ -51,11 +51,9 @@ impl CpuInfo { const HAS_LSE: u32 = 1; // FEAT_LSE #[cfg_attr(not(test), allow(dead_code))] const HAS_LSE2: u32 = 2; // FEAT_LSE2 - #[cfg(test)] - // This is currently only used in tests. + #[cfg_attr(not(test), allow(dead_code))] const HAS_LSE128: u32 = 3; // FEAT_LSE128 - #[cfg(test)] - // This is currently only used in tests. + #[cfg_attr(not(test), allow(dead_code))] const HAS_RCPC3: u32 = 4; // FEAT_LRCPC3 #[cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))] @@ -69,12 +67,17 @@ impl CpuInfo { pub(crate) fn has_lse2(self) -> bool { self.test(CpuInfo::HAS_LSE2) } - #[cfg(test)] + #[cfg_attr(not(test), allow(dead_code))] + #[cfg(any( + test, + not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")), + ))] #[inline] pub(crate) fn has_lse128(self) -> bool { self.test(CpuInfo::HAS_LSE128) } - #[cfg(test)] + #[cfg_attr(not(test), allow(dead_code))] + #[cfg(any(test, not(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3"))))] #[inline] pub(crate) fn has_rcpc3(self) -> bool { self.test(CpuInfo::HAS_RCPC3)