diff --git a/.cirrus.yml b/.cirrus.yml index 6571ec3f4..81f0b21f5 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -30,8 +30,8 @@ aarch64_linux_test_task: # outline-atomics is enabled by default on linux-gnu and linux-musl with dynamic linking. - RUSTFLAGS="${RUSTFLAGS} --cfg portable_atomic_no_outline_atomics" RUSTDOCFLAGS="${RUSTDOCFLAGS} --cfg portable_atomic_no_outline_atomics" ./tools/test.sh -vv --tests # +lse - # Neoverse N1 is ARMv8.2-a and doesn't support FEAT_LSE2. - # FEAT_LSE2 is tested on aarch64 macOS VM. + # Neoverse N1 is Armv8.2 and doesn't support FEAT_LSE2. + # FEAT_LSE2 is tested on AArch64 macOS VM. - RUSTFLAGS="${RUSTFLAGS} -C target-feature=+lse" RUSTDOCFLAGS="${RUSTDOCFLAGS} -C target-feature=+lse" ./tools/test.sh -vv --tests aarch64_linux_valgrind_task: @@ -50,5 +50,5 @@ aarch64_linux_valgrind_task: # outline-atomics is enabled by default on linux-gnu and linux-musl with dynamic linking. - RUSTFLAGS="${RUSTFLAGS} --cfg portable_atomic_no_outline_atomics" RUSTDOCFLAGS="${RUSTDOCFLAGS} --cfg portable_atomic_no_outline_atomics" ./tools/test.sh valgrind -vv # +lse - # As of Valgrind 3.19, Valgrind supports atomic instructions of ARMv8.0 and ARMv8.1 (FEAT_LSE). + # As of Valgrind 3.19, Valgrind supports atomic instructions of Armv8.0 and Armv8.1 (FEAT_LSE). - RUSTFLAGS="${RUSTFLAGS} -C target-feature=+lse" RUSTDOCFLAGS="${RUSTDOCFLAGS} -C target-feature=+lse" ./tools/test.sh valgrind -vv diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c4c15da25..95576f37d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -126,7 +126,7 @@ jobs: os: windows-latest - rust: nightly target: aarch64-apple-darwin - os: macos-14 # aarch64 + os: macos-14 # AArch64 - rust: '1.59' # LLVM 13 target: aarch64-unknown-linux-gnu # - rust: '1.64' # LLVM 14 @@ -292,7 +292,7 @@ jobs: env: RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} --cfg portable_atomic_no_outline_atomics RUSTFLAGS: ${{ env.RUSTFLAGS }} --cfg portable_atomic_no_outline_atomics - # outline-atomics is disabled by default on aarch64 musl with static linking + # outline-atomics is disabled by default on AArch64 musl with static linking if: (matrix.target == '' && !contains(matrix.rust, 'i686') || startsWith(matrix.target, 'x86_64')) || startsWith(matrix.target, 'aarch64') && !(contains(matrix.target, '-musl') && matrix.flags == '') || startsWith(matrix.target, 'armv5te') || matrix.target == 'arm-linux-androideabi' - run: tools/test.sh -vv --tests ${TARGET:-} ${BUILD_STD:-} ${RELEASE:-} env: @@ -301,9 +301,9 @@ jobs: # __kuser_helper_version < 5, etc., and is not a public API. RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} --cfg portable_atomic_test_outline_atomics_detect_false RUSTFLAGS: ${{ env.RUSTFLAGS }} --cfg portable_atomic_test_outline_atomics_detect_false - # outline-atomics is disabled by default on aarch64 musl with static linking + # outline-atomics is disabled by default on AArch64 musl with static linking if: (matrix.target == '' && !contains(matrix.rust, 'i686') || startsWith(matrix.target, 'x86_64')) || startsWith(matrix.target, 'aarch64') && !(contains(matrix.target, '-musl') && matrix.flags == '') || startsWith(matrix.target, 'armv5te') || matrix.target == 'arm-linux-androideabi' - # outline-atomics is disabled by default on aarch64 musl with static linking and powerpc64 + # outline-atomics is disabled by default on AArch64 musl with static linking and powerpc64 # powerpc64le- (little-endian) is skipped because it is pwr8 by default - run: tools/test.sh -vv --tests ${TARGET:-} ${BUILD_STD:-} ${RELEASE:-} --exclude api-test env: diff --git a/CHANGELOG.md b/CHANGELOG.md index 76333378a..94bbba53b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -63,7 +63,7 @@ Note: In this file, do not use the hard wrap in the middle of a sentence for com - Optimize 128-bit weak CAS on powerpc64. -- Optimize interrupt disable on no-std pre-v6 ARM where `unsafe-assume-single-core` and `disable-fiq` are enabled. ([771c45d](/~https://github.com/taiki-e/portable-atomic/commit/771c45da2d2afc4f83df033dd4bdf3f976d14a74)) +- Optimize interrupt disable on no-std pre-v6 Arm where `unsafe-assume-single-core` and `disable-fiq` are enabled. ([771c45d](/~https://github.com/taiki-e/portable-atomic/commit/771c45da2d2afc4f83df033dd4bdf3f976d14a74)) - Improve detection of Apple hardware. ([5c3a43b](/~https://github.com/taiki-e/portable-atomic/commit/5c3a43b53f1c4188f9dd597599633bc1a315bf44)) @@ -75,7 +75,7 @@ Note: In this file, do not use the hard wrap in the middle of a sentence for com - Optimize AArch64 128-bit atomic load/store when the `rcpc3` target feature is enabled at compile-time. ([#68](/~https://github.com/taiki-e/portable-atomic/pull/68)) -- Optimize inline assemblies on ARM, AArch64, and MSP430. +- Optimize inline assemblies on Arm, AArch64, and MSP430. ## [1.4.2] - 2023-07-27 @@ -99,11 +99,11 @@ Note: In this file, do not use the hard wrap in the middle of a sentence for com See also [rust-lang/rust#112150](/~https://github.com/rust-lang/rust/pull/112150). -- Optimize 128-bit atomics on aarch64/s390x. +- Optimize 128-bit atomics on AArch64/s390x. ## [1.3.3] - 2023-05-31 -- Fix build error on aarch64 ILP32 ABI targets (tier 3). +- Fix build error on AArch64 ILP32 ABI targets (tier 3). - Optimize 128-bit atomics on s390x. @@ -139,18 +139,18 @@ Note: In this file, do not use the hard wrap in the middle of a sentence for com - Various optimizations - Optimize x86_64 128-bit outline-atomics. This improves performance by up to 15% in concurrent RMW/store for cases where the `cmpxchg16b` target feature is not available at compile-time. ([40c4cd4](/~https://github.com/taiki-e/portable-atomic/commit/40c4cd4f682f1cb153f18d4d6a88795bafaf5667)) - Optimize x86_64 128-bit load that uses cmpxchg16b. ([40c4cd4](/~https://github.com/taiki-e/portable-atomic/commit/40c4cd4f682f1cb153f18d4d6a88795bafaf5667)) - - Optimize aarch64 128-bit load that uses FEAT_LSE. ([40c4cd4](/~https://github.com/taiki-e/portable-atomic/commit/40c4cd4f682f1cb153f18d4d6a88795bafaf5667)) - - Optimize pre-ARMv6 Linux/Android 64-bit atomics. ([efacc89](/~https://github.com/taiki-e/portable-atomic/commit/efacc89c210d7a34ef5e879821112189da5d1901)) + - Optimize AArch64 128-bit load that uses FEAT_LSE. ([40c4cd4](/~https://github.com/taiki-e/portable-atomic/commit/40c4cd4f682f1cb153f18d4d6a88795bafaf5667)) + - Optimize pre-Armv6 Linux/Android 64-bit atomics. ([efacc89](/~https://github.com/taiki-e/portable-atomic/commit/efacc89c210d7a34ef5e879821112189da5d1901)) - Support outline-atomics for powerpc64 128-bit atomics. This is currently disabled by default, and can be enabled by `--cfg portable_atomic_outline_atomics`. ([#90](/~https://github.com/taiki-e/portable-atomic/pull/90)) - - Optimize aarch64 outline-atomics on linux-musl. On linux-musl, outline-atomics is enabled by default only when dynamic linking is enabled. When static linking is enabled, this can be enabled by `--cfg portable_atomic_outline_atomics`. See the [`atomic128` module's readme](/~https://github.com/taiki-e/portable-atomic/blob/HEAD/src/imp/atomic128/README.md#run-time-feature-detection) for more. ([8418235](/~https://github.com/taiki-e/portable-atomic/commit/84182354e4a149074e28bda4683d538e5fb617ce), [31d0862](/~https://github.com/taiki-e/portable-atomic/commit/31d08623d4e21af207ff2343f5553b9b5a030452)) + - Optimize AArch64 outline-atomics on linux-musl. On linux-musl, outline-atomics is enabled by default only when dynamic linking is enabled. When static linking is enabled, this can be enabled by `--cfg portable_atomic_outline_atomics`. See the [`atomic128` module's readme](/~https://github.com/taiki-e/portable-atomic/blob/HEAD/src/imp/atomic128/README.md#run-time-feature-detection) for more. ([8418235](/~https://github.com/taiki-e/portable-atomic/commit/84182354e4a149074e28bda4683d538e5fb617ce), [31d0862](/~https://github.com/taiki-e/portable-atomic/commit/31d08623d4e21af207ff2343f5553b9b5a030452)) ## [1.2.0] - 2023-03-25 -- Make 64-bit atomics lock-free on ARM Linux/Android targets that do not have 64-bit atomics (e.g., armv5te-unknown-linux-gnueabi, arm-linux-androideabi, etc.) when the kernel version is 3.1 or later. ([#82](/~https://github.com/taiki-e/portable-atomic/pull/82)) +- Make 64-bit atomics lock-free on Arm Linux/Android targets that do not have 64-bit atomics (e.g., armv5te-unknown-linux-gnueabi, arm-linux-androideabi, etc.) when the kernel version is 3.1 or later. ([#82](/~https://github.com/taiki-e/portable-atomic/pull/82)) -- Fix aarch64 128-bit atomics performance regression on Apple hardware. ([#89](/~https://github.com/taiki-e/portable-atomic/pull/89)) +- Fix AArch64 128-bit atomics performance regression on Apple hardware. ([#89](/~https://github.com/taiki-e/portable-atomic/pull/89)) -- Optimize 128-bit atomics on aarch64, x86_64, powerpc64, and s390x. +- Optimize 128-bit atomics on AArch64, x86_64, powerpc64, and s390x. ## [1.1.0] - 2023-03-24 @@ -170,7 +170,7 @@ Note: In this file, do not use the hard wrap in the middle of a sentence for com - Support outline-atomics for cmpxchg16b on Rust 1.69+ (i.e., on Rust 1.69+, x86_64 128-bit atomics is lock-free on all Intel chips and almost all AMD chips, even if cmpxchg16b is not available at compile-time.). Previously it was only nightly. ([#80](/~https://github.com/taiki-e/portable-atomic/pull/80)) - portable-atomic no longer enables outline-atomics on target where run-time CPU feature detection is not available. ([#80](/~https://github.com/taiki-e/portable-atomic/pull/80)) -- Optimize aarch64 outline-atomics for 128-bit atomics. +- Optimize AArch64 outline-atomics for 128-bit atomics. - Support more targets and improve performance. ([#63](/~https://github.com/taiki-e/portable-atomic/pull/63), [#64](/~https://github.com/taiki-e/portable-atomic/pull/64), [#67](/~https://github.com/taiki-e/portable-atomic/pull/67), [#69](/~https://github.com/taiki-e/portable-atomic/pull/69), [#75](/~https://github.com/taiki-e/portable-atomic/pull/75), [#76](/~https://github.com/taiki-e/portable-atomic/pull/76), [#77](/~https://github.com/taiki-e/portable-atomic/pull/77)) See the [`atomic128` module's readme](/~https://github.com/taiki-e/portable-atomic/blob/HEAD/src/imp/atomic128/README.md#run-time-feature-detection) for a list of platforms that support outline-atomics. Most of these improvements have already been [submitted and accepted in rust-lang/stdarch](/~https://github.com/rust-lang/stdarch/pulls?q=is%3Apr+author%3Ataiki-e+std_detect) and will soon be available in `std::arch::is_aarch64_feature_detected`. @@ -244,7 +244,7 @@ The latest version of portable-atomic is 1.x. This release makes portable-atomic - Various improvements to `portable_atomic_unsafe_assume_single_core` cfg. ([#44](/~https://github.com/taiki-e/portable-atomic/pull/44), [#40](/~https://github.com/taiki-e/portable-atomic/pull/40)) - - Support disabling FIQs on pre-v6 ARM under `portable_atomic_disable_fiq` cfg. + - Support disabling FIQs on pre-v6 Arm under `portable_atomic_disable_fiq` cfg. - Support RISC-V supervisor mode under `portable_atomic_s_mode` cfg. - Optimize interrupt restore on AVR and MSP430. ([#40](/~https://github.com/taiki-e/portable-atomic/pull/40)) - Documentation improvements. @@ -260,7 +260,7 @@ The latest version of portable-atomic is 1.x. This release makes portable-atomic ## [0.3.14] - 2022-09-04 -- Optimize atomic load/store on no-std pre-v6 ARM when `portable_atomic_unsafe_assume_single_core` cfg is used. ([#36](/~https://github.com/taiki-e/portable-atomic/pull/36)) +- Optimize atomic load/store on no-std pre-v6 Arm when `portable_atomic_unsafe_assume_single_core` cfg is used. ([#36](/~https://github.com/taiki-e/portable-atomic/pull/36)) - Support pre-power8 powerpc64le. powerpc64le's default cpu version is power8, but you can technically compile it for the old cpu using the unsafe `-C target-cpu` rustc flag. @@ -274,7 +274,7 @@ The latest version of portable-atomic is 1.x. This release makes portable-atomic ## [0.3.12] - 2022-08-13 -- Support atomic CAS on no-std pre-v6 ARM targets (e.g., thumbv4t-none-eabi) under unsafe cfg `portable_atomic_unsafe_assume_single_core`. ([#28](/~https://github.com/taiki-e/portable-atomic/pull/28)) +- Support atomic CAS on no-std pre-v6 Arm targets (e.g., thumbv4t-none-eabi) under unsafe cfg `portable_atomic_unsafe_assume_single_core`. ([#28](/~https://github.com/taiki-e/portable-atomic/pull/28)) ## [0.3.11] - 2022-08-12 @@ -288,7 +288,7 @@ The latest version of portable-atomic is 1.x. This release makes portable-atomic - Atomic operations based on disabling interrupts on single-core systems are now considered lock-free. - The previous behavior was inconsistent because we consider the pre-v6 ARM Linux's atomic operations provided in a similar way by the Linux kernel to be lock-free. + The previous behavior was inconsistent because we consider the pre-v6 Arm Linux's atomic operations provided in a similar way by the Linux kernel to be lock-free. - Respect `-Z allow-features`. @@ -325,7 +325,7 @@ The latest version of portable-atomic is 1.x. This release makes portable-atomic - Optimize x86_64 outline-atomics. -- Optimize inline assemblies on ARM and AArch64. +- Optimize inline assemblies on Arm and AArch64. - Revert [thumbv6m atomic load/store changes made in 0.3.5](/~https://github.com/taiki-e/portable-atomic/pull/18). This is because [rust-lang/rust#99595](/~https://github.com/rust-lang/rust/pull/99595) has been reverted, so this is no longer needed. diff --git a/Cargo.toml b/Cargo.toml index 1f29bfe8d..8f1d95ed2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -66,7 +66,7 @@ s-mode = [] # For RISC-V targets, use AMO instructions even if A-extension is disabled. # This feature requires Rust 1.72+. force-amo = [] -# For ARM targets, also disable FIQs when disabling interrupts. +# For Arm targets, also disable FIQs when disabling interrupts. disable-fiq = [] # Note: serde and critical-section are public dependencies. diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index d18436500..d5d05d566 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -17,7 +17,7 @@ portable-atomic/ │ ├── imp/ │ │ ├── atomic128/ -- 128-bit atomic implementations (mainly by asm) │ │ │ └── detect/ -- Run-time CPU feature detection implementations used for outline-atomics -│ │ ├── arm_linux.rs -- 64-bit atomic implementation for pre-v6 ARM Linux/Android +│ │ ├── arm_linux.rs -- 64-bit atomic implementation for pre-v6 Arm Linux/Android │ │ ├── core_atomic.rs -- wrapper for core::sync::atomic types │ │ ├── fallback/ -- fallback implementation based on global locks │ │ ├── float.rs -- atomic float implementation based on atomic integer @@ -38,7 +38,7 @@ portable-atomic/ ## Testing powerpc64le using POWER Functional Simulator -We mainly use QEMU to test for targets other than x86_64/aarch64, but some instructions do not work well in QEMU, so we sometimes use other tools. This section describes testing powerpc64le using IBM [POWER Functional Simulator](https://www.ibm.com/support/pages/node/6491145). +We mainly use QEMU to test for targets other than x86_64/AArch64, but some instructions do not work well in QEMU, so we sometimes use other tools. This section describes testing powerpc64le using IBM [POWER Functional Simulator](https://www.ibm.com/support/pages/node/6491145). Note: Since QEMU 8.1.1, QEMU now supports all the instructions we use. @@ -149,7 +149,7 @@ Start simulator (Ctrl-C to stop). ./tools/fuchsia-test.sh emu aarch64 ``` -(The only fuchsia-specific code in our codebase is for aarch64, so here we use the aarch64 emulator, but if you pass `x86_64` instead of `aarch64` as the first argument of the script, it works for x86_64.) +(The only fuchsia-specific code in our codebase is for AArch64, so here we use the AArch64 emulator, but if you pass `x86_64` instead of `aarch64` as the first argument of the script, it works for x86_64.) ### Run tests diff --git a/README.md b/README.md index e21e4056e..b1e5b142f 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ Portable atomic types including support for 128-bit atomics, atomic float, etc. - Provide `AtomicI128` and `AtomicU128`. - Provide `AtomicF32` and `AtomicF64`. ([optional, requires the `float` feature](#optional-features-float)) - Provide atomic load/store for targets where atomic is not available at all in the standard library. (RISC-V without A-extension, MSP430, AVR) -- Provide atomic CAS for targets where atomic CAS is not available in the standard library. (thumbv6m, pre-v6 ARM, RISC-V without A-extension, MSP430, AVR, Xtensa, etc.) (always enabled for MSP430 and AVR, [optional](#optional-features-critical-section) otherwise) +- Provide atomic CAS for targets where atomic CAS is not available in the standard library. (thumbv6m, pre-v6 Arm, RISC-V without A-extension, MSP430, AVR, Xtensa, etc.) (always enabled for MSP430 and AVR, [optional](#optional-features-critical-section) otherwise) - Provide stable equivalents of the standard library's atomic types' unstable APIs, such as [`AtomicPtr::fetch_*`](/~https://github.com/rust-lang/rust/issues/99108). - Make features that require newer compilers, such as [`fetch_{max,min}`](https://doc.rust-lang.org/std/sync/atomic/struct.AtomicUsize.html#method.fetch_max), [`fetch_update`](https://doc.rust-lang.org/std/sync/atomic/struct.AtomicUsize.html#method.fetch_update), [`as_ptr`](https://doc.rust-lang.org/std/sync/atomic/struct.AtomicUsize.html#method.as_ptr), [`from_ptr`](https://doc.rust-lang.org/std/sync/atomic/struct.AtomicUsize.html#method.from_ptr), [`AtomicBool::fetch_not`](https://doc.rust-lang.org/std/sync/atomic/struct.AtomicBool.html#method.fetch_not) and [stronger CAS failure ordering](/~https://github.com/rust-lang/rust/pull/98383) available on Rust 1.34+. - Provide workaround for bugs in the standard library's atomic-related APIs, such as [rust-lang/rust#100650], `fence`/`compiler_fence` on MSP430 that cause LLVM error, etc. @@ -52,7 +52,7 @@ portable-atomic = { version = "1.3", default-features = false, features = ["requ ## 128-bit atomics support -Native 128-bit atomic operations are available on x86_64 (Rust 1.59+), aarch64 (Rust 1.59+), powerpc64 (nightly only), and s390x (nightly only), otherwise the fallback implementation is used. +Native 128-bit atomic operations are available on x86_64 (Rust 1.59+), AArch64 (Rust 1.59+), powerpc64 (nightly only), and s390x (nightly only), otherwise the fallback implementation is used. On x86_64, even if `cmpxchg16b` is not available at compile-time (note: `cmpxchg16b` target feature is enabled by default only on Apple and Windows (except Windows 7) targets), run-time detection checks whether `cmpxchg16b` is available. If `cmpxchg16b` is not available at either compile-time or run-time detection, the fallback implementation is used. See also [`portable_atomic_no_outline_atomics`](#optional-cfg-no-outline-atomics) cfg. @@ -126,7 +126,7 @@ See the [`atomic128` module's readme](/~https://github.com/taiki-e/portable-atomic Enabling this feature in an environment where privileged instructions are not available, or if the instructions used are not sufficient to disable interrupts in the system, it is also usually considered **unsound**, although the details are system-dependent. The following are known cases: - - On pre-v6 ARM, this disables only IRQs by default. For many systems (e.g., GBA) this is enough. If the system need to disable both IRQs and FIQs, you need to enable the `disable-fiq` feature together. + - On pre-v6 Arm, this disables only IRQs by default. For many systems (e.g., GBA) this is enough. If the system need to disable both IRQs and FIQs, you need to enable the `disable-fiq` feature together. - On RISC-V without A-extension, this generates code for machine-mode (M-mode) by default. If you enable the `s-mode` together, this generates code for supervisor-mode (S-mode). In particular, `qemu-system-riscv*` uses [OpenSBI](/~https://github.com/riscv-software-src/opensbi) as the default firmware. See also [the `interrupt` module's readme](/~https://github.com/taiki-e/portable-atomic/blob/HEAD/src/imp/interrupt/README.md). @@ -135,7 +135,7 @@ See the [`atomic128` module's readme](/~https://github.com/taiki-e/portable-atomic It is **very strongly discouraged** to enable this feature in libraries that depend on `portable-atomic`. The recommended approach for libraries is to leave it up to the end user whether or not to enable this feature. (However, it may make sense to enable this feature by default for libraries specific to a platform where it is guaranteed to always be sound, for example in a hardware abstraction layer targeting a single-core chip.) - ARMv6-M (thumbv6m), pre-v6 ARM (e.g., thumbv4t, thumbv5te), RISC-V without A-extension, and Xtensa are currently supported. + Armv6-M (thumbv6m), pre-v6 Arm (e.g., thumbv4t, thumbv5te), RISC-V without A-extension, and Xtensa are currently supported. Since all MSP430 and AVR are single-core, we always provide atomic CAS for them without this feature. @@ -167,14 +167,14 @@ RUSTFLAGS="--cfg portable_atomic_no_outline_atomics" cargo ... - **`--cfg portable_atomic_no_outline_atomics`**
Disable dynamic dispatching by run-time CPU feature detection. - If dynamic dispatching by run-time CPU feature detection is enabled, it allows maintaining support for older CPUs while using features that are not supported on older CPUs, such as CMPXCHG16B (x86_64) and FEAT_LSE/FEAT_LSE2 (aarch64). + If dynamic dispatching by run-time CPU feature detection is enabled, it allows maintaining support for older CPUs while using features that are not supported on older CPUs, such as CMPXCHG16B (x86_64) and FEAT_LSE/FEAT_LSE2 (AArch64). Note: - - Dynamic detection is currently only enabled in Rust 1.59+ for aarch64 and x86_64, nightly only for powerpc64 (disabled by default), otherwise it works the same as when this cfg is set. + - Dynamic detection is currently only enabled in Rust 1.59+ for x86_64 and AArch64, nightly only for powerpc64 (disabled by default), otherwise it works the same as when this cfg is set. - If the required target features are enabled at compile-time, the atomic operations are inlined. - This is compatible with no-std (as with all features except `std`). - On some targets, run-time detection is disabled by default mainly for compatibility with older versions of operating systems or incomplete build environments, and can be enabled by `--cfg portable_atomic_outline_atomics`. (When both cfg are enabled, `*_no_*` cfg is preferred.) - - Some aarch64 targets enable LLVM's `outline-atomics` target feature by default, so if you set this cfg, you may want to disable that as well. (portable-atomic's outline-atomics does not depend on the compiler-rt symbols, so even if you need to disable LLVM's outline-atomics, you may not need to disable portable-atomic's outline-atomics.) + - Some AArch64 targets enable LLVM's `outline-atomics` target feature by default, so if you set this cfg, you may want to disable that as well. (portable-atomic's outline-atomics does not depend on the compiler-rt symbols, so even if you need to disable LLVM's outline-atomics, you may not need to disable portable-atomic's outline-atomics.) See also the [`atomic128` module's readme](/~https://github.com/taiki-e/portable-atomic/blob/HEAD/src/imp/atomic128/README.md). diff --git a/build.rs b/build.rs index e17a562f7..e91503684 100644 --- a/build.rs +++ b/build.rs @@ -231,7 +231,7 @@ fn main() { // target_feature "lse2"/"lse128"/"rcpc3" is unstable and available on rustc side since nightly-2024-08-30: /~https://github.com/rust-lang/rust/pull/128192 if !version.probe(82, 2024, 8, 29) || needs_target_feature_fallback(&version, None) { // FEAT_LSE2 doesn't imply FEAT_LSE. FEAT_LSE128 implies FEAT_LSE but not FEAT_LSE2. - // aarch64 macOS always supports FEAT_LSE and FEAT_LSE2 because it is armv8.5-a: + // AArch64 macOS always supports FEAT_LSE and FEAT_LSE2 because it is Armv8.5: // /~https://github.com/llvm/llvm-project/blob/llvmorg-18.1.2/llvm/include/llvm/TargetParser/AArch64TargetParser.h#L728 // Script to get builtin targets that support FEAT_LSE/FEAT_LSE2 by default: // $ (for target in $(rustc --print target-list | grep -E '^aarch64|^arm64'); do rustc --print cfg --target "${target}" | grep -Fq '"lse"' && printf '%s\n' "${target}"; done) @@ -258,7 +258,7 @@ fn main() { } } "arm" => { - // For non-Linux/Android pre-v6 ARM (tier 3) with unsafe_assume_single_core enabled. + // For non-Linux/Android pre-v6 Arm (tier 3) with unsafe_assume_single_core enabled. // feature(isa_attribute) stabilized in Rust 1.67 (nightly-2022-11-06): /~https://github.com/rust-lang/rust/pull/102458 if version.nightly && !version.probe(67, 2022, 11, 5) { println!("cargo:rustc-cfg=portable_atomic_unstable_isa_attribute"); @@ -286,16 +286,16 @@ fn main() { // armeb-unknown-linux-gnueabi is v8 & aclass // /~https://github.com/rust-lang/rust/blob/1.80.0/compiler/rustc_target/src/spec/targets/armeb_unknown_linux_gnueabi.rs#L18 _ if target == "armeb-unknown-linux-gnueabi" => subarch = "v8", - // Legacy arm architectures (pre-v7 except v6m) don't have *class target feature. + // Legacy Arm architectures (pre-v7 except v6m) don't have *class target feature. "" => subarch = "v6", "v4t" | "v5te" | "v6" | "v6k" => {} _ => { known = false; if env::var_os("PORTABLE_ATOMIC_DENY_WARNINGS").is_some() { - panic!("unrecognized arm subarch: {}", target) + panic!("unrecognized Arm subarch: {}", target) } println!( - "cargo:warning={}: unrecognized arm subarch: {}", + "cargo:warning={}: unrecognized Arm subarch: {}", env!("CARGO_PKG_NAME"), target ); diff --git a/src/gen/utils.rs b/src/gen/utils.rs index 58f4c50df..f7243c921 100644 --- a/src/gen/utils.rs +++ b/src/gen/utils.rs @@ -11,7 +11,7 @@ // handle this is to pass it as a pointer and clear the upper bits inside asm, // but it is easier to overlook than cast, which can catch overlooks by // asm_sub_register lint. -// See also /~https://github.com/ARM-software/abi-aa/blob/2023Q3/aapcs64/aapcs64.rst#pointers +// See also /~https://github.com/ARM-software/abi-aa/blob/2024Q3/aapcs64/aapcs64.rst#pointers // // Except for x86_64, which can use 32-bit registers in the destination operand // (on x86_64, we use the ptr_modifier macro to handle this), we need to do the @@ -21,7 +21,7 @@ // recently submitted to the kernel, but in any case, this should be a safe // default for such ABIs). // -// Known architectures that have such ABI are x86_64 (X32), aarch64 (ILP32), +// Known architectures that have such ABI are x86_64 (X32), AArch64 (ILP32), // mips64 (N32), and riscv64 (s64ilp32, not merged yet though). (As of // 2023-06-05, only the former two are supported by rustc.) However, we list all // known 64-bit architectures because similar ABIs may exist or future added for diff --git a/src/imp/arm_linux.rs b/src/imp/arm_linux.rs index 592aa591d..49d5dde7c 100644 --- a/src/imp/arm_linux.rs +++ b/src/imp/arm_linux.rs @@ -1,13 +1,15 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT -// 64-bit atomic implementation using kuser_cmpxchg64 on pre-v6 ARM Linux/Android. -// -// Refs: -// - /~https://github.com/torvalds/linux/blob/v6.10/Documentation/arch/arm/kernel_user_helpers.rst -// - /~https://github.com/rust-lang/compiler-builtins/blob/compiler_builtins-v0.1.124/src/arm_linux.rs -// -// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use -// this module and use fallback implementation instead. +/* +64-bit atomic implementation using kuser_cmpxchg64 on pre-v6 Arm Linux/Android. + +Refs: +- /~https://github.com/torvalds/linux/blob/v6.10/Documentation/arch/arm/kernel_user_helpers.rst +- /~https://github.com/rust-lang/compiler-builtins/blob/compiler_builtins-v0.1.124/src/arm_linux.rs + +Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use +this module and use fallback implementation instead. +*/ // TODO: Since Rust 1.64, the Linux kernel requirement for Rust when using std is 3.2+, so it should // be possible to omit the dynamic kernel version check if the std feature is enabled on Rust 1.64+. @@ -90,7 +92,7 @@ where // This is not single-copy atomic reads, but this is ok because subsequent // CAS will check for consistency. // - // ARM's memory model allow mixed-sized atomic access. + // Arm's memory model allow mixed-sized atomic access. // /~https://github.com/rust-lang/unsafe-code-guidelines/issues/345#issuecomment-1172891466 // // Note that the C++20 memory model does not allow mixed-sized atomic access, diff --git a/src/imp/atomic128/README.md b/src/imp/atomic128/README.md index c421af5c7..1d53e8ee4 100644 --- a/src/imp/atomic128/README.md +++ b/src/imp/atomic128/README.md @@ -13,7 +13,7 @@ Here is the table of targets that support 128-bit atomics and the instructions u On compiler versions or platforms where these are not supported, the fallback implementation is used. -See [aarch64.rs](aarch64.rs) module-level comments for more details on the instructions used on aarch64. +See [aarch64.rs](aarch64.rs) module-level comments for more details on the instructions used on AArch64. ## Comparison with core::intrinsics::atomic_\* (core::sync::atomic::Atomic{I,U}128) @@ -21,8 +21,8 @@ This directory has target-specific implementations with inline assembly ([aarch6 Implementations with inline assembly generate assemblies almost equivalent to the `core::intrinsics::atomic_*` (used in `core::sync::atomic::Atomic{I,U}128`) for many operations, but some operations may or may not generate more efficient code. For example: -- On x86_64 and aarch64, implementation with inline assembly contains additional optimizations (e.g., [#16](/~https://github.com/taiki-e/portable-atomic/pull/16), [#126](/~https://github.com/taiki-e/portable-atomic/pull/126)) and is much faster for some operations. -- On aarch64, implementation with inline assembly supports outline-atomics on more operating systems, and may be faster in environments where outline-atomics can improve performance. +- On x86_64 and AArch64, implementation with inline assembly contains additional optimizations (e.g., [#16](/~https://github.com/taiki-e/portable-atomic/pull/16), [#126](/~https://github.com/taiki-e/portable-atomic/pull/126)) and is much faster for some operations. +- On AArch64, implementation with inline assembly supports outline-atomics on more operating systems, and may be faster in environments where outline-atomics can improve performance. - On powerpc64, LLVM does not support generating some 128-bit atomic operations (see [intrinsics.rs](intrinsics.rs) module-level comments), and we use CAS loop to implement them, so implementation with inline assembly may be faster for those operations. - In implementations without inline assembly, the compiler may reuse condition flags that have changed as a result of the operation, or use immediate values instead of registers, depending on the situation. diff --git a/src/imp/atomic128/aarch64.rs b/src/imp/atomic128/aarch64.rs index e71682662..06db9c7f1 100644 --- a/src/imp/atomic128/aarch64.rs +++ b/src/imp/atomic128/aarch64.rs @@ -1,79 +1,81 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT -// Atomic{I,U}128 implementation on AArch64. -// -// There are a few ways to implement 128-bit atomic operations in AArch64. -// -// - LDXP/STXP loop (DW LL/SC) -// - CASP (DWCAS) added as FEAT_LSE (mandatory from armv8.1-a) -// - LDP/STP (DW load/store) if FEAT_LSE2 (optional from armv8.2-a, mandatory from armv8.4-a) is available -// - LDIAPP/STILP (DW acquire-load/release-store) added as FEAT_LRCPC3 (optional from armv8.9-a/armv9.4-a) (if FEAT_LSE2 is also available) -// - LDCLRP/LDSETP/SWPP (DW RMW) added as FEAT_LSE128 (optional from armv9.4-a) -// -// This module supports all of these instructions and attempts to select the best -// one based on compile-time and run-time information about available CPU features -// and platforms. For example: -// -// - If outline-atomics is not enabled and FEAT_LSE is not available at -// compile-time, we use LDXP/STXP loop. -// - If outline-atomics is enabled and FEAT_LSE is not available at -// compile-time, we use CASP for CAS if FEAT_LSE is available -// at run-time, otherwise, use LDXP/STXP loop. -// - If FEAT_LSE is available at compile-time, we use CASP for load/store/CAS/RMW. -// However, when portable_atomic_ll_sc_rmw cfg is set, use LDXP/STXP loop instead of CASP -// loop for RMW (by default, it is set on Apple hardware where CASP is slow; -// see build script for details). -// - If outline-atomics is enabled and FEAT_LSE2 is not available at compile-time, -// we use LDP/STP (and also LDIAPP/STILP/SWPP if FEAT_LRCPC3/FEAT_LSE128 is -// available) for load/store if FEAT_LSE2 is available at run-time, otherwise, -// use LDXP/STXP or CASP depending on whether FEAT_LSE is available. -// - If FEAT_LSE2 is available at compile-time, we use LDP/STP for load/store. -// - If FEAT_LSE128 is available at compile-time, we use LDCLRP/LDSETP/SWPP for fetch_and/fetch_or/swap/{release,seqcst}-store. -// - If FEAT_LSE2 and FEAT_LRCPC3 are available at compile-time, we use LDIAPP/STILP for acquire-load/release-store. -// -// See each "Instruction selection flow for ..." comment in this file for the exact -// instruction selection per operation. -// -// Note: FEAT_LSE2 doesn't imply FEAT_LSE. FEAT_LSE128 implies FEAT_LSE but not FEAT_LSE2. -// -// Note that we do not separate LL and SC into separate functions, but handle -// them within a single asm block. This is because it is theoretically possible -// for the compiler to insert operations that might clear the reservation between -// LL and SC. Considering the type of operations we are providing and the fact -// that [progress64](/~https://github.com/ARM-software/progress64) uses such code, -// this is probably not a problem for aarch64, but it seems that aarch64 doesn't -// guarantee it and hexagon is the only architecture with hardware guarantees -// that such code works. See also: -// -// - https://yarchive.net/comp/linux/cmpxchg_ll_sc_portability.html -// - https://lists.llvm.org/pipermail/llvm-dev/2016-May/099490.html -// - https://lists.llvm.org/pipermail/llvm-dev/2018-June/123993.html -// -// Also, even when using a CAS loop to implement atomic RMW, include the loop itself -// in the asm block because it is more efficient for some codegen backends. -// /~https://github.com/rust-lang/compiler-builtins/issues/339#issuecomment-1191260474 -// -// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use -// this module and use intrinsics.rs instead. -// -// Refs: -// - ARM Compiler armasm User Guide -// https://developer.arm.com/documentation/dui0801/latest -// - Arm A-profile A64 Instruction Set Architecture -// https://developer.arm.com/documentation/ddi0602/latest -// - Arm Architecture Reference Manual for A-profile architecture -// https://developer.arm.com/documentation/ddi0487/latest -// - atomic-maybe-uninit /~https://github.com/taiki-e/atomic-maybe-uninit -// -// Generated asm: -// - aarch64 https://godbolt.org/z/9Kq15oGs4 -// - aarch64 msvc https://godbolt.org/z/hsWo8eYh4 -// - aarch64 (+lse) https://godbolt.org/z/81TanrTGn -// - aarch64 msvc (+lse) https://godbolt.org/z/KsannGvTY -// - aarch64 (+lse,+lse2) https://godbolt.org/z/EzvodM6ca -// - aarch64 (+lse,+lse2,+rcpc3) https://godbolt.org/z/3rEEs6KE6 -// - aarch64 (+lse2,+lse128) https://godbolt.org/z/PWhsPjGa7 -// - aarch64 (+lse2,+lse128,+rcpc3) https://godbolt.org/z/K8MMhfPT1 +/* +Atomic{I,U}128 implementation on AArch64. + +There are a few ways to implement 128-bit atomic operations in AArch64. + +- LDXP/STXP loop (DW LL/SC) +- CASP (DWCAS) added as Armv8.1 FEAT_LSE (optional from Armv8.0, mandatory from Armv8.1) +- LDP/STP (DW load/store) if Armv8.4 FEAT_LSE2 (optional from Armv8.2, mandatory from Armv8.4) is available +- LDIAPP/STILP (DW acquire-load/release-store) added as Armv8.9 FEAT_LRCPC3 (optional from Armv8.2) (if FEAT_LSE2 is also available) +- LDCLRP/LDSETP/SWPP (DW RMW) added as Armv9.4 FEAT_LSE128 (optional from Armv9.3) + +This module supports all of these instructions and attempts to select the best +one based on compile-time and run-time information about available CPU features +and platforms. For example: + +- If outline-atomics is not enabled and FEAT_LSE is not available at + compile-time, we use LDXP/STXP loop. +- If outline-atomics is enabled and FEAT_LSE is not available at + compile-time, we use CASP for CAS if FEAT_LSE is available + at run-time, otherwise, use LDXP/STXP loop. +- If FEAT_LSE is available at compile-time, we use CASP for load/store/CAS/RMW. + However, when portable_atomic_ll_sc_rmw cfg is set, use LDXP/STXP loop instead of CASP + loop for RMW (by default, it is set on Apple hardware where CASP is slow; + see build script for details). +- If outline-atomics is enabled and FEAT_LSE2 is not available at compile-time, + we use LDP/STP (and also LDIAPP/STILP/SWPP if FEAT_LRCPC3/FEAT_LSE128 is + available) for load/store if FEAT_LSE2 is available at run-time, otherwise, + use LDXP/STXP or CASP depending on whether FEAT_LSE is available. +- If FEAT_LSE2 is available at compile-time, we use LDP/STP for load/store. +- If FEAT_LSE128 is available at compile-time, we use LDCLRP/LDSETP/SWPP for fetch_and/fetch_or/swap/{release,seqcst}-store. +- If FEAT_LSE2 and FEAT_LRCPC3 are available at compile-time, we use LDIAPP/STILP for acquire-load/release-store. + +See each "Instruction selection flow for ..." comment in this file for the exact +instruction selection per operation. + +Note: FEAT_LSE2 doesn't imply FEAT_LSE. FEAT_LSE128 implies FEAT_LSE but not FEAT_LSE2. + +Note that we do not separate LL and SC into separate functions, but handle +them within a single asm block. This is because it is theoretically possible +for the compiler to insert operations that might clear the reservation between +LL and SC. Considering the type of operations we are providing and the fact +that [progress64](/~https://github.com/ARM-software/progress64) uses such code, +this is probably not a problem for AArch64, but it seems that AArch64 doesn't +guarantee it and hexagon is the only architecture with hardware guarantees +that such code works. See also: + +- https://yarchive.net/comp/linux/cmpxchg_ll_sc_portability.html +- https://lists.llvm.org/pipermail/llvm-dev/2016-May/099490.html +- https://lists.llvm.org/pipermail/llvm-dev/2018-June/123993.html + +Also, even when using a CAS loop to implement atomic RMW, include the loop itself +in the asm block because it is more efficient for some codegen backends. +/~https://github.com/rust-lang/compiler-builtins/issues/339#issuecomment-1191260474 + +Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use +this module and use intrinsics.rs instead. + +Refs: +- Arm A-profile A64 Instruction Set Architecture + https://developer.arm.com/documentation/ddi0602/2024-06 +- Arm Compiler armasm User Guide + https://developer.arm.com/documentation/dui0801/latest +- Arm Architecture Reference Manual for A-profile architecture + https://developer.arm.com/documentation/ddi0487/latest (PDF) +- atomic-maybe-uninit /~https://github.com/taiki-e/atomic-maybe-uninit + +Generated asm: +- aarch64 https://godbolt.org/z/9Kq15oGs4 +- aarch64 msvc https://godbolt.org/z/hsWo8eYh4 +- aarch64 (+lse) https://godbolt.org/z/81TanrTGn +- aarch64 msvc (+lse) https://godbolt.org/z/KsannGvTY +- aarch64 (+lse,+lse2) https://godbolt.org/z/EzvodM6ca +- aarch64 (+lse,+lse2,+rcpc3) https://godbolt.org/z/3rEEs6KE6 +- aarch64 (+lse2,+lse128) https://godbolt.org/z/PWhsPjGa7 +- aarch64 (+lse2,+lse128,+rcpc3) https://godbolt.org/z/K8MMhfPT1 +*/ include!("macros.rs"); @@ -304,7 +306,7 @@ macro_rules! debug_assert_rcpc3 { }; } -// Refs: https://developer.arm.com/documentation/100067/0611/armclang-Integrated-Assembler/AArch32-Target-selection-directives?lang=en +// Refs: https://developer.arm.com/documentation/100067/0611/armclang-Integrated-Assembler/AArch32-Target-selection-directives // // This is similar to #[target_feature(enable = "lse")], except that there are // no compiler guarantees regarding (un)inlining, and the scope is within an asm @@ -611,7 +613,7 @@ unsafe fn _atomic_load_ldp(src: *mut u128, order: Ordering) -> u128 { // SAFETY: the caller must guarantee that `dst` is valid for reads, // 16-byte aligned, that there are no concurrent non-atomic operations. // - // Refs: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/LDP--A64- + // Refs: https://developer.arm.com/documentation/ddi0602/2024-06/Base-Instructions/LDP--Load-pair-of-registers- unsafe { let (out_lo, out_hi); macro_rules! atomic_load_relaxed { @@ -680,7 +682,7 @@ unsafe fn _atomic_load_ldiapp(src: *mut u128, order: Ordering) -> u128 { // SAFETY: the caller must guarantee that `dst` is valid for reads, // 16-byte aligned, that there are no concurrent non-atomic operations. // - // Refs: https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/LDIAPP--Load-Acquire-RCpc-ordered-Pair-of-registers- + // Refs: https://developer.arm.com/documentation/ddi0602/2024-06/Base-Instructions/LDIAPP--Load-Acquire-RCpc-ordered-pair-of-registers- unsafe { let (out_lo, out_hi); match order { @@ -1030,8 +1032,7 @@ unsafe fn _atomic_store_stp(dst: *mut u128, val: u128, order: Ordering) { // SAFETY: the caller must guarantee that `dst` is valid for writes, // 16-byte aligned, that there are no concurrent non-atomic operations. // - // Refs: - // - STP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/STP--A64- + // Refs: https://developer.arm.com/documentation/ddi0602/2024-06/Base-Instructions/STP--Store-pair-of-registers- unsafe { #[rustfmt::skip] macro_rules! atomic_store { @@ -1109,7 +1110,7 @@ unsafe fn _atomic_store_stilp(dst: *mut u128, val: u128, order: Ordering) { // SAFETY: the caller must guarantee that `dst` is valid for writes, // 16-byte aligned, that there are no concurrent non-atomic operations. // - // Refs: https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/STILP--Store-Release-ordered-Pair-of-registers- + // Refs: https://developer.arm.com/documentation/ddi0602/2024-06/Base-Instructions/STILP--Store-release-ordered-pair-of-registers- unsafe { macro_rules! atomic_store { ($acquire:tt) => {{ @@ -1390,9 +1391,7 @@ unsafe fn _atomic_compare_exchange_casp( // reads, 16-byte aligned, that there are no concurrent non-atomic operations, // and the CPU supports FEAT_LSE. // - // Refs: - // - https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/CASPA--CASPAL--CASP--CASPL--CASPAL--CASP--CASPL--A64- - // - https://developer.arm.com/documentation/ddi0602/2023-06/Base-Instructions/CASP--CASPA--CASPAL--CASPL--Compare-and-Swap-Pair-of-words-or-doublewords-in-memory- + // Refs: https://developer.arm.com/documentation/ddi0602/2024-06/Base-Instructions/CASP--CASPA--CASPAL--CASPL--Compare-and-swap-pair-of-words-or-doublewords-in-memory- unsafe { let old = U128 { whole: old }; let new = U128 { whole: new }; @@ -1434,10 +1433,10 @@ unsafe fn _atomic_compare_exchange_ldxp_stxp( // reads, 16-byte aligned, and that there are no concurrent non-atomic operations. // // Refs: - // - LDXP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/LDXP--A64- - // - LDAXP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/LDAXP--A64- - // - STXP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/STXP--A64- - // - STLXP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/STLXP--A64- + // - LDXP: https://developer.arm.com/documentation/ddi0602/2024-06/Base-Instructions/LDXP--Load-exclusive-pair-of-registers- + // - LDAXP: https://developer.arm.com/documentation/ddi0602/2024-06/Base-Instructions/LDAXP--Load-acquire-exclusive-pair-of-registers- + // - STXP: https://developer.arm.com/documentation/ddi0602/2024-06/Base-Instructions/STXP--Store-exclusive-pair-of-registers- + // - STLXP: https://developer.arm.com/documentation/ddi0602/2024-06/Base-Instructions/STLXP--Store-release-exclusive-pair-of-registers- // // Note: Load-Exclusive pair (by itself) does not guarantee atomicity; to complete an atomic // operation (even load/store), a corresponding Store-Exclusive pair must succeed. @@ -1488,7 +1487,7 @@ unsafe fn _atomic_compare_exchange_ldxp_stxp( // casp is always strong, and ldxp requires a corresponding (succeed) stxp for // its atomicity (see code comment in _atomic_compare_exchange_ldxp_stxp). -// (i.e., aarch64 doesn't have 128-bit weak CAS) +// (i.e., AArch64 doesn't have 128-bit weak CAS) use self::atomic_compare_exchange as atomic_compare_exchange_weak; // ----------------------------------------------------------------------------- @@ -1548,8 +1547,7 @@ unsafe fn _atomic_swap_swpp(dst: *mut u128, val: u128, order: Ordering) -> u128 // reads, 16-byte aligned, that there are no concurrent non-atomic operations, // and the CPU supports FEAT_LSE128. // - // Refs: - // - https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/SWPP--SWPPA--SWPPAL--SWPPL--Swap-quadword-in-memory-?lang=en + // Refs: https://developer.arm.com/documentation/ddi0602/2024-06/Base-Instructions/SWPP--SWPPA--SWPPAL--SWPPL--Swap-quadword-in-memory- unsafe { let val = U128 { whole: val }; let (prev_lo, prev_hi); @@ -1950,8 +1948,7 @@ unsafe fn atomic_and(dst: *mut u128, val: u128, order: Ordering) -> u128 { // reads, 16-byte aligned, that there are no concurrent non-atomic operations, // and the CPU supports FEAT_LSE128. // - // Refs: - // - https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/LDCLRP--LDCLRPA--LDCLRPAL--LDCLRPL--Atomic-bit-clear-on-quadword-in-memory-?lang=en + // Refs: https://developer.arm.com/documentation/ddi0602/2024-06/Base-Instructions/LDCLRP--LDCLRPA--LDCLRPAL--LDCLRPL--Atomic-bit-clear-on-quadword-in-memory- unsafe { let val = U128 { whole: !val }; let (prev_lo, prev_hi); @@ -2009,8 +2006,7 @@ unsafe fn atomic_or(dst: *mut u128, val: u128, order: Ordering) -> u128 { // reads, 16-byte aligned, that there are no concurrent non-atomic operations, // and the CPU supports FEAT_LSE128. // - // Refs: - // - https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/LDSETP--LDSETPA--LDSETPAL--LDSETPL--Atomic-bit-set-on-quadword-in-memory-?lang=en + // Refs: https://developer.arm.com/documentation/ddi0602/2024-06/Base-Instructions/LDSETP--LDSETPA--LDSETPAL--LDSETPL--Atomic-bit-set-on-quadword-in-memory- unsafe { let val = U128 { whole: val }; let (prev_lo, prev_hi); diff --git a/src/imp/atomic128/detect/aarch64_aa64reg.rs b/src/imp/atomic128/detect/aarch64_aa64reg.rs index 62145c51d..059cba51d 100644 --- a/src/imp/atomic128/detect/aarch64_aa64reg.rs +++ b/src/imp/atomic128/detect/aarch64_aa64reg.rs @@ -1,35 +1,38 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT -// Run-time CPU feature detection on AArch64 Linux/Android/FreeBSD/NetBSD/OpenBSD by parsing system registers. -// -// As of nightly-2023-01-23, is_aarch64_feature_detected doesn't support run-time detection on NetBSD/OpenBSD. -// /~https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/mod.rs -// /~https://github.com/rust-lang/stdarch/pull/1374 -// -// Refs: -// - https://developer.arm.com/documentation/ddi0601/latest/AArch64-Registers -// - /~https://github.com/torvalds/linux/blob/v6.10/Documentation/arch/arm64/cpu-feature-registers.rst -// - /~https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/os/aarch64.rs -// -// Supported platforms: -// - Linux 4.11+ (emulate mrs instruction) -// /~https://github.com/torvalds/linux/commit/77c97b4ee21290f5f083173d957843b615abbff2 -// - FreeBSD 12.0+ (emulate mrs instruction) -// /~https://github.com/freebsd/freebsd-src/commit/398810619cb32abf349f8de23f29510b2ee0839b -// - NetBSD 9.0+ (through sysctl) -// /~https://github.com/NetBSD/src/commit/0e9d25528729f7fea53e78275d1bc5039dfe8ffb -// - OpenBSD 7.1+ (through sysctl) -// /~https://github.com/openbsd/src/commit/d335af936b9d7dd9cf655cae1ce19560c45de6c8 -// -// For now, this module is only used on NetBSD/OpenBSD. -// -// On Linux/Android/FreeBSD, we use auxv.rs and this module is test-only because: -// - On Linux/Android, this approach requires a higher kernel version than Rust supports, -// and also does not work with qemu-user (as of 7.2) and Valgrind (as of 3.19). -// (Looking into HWCAP_CPUID in auxvec, it appears that Valgrind is setting it -// to false correctly, but qemu-user is setting it to true.) -// - On FreeBSD, this approach does not work on FreeBSD 12 on QEMU (confirmed on -// FreeBSD 12.{2,3,4}), and we got SIGILL (worked on FreeBSD 13 and 14). +/* +Run-time CPU feature detection on AArch64 Linux/Android/FreeBSD/NetBSD/OpenBSD by parsing system registers. + +As of nightly-2024-09-07, is_aarch64_feature_detected doesn't support run-time detection on NetBSD. +/~https://github.com/rust-lang/stdarch/blob/d9466edb4c53cece8686ee6e17b028436ddf4151/crates/std_detect/src/detect/mod.rs +Run-time detection on OpenBSD by is_aarch64_feature_detected is supported on Rust 1.70+. +/~https://github.com/rust-lang/stdarch/pull/1374 + +Refs: +- https://developer.arm.com/documentation/ddi0601/2024-06/AArch64-Registers +- /~https://github.com/torvalds/linux/blob/v6.10/Documentation/arch/arm64/cpu-feature-registers.rst +- /~https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/os/aarch64.rs + +Supported platforms: +- Linux 4.11+ (emulate mrs instruction) + /~https://github.com/torvalds/linux/commit/77c97b4ee21290f5f083173d957843b615abbff2 +- FreeBSD 12.0+ (emulate mrs instruction) + /~https://github.com/freebsd/freebsd-src/commit/398810619cb32abf349f8de23f29510b2ee0839b +- NetBSD 9.0+ (through sysctl) + /~https://github.com/NetBSD/src/commit/0e9d25528729f7fea53e78275d1bc5039dfe8ffb +- OpenBSD 7.1+ (through sysctl) + /~https://github.com/openbsd/src/commit/d335af936b9d7dd9cf655cae1ce19560c45de6c8 + +For now, this module is only used on NetBSD/OpenBSD. + +On Linux/Android/FreeBSD, we use auxv.rs and this module is test-only because: +- On Linux/Android, this approach requires a higher kernel version than Rust supports, + and also does not work with qemu-user (as of 7.2) and Valgrind (as of 3.19). + (Looking into HWCAP_CPUID in auxvec, it appears that Valgrind is setting it + to false correctly, but qemu-user is setting it to true.) +- On FreeBSD, this approach does not work on FreeBSD 12 on QEMU (confirmed on + FreeBSD 12.{2,3,4}), and we got SIGILL (worked on FreeBSD 13 and 14). +*/ include!("common.rs"); @@ -201,7 +204,7 @@ mod imp { pub(super) fn aa64reg() -> AA64Reg { // Get system registers for cpu0. // If failed, returns default because machdep.cpuN.cpu_id sysctl is not available. - // machdep.cpuN.cpu_id sysctl was added on NetBSD 9.0 so it is not available on older versions. + // machdep.cpuN.cpu_id sysctl was added in NetBSD 9.0 so it is not available on older versions. // SAFETY: we passed a valid name in a C string. // It is ok to check only cpu0, even if there are more CPUs. // /~https://github.com/NetBSD/src/commit/bd9707e06ea7d21b5c24df6dfc14cb37c2819416 @@ -230,10 +233,17 @@ mod imp { // Defined in sys/sysctl.h. // /~https://github.com/openbsd/src/blob/ed8f5e8d82ace15e4cefca2c82941b15cb1a7830/sys/sys/sysctl.h#L82 pub(crate) const CTL_MACHDEP: c_int = 7; + // Defined in machine/cpu.h. // /~https://github.com/openbsd/src/blob/ed8f5e8d82ace15e4cefca2c82941b15cb1a7830/sys/arch/arm64/include/cpu.h#L25-L40 + // OpenBSD 7.1+ + // /~https://github.com/openbsd/src/commit/d335af936b9d7dd9cf655cae1ce19560c45de6c8 pub(crate) const CPU_ID_AA64ISAR0: c_int = 2; pub(crate) const CPU_ID_AA64ISAR1: c_int = 3; + // OpenBSD 7.3+ + // /~https://github.com/openbsd/src/commit/c7654cd65262d532212f65123ee3905ba200365c + // However, on OpenBSD 7.3-7.5, querying CPU_ID_AA64MMFR2 always returns 0. + // /~https://github.com/openbsd/src/commit/e8331b74e5c20302d4bd948c9db722af688ccfc1 pub(crate) const CPU_ID_AA64MMFR2: c_int = 7; extern "C" { @@ -251,12 +261,6 @@ mod imp { } } - // ID_AA64ISAR0_EL1 and ID_AA64ISAR1_EL1 are supported on OpenBSD 7.1+. - // /~https://github.com/openbsd/src/commit/d335af936b9d7dd9cf655cae1ce19560c45de6c8 - // Others are supported on OpenBSD 7.3+. - // /~https://github.com/openbsd/src/commit/c7654cd65262d532212f65123ee3905ba200365c - // However, on 7.3-7.5, querying AA64MMFR2 returned 0. - // /~https://github.com/openbsd/src/commit/e8331b74e5c20302d4bd948c9db722af688ccfc1 // sysctl returns an unsupported error if operation is not supported, // so we can safely use this function on older versions of OpenBSD. pub(super) fn aa64reg() -> AA64Reg { diff --git a/src/imp/atomic128/detect/aarch64_fuchsia.rs b/src/imp/atomic128/detect/aarch64_fuchsia.rs index 2e0aa8963..e3c0adf8b 100644 --- a/src/imp/atomic128/detect/aarch64_fuchsia.rs +++ b/src/imp/atomic128/detect/aarch64_fuchsia.rs @@ -1,13 +1,15 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT -// Run-time CPU feature detection on aarch64 Fuchsia by using zx_system_get_features. -// -// As of nightly-2023-01-23, is_aarch64_feature_detected doesn't support run-time detection on Fuchsia. -// /~https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/mod.rs -// -// Refs: -// - https://fuchsia.dev/fuchsia-src/reference/syscalls/system_get_features -// - /~https://github.com/llvm/llvm-project/commit/4e731abc55681751b5d736b613f7720e50eb1ad4 +/* +Run-time CPU feature detection on AArch64 Fuchsia by using zx_system_get_features. + +As of nightly-2024-09-07, is_aarch64_feature_detected doesn't support run-time detection on Fuchsia. +/~https://github.com/rust-lang/stdarch/blob/d9466edb4c53cece8686ee6e17b028436ddf4151/crates/std_detect/src/detect/mod.rs + +Refs: +- https://fuchsia.dev/fuchsia-src/reference/syscalls/system_get_features +- /~https://github.com/llvm/llvm-project/commit/4e731abc55681751b5d736b613f7720e50eb1ad4 +*/ include!("common.rs"); @@ -18,6 +20,7 @@ mod ffi { // https://fuchsia.googlesource.com/fuchsia/+/refs/heads/main/zircon/system/public/zircon/errors.h pub(crate) const ZX_OK: zx_status_t = 0; + // https://fuchsia.googlesource.com/fuchsia/+/refs/heads/main/zircon/system/public/zircon/features.h pub(crate) const ZX_FEATURE_KIND_CPU: u32 = 0; pub(crate) const ZX_ARM64_FEATURE_ISA_ATOMICS: u32 = 1 << 8; diff --git a/src/imp/atomic128/detect/aarch64_macos.rs b/src/imp/atomic128/detect/aarch64_macos.rs index fc3e23a25..fb334c9b1 100644 --- a/src/imp/atomic128/detect/aarch64_macos.rs +++ b/src/imp/atomic128/detect/aarch64_macos.rs @@ -1,19 +1,21 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT -// Run-time CPU feature detection on aarch64 macOS by using sysctl. -// -// This module is currently only enabled on tests because aarch64 macOS always supports FEAT_LSE and FEAT_LSE2. -// /~https://github.com/llvm/llvm-project/blob/llvmorg-18.1.2/llvm/include/llvm/TargetParser/AArch64TargetParser.h#L728 -// -// If macOS supporting FEAT_LSE128/FEAT_LRCPC3 becomes popular in the future, this module will -// be used to support outline-atomics for FEAT_LSE128/FEAT_LRCPC3. -// M4 is armv9.2-a and it doesn't support FEAT_LSE128/FEAT_LRCPC3. -// -// Refs: https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics -// -// Note that iOS doesn't support sysctl: -// - https://developer.apple.com/forums/thread/9440 -// - https://nabla-c0d3.github.io/blog/2015/06/16/ios9-security-privacy +/* +Run-time CPU feature detection on AArch64 macOS by using sysctl. + +This module is currently only enabled on tests because AArch64 macOS always supports FEAT_LSE and FEAT_LSE2. +/~https://github.com/llvm/llvm-project/blob/llvmorg-18.1.2/llvm/include/llvm/TargetParser/AArch64TargetParser.h#L728 + +If macOS supporting FEAT_LSE128/FEAT_LRCPC3 becomes popular in the future, this module will +be used to support outline-atomics for FEAT_LSE128/FEAT_LRCPC3. +M4 is Armv9.2 and it doesn't support FEAT_LSE128/FEAT_LRCPC3. + +Refs: https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics + +Note that iOS doesn't support sysctl: +- https://developer.apple.com/forums/thread/9440 +- https://nabla-c0d3.github.io/blog/2015/06/16/ios9-security-privacy +*/ include!("common.rs"); @@ -67,7 +69,7 @@ unsafe fn sysctlbyname32(name: &[u8]) -> Option { #[cold] fn _detect(info: &mut CpuInfo) { - // hw.optional.armv8_1_atomics is available on macOS 11+ (note: aarch64 support was added on macOS 11), + // hw.optional.armv8_1_atomics is available on macOS 11+ (note: AArch64 support was added in macOS 11), // hw.optional.arm.FEAT_* are only available on macOS 12+. // Query both names in case future versions of macOS remove the old name. // /~https://github.com/golang/go/commit/c15593197453b8bf90fc3a9080ba2afeaf7934ea diff --git a/src/imp/atomic128/detect/aarch64_windows.rs b/src/imp/atomic128/detect/aarch64_windows.rs index 4ce0896c3..2fbcf6b15 100644 --- a/src/imp/atomic128/detect/aarch64_windows.rs +++ b/src/imp/atomic128/detect/aarch64_windows.rs @@ -1,12 +1,13 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT -// Run-time CPU feature detection on aarch64 Windows by using IsProcessorFeaturePresent. -// -// As of nightly-2023-01-23, is_aarch64_feature_detected doesn't support run-time detection of FEAT_LSE on Windows. -// /~https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/os/windows/aarch64.rs -// /~https://github.com/rust-lang/stdarch/pull/1373 -// -// Refs: https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent +/* +Run-time CPU feature detection on AArch64 Windows by using IsProcessorFeaturePresent. + +Run-time detection of FEAT_LSE on Windows by is_aarch64_feature_detected is supported on Rust 1.70+. +/~https://github.com/rust-lang/stdarch/pull/1373 + +Refs: https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent +*/ include!("common.rs"); @@ -17,6 +18,7 @@ mod ffi { pub(crate) type BOOL = i32; pub(crate) const FALSE: BOOL = 0; + // Defined in winnt.h of Windows SDK. pub(crate) const PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE: DWORD = 34; diff --git a/src/imp/atomic128/detect/auxv.rs b/src/imp/atomic128/detect/auxv.rs index e7b60fc75..cd505110e 100644 --- a/src/imp/atomic128/detect/auxv.rs +++ b/src/imp/atomic128/detect/auxv.rs @@ -1,91 +1,93 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT -// Run-time CPU feature detection on AArch64/PowerPC64 Linux/Android/FreeBSD/OpenBSD by parsing ELF auxiliary vectors. -// -// Supported platforms: -// - Linux 6.4+ (through prctl) -// /~https://github.com/torvalds/linux/commit/ddc65971bb677aa9f6a4c21f76d3133e106f88eb -// - glibc 2.16+ (through getauxval) -// /~https://github.com/bminor/glibc/commit/c7683a6d02f3ed59f5cd119b3e8547f45a15912f -// - musl 1.1.0+ (through getauxval) -// /~https://github.com/bminor/musl/commit/21ada94c4b8c01589367cea300916d7db8461ae7 -// - uClibc-ng 1.0.43+ (through getauxval) -// /~https://github.com/wbx-github/uclibc-ng/commit/d869bb1600942c01a77539128f9ba5b5b55ad647 -// - Picolibc 1.4.6+ (through getauxval) -// /~https://github.com/picolibc/picolibc/commit/19bfe51d62ad7e32533c7f664b5bca8e26286e31 -// - Android 4.3+ (API level 18+) (through getauxval) -// /~https://github.com/aosp-mirror/platform_bionic/blob/d3ebc2f7c49a9893b114124d4a6b315f3a328764/libc/include/sys/auxv.h#L49 -// - FreeBSD 12.0+ and 11.4+ (through elf_aux_info) -// /~https://github.com/freebsd/freebsd-src/commit/0b08ae2120cdd08c20a2b806e2fcef4d0a36c470 -// /~https://github.com/freebsd/freebsd-src/blob/release/11.4.0/sys/sys/auxv.h -// - OpenBSD 7.6+ (through elf_aux_info) -// /~https://github.com/openbsd/src/commit/ef873df06dac50249b2dd380dc6100eee3b0d23d -// -// # Linux/Android -// -// As of nightly-2023-01-23, is_aarch64_feature_detected always uses dlsym by default -// on aarch64 Linux/Android, but on the following platforms, we can safely assume -// getauxval is linked to the binary. -// -// - On glibc (*-linux-gnu*), [aarch64 support is available on glibc 2.17+](/~https://github.com/bminor/glibc/blob/glibc-2.17/NEWS#L35) -// - On musl (*-linux-musl*, *-linux-ohos*), [aarch64 support is available on musl 1.1.7+](/~https://github.com/bminor/musl/blob/v1.1.7/WHATSNEW#L1422) -// - On bionic (*-android*), [64-bit architecture support is available on Android 5.0+ (API level 21+)](https://android-developers.googleblog.com/2014/10/whats-new-in-android-50-lollipop.html) -// -// However, on musl with static linking, it seems that getauxval is not always available, independent of version requirements: /~https://github.com/rust-lang/rust/issues/89626 -// (That problem may have been fixed in /~https://github.com/rust-lang/rust/commit/9a04ae4997493e9260352064163285cddc43de3c, -// but even in the version containing that patch, [there is report](/~https://github.com/rust-lang/rust/issues/89626#issuecomment-1242636038) -// of the same error.) -// -// On other Linux targets, we cannot assume that getauxval is always available, so we don't enable -// run-time detection by default (can be enabled by `--cfg portable_atomic_outline_atomics`). -// -// - musl with static linking. See the above for more. -// Also, dlsym(getauxval) always returns null when statically linked. -// - uClibc-ng (*-linux-uclibc*, *-l4re-uclibc*). getauxval was recently added (See the above list). -// - Picolibc. getauxval was recently added (See the above list). -// -// See also /~https://github.com/rust-lang/stdarch/pull/1375 -// -// See tests::test_linux_like and aarch64_aa64reg.rs for (test-only) alternative implementations. -// -// # FreeBSD -// -// As of nightly-2023-01-23, is_aarch64_feature_detected always uses mrs on -// aarch64 FreeBSD. However, they do not work on FreeBSD 12 on QEMU (confirmed -// on FreeBSD 12.{2,3,4}), and we got SIGILL (worked on FreeBSD 13 and 14). -// -// So use elf_aux_info instead of mrs like compiler-rt does. -// https://reviews.llvm.org/D109330 -// -// elf_aux_info is available on FreeBSD 12.0+ and 11.4+: -// /~https://github.com/freebsd/freebsd-src/commit/0b08ae2120cdd08c20a2b806e2fcef4d0a36c470 -// /~https://github.com/freebsd/freebsd-src/blob/release/11.4.0/sys/sys/auxv.h -// On FreeBSD, [aarch64 support is available on FreeBSD 11.0+](https://www.freebsd.org/releases/11.0R/announce), -// but FreeBSD 11 (11.4) was EoL on 2021-09-30, and FreeBSD 11.3 was EoL on 2020-09-30: -// https://www.freebsd.org/security/unsupported -// See also /~https://github.com/rust-lang/stdarch/pull/611#issuecomment-445464613 -// -// See tests::test_freebsd and aarch64_aa64reg.rs for (test-only) alternative implementations. -// -// # OpenBSD -// -// elf_aux_info is available on OpenBSD 7.6+: -// /~https://github.com/openbsd/src/commit/ef873df06dac50249b2dd380dc6100eee3b0d23d -// -// On aarch64, there is an alternative that available on older version, -// so we use it (see aarch64_aa64reg.rs). -// -// # PowerPC64 -// -// On PowerPC64, run-time detection is currently disabled by default mainly for -// compatibility with older versions of operating systems -// (can be enabled by `--cfg portable_atomic_outline_atomics`). -// -// - On glibc, [powerpc64 support is available on glibc 2.3+](/~https://github.com/bminor/glibc/blob/glibc-2.3/NEWS#L55) -// - On musl, [powerpc64 support is available on musl 1.1.15+](/~https://github.com/bminor/musl/blob/v1.1.15/WHATSNEW#L1702) -// - On FreeBSD, [powerpc64 support is available on FreeBSD 9.0+](https://www.freebsd.org/releases/9.0R/announce) -// -// (On uClibc-ng, [powerpc64 is not supported](/~https://github.com/wbx-github/uclibc-ng/commit/d4d4f37fda7fa57e57132ff2f0d735ce7cc2178e)) +/* +Run-time CPU feature detection on AArch64/PowerPC64 Linux/Android/FreeBSD/OpenBSD by parsing ELF auxiliary vectors. + +Supported platforms: +- Linux 6.4+ (through prctl) + /~https://github.com/torvalds/linux/commit/ddc65971bb677aa9f6a4c21f76d3133e106f88eb +- glibc 2.16+ (through getauxval) + /~https://github.com/bminor/glibc/commit/c7683a6d02f3ed59f5cd119b3e8547f45a15912f +- musl 1.1.0+ (through getauxval) + /~https://github.com/bminor/musl/commit/21ada94c4b8c01589367cea300916d7db8461ae7 +- uClibc-ng 1.0.43+ (through getauxval) + /~https://github.com/wbx-github/uclibc-ng/commit/d869bb1600942c01a77539128f9ba5b5b55ad647 +- Picolibc 1.4.6+ (through getauxval) + /~https://github.com/picolibc/picolibc/commit/19bfe51d62ad7e32533c7f664b5bca8e26286e31 +- Android 4.3+ (API level 18+) (through getauxval) + /~https://github.com/aosp-mirror/platform_bionic/blob/d3ebc2f7c49a9893b114124d4a6b315f3a328764/libc/include/sys/auxv.h#L49 +- FreeBSD 12.0+ and 11.4+ (through elf_aux_info) + /~https://github.com/freebsd/freebsd-src/commit/0b08ae2120cdd08c20a2b806e2fcef4d0a36c470 + /~https://github.com/freebsd/freebsd-src/blob/release/11.4.0/sys/sys/auxv.h +- OpenBSD 7.6+ (through elf_aux_info) + /~https://github.com/openbsd/src/commit/ef873df06dac50249b2dd380dc6100eee3b0d23d + +# Linux/Android + +As of nightly-2023-01-23, is_aarch64_feature_detected always uses dlsym by default +on AArch64 Linux/Android, but on the following platforms, we can safely assume +getauxval is linked to the binary. + +- On glibc (*-linux-gnu*), [AArch64 support is available on glibc 2.17+](/~https://github.com/bminor/glibc/blob/glibc-2.17/NEWS#L35) +- On musl (*-linux-musl*, *-linux-ohos*), [AArch64 support is available on musl 1.1.7+](/~https://github.com/bminor/musl/blob/v1.1.7/WHATSNEW#L1422) +- On bionic (*-android*), [64-bit architecture support is available on Android 5.0+ (API level 21+)](https://android-developers.googleblog.com/2014/10/whats-new-in-android-50-lollipop.html) + +However, on musl with static linking, it seems that getauxval is not always available, independent of version requirements: /~https://github.com/rust-lang/rust/issues/89626 +(That problem may have been fixed in /~https://github.com/rust-lang/rust/commit/9a04ae4997493e9260352064163285cddc43de3c, +but even in the version containing that patch, [there is report](/~https://github.com/rust-lang/rust/issues/89626#issuecomment-1242636038) +of the same error.) + +On other Linux targets, we cannot assume that getauxval is always available, so we don't enable +run-time detection by default (can be enabled by `--cfg portable_atomic_outline_atomics`). + +- musl with static linking. See the above for more. + Also, dlsym(getauxval) always returns null when statically linked. +- uClibc-ng (*-linux-uclibc*, *-l4re-uclibc*). getauxval was recently added (See the above list). +- Picolibc. getauxval was recently added (See the above list). + +See also /~https://github.com/rust-lang/stdarch/pull/1375 + +See tests::test_linux_like and aarch64_aa64reg.rs for (test-only) alternative implementations. + +# FreeBSD + +As of nightly-2023-01-23, is_aarch64_feature_detected always uses mrs on +AArch64 FreeBSD. However, they do not work on FreeBSD 12 on QEMU (confirmed +on FreeBSD 12.{2,3,4}), and we got SIGILL (worked on FreeBSD 13 and 14). + +So use elf_aux_info instead of mrs like compiler-rt does. +https://reviews.llvm.org/D109330 + +elf_aux_info is available on FreeBSD 12.0+ and 11.4+: +/~https://github.com/freebsd/freebsd-src/commit/0b08ae2120cdd08c20a2b806e2fcef4d0a36c470 +/~https://github.com/freebsd/freebsd-src/blob/release/11.4.0/sys/sys/auxv.h +On FreeBSD, [AArch64 support is available on FreeBSD 11.0+](https://www.freebsd.org/releases/11.0R/announce), +but FreeBSD 11 (11.4) was EoL on 2021-09-30, and FreeBSD 11.3 was EoL on 2020-09-30: +https://www.freebsd.org/security/unsupported +See also /~https://github.com/rust-lang/stdarch/pull/611#issuecomment-445464613 + +See tests::test_freebsd and aarch64_aa64reg.rs for (test-only) alternative implementations. + +# OpenBSD + +elf_aux_info is available on OpenBSD 7.6+: +/~https://github.com/openbsd/src/commit/ef873df06dac50249b2dd380dc6100eee3b0d23d + +On AArch64, there is an alternative that available on older version, +so we use it (see aarch64_aa64reg.rs). + +# PowerPC64 + +On PowerPC64, run-time detection is currently disabled by default mainly for +compatibility with older versions of operating systems +(can be enabled by `--cfg portable_atomic_outline_atomics`). + +- On glibc, [powerpc64 support is available on glibc 2.3+](/~https://github.com/bminor/glibc/blob/glibc-2.3/NEWS#L55) +- On musl, [powerpc64 support is available on musl 1.1.15+](/~https://github.com/bminor/musl/blob/v1.1.15/WHATSNEW#L1702) +- On FreeBSD, [powerpc64 support is available on FreeBSD 9.0+](https://www.freebsd.org/releases/9.0R/announce) + +(On uClibc-ng, [powerpc64 is not supported](/~https://github.com/wbx-github/uclibc-ng/commit/d4d4f37fda7fa57e57132ff2f0d735ce7cc2178e)) +*/ include!("common.rs"); @@ -209,8 +211,8 @@ mod os { } } -// Basically, Linux and FreeBSD use the same hwcap values. -// FreeBSD supports a subset of the hwcap values supported by Linux. +// Basically, Linux/FreeBSD/OpenBSD use the same hwcap values. +// FreeBSD/OpenBSD supports a subset of the hwcap values supported by Linux. use arch::_detect; #[cfg(target_arch = "aarch64")] mod arch { @@ -218,20 +220,36 @@ mod arch { // Linux // /~https://github.com/torvalds/linux/blob/v6.10/arch/arm64/include/uapi/asm/hwcap.h + // /~https://github.com/torvalds/linux/blob/v6.10/Documentation/arch/arm64/elf_hwcaps.rst // FreeBSD // Defined in machine/elf.h. // /~https://github.com/freebsd/freebsd-src/blob/release/14.1.0/sys/arm64/include/elf.h - // available on FreeBSD 13.0+ and 12.2+ - // /~https://github.com/freebsd/freebsd-src/blob/release/13.0.0/sys/arm64/include/elf.h - // /~https://github.com/freebsd/freebsd-src/blob/release/12.2.0/sys/arm64/include/elf.h // OpenBSD // Defined in machine/elf.h. // /~https://github.com/openbsd/src/blob/ed8f5e8d82ace15e4cefca2c82941b15cb1a7830/sys/arch/arm64/include/elf.h + // Linux 4.3+ + // /~https://github.com/torvalds/linux/commit/40a1db2434a1b62332b1af25cfa14d7b8c0301fe + // FreeBSD 13.0+/12.2+ + // /~https://github.com/freebsd/freebsd-src/blob/release/13.0.0/sys/arm64/include/elf.h + // /~https://github.com/freebsd/freebsd-src/blob/release/12.2.0/sys/arm64/include/elf.h + // OpenBSD 7.6+ + // /~https://github.com/openbsd/src/commit/ef873df06dac50249b2dd380dc6100eee3b0d23d pub(super) const HWCAP_ATOMICS: ffi::c_ulong = 1 << 8; + // Linux 4.17+ + // /~https://github.com/torvalds/linux/commit/7206dc93a58fb76421c4411eefa3c003337bcb2d + // FreeBSD 13.0+/12.2+ + // /~https://github.com/freebsd/freebsd-src/blob/release/13.0.0/sys/arm64/include/elf.h + // /~https://github.com/freebsd/freebsd-src/blob/release/12.2.0/sys/arm64/include/elf.h + // OpenBSD 7.6+ + // /~https://github.com/openbsd/src/commit/ef873df06dac50249b2dd380dc6100eee3b0d23d pub(super) const HWCAP_USCAT: ffi::c_ulong = 1 << 25; + // Linux 6.7+ + // /~https://github.com/torvalds/linux/commit/338a835f40a849cd89b993e342bd9fbd5684825c #[cfg(any(target_os = "linux", target_os = "android"))] #[cfg(target_pointer_width = "64")] pub(super) const HWCAP2_LRCPC3: ffi::c_ulong = 1 << 46; + // Linux 6.7+ + // /~https://github.com/torvalds/linux/commit/94d0657f9f0d311489606589133ebf49e28104d8 #[cfg(any(target_os = "linux", target_os = "android"))] #[cfg(target_pointer_width = "64")] pub(super) const HWCAP2_LSE128: ffi::c_ulong = 1 << 47; @@ -265,14 +283,19 @@ mod arch { // Linux // /~https://github.com/torvalds/linux/blob/v6.10/arch/powerpc/include/uapi/asm/cputable.h + // /~https://github.com/torvalds/linux/blob/v6.10/Documentation/arch/powerpc/elf_hwcaps.rst // FreeBSD // Defined in machine/cpu.h. // /~https://github.com/freebsd/freebsd-src/blob/release/14.1.0/sys/powerpc/include/cpu.h - // available on FreeBSD 11.0+ - // /~https://github.com/freebsd/freebsd-src/commit/b0bf7fcd298133457991b27625bbed766e612730 // OpenBSD // Defined in machine/elf.h. // /~https://github.com/openbsd/src/blob/ed8f5e8d82ace15e4cefca2c82941b15cb1a7830/sys/arch/powerpc64/include/elf.h + // Linux 3.10+ + // /~https://github.com/torvalds/linux/commit/cbbc6f1b1433ef553d57826eee87a84ca49645ce + // FreeBSD 11.0+ + // /~https://github.com/freebsd/freebsd-src/commit/b0bf7fcd298133457991b27625bbed766e612730 + // OpenBSD 7.6+ + // /~https://github.com/openbsd/src/commit/0b0568a19fc4c197871ceafbabc91fabf17ca152 pub(super) const PPC_FEATURE2_ARCH_2_07: ffi::c_ulong = 0x80000000; // Linux 4.5+ // /~https://github.com/torvalds/linux/commit/e708c24cd01ce80b1609d8baccee40ccc3608a01 @@ -451,7 +474,7 @@ mod tests { // This is almost equivalent to what elf_aux_info does. // https://man.freebsd.org/elf_aux_info(3) - // On FreeBSD, [aarch64 support is available on FreeBSD 11.0+](https://www.freebsd.org/releases/11.0R/announce), + // On FreeBSD, [AArch64 support is available on FreeBSD 11.0+](https://www.freebsd.org/releases/11.0R/announce), // but elf_aux_info is available on FreeBSD 12.0+ and 11.4+: // /~https://github.com/freebsd/freebsd-src/commit/0b08ae2120cdd08c20a2b806e2fcef4d0a36c470 // /~https://github.com/freebsd/freebsd-src/blob/release/11.4.0/sys/sys/auxv.h @@ -692,7 +715,7 @@ mod tests { ); assert_eq!( os::getauxval(ffi::AT_HWCAP2), - // AT_HWCAP2 is only available on FreeBSD 13+, at least for aarch64. + // AT_HWCAP2 is only available on FreeBSD 13+, at least for AArch64. getauxval_sysctl_asm_syscall(ffi::AT_HWCAP2).unwrap_or(0) ); } diff --git a/src/imp/atomic128/detect/common.rs b/src/imp/atomic128/detect/common.rs index c4568f5a8..6f4a6e472 100644 --- a/src/imp/atomic128/detect/common.rs +++ b/src/imp/atomic128/detect/common.rs @@ -140,7 +140,7 @@ mod c_types { // c_size_t is currently always usize // /~https://github.com/rust-lang/rust/blob/1.80.0/library/core/src/ffi/mod.rs#L67 pub(crate) type c_size_t = usize; - // c_char is u8 by default on most non-Apple/non-Windows ARM/PowerPC/RISC-V/s390x/Hexagon targets + // c_char is u8 by default on most non-Apple/non-Windows Arm/PowerPC/RISC-V/s390x/Hexagon targets // (Linux/Android/FreeBSD/NetBSD/OpenBSD/VxWorks/Fuchsia/QNX Neutrino/Horizon/AIX/z/OS) // /~https://github.com/rust-lang/rust/blob/1.80.0/library/core/src/ffi/mod.rs#L83 // /~https://github.com/llvm/llvm-project/blob/llvmorg-18.1.2/lldb/source/Utility/ArchSpec.cpp#L712 diff --git a/src/imp/atomic128/detect/x86_64.rs b/src/imp/atomic128/detect/x86_64.rs index 0bb55b31e..c824baf49 100644 --- a/src/imp/atomic128/detect/x86_64.rs +++ b/src/imp/atomic128/detect/x86_64.rs @@ -1,8 +1,10 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT -// Run-time CPU feature detection on x86_64 by using CPUID. -// -// Adapted from /~https://github.com/rust-lang/stdarch. +/* +Run-time CPU feature detection on x86_64 by using CPUID. + +Adapted from /~https://github.com/rust-lang/stdarch. +*/ #![cfg_attr(portable_atomic_sanitize_thread, allow(dead_code))] diff --git a/src/imp/atomic128/intrinsics.rs b/src/imp/atomic128/intrinsics.rs index 563523854..9ad48eeb9 100644 --- a/src/imp/atomic128/intrinsics.rs +++ b/src/imp/atomic128/intrinsics.rs @@ -1,29 +1,31 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT -// Atomic{I,U}128 implementation without inline assembly. -// -// Note: This module is currently only enabled on Miri and ThreadSanitizer which -// do not support inline assembly. -// -// This uses `core::arch::x86_64::cmpxchg16b` on x86_64 and -// `core::intrinsics::atomic_*` on aarch64, powerpc64, and s390x. -// -// See README.md of this directory for performance comparison with the -// implementation with inline assembly. -// -// Note: -// - This currently needs Rust 1.70 on x86_64, otherwise nightly compilers. -// - On powerpc64, this requires LLVM 15+ and pwr8+ (quadword-atomics LLVM target feature): -// /~https://github.com/llvm/llvm-project/commit/549e118e93c666914a1045fde38a2cac33e1e445 -// - On s390x, old LLVM (pre-18) generates libcalls for operations other than load/store/cmpxchg: -// /~https://github.com/llvm/llvm-project/commit/c568927f3e2e7d9804ea74ecbf11c16c014ddcbc -// - On aarch64 big-endian, LLVM (as of 17) generates broken code. (wrong result in stress test) -// (on cfg(miri)/cfg(sanitize) it may be fine though) -// - On powerpc64, LLVM (as of 17) doesn't support 128-bit atomic min/max: -// /~https://github.com/llvm/llvm-project/issues/68390 -// - On powerpc64le, LLVM (as of 17) generates broken code. (wrong result from fetch_add) -// -// Refs: /~https://github.com/rust-lang/rust/blob/1.80.0/library/core/src/sync/atomic.rs +/* +Atomic{I,U}128 implementation without inline assembly. + +Adapted from /~https://github.com/rust-lang/rust/blob/1.80.0/library/core/src/sync/atomic.rs. + +Note: This module is currently only enabled on Miri and ThreadSanitizer which +do not support inline assembly. + +This uses `core::arch::x86_64::cmpxchg16b` on x86_64 and +`core::intrinsics::atomic_*` on aarch64, powerpc64, and s390x. + +See README.md of this directory for performance comparison with the +implementation with inline assembly. + +Note: +- This currently needs Rust 1.70 on x86_64, otherwise nightly compilers. +- On powerpc64, this requires LLVM 15+ and pwr8+ (quadword-atomics LLVM target feature): + /~https://github.com/llvm/llvm-project/commit/549e118e93c666914a1045fde38a2cac33e1e445 +- On s390x, old LLVM (pre-18) generates libcalls for operations other than load/store/cmpxchg: + /~https://github.com/llvm/llvm-project/commit/c568927f3e2e7d9804ea74ecbf11c16c014ddcbc +- On aarch64 big-endian, LLVM (as of 17) generates broken code. (wrong result in stress test) + (on cfg(miri)/cfg(sanitize) it may be fine though) +- On powerpc64, LLVM (as of 17) doesn't support 128-bit atomic min/max: + /~https://github.com/llvm/llvm-project/issues/68390 +- On powerpc64le, LLVM (as of 17) generates broken code. (wrong result from fetch_add) +*/ include!("macros.rs"); diff --git a/src/imp/atomic128/powerpc64.rs b/src/imp/atomic128/powerpc64.rs index 0c9926300..5111b0f08 100644 --- a/src/imp/atomic128/powerpc64.rs +++ b/src/imp/atomic128/powerpc64.rs @@ -1,31 +1,33 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT -// Atomic{I,U}128 implementation on PowerPC64. -// -// powerpc64 on pwr8+ support 128-bit atomics (load/store/LL/SC): -// /~https://github.com/llvm/llvm-project/commit/549e118e93c666914a1045fde38a2cac33e1e445 -// /~https://github.com/llvm/llvm-project/blob/llvmorg-18.1.2/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll -// /~https://github.com/llvm/llvm-project/blob/llvmorg-18.1.2/llvm/test/CodeGen/PowerPC/atomics-i128.ll -// -// powerpc64le is pwr8+ by default /~https://github.com/llvm/llvm-project/blob/llvmorg-18.1.2/llvm/lib/Target/PowerPC/PPC.td#L674 -// See also /~https://github.com/rust-lang/rust/issues/59932 -// -// Note that we do not separate LL and SC into separate functions, but handle -// them within a single asm block. This is because it is theoretically possible -// for the compiler to insert operations that might clear the reservation between -// LL and SC. See aarch64.rs for details. -// -// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use -// this module and use intrinsics.rs instead. -// -// Refs: -// - Power ISA https://openpowerfoundation.org/specifications/isa -// - AIX Assembler language reference https://www.ibm.com/docs/en/aix/7.3?topic=aix-assembler-language-reference -// - atomic-maybe-uninit /~https://github.com/taiki-e/atomic-maybe-uninit -// -// Generated asm: -// - powerpc64 (pwr8) https://godbolt.org/z/71xGhY9qf -// - powerpc64le https://godbolt.org/z/4TexcjGEz +/* +Atomic{I,U}128 implementation on PowerPC64. + +powerpc64 on pwr8+ support 128-bit atomics (load/store/LL/SC): +/~https://github.com/llvm/llvm-project/commit/549e118e93c666914a1045fde38a2cac33e1e445 +/~https://github.com/llvm/llvm-project/blob/llvmorg-18.1.2/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll +/~https://github.com/llvm/llvm-project/blob/llvmorg-18.1.2/llvm/test/CodeGen/PowerPC/atomics-i128.ll + +powerpc64le is pwr8+ by default /~https://github.com/llvm/llvm-project/blob/llvmorg-18.1.2/llvm/lib/Target/PowerPC/PPC.td#L674 +See also /~https://github.com/rust-lang/rust/issues/59932 + +Note that we do not separate LL and SC into separate functions, but handle +them within a single asm block. This is because it is theoretically possible +for the compiler to insert operations that might clear the reservation between +LL and SC. See aarch64.rs for details. + +Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use +this module and use intrinsics.rs instead. + +Refs: +- Power ISA https://openpowerfoundation.org/specifications/isa +- AIX Assembler language reference https://www.ibm.com/docs/en/aix/7.3?topic=aix-assembler-language-reference +- atomic-maybe-uninit /~https://github.com/taiki-e/atomic-maybe-uninit + +Generated asm: +- powerpc64 (pwr8) https://godbolt.org/z/71xGhY9qf +- powerpc64le https://godbolt.org/z/4TexcjGEz +*/ include!("macros.rs"); diff --git a/src/imp/atomic128/s390x.rs b/src/imp/atomic128/s390x.rs index e56d864f4..cb831d089 100644 --- a/src/imp/atomic128/s390x.rs +++ b/src/imp/atomic128/s390x.rs @@ -1,28 +1,30 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT -// Atomic{I,U}128 implementation on s390x. -// -// s390x has 128-bit atomic load/store/CAS instructions and other operations are emulated by CAS loop. -// /~https://github.com/llvm/llvm-project/commit/a11f63a952664f700f076fd754476a2b9eb158cc -// /~https://github.com/llvm/llvm-project/commit/c568927f3e2e7d9804ea74ecbf11c16c014ddcbc -// -// LLVM's minimal supported architecture level is z10: -// /~https://github.com/llvm/llvm-project/blob/llvmorg-18.1.2/llvm/lib/Target/SystemZ/SystemZProcessors.td#L16-L17 -// This does not appear to have changed since the current s390x backend was added in LLVM 3.3: -// /~https://github.com/llvm/llvm-project/commit/5f613dfd1f7edb0ae95d521b7107b582d9df5103#diff-cbaef692b3958312e80fd5507a7e2aff071f1acb086f10e8a96bc06a7bb289db -// -// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use -// this module and use intrinsics.rs instead. -// -// Refs: -// - z/Architecture Principles of Operation https://publibfp.dhe.ibm.com/epubs/pdf/a227832d.pdf -// - z/Architecture Reference Summary https://www.ibm.com/support/pages/zarchitecture-reference-summary -// - atomic-maybe-uninit /~https://github.com/taiki-e/atomic-maybe-uninit -// -// Generated asm: -// - s390x https://godbolt.org/z/sbvhjKrMT -// - s390x (z196) https://godbolt.org/z/Erbqazhv7 -// - s390x (z15) https://godbolt.org/z/GEaePbbsT +/* +Atomic{I,U}128 implementation on s390x. + +s390x has 128-bit atomic load/store/CAS instructions and other operations are emulated by CAS loop. +/~https://github.com/llvm/llvm-project/commit/a11f63a952664f700f076fd754476a2b9eb158cc +/~https://github.com/llvm/llvm-project/commit/c568927f3e2e7d9804ea74ecbf11c16c014ddcbc + +LLVM's minimal supported architecture level is z10: +/~https://github.com/llvm/llvm-project/blob/llvmorg-18.1.2/llvm/lib/Target/SystemZ/SystemZProcessors.td#L16-L17 +This does not appear to have changed since the current s390x backend was added in LLVM 3.3: +/~https://github.com/llvm/llvm-project/commit/5f613dfd1f7edb0ae95d521b7107b582d9df5103#diff-cbaef692b3958312e80fd5507a7e2aff071f1acb086f10e8a96bc06a7bb289db + +Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use +this module and use intrinsics.rs instead. + +Refs: +- z/Architecture Principles of Operation https://publibfp.dhe.ibm.com/epubs/pdf/a227832d.pdf +- z/Architecture Reference Summary https://www.ibm.com/support/pages/zarchitecture-reference-summary +- atomic-maybe-uninit /~https://github.com/taiki-e/atomic-maybe-uninit + +Generated asm: +- s390x https://godbolt.org/z/sbvhjKrMT +- s390x (z196) https://godbolt.org/z/Erbqazhv7 +- s390x (z15) https://godbolt.org/z/GEaePbbsT +*/ include!("macros.rs"); diff --git a/src/imp/atomic128/x86_64.rs b/src/imp/atomic128/x86_64.rs index 8e0ced6ee..f88c7107f 100644 --- a/src/imp/atomic128/x86_64.rs +++ b/src/imp/atomic128/x86_64.rs @@ -1,16 +1,18 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT -// Atomic{I,U}128 implementation on x86_64 using CMPXCHG16B (DWCAS). -// -// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use -// this module and use intrinsics.rs instead. -// -// Refs: -// - x86 and amd64 instruction reference https://www.felixcloutier.com/x86 -// - atomic-maybe-uninit /~https://github.com/taiki-e/atomic-maybe-uninit -// -// Generated asm: -// - x86_64 (+cmpxchg16b) https://godbolt.org/z/r5x9M8PdK +/* +Atomic{I,U}128 implementation on x86_64 using CMPXCHG16B (DWCAS). + +Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use +this module and use intrinsics.rs instead. + +Refs: +- x86 and amd64 instruction reference https://www.felixcloutier.com/x86 +- atomic-maybe-uninit /~https://github.com/taiki-e/atomic-maybe-uninit + +Generated asm: +- x86_64 (+cmpxchg16b) https://godbolt.org/z/r5x9M8PdK +*/ // TODO: use core::arch::x86_64::cmpxchg16b where available and efficient than asm @@ -75,7 +77,7 @@ macro_rules! ptr_modifier { // Unlike AArch64 and RISC-V, x86's assembler doesn't check instruction // requirements for the currently enabled target features. In the first place, -// there is no option in the x86 assembly for such case, like ARM .arch_extension, +// there is no option in the x86 assembly for such case, like Arm .arch_extension, // RISC-V .option arch, PowerPC .machine, etc. // However, we set target_feature(enable) when available (Rust 1.69+) in case a // new codegen backend is added that checks for it in the future, or an option diff --git a/src/imp/core_atomic.rs b/src/imp/core_atomic.rs index c4861a987..f204d1694 100644 --- a/src/imp/core_atomic.rs +++ b/src/imp/core_atomic.rs @@ -1,9 +1,11 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT -// Wrap the standard library's atomic types in newtype. -// -// This is not a reexport, because we want to backport changes like -// /~https://github.com/rust-lang/rust/pull/98383 to old compilers. +/* +Wrap the standard library's atomic types in newtype. + +This is not a reexport, because we want to backport changes like +/~https://github.com/rust-lang/rust/pull/98383 to old compilers. +*/ use core::{cell::UnsafeCell, marker::PhantomData, sync::atomic::Ordering}; @@ -13,6 +15,7 @@ use core::{cell::UnsafeCell, marker::PhantomData, sync::atomic::Ordering}; // RefUnwindSafe when "linked to std", and that's behavior that our other atomic // implementations can't emulate, so use PhantomData to match // conditions where our other atomic implementations implement RefUnwindSafe. +// // If we do not do this, for example, downstream that is only tested on x86_64 // may incorrectly assume that AtomicU64 always implements RefUnwindSafe even on // older rustc, and may be broken on platforms where std AtomicU64 is not available. @@ -377,7 +380,7 @@ macro_rules! atomic_int { pub(crate) fn not(&self, order: Ordering) { self.fetch_not(order); } - // TODO: provide asm-based implementation on AArch64, ARMv7, RISC-V, etc. + // TODO: provide asm-based implementation on AArch64, Armv7, RISC-V, etc. #[inline] #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces pub(crate) fn fetch_neg(&self, order: Ordering) -> $int_type { diff --git a/src/imp/fallback/mod.rs b/src/imp/fallback/mod.rs index 616c720d1..49bcb7f28 100644 --- a/src/imp/fallback/mod.rs +++ b/src/imp/fallback/mod.rs @@ -1,15 +1,17 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT -// Fallback implementation using global locks. -// -// This implementation uses seqlock for global locks. -// -// This is basically based on global locks in crossbeam-utils's `AtomicCell`, -// but seqlock is implemented in a way that does not depend on UB -// (see comments in optimistic_read method in atomic! macro for details). -// -// Note that we cannot use a lock per atomic type, since the in-memory representation of the atomic -// type and the value type must be the same. +/* +Fallback implementation using global locks. + +This implementation uses seqlock for global locks. + +This is basically based on global locks in crossbeam-utils's `AtomicCell`, +but seqlock is implemented in a way that does not depend on UB +(see comments in optimistic_read method in atomic! macro for details). + +Note that we cannot use a lock per atomic type, since the in-memory representation of the atomic +type and the value type must be the same. +*/ #![cfg_attr( any( @@ -62,7 +64,7 @@ pub(crate) mod utils; // counter will not be increased that fast. // // Some 64-bit architectures have ABI with 32-bit pointer width (e.g., x86_64 X32 ABI, -// aarch64 ILP32 ABI, mips64 N32 ABI). On those targets, AtomicU64 is available and fast, +// AArch64 ILP32 ABI, mips64 N32 ABI). On those targets, AtomicU64 is available and fast, // so use it to implement normal sequence lock. cfg_has_fast_atomic_64! { mod seq_lock; @@ -78,7 +80,7 @@ use seq_lock::{SeqLock, SeqLockWriteGuard}; use utils::CachePadded; // Some 64-bit architectures have ABI with 32-bit pointer width (e.g., x86_64 X32 ABI, -// aarch64 ILP32 ABI, mips64 N32 ABI). On those targets, AtomicU64 is fast, +// AArch64 ILP32 ABI, mips64 N32 ABI). On those targets, AtomicU64 is fast, // so use it to reduce chunks of byte-wise atomic memcpy. use seq_lock::{AtomicChunk, Chunk}; diff --git a/src/imp/fallback/outline_atomics.rs b/src/imp/fallback/outline_atomics.rs index b40288bdd..1b7f3c7e1 100644 --- a/src/imp/fallback/outline_atomics.rs +++ b/src/imp/fallback/outline_atomics.rs @@ -1,12 +1,14 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT -// Helper for outline-atomics. -// -// On architectures where DW atomics are not supported on older CPUs, we use -// fallback implementation when DW atomic instructions are not supported and -// outline-atomics is enabled. -// -// This module provides helpers to implement them. +/* +Helper for outline-atomics. + +On architectures where DW atomics are not supported on older CPUs, we use +fallback implementation when DW atomic instructions are not supported and +outline-atomics is enabled. + +This module provides helpers to implement them. +*/ use core::sync::atomic::Ordering; diff --git a/src/imp/float.rs b/src/imp/float.rs index 3fd3f5906..decf4f994 100644 --- a/src/imp/float.rs +++ b/src/imp/float.rs @@ -1,16 +1,18 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT -// AtomicF{32,64} implementation based on AtomicU{32,64}. -// -// This module provides atomic float implementations using atomic integer. -// -// Note that most of `fetch_*` operations of atomic floats are implemented using -// CAS loops, which can be slower than equivalent operations of atomic integers. -// -// GPU targets have atomic instructions for float, so GPU targets will use -// architecture-specific implementations instead of this implementation in the -// future: /~https://github.com/taiki-e/portable-atomic/issues/34 -// +/* +AtomicF{32,64} implementation based on AtomicU{32,64}. + +This module provides atomic float implementations using atomic integer. + +Note that most of `fetch_*` operations of atomic floats are implemented using +CAS loops, which can be slower than equivalent operations of atomic integers. + +GPU targets have atomic instructions for float, so GPU targets will use +architecture-specific implementations instead of this implementation in the +future: /~https://github.com/taiki-e/portable-atomic/issues/34 / /~https://github.com/taiki-e/portable-atomic/pull/45 +*/ + // TODO: fetch_{minimum,maximum}* https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2024/p3008r2.html #![cfg_attr( diff --git a/src/imp/interrupt/README.md b/src/imp/interrupt/README.md index edc5fbf2e..0480ae1da 100644 --- a/src/imp/interrupt/README.md +++ b/src/imp/interrupt/README.md @@ -3,7 +3,7 @@ This module is used to provide atomic CAS for targets where atomic CAS is not available in the standard library. - On MSP430 and AVR, they are always single-core, so this module is always used. -- On ARMv6-M (thumbv6m), pre-v6 ARM (e.g., thumbv4t, thumbv5te), RISC-V without A-extension, and Xtensa, they could be multi-core, so this module is used when the `unsafe-assume-single-core` feature is enabled. +- On Armv6-M (thumbv6m), pre-v6 Arm (e.g., thumbv4t, thumbv5te), RISC-V without A-extension, and Xtensa, they could be multi-core, so this module is used when the `unsafe-assume-single-core` feature is enabled. The implementation uses privileged instructions to disable interrupts, so it usually doesn't work on unprivileged mode. Enabling this feature in an environment where privileged instructions are not available, or if the instructions used are not sufficient to disable interrupts in the system, it is also usually considered **unsound**, although the details are system-dependent. @@ -12,9 +12,9 @@ Consider using the [`critical-section` feature](../../../README.md#optional-feat For some targets, the implementation can be changed by explicitly enabling features. -- On ARMv6-M, this disables interrupts by modifying the PRIMASK register. -- On pre-v6 ARM, this disables interrupts by modifying the I (IRQ mask) bit of the CPSR. -- On pre-v6 ARM with the `disable-fiq` feature, this disables interrupts by modifying the I (IRQ mask) bit and F (FIQ mask) bit of the CPSR. +- On Armv6-M, this disables interrupts by modifying the PRIMASK register. +- On pre-v6 Arm, this disables interrupts by modifying the I (IRQ mask) bit of the CPSR. +- On pre-v6 Arm with the `disable-fiq` feature, this disables interrupts by modifying the I (IRQ mask) bit and F (FIQ mask) bit of the CPSR. - On RISC-V (without A-extension), this disables interrupts by modifying the MIE (Machine Interrupt Enable) bit of the `mstatus` register. - On RISC-V (without A-extension) with the `s-mode` feature, this disables interrupts by modifying the SIE (Supervisor Interrupt Enable) bit of the `sstatus` register. - On RISC-V (without A-extension) with the `force-amo` feature, this uses AMO instructions for RMWs that have corresponding AMO instructions even if A-extension is disabled. For other RMWs, this disables interrupts as usual. diff --git a/src/imp/interrupt/armv4t.rs b/src/imp/interrupt/armv4t.rs index e7134c2aa..afd118864 100644 --- a/src/imp/interrupt/armv4t.rs +++ b/src/imp/interrupt/armv4t.rs @@ -1,9 +1,11 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT -// Refs: https://developer.arm.com/documentation/ddi0406/cb/System-Level-Architecture/The-System-Level-Programmers--Model/ARM-processor-modes-and-ARM-core-registers/Program-Status-Registers--PSRs-?lang=en -// -// Generated asm: -// - armv5te https://godbolt.org/z/fhaW3d9Kv +/* +Refs: https://developer.arm.com/documentation/ddi0406/cb/System-Level-Architecture/The-System-Level-Programmers--Model/ARM-processor-modes-and-ARM-core-registers/Program-Status-Registers--PSRs- + +Generated asm: +- armv5te https://godbolt.org/z/fhaW3d9Kv +*/ #[cfg(not(portable_atomic_no_asm))] use core::arch::asm; @@ -67,8 +69,8 @@ pub(super) unsafe fn restore(cpsr: State) { } } -// On pre-v6 ARM, we cannot use core::sync::atomic here because they call the -// `__sync_*` builtins for non-relaxed load/store (because pre-v6 ARM doesn't +// On pre-v6 Arm, we cannot use core::sync::atomic here because they call the +// `__sync_*` builtins for non-relaxed load/store (because pre-v6 Arm doesn't // have Data Memory Barrier). // // Generated asm: diff --git a/src/imp/interrupt/armv6m.rs b/src/imp/interrupt/armv6m.rs index f0ba21a8a..cac62d1fd 100644 --- a/src/imp/interrupt/armv6m.rs +++ b/src/imp/interrupt/armv6m.rs @@ -1,9 +1,11 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT -// Adapted from /~https://github.com/rust-embedded/cortex-m. -// -// Generated asm: -// - armv6-m https://godbolt.org/z/1sqKnsY6n +/* +Adapted from /~https://github.com/rust-embedded/cortex-m. + +Generated asm: +- armv6-m https://godbolt.org/z/1sqKnsY6n +*/ #[cfg(not(portable_atomic_no_asm))] use core::arch::asm; diff --git a/src/imp/interrupt/avr.rs b/src/imp/interrupt/avr.rs index 76d99c142..36344f4e0 100644 --- a/src/imp/interrupt/avr.rs +++ b/src/imp/interrupt/avr.rs @@ -1,9 +1,11 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT -// Adapted from /~https://github.com/Rahix/avr-device. -// -// Refs: -// - AVR Instruction Set Manual https://ww1.microchip.com/downloads/en/DeviceDoc/AVR-InstructionSet-Manual-DS40002198.pdf +/* +Adapted from /~https://github.com/Rahix/avr-device. + +Refs: +- AVR Instruction Set Manual https://ww1.microchip.com/downloads/en/DeviceDoc/AVR-InstructionSet-Manual-DS40002198.pdf +*/ #[cfg(not(portable_atomic_no_asm))] use core::arch::asm; diff --git a/src/imp/interrupt/mod.rs b/src/imp/interrupt/mod.rs index 1fa778eb8..f604e6e8f 100644 --- a/src/imp/interrupt/mod.rs +++ b/src/imp/interrupt/mod.rs @@ -1,35 +1,37 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT -// Critical section based fallback implementations -// -// This module supports two different critical section implementations: -// - Built-in "disable all interrupts". -// - Call into the `critical-section` crate (which allows the user to plug any implementation). -// -// The `critical-section`-based fallback is enabled when the user asks for it with the `critical-section` -// Cargo feature. -// -// The "disable interrupts" fallback is not sound on multi-core systems. -// Also, this uses privileged instructions to disable interrupts, so it usually -// doesn't work on unprivileged mode. Using this fallback in an environment where privileged -// instructions are not available is also usually considered **unsound**, -// although the details are system-dependent. -// -// Therefore, this implementation will only be enabled in one of the following cases: -// -// - When the user explicitly declares that the system is single-core and that -// privileged instructions are available using an unsafe cfg. -// - When we can safely assume that the system is single-core and that -// privileged instructions are available on the system. -// -// AVR, which is single core[^avr1] and LLVM also generates code that disables -// interrupts [^avr2] in atomic ops by default, is considered the latter. -// MSP430 as well. -// -// See also README.md of this directory. -// -// [^avr1]: /~https://github.com/llvm/llvm-project/blob/llvmorg-18.1.2/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp#L1074 -// [^avr2]: /~https://github.com/llvm/llvm-project/blob/llvmorg-18.1.2/llvm/test/CodeGen/AVR/atomics/load16.ll#L5 +/* +Critical section based fallback implementations + +This module supports two different critical section implementations: +- Built-in "disable all interrupts". +- Call into the `critical-section` crate (which allows the user to plug any implementation). + +The `critical-section`-based fallback is enabled when the user asks for it with the `critical-section` +Cargo feature. + +The "disable interrupts" fallback is not sound on multi-core systems. +Also, this uses privileged instructions to disable interrupts, so it usually +doesn't work on unprivileged mode. Using this fallback in an environment where privileged +instructions are not available is also usually considered **unsound**, +although the details are system-dependent. + +Therefore, this implementation will only be enabled in one of the following cases: + +- When the user explicitly declares that the system is single-core and that + privileged instructions are available using an unsafe cfg. +- When we can safely assume that the system is single-core and that + privileged instructions are available on the system. + +AVR, which is single core[^avr1] and LLVM also generates code that disables +interrupts [^avr2] in atomic ops by default, is considered the latter. +MSP430 as well. + +See also README.md of this directory. + +[^avr1]: /~https://github.com/llvm/llvm-project/blob/llvmorg-18.1.2/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp#L1074 +[^avr2]: /~https://github.com/llvm/llvm-project/blob/llvmorg-18.1.2/llvm/test/CodeGen/AVR/atomics/load16.ll#L5 +*/ // On some platforms, atomic load/store can be implemented in a more efficient // way than disabling interrupts. On MSP430, some RMWs that do not return the @@ -70,7 +72,7 @@ use core::{cell::UnsafeCell, sync::atomic::Ordering}; const IS_ALWAYS_LOCK_FREE: bool = false; // Consider atomic operations based on disabling interrupts on single-core -// systems are lock-free. (We consider the pre-v6 ARM Linux's atomic operations +// systems are lock-free. (We consider the pre-v6 Arm Linux's atomic operations // provided in a similar way by the Linux kernel to be lock-free.) #[cfg(not(feature = "critical-section"))] const IS_ALWAYS_LOCK_FREE: bool = true; diff --git a/src/imp/interrupt/msp430.rs b/src/imp/interrupt/msp430.rs index 8c1ca80ee..6b610d91b 100644 --- a/src/imp/interrupt/msp430.rs +++ b/src/imp/interrupt/msp430.rs @@ -1,10 +1,12 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT -// Adapted from /~https://github.com/rust-embedded/msp430. -// -// See also src/imp/msp430.rs. -// -// Refs: https://www.ti.com/lit/ug/slau208q/slau208q.pdf +/* +Adapted from /~https://github.com/rust-embedded/msp430. + +See also src/imp/msp430.rs. + +Refs: https://www.ti.com/lit/ug/slau208q/slau208q.pdf +*/ #[cfg(not(portable_atomic_no_asm))] use core::arch::asm; diff --git a/src/imp/interrupt/riscv.rs b/src/imp/interrupt/riscv.rs index 8437ffb21..e6f349181 100644 --- a/src/imp/interrupt/riscv.rs +++ b/src/imp/interrupt/riscv.rs @@ -1,13 +1,15 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT -// Refs: -// - /~https://github.com/riscv/riscv-isa-manual/blob/riscv-isa-release-8b9dc50-2024-08-30/src/machine.adoc#machine-status-mstatus-and-mstatush-registers -// - /~https://github.com/riscv/riscv-isa-manual/blob/riscv-isa-release-8b9dc50-2024-08-30/src/supervisor.adoc#supervisor-status-sstatus-register -// -// See also src/imp/riscv.rs. -// -// Generated asm: -// - riscv64gc https://godbolt.org/z/zTrzT1Ee7 +/* +Refs: +- /~https://github.com/riscv/riscv-isa-manual/blob/riscv-isa-release-8b9dc50-2024-08-30/src/machine.adoc#machine-status-mstatus-and-mstatush-registers +- /~https://github.com/riscv/riscv-isa-manual/blob/riscv-isa-release-8b9dc50-2024-08-30/src/supervisor.adoc#supervisor-status-sstatus-register + +See also src/imp/riscv.rs. + +Generated asm: +- riscv64gc https://godbolt.org/z/zTrzT1Ee7 +*/ #[cfg(not(portable_atomic_no_asm))] use core::arch::asm; diff --git a/src/imp/interrupt/xtensa.rs b/src/imp/interrupt/xtensa.rs index f9864c74f..da65dda48 100644 --- a/src/imp/interrupt/xtensa.rs +++ b/src/imp/interrupt/xtensa.rs @@ -1,8 +1,10 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT -// Refs: -// - Xtensa Instruction Set Architecture (ISA) Reference Manual https://0x04.net/~mwk/doc/xtensa.pdf -// - Linux kernel's Xtensa atomic implementation /~https://github.com/torvalds/linux/blob/v6.10/arch/xtensa/include/asm/atomic.h +/* +Refs: +- Xtensa Instruction Set Architecture (ISA) Reference Manual https://0x04.net/~mwk/doc/xtensa.pdf +- Linux kernel's Xtensa atomic implementation /~https://github.com/torvalds/linux/blob/v6.10/arch/xtensa/include/asm/atomic.h +*/ use core::arch::asm; diff --git a/src/imp/mod.rs b/src/imp/mod.rs index 862d1a064..593ea978c 100644 --- a/src/imp/mod.rs +++ b/src/imp/mod.rs @@ -28,7 +28,7 @@ )] mod core_atomic; -// aarch64 128-bit atomics +// AArch64 128-bit atomics #[cfg(all( target_arch = "aarch64", any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), @@ -113,7 +113,7 @@ mod powerpc64; #[cfg_attr(not(any(miri, portable_atomic_sanitize_thread)), path = "atomic128/s390x.rs")] mod s390x; -// pre-v6 ARM Linux 64-bit atomics +// pre-v6 Arm Linux 64-bit atomics #[cfg(feature = "fallback")] // Miri and Sanitizer do not support inline assembly. #[cfg(all( @@ -390,7 +390,7 @@ items! { } // 64-bit atomics (platform-specific) -// pre-v6 ARM Linux +// pre-v6 Arm Linux #[cfg(feature = "fallback")] #[cfg(all( target_arch = "arm", @@ -405,7 +405,7 @@ items! { pub(crate) use self::arm_linux::{AtomicI64, AtomicU64}; // 128-bit atomics (platform-specific) -// aarch64 +// AArch64 #[cfg(all( target_arch = "aarch64", any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), diff --git a/src/imp/msp430.rs b/src/imp/msp430.rs index caf95ba14..9206b2700 100644 --- a/src/imp/msp430.rs +++ b/src/imp/msp430.rs @@ -1,17 +1,17 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT -// Atomic load/store implementation on MSP430. -// -// Adapted from /~https://github.com/pftbest/msp430-atomic. -// Including /~https://github.com/pftbest/msp430-atomic/pull/4 for a compile error fix. -// Including /~https://github.com/pftbest/msp430-atomic/pull/5 for a soundness bug fix. -// -// Operations not supported here are provided by disabling interrupts. -// See also src/imp/interrupt/msp430.rs. -// -// Note: Ordering is always SeqCst. -// -// Refs: https://www.ti.com/lit/ug/slau208q/slau208q.pdf +/* +Atomic load/store implementation on MSP430. + +Adapted from /~https://github.com/pftbest/msp430-atomic. + +Operations not supported here are provided by disabling interrupts. +See also src/imp/interrupt/msp430.rs. + +Note: Ordering is always SeqCst. + +Refs: https://www.ti.com/lit/ug/slau208q/slau208q.pdf +*/ #[cfg(not(portable_atomic_no_asm))] use core::arch::asm; diff --git a/src/imp/riscv.rs b/src/imp/riscv.rs index 370fda739..c8b55f85f 100644 --- a/src/imp/riscv.rs +++ b/src/imp/riscv.rs @@ -1,28 +1,30 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT -// Atomic load/store implementation on RISC-V. -// -// This is for RISC-V targets without atomic CAS. (rustc doesn't provide atomics -// at all on such targets. /~https://github.com/rust-lang/rust/pull/114499) -// -// Also, optionally provides RMW implementation when force-amo or Zaamo target feature is enabled. -// -// Refs: -// - RISC-V Instruction Set Manual Volume I: Unprivileged ISA -// https://riscv.org/wp-content/uploads/2019/12/riscv-spec-20191213.pdf -// - RISC-V Atomics ABI Specification -// /~https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/draft-20240829-13bfa9f54634cb60d86b9b333e109f077805b4b3/riscv-atomic.adoc -// - "Mappings from C/C++ primitives to RISC-V primitives." table in RISC-V Instruction Set Manual -// /~https://github.com/riscv/riscv-isa-manual/blob/riscv-isa-release-8b9dc50-2024-08-30/src/mm-eplan.adoc#code-porting-and-mapping-guidelines -// - ""Zaamo" Extension for Atomic Memory Operations" in RISC-V Instruction Set Manual -// /~https://github.com/riscv/riscv-isa-manual/blob/riscv-isa-release-8b9dc50-2024-08-30/src/a-st-ext.adoc#zaamo-extension-for-atomic-memory-operations -// - ""Zabha" Extension for Byte and Halfword Atomic Memory Operations" in RISC-V Instruction Set Manual -// /~https://github.com/riscv/riscv-isa-manual/blob/riscv-isa-release-8b9dc50-2024-08-30/src/zabha.adoc -// - atomic-maybe-uninit /~https://github.com/taiki-e/atomic-maybe-uninit -// -// Generated asm: -// - riscv64gc https://godbolt.org/z/x8bhEn39e -// - riscv32imac https://godbolt.org/z/aG9157dhW +/* +Atomic load/store implementation on RISC-V. + +This is for RISC-V targets without atomic CAS. (rustc doesn't provide atomics +at all on such targets. /~https://github.com/rust-lang/rust/pull/114499) + +Also, optionally provides RMW implementation when force-amo or Zaamo target feature is enabled. + +Refs: +- RISC-V Instruction Set Manual + /~https://github.com/riscv/riscv-isa-manual/tree/riscv-isa-release-8b9dc50-2024-08-30 + "Mappings from C/C++ primitives to RISC-V primitives." table in Code Porting and Mapping Guidelines + /~https://github.com/riscv/riscv-isa-manual/blob/riscv-isa-release-8b9dc50-2024-08-30/src/mm-eplan.adoc#code-porting-and-mapping-guidelines + "Zaamo" Extension for Atomic Memory Operations + /~https://github.com/riscv/riscv-isa-manual/blob/riscv-isa-release-8b9dc50-2024-08-30/src/a-st-ext.adoc#zaamo-extension-for-atomic-memory-operations + "Zabha" Extension for Byte and Halfword Atomic Memory Operations + /~https://github.com/riscv/riscv-isa-manual/blob/riscv-isa-release-8b9dc50-2024-08-30/src/zabha.adoc +- RISC-V Atomics ABI Specification + /~https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/draft-20240829-13bfa9f54634cb60d86b9b333e109f077805b4b3/riscv-atomic.adoc +- atomic-maybe-uninit /~https://github.com/taiki-e/atomic-maybe-uninit + +Generated asm: +- riscv64gc https://godbolt.org/z/x8bhEn39e +- riscv32imac https://godbolt.org/z/aG9157dhW +*/ #[cfg(not(portable_atomic_no_asm))] use core::arch::asm; diff --git a/src/imp/x86.rs b/src/imp/x86.rs index b8748cc4c..79c2dcc9a 100644 --- a/src/imp/x86.rs +++ b/src/imp/x86.rs @@ -1,18 +1,20 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT -// Atomic operations implementation on x86/x86_64. -// -// This module provides atomic operations not supported by LLVM or optimizes -// cases where LLVM code generation is not optimal. -// -// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use -// this module and use CAS loop instead. -// -// Refs: -// - x86 and amd64 instruction reference https://www.felixcloutier.com/x86 -// -// Generated asm: -// - x86_64 https://godbolt.org/z/Kcsj1jd9c +/* +Atomic operations implementation on x86/x86_64. + +This module provides atomic operations not supported by LLVM or optimizes +cases where LLVM code generation is not optimal. + +Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use +this module and use CAS loop instead. + +Refs: +- x86 and amd64 instruction reference https://www.felixcloutier.com/x86 + +Generated asm: +- x86_64 https://godbolt.org/z/Kcsj1jd9c +*/ use core::{arch::asm, sync::atomic::Ordering}; diff --git a/src/lib.rs b/src/lib.rs index 00032ebc6..64b000782 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,7 +8,7 @@ Portable atomic types including support for 128-bit atomics, atomic float, etc. - Provide `AtomicI128` and `AtomicU128`. - Provide `AtomicF32` and `AtomicF64`. ([optional, requires the `float` feature](#optional-features-float)) - Provide atomic load/store for targets where atomic is not available at all in the standard library. (RISC-V without A-extension, MSP430, AVR) -- Provide atomic CAS for targets where atomic CAS is not available in the standard library. (thumbv6m, pre-v6 ARM, RISC-V without A-extension, MSP430, AVR, Xtensa, etc.) (always enabled for MSP430 and AVR, [optional](#optional-features-critical-section) otherwise) +- Provide atomic CAS for targets where atomic CAS is not available in the standard library. (thumbv6m, pre-v6 Arm, RISC-V without A-extension, MSP430, AVR, Xtensa, etc.) (always enabled for MSP430 and AVR, [optional](#optional-features-critical-section) otherwise) - Provide stable equivalents of the standard library's atomic types' unstable APIs, such as [`AtomicPtr::fetch_*`](/~https://github.com/rust-lang/rust/issues/99108). - Make features that require newer compilers, such as [`fetch_{max,min}`](https://doc.rust-lang.org/std/sync/atomic/struct.AtomicUsize.html#method.fetch_max), [`fetch_update`](https://doc.rust-lang.org/std/sync/atomic/struct.AtomicUsize.html#method.fetch_update), [`as_ptr`](https://doc.rust-lang.org/std/sync/atomic/struct.AtomicUsize.html#method.as_ptr), [`from_ptr`](https://doc.rust-lang.org/std/sync/atomic/struct.AtomicUsize.html#method.from_ptr), [`AtomicBool::fetch_not`](https://doc.rust-lang.org/std/sync/atomic/struct.AtomicBool.html#method.fetch_not) and [stronger CAS failure ordering](/~https://github.com/rust-lang/rust/pull/98383) available on Rust 1.34+. - Provide workaround for bugs in the standard library's atomic-related APIs, such as [rust-lang/rust#100650], `fence`/`compiler_fence` on MSP430 that cause LLVM error, etc. @@ -46,7 +46,7 @@ portable-atomic = { version = "1.3", default-features = false, features = ["requ ## 128-bit atomics support -Native 128-bit atomic operations are available on x86_64 (Rust 1.59+), aarch64 (Rust 1.59+), powerpc64 (nightly only), and s390x (nightly only), otherwise the fallback implementation is used. +Native 128-bit atomic operations are available on x86_64 (Rust 1.59+), AArch64 (Rust 1.59+), powerpc64 (nightly only), and s390x (nightly only), otherwise the fallback implementation is used. On x86_64, even if `cmpxchg16b` is not available at compile-time (note: `cmpxchg16b` target feature is enabled by default only on Apple and Windows (except Windows 7) targets), run-time detection checks whether `cmpxchg16b` is available. If `cmpxchg16b` is not available at either compile-time or run-time detection, the fallback implementation is used. See also [`portable_atomic_no_outline_atomics`](#optional-cfg-no-outline-atomics) cfg. @@ -120,7 +120,7 @@ See the [`atomic128` module's readme](/~https://github.com/taiki-e/portable-atomic Enabling this feature in an environment where privileged instructions are not available, or if the instructions used are not sufficient to disable interrupts in the system, it is also usually considered **unsound**, although the details are system-dependent. The following are known cases: - - On pre-v6 ARM, this disables only IRQs by default. For many systems (e.g., GBA) this is enough. If the system need to disable both IRQs and FIQs, you need to enable the `disable-fiq` feature together. + - On pre-v6 Arm, this disables only IRQs by default. For many systems (e.g., GBA) this is enough. If the system need to disable both IRQs and FIQs, you need to enable the `disable-fiq` feature together. - On RISC-V without A-extension, this generates code for machine-mode (M-mode) by default. If you enable the `s-mode` together, this generates code for supervisor-mode (S-mode). In particular, `qemu-system-riscv*` uses [OpenSBI](/~https://github.com/riscv-software-src/opensbi) as the default firmware. See also [the `interrupt` module's readme](/~https://github.com/taiki-e/portable-atomic/blob/HEAD/src/imp/interrupt/README.md). @@ -129,7 +129,7 @@ See the [`atomic128` module's readme](/~https://github.com/taiki-e/portable-atomic It is **very strongly discouraged** to enable this feature in libraries that depend on `portable-atomic`. The recommended approach for libraries is to leave it up to the end user whether or not to enable this feature. (However, it may make sense to enable this feature by default for libraries specific to a platform where it is guaranteed to always be sound, for example in a hardware abstraction layer targeting a single-core chip.) - ARMv6-M (thumbv6m), pre-v6 ARM (e.g., thumbv4t, thumbv5te), RISC-V without A-extension, and Xtensa are currently supported. + Armv6-M (thumbv6m), pre-v6 Arm (e.g., thumbv4t, thumbv5te), RISC-V without A-extension, and Xtensa are currently supported. Since all MSP430 and AVR are single-core, we always provide atomic CAS for them without this feature. @@ -161,14 +161,14 @@ RUSTFLAGS="--cfg portable_atomic_no_outline_atomics" cargo ... - **`--cfg portable_atomic_no_outline_atomics`**
Disable dynamic dispatching by run-time CPU feature detection. - If dynamic dispatching by run-time CPU feature detection is enabled, it allows maintaining support for older CPUs while using features that are not supported on older CPUs, such as CMPXCHG16B (x86_64) and FEAT_LSE/FEAT_LSE2 (aarch64). + If dynamic dispatching by run-time CPU feature detection is enabled, it allows maintaining support for older CPUs while using features that are not supported on older CPUs, such as CMPXCHG16B (x86_64) and FEAT_LSE/FEAT_LSE2 (AArch64). Note: - - Dynamic detection is currently only enabled in Rust 1.59+ for aarch64 and x86_64, nightly only for powerpc64 (disabled by default), otherwise it works the same as when this cfg is set. + - Dynamic detection is currently only enabled in Rust 1.59+ for x86_64 and AArch64, nightly only for powerpc64 (disabled by default), otherwise it works the same as when this cfg is set. - If the required target features are enabled at compile-time, the atomic operations are inlined. - This is compatible with no-std (as with all features except `std`). - On some targets, run-time detection is disabled by default mainly for compatibility with older versions of operating systems or incomplete build environments, and can be enabled by `--cfg portable_atomic_outline_atomics`. (When both cfg are enabled, `*_no_*` cfg is preferred.) - - Some aarch64 targets enable LLVM's `outline-atomics` target feature by default, so if you set this cfg, you may want to disable that as well. (portable-atomic's outline-atomics does not depend on the compiler-rt symbols, so even if you need to disable LLVM's outline-atomics, you may not need to disable portable-atomic's outline-atomics.) + - Some AArch64 targets enable LLVM's `outline-atomics` target feature by default, so if you set this cfg, you may want to disable that as well. (portable-atomic's outline-atomics does not depend on the compiler-rt symbols, so even if you need to disable LLVM's outline-atomics, you may not need to disable portable-atomic's outline-atomics.) See also the [`atomic128` module's readme](/~https://github.com/taiki-e/portable-atomic/blob/HEAD/src/imp/atomic128/README.md). @@ -232,9 +232,9 @@ RUSTFLAGS="--cfg portable_atomic_no_outline_atomics" cargo ... // These features are already stabilized or have already been removed from compilers, // and can safely be enabled for old nightly as long as version detection works. // - cfg(target_has_atomic) -// - asm! on ARM, AArch64, RISC-V, x86_64 +// - asm! on Arm, AArch64, RISC-V, x86_64 // - llvm_asm! on AVR (tier 3) and MSP430 (tier 3) -// - #[instruction_set] on non-Linux/Android pre-v6 ARM (tier 3) +// - #[instruction_set] on non-Linux/Android pre-v6 Arm (tier 3) #![cfg_attr(portable_atomic_unstable_cfg_target_has_atomic, feature(cfg_target_has_atomic))] #![cfg_attr( all( diff --git a/tests/no-std-qemu/.cargo/config.toml b/tests/no-std-qemu/.cargo/config.toml index 12ff8330b..dd6c92ce3 100644 --- a/tests/no-std-qemu/.cargo/config.toml +++ b/tests/no-std-qemu/.cargo/config.toml @@ -17,7 +17,7 @@ runner = "qemu-system-arm -M lm3s6965evb -cpu cortex-m33 -display none -semihost [target.thumbv8m.main-none-eabihf] runner = "qemu-system-arm -M lm3s6965evb -cpu cortex-m33 -display none -semihosting -kernel" -# ARMv5TE +# Armv5TE [target.armv5te-none-eabi] runner = "qemu-system-arm -M versatilepb -cpu arm926 -display none -semihosting -kernel" [target.thumbv5te-none-eabi] diff --git a/tools/build.sh b/tools/build.sh index afd1c68a8..1c44dad39 100755 --- a/tools/build.sh +++ b/tools/build.sh @@ -47,11 +47,11 @@ default_targets=( # x86_64 # rustc --print target-list | grep -E '^x86_64' x86_64-unknown-linux-gnu - # x86_64 with CMPXCHG16B + # with CMPXCHG16B x86_64-apple-darwin - # x86_64 X32 ABI + # X32 ABI x86_64-unknown-linux-gnux32 - # x86_64 without CPUID + # no CPUID x86_64-fortanix-unknown-sgx # x86 @@ -70,17 +70,17 @@ default_targets=( aarch64-unknown-linux-uclibc # custom target aarch64-unknown-netbsd aarch64-unknown-openbsd - # aarch64 with FEAT_LSE & FEAT_LSE2 + # FEAT_LSE & FEAT_LSE2 aarch64-apple-darwin - # aarch64 big endian + # big endian aarch64_be-unknown-linux-gnu aarch64_be-unknown-netbsd - # aarch64 ILP32 ABI + # ILP32 ABI aarch64-unknown-linux-gnu_ilp32 - # aarch64 ILP32 ABI big endian + # ILP32 ABI big endian aarch64_be-unknown-linux-gnu_ilp32 - # pre-v6 arm linux-like + # arm pre-v6 linux-like armv4t-unknown-linux-gnueabi armv5te-unknown-linux-gnueabi arm-linux-androideabi diff --git a/tools/no-std.sh b/tools/no-std.sh index f443d6a2a..ae1498dbb 100755 --- a/tools/no-std.sh +++ b/tools/no-std.sh @@ -10,19 +10,20 @@ cd -- "$(dirname -- "$0")"/.. # ./tools/no-std.sh [+toolchain] [target]... default_targets=( - # armv4t + # arm + # v4T armv4t-none-eabi thumbv4t-none-eabi - # armv5te + # v5TE armv5te-none-eabi thumbv5te-none-eabi - # armv6-m + # v6-M thumbv6m-none-eabi - # armv7-m + # v7-M thumbv7m-none-eabi thumbv7em-none-eabi thumbv7em-none-eabihf - # armv8-m + # v8-M thumbv8m.base-none-eabi thumbv8m.main-none-eabi thumbv8m.main-none-eabihf diff --git a/tools/target_spec.sh b/tools/target_spec.sh index 724615565..88c32c029 100755 --- a/tools/target_spec.sh +++ b/tools/target_spec.sh @@ -32,7 +32,7 @@ cat >|"${utils_file}" <|"${utils_file}" <