From 7fd038bb2e8ba92762a8d1ffd86a87c7cbd637da Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 3 Apr 2018 09:44:32 -0700 Subject: [PATCH] Stabilize x86/x86_64 intrinsics This commit stabilizes all intrinsics in the `x86` and `x86_64` modules, namely allowing stabilization of the `arch::x86` and `arch::x86_64` module in libstd. Stabilizations here were applied in an automated fashion using [this script][scr], and notably everything related to `__m64` was omitted from this round of stabilization [scr]: https://gist.github.com/alexcrichton/5b456d495d6fe1df46a158754565c7a5 --- coresimd/arm/mod.rs | 6 +- coresimd/mod.rs | 12 +- coresimd/x86/abm.rs | 6 + coresimd/x86/aes.rs | 18 + coresimd/x86/avx.rs | 584 ++++++++++++++++++++++++++++ coresimd/x86/avx2.rs | 582 ++++++++++++++++++++++++++++ coresimd/x86/{bmi.rs => bmi1.rs} | 24 ++ coresimd/x86/bmi2.rs | 12 + coresimd/x86/bswap.rs | 3 + coresimd/x86/cpuid.rs | 8 + coresimd/x86/eflags.rs | 12 + coresimd/x86/fxsr.rs | 6 + coresimd/x86/mod.rs | 12 +- coresimd/x86/pclmulqdq.rs | 3 + coresimd/x86/rdrand.rs | 12 + coresimd/x86/rdtsc.rs | 6 + coresimd/x86/sha.rs | 21 ++ coresimd/x86/sse.rs | 341 +++++++++++++++++ coresimd/x86/sse2.rs | 630 +++++++++++++++++++++++++++++++ coresimd/x86/sse3.rs | 33 ++ coresimd/x86/sse41.rs | 187 +++++++++ coresimd/x86/sse42.rs | 70 ++++ coresimd/x86/sse4a.rs | 4 + coresimd/x86/ssse3.rs | 48 +++ coresimd/x86/tbm.rs | 18 + coresimd/x86/xsave.rs | 25 ++ coresimd/x86_64/abm.rs | 6 + coresimd/x86_64/avx.rs | 3 + coresimd/x86_64/avx2.rs | 3 + coresimd/x86_64/bmi.rs | 24 ++ coresimd/x86_64/bmi2.rs | 12 + coresimd/x86_64/bswap.rs | 3 + coresimd/x86_64/fxsr.rs | 6 + coresimd/x86_64/rdrand.rs | 6 + coresimd/x86_64/sse.rs | 9 + coresimd/x86_64/sse2.rs | 33 ++ coresimd/x86_64/sse41.rs | 6 + coresimd/x86_64/sse42.rs | 3 + coresimd/x86_64/xsave.rs | 18 + crates/stdsimd/src/lib.rs | 2 +- stdsimd/arch/detect/arch/x86.rs | 64 +++- stdsimd/mod.rs | 16 +- 42 files changed, 2889 insertions(+), 8 deletions(-) rename coresimd/x86/{bmi.rs => bmi1.rs} (78%) diff --git a/coresimd/arm/mod.rs b/coresimd/arm/mod.rs index d778ed6f14..9798db59bc 100644 --- a/coresimd/arm/mod.rs +++ b/coresimd/arm/mod.rs @@ -20,8 +20,10 @@ pub use self::v7::*; // NEON is supported on AArch64, and on ARM when built with the v7 and neon // features. Building ARM without neon produces incorrect codegen. #[cfg(any(target_arch = "aarch64", - all(target_feature = "v7", target_feature = "neon")))] + all(target_feature = "v7", target_feature = "neon"), + dox))] mod neon; #[cfg(any(target_arch = "aarch64", - all(target_feature = "v7", target_feature = "neon")))] + all(target_feature = "v7", target_feature = "neon"), + dox))] pub use self::neon::*; diff --git a/coresimd/mod.rs b/coresimd/mod.rs index 5768ca9b34..9cf63d14af 100644 --- a/coresimd/mod.rs +++ b/coresimd/mod.rs @@ -41,14 +41,16 @@ pub mod simd { /// [`aarch64`]: https://rust-lang-nursery.github.io/stdsimd/aarch64/stdsimd/arch/index.html /// [`mips`]: https://rust-lang-nursery.github.io/stdsimd/mips/stdsimd/arch/index.html /// [`mips64`]: https://rust-lang-nursery.github.io/stdsimd/mips64/stdsimd/arch/index.html -#[unstable(feature = "stdsimd", issue = "0")] +#[stable(feature = "simd_arch", since = "1.27.0")] pub mod arch { /// Platform-specific intrinsics for the `x86` platform. /// /// See the [module documentation](../index.html) for more details. #[cfg(any(target_arch = "x86", dox))] #[doc(cfg(target_arch = "x86"))] + #[stable(feature = "simd_x86", since = "1.27.0")] pub mod x86 { + #[stable(feature = "simd_x86", since = "1.27.0")] pub use coresimd::x86::*; } @@ -57,8 +59,11 @@ pub mod arch { /// See the [module documentation](../index.html) for more details. #[cfg(any(target_arch = "x86_64", dox))] #[doc(cfg(target_arch = "x86_64"))] + #[stable(feature = "simd_x86", since = "1.27.0")] pub mod x86_64 { + #[stable(feature = "simd_x86", since = "1.27.0")] pub use coresimd::x86::*; + #[stable(feature = "simd_x86", since = "1.27.0")] pub use coresimd::x86_64::*; } @@ -67,6 +72,7 @@ pub mod arch { /// See the [module documentation](../index.html) for more details. #[cfg(any(target_arch = "arm", dox))] #[doc(cfg(target_arch = "arm"))] + #[unstable(feature = "stdsimd", issue = "0")] pub mod arm { pub use coresimd::arm::*; } @@ -76,6 +82,7 @@ pub mod arch { /// See the [module documentation](../index.html) for more details. #[cfg(any(target_arch = "aarch64", dox))] #[doc(cfg(target_arch = "aarch64"))] + #[unstable(feature = "stdsimd", issue = "0")] pub mod aarch64 { pub use coresimd::aarch64::*; pub use coresimd::arm::*; @@ -85,6 +92,7 @@ pub mod arch { /// /// See the [module documentation](../index.html) for more details. #[cfg(target_arch = "wasm32")] + #[unstable(feature = "stdsimd", issue = "0")] pub mod wasm32 { pub use coresimd::wasm32::*; } @@ -94,6 +102,7 @@ pub mod arch { /// See the [module documentation](../index.html) for more details. #[cfg(any(target_arch = "mips", dox))] #[doc(cfg(target_arch = "mips"))] + #[unstable(feature = "stdsimd", issue = "0")] pub mod mips { pub use coresimd::mips::*; } @@ -103,6 +112,7 @@ pub mod arch { /// See the [module documentation](../index.html) for more details. #[cfg(any(target_arch = "mips64", dox))] #[doc(cfg(target_arch = "mips64"))] + #[unstable(feature = "stdsimd", issue = "0")] pub mod mips64 { pub use coresimd::mips::*; } diff --git a/coresimd/x86/abm.rs b/coresimd/x86/abm.rs index 1576c0a750..b8d336b03e 100644 --- a/coresimd/x86/abm.rs +++ b/coresimd/x86/abm.rs @@ -23,17 +23,23 @@ use stdsimd_test::assert_instr; /// Counts the leading most significant zero bits. /// /// When the operand is zero, it returns its size in bits. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_lzcnt_u32) #[inline] #[target_feature(enable = "lzcnt")] #[cfg_attr(test, assert_instr(lzcnt))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _lzcnt_u32(x: u32) -> u32 { x.leading_zeros() } /// Counts the bits that are set. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_popcnt32) #[inline] #[target_feature(enable = "popcnt")] #[cfg_attr(test, assert_instr(popcnt))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _popcnt32(x: i32) -> i32 { x.count_ones() as i32 } diff --git a/coresimd/x86/aes.rs b/coresimd/x86/aes.rs index 6fade3d453..7a44d4ae5c 100644 --- a/coresimd/x86/aes.rs +++ b/coresimd/x86/aes.rs @@ -29,41 +29,56 @@ extern "C" { } /// Perform one round of an AES decryption flow on data (state) in `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesdec_si128) #[inline] #[target_feature(enable = "aes")] #[cfg_attr(test, assert_instr(aesdec))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_aesdec_si128(a: __m128i, round_key: __m128i) -> __m128i { aesdec(a, round_key) } /// Perform the last round of an AES decryption flow on data (state) in `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesdeclast_si128) #[inline] #[target_feature(enable = "aes")] #[cfg_attr(test, assert_instr(aesdeclast))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_aesdeclast_si128(a: __m128i, round_key: __m128i) -> __m128i { aesdeclast(a, round_key) } /// Perform one round of an AES encryption flow on data (state) in `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesenc_si128) #[inline] #[target_feature(enable = "aes")] #[cfg_attr(test, assert_instr(aesenc))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_aesenc_si128(a: __m128i, round_key: __m128i) -> __m128i { aesenc(a, round_key) } /// Perform the last round of an AES encryption flow on data (state) in `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesenclast_si128) #[inline] #[target_feature(enable = "aes")] #[cfg_attr(test, assert_instr(aesenclast))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_aesenclast_si128(a: __m128i, round_key: __m128i) -> __m128i { aesenclast(a, round_key) } /// Perform the `InvMixColumns` transformation on `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesimc_si128) #[inline] #[target_feature(enable = "aes")] #[cfg_attr(test, assert_instr(aesimc))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_aesimc_si128(a: __m128i) -> __m128i { aesimc(a) } @@ -73,10 +88,13 @@ pub unsafe fn _mm_aesimc_si128(a: __m128i) -> __m128i { /// Assist in expanding the AES cipher key by computing steps towards /// generating a round key for encryption cipher using data from `a` and an /// 8-bit round constant `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aeskeygenassist_si128) #[inline] #[target_feature(enable = "aes")] #[cfg_attr(test, assert_instr(aeskeygenassist, imm8 = 0))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_aeskeygenassist_si128(a: __m128i, imm8: i32) -> __m128i { macro_rules! call { ($imm8:expr) => { diff --git a/coresimd/x86/avx.rs b/coresimd/x86/avx.rs index 16fa5ce217..cd4e65e562 100644 --- a/coresimd/x86/avx.rs +++ b/coresimd/x86/avx.rs @@ -25,18 +25,24 @@ use stdsimd_test::assert_instr; /// Add packed double-precision (64-bit) floating-point elements /// in `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vaddpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_add_pd(a: __m256d, b: __m256d) -> __m256d { simd_add(a, b) } /// Add packed single-precision (32-bit) floating-point elements in `a` and /// `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vaddps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 { simd_add(a, b) } @@ -44,11 +50,14 @@ pub unsafe fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 { /// Compute the bitwise AND of a packed double-precision (64-bit) /// floating-point elements /// in `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_and_pd) #[inline] #[target_feature(enable = "avx")] // FIXME: Should be 'vandpd' instuction. // See /~https://github.com/rust-lang-nursery/stdsimd/issues/71 #[cfg_attr(test, assert_instr(vandps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d { let a: u64x4 = mem::transmute(a); let b: u64x4 = mem::transmute(b); @@ -57,9 +66,12 @@ pub unsafe fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d { /// Compute the bitwise AND of packed single-precision (32-bit) floating-point /// elements in `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_and_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vandps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 { let a: u32x8 = mem::transmute(a); let b: u32x8 = mem::transmute(b); @@ -68,11 +80,14 @@ pub unsafe fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 { /// Compute the bitwise OR packed double-precision (64-bit) floating-point /// elements in `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_or_pd) #[inline] #[target_feature(enable = "avx")] // FIXME: Should be 'vorpd' instuction. // See /~https://github.com/rust-lang-nursery/stdsimd/issues/71 #[cfg_attr(test, assert_instr(vorps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d { let a: u64x4 = mem::transmute(a); let b: u64x4 = mem::transmute(b); @@ -81,9 +96,12 @@ pub unsafe fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d { /// Compute the bitwise OR packed single-precision (32-bit) floating-point /// elements in `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_or_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vorps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 { let a: u32x8 = mem::transmute(a); let b: u32x8 = mem::transmute(b); @@ -92,10 +110,13 @@ pub unsafe fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 { /// Shuffle double-precision (64-bit) floating-point elements within 128-bit /// lanes using the control in `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vshufpd, imm8 = 0x1))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_shuffle_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d { let imm8 = (imm8 & 0xFF) as u8; macro_rules! shuffle4 { @@ -135,10 +156,13 @@ pub unsafe fn _mm256_shuffle_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d { /// Shuffle single-precision (32-bit) floating-point elements in `a` within /// 128-bit lanes using the control in `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vshufps, imm8 = 0x0))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_shuffle_ps(a: __m256, b: __m256, imm8: i32) -> __m256 { let imm8 = (imm8 & 0xFF) as u8; macro_rules! shuffle4 { @@ -196,10 +220,13 @@ pub unsafe fn _mm256_shuffle_ps(a: __m256, b: __m256, imm8: i32) -> __m256 { /// Compute the bitwise NOT of packed double-precision (64-bit) floating-point /// elements in `a` /// and then AND with `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_andnot_pd) #[inline] #[target_feature(enable = "avx")] // FIXME: Should be 'vandnpd' instruction. #[cfg_attr(test, assert_instr(vandnps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d { let a: u64x4 = mem::transmute(a); let b: u64x4 = mem::transmute(b); @@ -209,9 +236,12 @@ pub unsafe fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d { /// Compute the bitwise NOT of packed single-precision (32-bit) floating-point /// elements in `a` /// and then AND with `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_andnot_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vandnps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_andnot_ps(a: __m256, b: __m256) -> __m256 { let a: u32x8 = mem::transmute(a); let b: u32x8 = mem::transmute(b); @@ -220,108 +250,144 @@ pub unsafe fn _mm256_andnot_ps(a: __m256, b: __m256) -> __m256 { /// Compare packed double-precision (64-bit) floating-point elements /// in `a` and `b`, and return packed maximum values +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaxpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_max_pd(a: __m256d, b: __m256d) -> __m256d { maxpd256(a, b) } /// Compare packed single-precision (32-bit) floating-point elements in `a` /// and `b`, and return packed maximum values +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaxps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_max_ps(a: __m256, b: __m256) -> __m256 { maxps256(a, b) } /// Compare packed double-precision (64-bit) floating-point elements /// in `a` and `b`, and return packed minimum values +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vminpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_min_pd(a: __m256d, b: __m256d) -> __m256d { minpd256(a, b) } /// Compare packed single-precision (32-bit) floating-point elements in `a` /// and `b`, and return packed minimum values +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vminps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_min_ps(a: __m256, b: __m256) -> __m256 { minps256(a, b) } /// Add packed double-precision (64-bit) floating-point elements /// in `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmulpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mul_pd(a: __m256d, b: __m256d) -> __m256d { simd_mul(a, b) } /// Add packed single-precision (32-bit) floating-point elements in `a` and /// `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmulps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mul_ps(a: __m256, b: __m256) -> __m256 { simd_mul(a, b) } /// Alternatively add and subtract packed double-precision (64-bit) /// floating-point elements in `a` to/from packed elements in `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_addsub_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vaddsubpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d { addsubpd256(a, b) } /// Alternatively add and subtract packed single-precision (32-bit) /// floating-point elements in `a` to/from packed elements in `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_addsub_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vaddsubps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_addsub_ps(a: __m256, b: __m256) -> __m256 { addsubps256(a, b) } /// Subtract packed double-precision (64-bit) floating-point elements in `b` /// from packed elements in `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vsubpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sub_pd(a: __m256d, b: __m256d) -> __m256d { simd_sub(a, b) } /// Subtract packed single-precision (32-bit) floating-point elements in `b` /// from packed elements in `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vsubps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sub_ps(a: __m256, b: __m256) -> __m256 { simd_sub(a, b) } /// Compute the division of each of the 8 packed 32-bit floating-point elements /// in `a` by the corresponding packed elements in `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_div_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vdivps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_div_ps(a: __m256, b: __m256) -> __m256 { simd_div(a, b) } /// Compute the division of each of the 4 packed 64-bit floating-point elements /// in `a` by the corresponding packed elements in `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_div_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vdivpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_div_pd(a: __m256d, b: __m256d) -> __m256d { simd_div(a, b) } @@ -337,10 +403,13 @@ pub unsafe fn _mm256_div_pd(a: __m256d, b: __m256d) -> __m256d { /// For a complete list of options, check [the LLVM docs][llvm_docs]. /// /// [llvm_docs]: /~https://github.com/llvm-mirror/clang/blob/dcd8d797b20291f1a6b3e0ddda085aa2bbb382a8/lib/Headers/avxintrin.h#L382 +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_round_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vroundpd, b = 0x3))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_round_pd(a: __m256d, b: i32) -> __m256d { macro_rules! call { ($imm8:expr) => { @@ -352,18 +421,24 @@ pub unsafe fn _mm256_round_pd(a: __m256d, b: i32) -> __m256d { /// Round packed double-precision (64-bit) floating point elements in `a` /// toward positive infinity. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_ceil_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vroundpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_ceil_pd(a: __m256d) -> __m256d { roundpd256(a, 0x02) } /// Round packed double-precision (64-bit) floating point elements in `a` /// toward negative infinity. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_floor_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vroundpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_floor_pd(a: __m256d) -> __m256d { roundpd256(a, 0x01) } @@ -379,10 +454,13 @@ pub unsafe fn _mm256_floor_pd(a: __m256d) -> __m256d { /// For a complete list of options, check [the LLVM docs][llvm_docs]. /// /// [llvm_docs]: /~https://github.com/llvm-mirror/clang/blob/dcd8d797b20291f1a6b3e0ddda085aa2bbb382a8/lib/Headers/avxintrin.h#L382 +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_round_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vroundps, b = 0x00))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_round_ps(a: __m256, b: i32) -> __m256 { macro_rules! call { ($imm8:expr) => { @@ -394,46 +472,61 @@ pub unsafe fn _mm256_round_ps(a: __m256, b: i32) -> __m256 { /// Round packed single-precision (32-bit) floating point elements in `a` /// toward positive infinity. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_ceil_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vroundps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_ceil_ps(a: __m256) -> __m256 { roundps256(a, 0x02) } /// Round packed single-precision (32-bit) floating point elements in `a` /// toward negative infinity. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_floor_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vroundps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_floor_ps(a: __m256) -> __m256 { roundps256(a, 0x01) } /// Return the square root of packed single-precision (32-bit) floating point /// elements in `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sqrt_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vsqrtps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sqrt_ps(a: __m256) -> __m256 { sqrtps256(a) } /// Return the square root of packed double-precision (64-bit) floating point /// elements in `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sqrt_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vsqrtpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sqrt_pd(a: __m256d) -> __m256d { sqrtpd256(a) } /// Blend packed double-precision (64-bit) floating-point elements from /// `a` and `b` using control mask `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vblendpd, imm8 = 9))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_blend_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d { let imm8 = (imm8 & 0xFF) as u8; macro_rules! blend4 { @@ -473,10 +566,13 @@ pub unsafe fn _mm256_blend_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d { /// Blend packed single-precision (32-bit) floating-point elements from /// `a` and `b` using control mask `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vblendps, imm8 = 9))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_blend_ps(a: __m256, b: __m256, imm8: i32) -> __m256 { let imm8 = (imm8 & 0xFF) as u8; macro_rules! blend4 { @@ -533,18 +629,24 @@ pub unsafe fn _mm256_blend_ps(a: __m256, b: __m256, imm8: i32) -> __m256 { /// Blend packed double-precision (64-bit) floating-point elements from /// `a` and `b` using `c` as a mask. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blendv_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vblendvpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { vblendvpd(a, b, c) } /// Blend packed single-precision (32-bit) floating-point elements from /// `a` and `b` using `c` as a mask. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blendv_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vblendvps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 { vblendvps(a, b, c) } @@ -553,10 +655,13 @@ pub unsafe fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 { /// elements in `a` and `b` using the high 4 bits in `imm8`, /// sum the four products, and conditionally return the sum /// using the low 4 bits of `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_dp_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vdpps, imm8 = 0x0))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_dp_ps(a: __m256, b: __m256, imm8: i32) -> __m256 { macro_rules! call { ($imm8:expr) => { @@ -570,9 +675,12 @@ pub unsafe fn _mm256_dp_ps(a: __m256, b: __m256, imm8: i32) -> __m256 { /// of 4 64-bit floating points `a` and `b`. /// In the result, sums of elements from `a` are returned in even locations, /// while sums of elements from `b` are returned in odd locations. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hadd_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vhaddpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_hadd_pd(a: __m256d, b: __m256d) -> __m256d { vhaddpd(a, b) } @@ -582,9 +690,12 @@ pub unsafe fn _mm256_hadd_pd(a: __m256d, b: __m256d) -> __m256d { /// In the result, sums of elements from `a` are returned in locations of /// indices 0, 1, 4, 5; while sums of elements from `b` are locations /// 2, 3, 6, 7. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hadd_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vhaddps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_hadd_ps(a: __m256, b: __m256) -> __m256 { vhaddps(a, b) } @@ -593,9 +704,12 @@ pub unsafe fn _mm256_hadd_ps(a: __m256, b: __m256) -> __m256 { /// of 4 64-bit floating points `a` and `b`. /// In the result, sums of elements from `a` are returned in even locations, /// while sums of elements from `b` are returned in odd locations. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hsub_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vhsubpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_hsub_pd(a: __m256d, b: __m256d) -> __m256d { vhsubpd(a, b) } @@ -605,19 +719,25 @@ pub unsafe fn _mm256_hsub_pd(a: __m256d, b: __m256d) -> __m256d { /// In the result, sums of elements from `a` are returned in locations of /// indices 0, 1, 4, 5; while sums of elements from `b` are locations /// 2, 3, 6, 7. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hsub_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vhsubps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 { vhsubps(a, b) } /// Compute the bitwise XOR of packed double-precision (64-bit) floating-point /// elements in `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_xor_pd) #[inline] #[target_feature(enable = "avx")] // FIXME Should be 'vxorpd' instruction. #[cfg_attr(test, assert_instr(vxorps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d { let a: u64x4 = mem::transmute(a); let b: u64x4 = mem::transmute(b); @@ -626,9 +746,12 @@ pub unsafe fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d { /// Compute the bitwise XOR of packed single-precision (32-bit) floating-point /// elements in `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_xor_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vxorps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_xor_ps(a: __m256, b: __m256) -> __m256 { let a: u32x8 = mem::transmute(a); let b: u32x8 = mem::transmute(b); @@ -636,77 +759,112 @@ pub unsafe fn _mm256_xor_ps(a: __m256, b: __m256) -> __m256 { } /// Equal (ordered, non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_EQ_OQ: i32 = 0x00; /// Less-than (ordered, signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_LT_OS: i32 = 0x01; /// Less-than-or-equal (ordered, signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_LE_OS: i32 = 0x02; /// Unordered (non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_UNORD_Q: i32 = 0x03; /// Not-equal (unordered, non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_NEQ_UQ: i32 = 0x04; /// Not-less-than (unordered, signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_NLT_US: i32 = 0x05; /// Not-less-than-or-equal (unordered, signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_NLE_US: i32 = 0x06; /// Ordered (non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_ORD_Q: i32 = 0x07; /// Equal (unordered, non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_EQ_UQ: i32 = 0x08; /// Not-greater-than-or-equal (unordered, signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_NGE_US: i32 = 0x09; /// Not-greater-than (unordered, signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_NGT_US: i32 = 0x0a; /// False (ordered, non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_FALSE_OQ: i32 = 0x0b; /// Not-equal (ordered, non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_NEQ_OQ: i32 = 0x0c; /// Greater-than-or-equal (ordered, signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_GE_OS: i32 = 0x0d; /// Greater-than (ordered, signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_GT_OS: i32 = 0x0e; /// True (unordered, non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_TRUE_UQ: i32 = 0x0f; /// Equal (ordered, signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_EQ_OS: i32 = 0x10; /// Less-than (ordered, non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_LT_OQ: i32 = 0x11; /// Less-than-or-equal (ordered, non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_LE_OQ: i32 = 0x12; /// Unordered (signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_UNORD_S: i32 = 0x13; /// Not-equal (unordered, signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_NEQ_US: i32 = 0x14; /// Not-less-than (unordered, non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_NLT_UQ: i32 = 0x15; /// Not-less-than-or-equal (unordered, non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_NLE_UQ: i32 = 0x16; /// Ordered (signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_ORD_S: i32 = 0x17; /// Equal (unordered, signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_EQ_US: i32 = 0x18; /// Not-greater-than-or-equal (unordered, non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_NGE_UQ: i32 = 0x19; /// Not-greater-than (unordered, non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_NGT_UQ: i32 = 0x1a; /// False (ordered, signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_FALSE_OS: i32 = 0x1b; /// Not-equal (ordered, signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_NEQ_OS: i32 = 0x1c; /// Greater-than-or-equal (ordered, non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_GE_OQ: i32 = 0x1d; /// Greater-than (ordered, non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_GT_OQ: i32 = 0x1e; /// True (unordered, signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_TRUE_US: i32 = 0x1f; /// Compare packed double-precision (64-bit) floating-point /// elements in `a` and `b` based on the comparison operand /// specified by `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_pd) #[inline] #[target_feature(enable = "avx,sse2")] #[cfg_attr(test, assert_instr(vcmpeqpd, imm8 = 0))] // TODO Validate vcmppd #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmp_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d { macro_rules! call { ($imm8:expr) => { @@ -719,10 +877,13 @@ pub unsafe fn _mm_cmp_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d { /// Compare packed double-precision (64-bit) floating-point /// elements in `a` and `b` based on the comparison operand /// specified by `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcmpeqpd, imm8 = 0))] // TODO Validate vcmppd #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cmp_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d { macro_rules! call { ($imm8:expr) => { @@ -735,10 +896,13 @@ pub unsafe fn _mm256_cmp_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d { /// Compare packed single-precision (32-bit) floating-point /// elements in `a` and `b` based on the comparison operand /// specified by `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ps) #[inline] #[target_feature(enable = "avx,sse")] #[cfg_attr(test, assert_instr(vcmpeqps, imm8 = 0))] // TODO Validate vcmpps #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmp_ps(a: __m128, b: __m128, imm8: i32) -> __m128 { macro_rules! call { ($imm8:expr) => { @@ -751,10 +915,13 @@ pub unsafe fn _mm_cmp_ps(a: __m128, b: __m128, imm8: i32) -> __m128 { /// Compare packed single-precision (32-bit) floating-point /// elements in `a` and `b` based on the comparison operand /// specified by `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcmpeqps, imm8 = 0))] // TODO Validate vcmpps #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cmp_ps(a: __m256, b: __m256, imm8: i32) -> __m256 { macro_rules! call { ($imm8:expr) => { @@ -769,10 +936,13 @@ pub unsafe fn _mm256_cmp_ps(a: __m256, b: __m256, imm8: i32) -> __m256 { /// store the result in the lower element of returned vector, /// and copy the upper element from `a` to the upper element of returned /// vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sd) #[inline] #[target_feature(enable = "avx,sse2")] #[cfg_attr(test, assert_instr(vcmpeqsd, imm8 = 0))] // TODO Validate vcmpsd #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmp_sd(a: __m128d, b: __m128d, imm8: i32) -> __m128d { macro_rules! call { ($imm8:expr) => { @@ -787,10 +957,13 @@ pub unsafe fn _mm_cmp_sd(a: __m128d, b: __m128d, imm8: i32) -> __m128d { /// store the result in the lower element of returned vector, /// and copy the upper 3 packed elements from `a` to the upper elements of /// returned vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ss) #[inline] #[target_feature(enable = "avx,sse")] #[cfg_attr(test, assert_instr(vcmpeqss, imm8 = 0))] // TODO Validate vcmpss #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmp_ss(a: __m128, b: __m128, imm8: i32) -> __m128 { macro_rules! call { ($imm8:expr) => { @@ -802,82 +975,109 @@ pub unsafe fn _mm_cmp_ss(a: __m128, b: __m128, imm8: i32) -> __m128 { /// Convert packed 32-bit integers in `a` to packed double-precision (64-bit) /// floating-point elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi32_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvtdq2pd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepi32_pd(a: __m128i) -> __m256d { simd_cast(a.as_i32x4()) } /// Convert packed 32-bit integers in `a` to packed single-precision (32-bit) /// floating-point elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi32_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvtdq2ps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepi32_ps(a: __m256i) -> __m256 { vcvtdq2ps(a.as_i32x8()) } /// Convert packed double-precision (64-bit) floating-point elements in `a` /// to packed single-precision (32-bit) floating-point elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtpd_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvtpd2ps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtpd_ps(a: __m256d) -> __m128 { vcvtpd2ps(a) } /// Convert packed single-precision (32-bit) floating-point elements in `a` /// to packed 32-bit integers. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtps_epi32) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvtps2dq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtps_epi32(a: __m256) -> __m256i { mem::transmute(vcvtps2dq(a)) } /// Convert packed single-precision (32-bit) floating-point elements in `a` /// to packed double-precision (64-bit) floating-point elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtps_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvtps2pd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtps_pd(a: __m128) -> __m256d { simd_cast(a) } /// Convert packed double-precision (64-bit) floating-point elements in `a` /// to packed 32-bit integers with truncation. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttpd_epi32) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvttpd2dq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvttpd_epi32(a: __m256d) -> __m128i { mem::transmute(vcvttpd2dq(a)) } /// Convert packed double-precision (64-bit) floating-point elements in `a` /// to packed 32-bit integers. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtpd_epi32) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvtpd2dq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtpd_epi32(a: __m256d) -> __m128i { mem::transmute(vcvtpd2dq(a)) } /// Convert packed single-precision (32-bit) floating-point elements in `a` /// to packed 32-bit integers with truncation. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttps_epi32) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvttps2dq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvttps_epi32(a: __m256) -> __m256i { mem::transmute(vcvttps2dq(a)) } /// Extract 128 bits (composed of 4 packed single-precision (32-bit) /// floating-point elements) from `a`, selected with `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extractf128_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vextractf128, imm8 = 1))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_extractf128_ps(a: __m256, imm8: i32) -> __m128 { match imm8 & 1 { 0 => simd_shuffle4(a, _mm256_undefined_ps(), [0, 1, 2, 3]), @@ -887,10 +1087,13 @@ pub unsafe fn _mm256_extractf128_ps(a: __m256, imm8: i32) -> __m128 { /// Extract 128 bits (composed of 2 packed double-precision (64-bit) /// floating-point elements) from `a`, selected with `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extractf128_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vextractf128, imm8 = 1))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_extractf128_pd(a: __m256d, imm8: i32) -> __m128d { match imm8 & 1 { 0 => simd_shuffle2(a, _mm256_undefined_pd(), [0, 1]), @@ -899,10 +1102,13 @@ pub unsafe fn _mm256_extractf128_pd(a: __m256d, imm8: i32) -> __m128d { } /// Extract 128 bits (composed of integer data) from `a`, selected with `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extractf128_si256) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vextractf128, imm8 = 1))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_extractf128_si256(a: __m256i, imm8: i32) -> __m128i { let b = _mm256_undefined_si256().as_i64x4(); let dst: i64x2 = match imm8 & 1 { @@ -913,46 +1119,61 @@ pub unsafe fn _mm256_extractf128_si256(a: __m256i, imm8: i32) -> __m128i { } /// Zero the contents of all XMM or YMM registers. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_zeroall) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vzeroall))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_zeroall() { vzeroall() } /// Zero the upper 128 bits of all YMM registers; /// the lower 128-bits of the registers are unmodified. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_zeroupper) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vzeroupper))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_zeroupper() { vzeroupper() } /// Shuffle single-precision (32-bit) floating-point elements in `a` /// within 128-bit lanes using the control in `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutevar_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vpermilps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_permutevar_ps(a: __m256, b: __m256i) -> __m256 { vpermilps256(a, b.as_i32x8()) } /// Shuffle single-precision (32-bit) floating-point elements in `a` /// using the control in `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutevar_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vpermilps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 { vpermilps(a, b.as_i32x4()) } /// Shuffle single-precision (32-bit) floating-point elements in `a` /// within 128-bit lanes using the control in `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_permute_ps(a: __m256, imm8: i32) -> __m256 { let imm8 = (imm8 & 0xFF) as u8; macro_rules! shuffle4 { @@ -1004,10 +1225,13 @@ pub unsafe fn _mm256_permute_ps(a: __m256, imm8: i32) -> __m256 { /// Shuffle single-precision (32-bit) floating-point elements in `a` /// using the control in `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permute_ps) #[inline] #[target_feature(enable = "avx,sse")] #[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_permute_ps(a: __m128, imm8: i32) -> __m128 { let imm8 = (imm8 & 0xFF) as u8; macro_rules! shuffle4 { @@ -1055,28 +1279,37 @@ pub unsafe fn _mm_permute_ps(a: __m128, imm8: i32) -> __m128 { /// Shuffle double-precision (64-bit) floating-point elements in `a` /// within 256-bit lanes using the control in `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutevar_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vpermilpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_permutevar_pd(a: __m256d, b: __m256i) -> __m256d { vpermilpd256(a, b.as_i64x4()) } /// Shuffle double-precision (64-bit) floating-point elements in `a` /// using the control in `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutevar_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vpermilpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d { vpermilpd(a, b.as_i64x2()) } /// Shuffle double-precision (64-bit) floating-point elements in `a` /// within 128-bit lanes using the control in `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vpermilpd, imm8 = 0x1))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_permute_pd(a: __m256d, imm8: i32) -> __m256d { let imm8 = (imm8 & 0xFF) as u8; macro_rules! shuffle4 { @@ -1116,10 +1349,13 @@ pub unsafe fn _mm256_permute_pd(a: __m256d, imm8: i32) -> __m256d { /// Shuffle double-precision (64-bit) floating-point elements in `a` /// using the control in `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permute_pd) #[inline] #[target_feature(enable = "avx,sse2")] #[cfg_attr(test, assert_instr(vpermilpd, imm8 = 0x1))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_permute_pd(a: __m128d, imm8: i32) -> __m128d { let imm8 = (imm8 & 0xFF) as u8; macro_rules! shuffle2 { @@ -1143,10 +1379,13 @@ pub unsafe fn _mm_permute_pd(a: __m128d, imm8: i32) -> __m128d { /// Shuffle 256-bits (composed of 8 packed single-precision (32-bit) /// floating-point elements) selected by `imm8` from `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute2f128_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vperm2f128, imm8 = 0x5))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_permute2f128_ps( a: __m256, b: __m256, imm8: i32 ) -> __m256 { @@ -1160,10 +1399,13 @@ pub unsafe fn _mm256_permute2f128_ps( /// Shuffle 256-bits (composed of 4 packed double-precision (64-bit) /// floating-point elements) selected by `imm8` from `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute2f128_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vperm2f128, imm8 = 0x31))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_permute2f128_pd( a: __m256d, b: __m256d, imm8: i32 ) -> __m256d { @@ -1177,10 +1419,13 @@ pub unsafe fn _mm256_permute2f128_pd( /// Shuffle 258-bits (composed of integer data) selected by `imm8` /// from `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute2f128_si256) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vperm2f128, imm8 = 0x31))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_permute2f128_si256( a: __m256i, b: __m256i, imm8: i32 ) -> __m256i { @@ -1197,45 +1442,60 @@ pub unsafe fn _mm256_permute2f128_si256( /// Broadcast a single-precision (32-bit) floating-point element from memory /// to all elements of the returned vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcast_ss) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vbroadcastss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcast_ss(f: &f32) -> __m256 { _mm256_set1_ps(*f) } /// Broadcast a single-precision (32-bit) floating-point element from memory /// to all elements of the returned vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcast_ss) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vbroadcastss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_broadcast_ss(f: &f32) -> __m128 { _mm_set1_ps(*f) } /// Broadcast a double-precision (64-bit) floating-point element from memory /// to all elements of the returned vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcast_sd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vbroadcastsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcast_sd(f: &f64) -> __m256d { _mm256_set1_pd(*f) } /// Broadcast 128 bits from memory (composed of 4 packed single-precision /// (32-bit) floating-point elements) to all elements of the returned vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcast_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vbroadcastf128))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcast_ps(a: &__m128) -> __m256 { vbroadcastf128ps256(a) } /// Broadcast 128 bits from memory (composed of 2 packed double-precision /// (64-bit) floating-point elements) to all elements of the returned vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcast_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vbroadcastf128))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcast_pd(a: &__m128d) -> __m256d { vbroadcastf128pd256(a) } @@ -1243,10 +1503,13 @@ pub unsafe fn _mm256_broadcast_pd(a: &__m128d) -> __m256d { /// Copy `a` to result, then insert 128 bits (composed of 4 packed /// single-precision (32-bit) floating-point elements) from `b` into result /// at the location specified by `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_insertf128_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128, imm8 = 1))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_insertf128_ps(a: __m256, b: __m128, imm8: i32) -> __m256 { let b = _mm256_castps128_ps256(b); match imm8 & 1 { @@ -1258,10 +1521,13 @@ pub unsafe fn _mm256_insertf128_ps(a: __m256, b: __m128, imm8: i32) -> __m256 { /// Copy `a` to result, then insert 128 bits (composed of 2 packed /// double-precision (64-bit) floating-point elements) from `b` into result /// at the location specified by `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_insertf128_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128, imm8 = 1))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_insertf128_pd( a: __m256d, b: __m128d, imm8: i32 ) -> __m256d { @@ -1273,10 +1539,13 @@ pub unsafe fn _mm256_insertf128_pd( /// Copy `a` to result, then insert 128 bits from `b` into result /// at the location specified by `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_insertf128_si256) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128, imm8 = 1))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_insertf128_si256( a: __m256i, b: __m128i, imm8: i32 ) -> __m256i { @@ -1290,10 +1559,13 @@ pub unsafe fn _mm256_insertf128_si256( /// Copy `a` to result, and insert the 8-bit integer `i` into result /// at the location specified by `index`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_insert_epi8) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_insert_epi8(a: __m256i, i: i8, index: i32) -> __m256i { mem::transmute(simd_insert( a.as_i8x32(), @@ -1304,10 +1576,13 @@ pub unsafe fn _mm256_insert_epi8(a: __m256i, i: i8, index: i32) -> __m256i { /// Copy `a` to result, and insert the 16-bit integer `i` into result /// at the location specified by `index`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_insert_epi16) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_insert_epi16(a: __m256i, i: i16, index: i32) -> __m256i { mem::transmute(simd_insert( a.as_i16x16(), @@ -1318,10 +1593,13 @@ pub unsafe fn _mm256_insert_epi16(a: __m256i, i: i16, index: i32) -> __m256i { /// Copy `a` to result, and insert the 32-bit integer `i` into result /// at the location specified by `index`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_insert_epi32) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_insert_epi32(a: __m256i, i: i32, index: i32) -> __m256i { mem::transmute(simd_insert(a.as_i32x8(), (index as u32) & 7, i)) } @@ -1330,9 +1608,12 @@ pub unsafe fn _mm256_insert_epi32(a: __m256i, i: i32, index: i32) -> __m256i { /// floating-point elements) from memory into result. /// `mem_addr` must be aligned on a 32-byte boundary or a /// general-protection exception may be generated. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_load_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovapd expected +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d { *(mem_addr as *const __m256d) } @@ -1341,9 +1622,12 @@ pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d { /// floating-point elements) from `a` into memory. /// `mem_addr` must be aligned on a 32-byte boundary or a /// general-protection exception may be generated. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_store_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovapd expected +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_store_pd(mem_addr: *const f64, a: __m256d) { *(mem_addr as *mut __m256d) = a; } @@ -1352,9 +1636,12 @@ pub unsafe fn _mm256_store_pd(mem_addr: *const f64, a: __m256d) { /// floating-point elements) from memory into result. /// `mem_addr` must be aligned on a 32-byte boundary or a /// general-protection exception may be generated. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_load_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovaps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_load_ps(mem_addr: *const f32) -> __m256 { *(mem_addr as *const __m256) } @@ -1363,9 +1650,12 @@ pub unsafe fn _mm256_load_ps(mem_addr: *const f32) -> __m256 { /// floating-point elements) from `a` into memory. /// `mem_addr` must be aligned on a 32-byte boundary or a /// general-protection exception may be generated. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_store_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovaps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_store_ps(mem_addr: *const f32, a: __m256) { *(mem_addr as *mut __m256) = a; } @@ -1373,9 +1663,12 @@ pub unsafe fn _mm256_store_ps(mem_addr: *const f32, a: __m256) { /// Load 256-bits (composed of 4 packed double-precision (64-bit) /// floating-point elements) from memory into result. /// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_loadu_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovupd expected +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d { let mut dst = _mm256_undefined_pd(); ptr::copy_nonoverlapping( @@ -1389,9 +1682,12 @@ pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d { /// Store 256-bits (composed of 4 packed double-precision (64-bit) /// floating-point elements) from `a` into memory. /// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_storeu_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovupd expected +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: __m256d) { storeupd256(mem_addr, a); } @@ -1399,9 +1695,12 @@ pub unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: __m256d) { /// Load 256-bits (composed of 8 packed single-precision (32-bit) /// floating-point elements) from memory into result. /// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_loadu_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovups))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_loadu_ps(mem_addr: *const f32) -> __m256 { let mut dst = _mm256_undefined_ps(); ptr::copy_nonoverlapping( @@ -1415,9 +1714,12 @@ pub unsafe fn _mm256_loadu_ps(mem_addr: *const f32) -> __m256 { /// Store 256-bits (composed of 8 packed single-precision (32-bit) /// floating-point elements) from `a` into memory. /// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_storeu_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovups))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_storeu_ps(mem_addr: *mut f32, a: __m256) { storeups256(mem_addr, a); } @@ -1425,9 +1727,12 @@ pub unsafe fn _mm256_storeu_ps(mem_addr: *mut f32, a: __m256) { /// Load 256-bits of integer data from memory into result. /// `mem_addr` must be aligned on a 32-byte boundary or a /// general-protection exception may be generated. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_load_si256) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovdqa expected +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_load_si256(mem_addr: *const __m256i) -> __m256i { *mem_addr } @@ -1435,18 +1740,24 @@ pub unsafe fn _mm256_load_si256(mem_addr: *const __m256i) -> __m256i { /// Store 256-bits of integer data from `a` into memory. /// `mem_addr` must be aligned on a 32-byte boundary or a /// general-protection exception may be generated. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_store_si256) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovdqa expected +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_store_si256(mem_addr: *mut __m256i, a: __m256i) { *mem_addr = a; } /// Load 256-bits of integer data from memory into result. /// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_loadu_si256) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovdqu expected +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_loadu_si256(mem_addr: *const __m256i) -> __m256i { let mut dst = _mm256_undefined_si256(); ptr::copy_nonoverlapping( @@ -1459,9 +1770,12 @@ pub unsafe fn _mm256_loadu_si256(mem_addr: *const __m256i) -> __m256i { /// Store 256-bits of integer data from `a` into memory. /// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_storeu_si256) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovdqu expected +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) { storeudq256(mem_addr as *mut i8, a.as_i8x32()); } @@ -1469,9 +1783,12 @@ pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) { /// Load packed double-precision (64-bit) floating-point elements from memory /// into result using `mask` (elements are zeroed out when the high bit of the /// corresponding element is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskload_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaskmovpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_maskload_pd( mem_addr: *const f64, mask: __m256i ) -> __m256d { @@ -1480,9 +1797,12 @@ pub unsafe fn _mm256_maskload_pd( /// Store packed double-precision (64-bit) floating-point elements from `a` /// into memory using `mask`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskstore_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaskmovpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_maskstore_pd( mem_addr: *mut f64, mask: __m256i, a: __m256d ) { @@ -1492,18 +1812,24 @@ pub unsafe fn _mm256_maskstore_pd( /// Load packed double-precision (64-bit) floating-point elements from memory /// into result using `mask` (elements are zeroed out when the high bit of the /// corresponding element is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskload_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaskmovpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: __m128i) -> __m128d { maskloadpd(mem_addr as *const i8, mask.as_i64x2()) } /// Store packed double-precision (64-bit) floating-point elements from `a` /// into memory using `mask`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskstore_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaskmovpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d) { maskstorepd(mem_addr as *mut i8, mask.as_i64x2(), a); } @@ -1511,9 +1837,12 @@ pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d) { /// Load packed single-precision (32-bit) floating-point elements from memory /// into result using `mask` (elements are zeroed out when the high bit of the /// corresponding element is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskload_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaskmovps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_maskload_ps( mem_addr: *const f32, mask: __m256i ) -> __m256 { @@ -1522,9 +1851,12 @@ pub unsafe fn _mm256_maskload_ps( /// Store packed single-precision (32-bit) floating-point elements from `a` /// into memory using `mask`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskstore_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaskmovps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_maskstore_ps( mem_addr: *mut f32, mask: __m256i, a: __m256 ) { @@ -1534,45 +1866,60 @@ pub unsafe fn _mm256_maskstore_ps( /// Load packed single-precision (32-bit) floating-point elements from memory /// into result using `mask` (elements are zeroed out when the high bit of the /// corresponding element is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskload_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaskmovps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: __m128i) -> __m128 { maskloadps(mem_addr as *const i8, mask.as_i32x4()) } /// Store packed single-precision (32-bit) floating-point elements from `a` /// into memory using `mask`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskstore_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaskmovps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128) { maskstoreps(mem_addr as *mut i8, mask.as_i32x4(), a); } /// Duplicate odd-indexed single-precision (32-bit) floating-point elements /// from `a`, and return the results. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movehdup_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovshdup))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_movehdup_ps(a: __m256) -> __m256 { simd_shuffle8(a, a, [1, 1, 3, 3, 5, 5, 7, 7]) } /// Duplicate even-indexed single-precision (32-bit) floating-point elements /// from `a`, and return the results. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_moveldup_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovsldup))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_moveldup_ps(a: __m256) -> __m256 { simd_shuffle8(a, a, [0, 0, 2, 2, 4, 4, 6, 6]) } /// Duplicate even-indexed double-precision (64-bit) floating-point elements /// from "a", and return the results. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movedup_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovddup))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_movedup_pd(a: __m256d) -> __m256d { simd_shuffle4(a, a, [0, 0, 2, 2]) } @@ -1580,9 +1927,12 @@ pub unsafe fn _mm256_movedup_pd(a: __m256d) -> __m256d { /// Load 256-bits of integer data from unaligned memory into result. /// This intrinsic may perform better than `_mm256_loadu_si256` when the /// data crosses a cache line boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_lddqu_si256) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vlddqu))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_lddqu_si256(mem_addr: *const __m256i) -> __m256i { mem::transmute(vlddqu(mem_addr as *const i8)) } @@ -1590,9 +1940,12 @@ pub unsafe fn _mm256_lddqu_si256(mem_addr: *const __m256i) -> __m256i { /// Moves integer data from a 256-bit integer vector to a 32-byte /// aligned memory location. To minimize caching, the data is flagged as /// non-temporal (unlikely to be used again soon) +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_stream_si256) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntdq +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_stream_si256(mem_addr: *const __m256i, a: __m256i) { intrinsics::nontemporal_store(mem::transmute(mem_addr), a); } @@ -1600,9 +1953,12 @@ pub unsafe fn _mm256_stream_si256(mem_addr: *const __m256i, a: __m256i) { /// Moves double-precision values from a 256-bit vector of [4 x double] /// to a 32-byte aligned memory location. To minimize caching, the data is /// flagged as non-temporal (unlikely to be used again soon). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_stream_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntpd +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_stream_pd(mem_addr: *const f64, a: __m256d) { intrinsics::nontemporal_store(mem::transmute(mem_addr), a); } @@ -1611,9 +1967,12 @@ pub unsafe fn _mm256_stream_pd(mem_addr: *const f64, a: __m256d) { /// of [8 x float] to a 32-byte aligned memory location. To minimize /// caching, the data is flagged as non-temporal (unlikely to be used again /// soon). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_stream_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovntps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_stream_ps(mem_addr: *const f32, a: __m256) { intrinsics::nontemporal_store(mem::transmute(mem_addr), a); } @@ -1621,9 +1980,12 @@ pub unsafe fn _mm256_stream_ps(mem_addr: *const f32, a: __m256) { /// Compute the approximate reciprocal of packed single-precision (32-bit) /// floating-point elements in `a`, and return the results. The maximum /// relative error for this approximation is less than 1.5*2^-12. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rcp_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vrcpps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_rcp_ps(a: __m256) -> __m256 { vrcpps(a) } @@ -1631,45 +1993,60 @@ pub unsafe fn _mm256_rcp_ps(a: __m256) -> __m256 { /// Compute the approximate reciprocal square root of packed single-precision /// (32-bit) floating-point elements in `a`, and return the results. /// The maximum relative error for this approximation is less than 1.5*2^-12. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rsqrt_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vrsqrtps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_rsqrt_ps(a: __m256) -> __m256 { vrsqrtps(a) } /// Unpack and interleave double-precision (64-bit) floating-point elements /// from the high half of each 128-bit lane in `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpackhi_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vunpckhpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d { simd_shuffle4(a, b, [1, 5, 3, 7]) } /// Unpack and interleave single-precision (32-bit) floating-point elements /// from the high half of each 128-bit lane in `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpackhi_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vunpckhps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 { simd_shuffle8(a, b, [2, 10, 3, 11, 6, 14, 7, 15]) } /// Unpack and interleave double-precision (64-bit) floating-point elements /// from the low half of each 128-bit lane in `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpacklo_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vunpcklpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d { simd_shuffle4(a, b, [0, 4, 2, 6]) } /// Unpack and interleave single-precision (32-bit) floating-point elements /// from the low half of each 128-bit lane in `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpacklo_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vunpcklps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 { simd_shuffle8(a, b, [0, 8, 1, 9, 4, 12, 5, 13]) } @@ -1678,9 +2055,12 @@ pub unsafe fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 { /// `b`, and set `ZF` to 1 if the result is zero, otherwise set `ZF` to 0. /// Compute the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if /// the result is zero, otherwise set `CF` to 0. Return the `ZF` value. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_testz_si256) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vptest))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32 { ptestz256(a.as_i64x4(), b.as_i64x4()) } @@ -1689,9 +2069,12 @@ pub unsafe fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32 { /// `b`, and set `ZF` to 1 if the result is zero, otherwise set `ZF` to 0. /// Compute the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if /// the result is zero, otherwise set `CF` to 0. Return the `CF` value. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_testc_si256) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vptest))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_testc_si256(a: __m256i, b: __m256i) -> i32 { ptestc256(a.as_i64x4(), b.as_i64x4()) } @@ -1701,9 +2084,12 @@ pub unsafe fn _mm256_testc_si256(a: __m256i, b: __m256i) -> i32 { /// Compute the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if /// the result is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and /// `CF` values are zero, otherwise return 0. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_testnzc_si256) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vptest))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_testnzc_si256(a: __m256i, b: __m256i) -> i32 { ptestnzc256(a.as_i64x4(), b.as_i64x4()) } @@ -1715,9 +2101,12 @@ pub unsafe fn _mm256_testnzc_si256(a: __m256i, b: __m256i) -> i32 { /// NOT of `a` and then AND with `b`, producing an intermediate value, and set /// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value /// is zero, otherwise set `CF` to 0. Return the `ZF` value. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_testz_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_testz_pd(a: __m256d, b: __m256d) -> i32 { vtestzpd256(a, b) } @@ -1729,9 +2118,12 @@ pub unsafe fn _mm256_testz_pd(a: __m256d, b: __m256d) -> i32 { /// NOT of `a` and then AND with `b`, producing an intermediate value, and set /// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value /// is zero, otherwise set `CF` to 0. Return the `CF` value. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_testc_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_testc_pd(a: __m256d, b: __m256d) -> i32 { vtestcpd256(a, b) } @@ -1744,9 +2136,12 @@ pub unsafe fn _mm256_testc_pd(a: __m256d, b: __m256d) -> i32 { /// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value /// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values /// are zero, otherwise return 0. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_testnzc_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_testnzc_pd(a: __m256d, b: __m256d) -> i32 { vtestnzcpd256(a, b) } @@ -1758,9 +2153,12 @@ pub unsafe fn _mm256_testnzc_pd(a: __m256d, b: __m256d) -> i32 { /// NOT of `a` and then AND with `b`, producing an intermediate value, and set /// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value /// is zero, otherwise set `CF` to 0. Return the `ZF` value. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testz_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32 { vtestzpd(a, b) } @@ -1772,9 +2170,12 @@ pub unsafe fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32 { /// NOT of `a` and then AND with `b`, producing an intermediate value, and set /// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value /// is zero, otherwise set `CF` to 0. Return the `CF` value. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testc_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_testc_pd(a: __m128d, b: __m128d) -> i32 { vtestcpd(a, b) } @@ -1787,9 +2188,12 @@ pub unsafe fn _mm_testc_pd(a: __m128d, b: __m128d) -> i32 { /// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value /// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values /// are zero, otherwise return 0. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testnzc_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_testnzc_pd(a: __m128d, b: __m128d) -> i32 { vtestnzcpd(a, b) } @@ -1801,9 +2205,12 @@ pub unsafe fn _mm_testnzc_pd(a: __m128d, b: __m128d) -> i32 { /// NOT of `a` and then AND with `b`, producing an intermediate value, and set /// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value /// is zero, otherwise set `CF` to 0. Return the `ZF` value. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_testz_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_testz_ps(a: __m256, b: __m256) -> i32 { vtestzps256(a, b) } @@ -1815,9 +2222,12 @@ pub unsafe fn _mm256_testz_ps(a: __m256, b: __m256) -> i32 { /// NOT of `a` and then AND with `b`, producing an intermediate value, and set /// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value /// is zero, otherwise set `CF` to 0. Return the `CF` value. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_testc_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_testc_ps(a: __m256, b: __m256) -> i32 { vtestcps256(a, b) } @@ -1830,9 +2240,12 @@ pub unsafe fn _mm256_testc_ps(a: __m256, b: __m256) -> i32 { /// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value /// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values /// are zero, otherwise return 0. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_testnzc_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_testnzc_ps(a: __m256, b: __m256) -> i32 { vtestnzcps256(a, b) } @@ -1844,9 +2257,12 @@ pub unsafe fn _mm256_testnzc_ps(a: __m256, b: __m256) -> i32 { /// NOT of `a` and then AND with `b`, producing an intermediate value, and set /// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value /// is zero, otherwise set `CF` to 0. Return the `ZF` value. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testz_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_testz_ps(a: __m128, b: __m128) -> i32 { vtestzps(a, b) } @@ -1858,9 +2274,12 @@ pub unsafe fn _mm_testz_ps(a: __m128, b: __m128) -> i32 { /// NOT of `a` and then AND with `b`, producing an intermediate value, and set /// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value /// is zero, otherwise set `CF` to 0. Return the `CF` value. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testc_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_testc_ps(a: __m128, b: __m128) -> i32 { vtestcps(a, b) } @@ -1873,9 +2292,12 @@ pub unsafe fn _mm_testc_ps(a: __m128, b: __m128) -> i32 { /// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value /// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values /// are zero, otherwise return 0. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testnzc_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 { vtestnzcps(a, b) } @@ -1883,9 +2305,12 @@ pub unsafe fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 { /// Set each bit of the returned mask based on the most significant bit of the /// corresponding packed double-precision (64-bit) floating-point element in /// `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movemask_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovmskpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 { movmskpd256(a) } @@ -1893,52 +2318,70 @@ pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 { /// Set each bit of the returned mask based on the most significant bit of the /// corresponding packed single-precision (32-bit) floating-point element in /// `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movemask_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovmskps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 { movmskps256(a) } /// Return vector of type __m256d with all elements set to zero. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setzero_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vxorps))] // FIXME vxorpd expected +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_setzero_pd() -> __m256d { _mm256_set1_pd(0.0) } /// Return vector of type __m256 with all elements set to zero. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setzero_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vxorps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_setzero_ps() -> __m256 { _mm256_set1_ps(0.0) } /// Return vector of type __m256i with all elements set to zero. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setzero_si256) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vxor))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_setzero_si256() -> __m256i { _mm256_set1_epi8(0) } /// Set packed double-precision (64-bit) floating-point elements in returned /// vector with the supplied values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_pd) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[cfg_attr(test, assert_instr(vinsertf128))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d { _mm256_setr_pd(d, c, b, a) } /// Set packed single-precision (32-bit) floating-point elements in returned /// vector with the supplied values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_ps) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set_ps( a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32 ) -> __m256 { @@ -1947,9 +2390,12 @@ pub unsafe fn _mm256_set_ps( /// Set packed 8-bit integers in returned vector with the supplied values in /// reverse order. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_epi8) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set_epi8( e00: i8, e01: i8, e02: i8, e03: i8, e04: i8, e05: i8, e06: i8, e07: i8, e08: i8, e09: i8, e10: i8, e11: i8, e12: i8, e13: i8, e14: i8, e15: i8, @@ -1966,9 +2412,12 @@ pub unsafe fn _mm256_set_epi8( } /// Set packed 16-bit integers in returned vector with the supplied values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_epi16) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set_epi16( e00: i16, e01: i16, e02: i16, e03: i16, e04: i16, e05: i16, e06: i16, e07: i16, e08: i16, e09: i16, e10: i16, e11: i16, e12: i16, e13: i16, @@ -1984,9 +2433,12 @@ pub unsafe fn _mm256_set_epi16( } /// Set packed 32-bit integers in returned vector with the supplied values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_epi32) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set_epi32( e0: i32, e1: i32, e2: i32, e3: i32, e4: i32, e5: i32, e6: i32, e7: i32 ) -> __m256i { @@ -1994,27 +2446,36 @@ pub unsafe fn _mm256_set_epi32( } /// Set packed 64-bit integers in returned vector with the supplied values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_epi64x) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i { _mm256_setr_epi64x(d, c, b, a) } /// Set packed double-precision (64-bit) floating-point elements in returned /// vector with the supplied values in reverse order. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_pd) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d { __m256d(a, b, c, d) } /// Set packed single-precision (32-bit) floating-point elements in returned /// vector with the supplied values in reverse order. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_ps) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_setr_ps( a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32 ) -> __m256 { @@ -2023,9 +2484,12 @@ pub unsafe fn _mm256_setr_ps( /// Set packed 8-bit integers in returned vector with the supplied values in /// reverse order. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_epi8) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_setr_epi8( e00: i8, e01: i8, e02: i8, e03: i8, e04: i8, e05: i8, e06: i8, e07: i8, e08: i8, e09: i8, e10: i8, e11: i8, e12: i8, e13: i8, e14: i8, e15: i8, @@ -2043,9 +2507,12 @@ pub unsafe fn _mm256_setr_epi8( /// Set packed 16-bit integers in returned vector with the supplied values in /// reverse order. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_epi16) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_setr_epi16( e00: i16, e01: i16, e02: i16, e03: i16, e04: i16, e05: i16, e06: i16, e07: i16, e08: i16, e09: i16, e10: i16, e11: i16, e12: i16, e13: i16, @@ -2062,9 +2529,12 @@ pub unsafe fn _mm256_setr_epi16( /// Set packed 32-bit integers in returned vector with the supplied values in /// reverse order. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_epi32) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_setr_epi32( e0: i32, e1: i32, e2: i32, e3: i32, e4: i32, e5: i32, e6: i32, e7: i32 ) -> __m256i { @@ -2073,38 +2543,50 @@ pub unsafe fn _mm256_setr_epi32( /// Set packed 64-bit integers in returned vector with the supplied values in /// reverse order. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_epi64x) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i { mem::transmute(i64x4::new(a, b, c, d)) } /// Broadcast double-precision (64-bit) floating-point value `a` to all /// elements of returned vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_pd) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set1_pd(a: f64) -> __m256d { _mm256_setr_pd(a, a, a, a) } /// Broadcast single-precision (32-bit) floating-point value `a` to all /// elements of returned vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_ps) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set1_ps(a: f32) -> __m256 { _mm256_setr_ps(a, a, a, a, a, a, a, a) } /// Broadcast 8-bit integer `a` to all elements of returned vector. /// This intrinsic may generate the `vpbroadcastb`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_epi8) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vpshufb))] #[cfg_attr(test, assert_instr(vinsertf128))] // This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set1_epi8(a: i8) -> __m256i { #[cfg_attr(rustfmt, rustfmt_skip)] _mm256_setr_epi8( @@ -2117,112 +2599,148 @@ pub unsafe fn _mm256_set1_epi8(a: i8) -> __m256i { /// Broadcast 16-bit integer `a` to all all elements of returned vector. /// This intrinsic may generate the `vpbroadcastw`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_epi16) #[inline] #[target_feature(enable = "avx")] //#[cfg_attr(test, assert_instr(vpshufb))] #[cfg_attr(test, assert_instr(vinsertf128))] // This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set1_epi16(a: i16) -> __m256i { _mm256_setr_epi16(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a) } /// Broadcast 32-bit integer `a` to all elements of returned vector. /// This intrinsic may generate the `vpbroadcastd`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_epi32) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set1_epi32(a: i32) -> __m256i { _mm256_setr_epi32(a, a, a, a, a, a, a, a) } /// Broadcast 64-bit integer `a` to all elements of returned vector. /// This intrinsic may generate the `vpbroadcastq`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_epi64x) #[inline] #[target_feature(enable = "avx")] //#[cfg_attr(test, assert_instr(vmovddup))] #[cfg_attr(test, assert_instr(vinsertf128))] // This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set1_epi64x(a: i64) -> __m256i { _mm256_setr_epi64x(a, a, a, a) } /// Cast vector of type __m256d to type __m256. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castpd_ps) #[inline] #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castpd_ps(a: __m256d) -> __m256 { mem::transmute(a) } /// Cast vector of type __m256 to type __m256d. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castps_pd) #[inline] #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castps_pd(a: __m256) -> __m256d { mem::transmute(a) } /// Casts vector of type __m256 to type __m256i. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castps_si256) #[inline] #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castps_si256(a: __m256) -> __m256i { mem::transmute(a) } /// Casts vector of type __m256i to type __m256. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castsi256_ps) #[inline] #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castsi256_ps(a: __m256i) -> __m256 { mem::transmute(a) } /// Casts vector of type __m256d to type __m256i. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castpd_si256) #[inline] #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castpd_si256(a: __m256d) -> __m256i { mem::transmute(a) } /// Casts vector of type __m256i to type __m256d. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castsi256_pd) #[inline] #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castsi256_pd(a: __m256i) -> __m256d { mem::transmute(a) } /// Casts vector of type __m256 to type __m128. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castps256_ps128) #[inline] #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castps256_ps128(a: __m256) -> __m128 { simd_shuffle4(a, a, [0, 1, 2, 3]) } /// Casts vector of type __m256d to type __m128d. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castpd256_pd128) #[inline] #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castpd256_pd128(a: __m256d) -> __m128d { simd_shuffle2(a, a, [0, 1]) } /// Casts vector of type __m256i to type __m128i. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castsi256_si128) #[inline] #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i { let a = a.as_i64x4(); let dst: i64x2 = simd_shuffle2(a, a, [0, 1]); @@ -2231,10 +2749,13 @@ pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i { /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castps128_ps256) #[inline] #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 { // FIXME simd_shuffle8(a, a, [0, 1, 2, 3, -1, -1, -1, -1]) simd_shuffle8(a, a, [0, 1, 2, 3, 0, 0, 0, 0]) @@ -2242,10 +2763,13 @@ pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 { /// Casts vector of type __m128d to type __m256d; /// the upper 128 bits of the result are undefined. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castpd128_pd256) #[inline] #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d { // FIXME simd_shuffle4(a, a, [0, 1, -1, -1]) simd_shuffle4(a, a, [0, 1, 0, 0]) @@ -2253,10 +2777,13 @@ pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d { /// Casts vector of type __m128i to type __m256i; /// the upper 128 bits of the result are undefined. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castsi128_si256) #[inline] #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i { let a = a.as_i64x2(); // FIXME simd_shuffle4(a, a, [0, 1, -1, -1]) @@ -2267,10 +2794,13 @@ pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i { /// Constructs a 256-bit floating-point vector of [8 x float] from a /// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain /// the value of the source vector. The upper 128 bits are set to zero. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_zextps128_ps256) #[inline] #[target_feature(enable = "avx,sse")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> __m256 { simd_shuffle8(a, _mm_setzero_ps(), [0, 1, 2, 3, 4, 5, 6, 7]) } @@ -2278,10 +2808,13 @@ pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> __m256 { /// Constructs a 256-bit integer vector from a 128-bit integer vector. /// The lower 128 bits contain the value of the source vector. The upper /// 128 bits are set to zero. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_zextsi128_si256) #[inline] #[target_feature(enable = "avx,sse2")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i { let b = _mm_setzero_si128().as_i64x2(); let dst: i64x4 = simd_shuffle4(a.as_i64x2(), b, [0, 1, 2, 3]); @@ -2292,50 +2825,68 @@ pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i { /// 128-bit floating-point vector of [2 x double]. The lower 128 bits /// contain the value of the source vector. The upper 128 bits are set /// to zero. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_zextpd128_pd256) #[inline] #[target_feature(enable = "avx,sse2")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d { simd_shuffle4(a, _mm_setzero_pd(), [0, 1, 2, 3]) } /// Return vector of type `__m256` with undefined elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_undefined_ps) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_undefined_ps() -> __m256 { _mm256_set1_ps(mem::uninitialized()) } /// Return vector of type `__m256d` with undefined elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_undefined_pd) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_undefined_pd() -> __m256d { _mm256_set1_pd(mem::uninitialized()) } /// Return vector of type __m256i with undefined elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_undefined_si256) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_undefined_si256() -> __m256i { _mm256_set1_epi8(mem::uninitialized()) } /// Set packed __m256 returned vector with the supplied values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_m128) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256 { simd_shuffle8(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7]) } /// Set packed __m256d returned vector with the supplied values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_m128d) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set_m128d(hi: __m128d, lo: __m128d) -> __m256d { let hi: __m128 = mem::transmute(hi); let lo: __m128 = mem::transmute(lo); @@ -2343,9 +2894,12 @@ pub unsafe fn _mm256_set_m128d(hi: __m128d, lo: __m128d) -> __m256d { } /// Set packed __m256i returned vector with the supplied values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_m128i) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i { let hi: __m128 = mem::transmute(hi); let lo: __m128 = mem::transmute(lo); @@ -2353,25 +2907,34 @@ pub unsafe fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i { } /// Set packed __m256 returned vector with the supplied values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_m128) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_setr_m128(lo: __m128, hi: __m128) -> __m256 { _mm256_set_m128(hi, lo) } /// Set packed __m256d returned vector with the supplied values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_m128d) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_setr_m128d(lo: __m128d, hi: __m128d) -> __m256d { _mm256_set_m128d(hi, lo) } /// Set packed __m256i returned vector with the supplied values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_m128i) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i { _mm256_set_m128i(hi, lo) } @@ -2380,9 +2943,12 @@ pub unsafe fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i { /// floating-point elements) from memory, and combine them into a 256-bit /// value. /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_loadu2_m128) #[inline] #[target_feature(enable = "avx,sse")] // This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_loadu2_m128( hiaddr: *const f32, loaddr: *const f32 ) -> __m256 { @@ -2394,9 +2960,12 @@ pub unsafe fn _mm256_loadu2_m128( /// floating-point elements) from memory, and combine them into a 256-bit /// value. /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_loadu2_m128d) #[inline] #[target_feature(enable = "avx,sse2")] // This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_loadu2_m128d( hiaddr: *const f64, loaddr: *const f64 ) -> __m256d { @@ -2407,9 +2976,12 @@ pub unsafe fn _mm256_loadu2_m128d( /// Load two 128-bit values (composed of integer data) from memory, and combine /// them into a 256-bit value. /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_loadu2_m128i) #[inline] #[target_feature(enable = "avx,sse2")] // This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_loadu2_m128i( hiaddr: *const __m128i, loaddr: *const __m128i ) -> __m256i { @@ -2421,9 +2993,12 @@ pub unsafe fn _mm256_loadu2_m128i( /// single-precision (32-bit) floating-point elements) from `a` into memory two /// different 128-bit locations. /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_storeu2_m128) #[inline] #[target_feature(enable = "avx,sse")] // This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_storeu2_m128( hiaddr: *mut f32, loaddr: *mut f32, a: __m256 ) { @@ -2437,9 +3012,12 @@ pub unsafe fn _mm256_storeu2_m128( /// double-precision (64-bit) floating-point elements) from `a` into memory two /// different 128-bit locations. /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_storeu2_m128d) #[inline] #[target_feature(enable = "avx,sse2")] // This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_storeu2_m128d( hiaddr: *mut f64, loaddr: *mut f64, a: __m256d ) { @@ -2452,9 +3030,12 @@ pub unsafe fn _mm256_storeu2_m128d( /// Store the high and low 128-bit halves (each composed of integer data) from /// `a` into memory two different 128-bit locations. /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_storeu2_m128i) #[inline] #[target_feature(enable = "avx,sse2")] // This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_storeu2_m128i( hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i ) { @@ -2465,9 +3046,12 @@ pub unsafe fn _mm256_storeu2_m128i( } /// Returns the first element of the input vector of [8 x float]. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtss_f32) #[inline] #[target_feature(enable = "avx")] //#[cfg_attr(test, assert_instr(movss))] FIXME +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtss_f32(a: __m256) -> f32 { simd_extract(a, 0) } diff --git a/coresimd/x86/avx2.rs b/coresimd/x86/avx2.rs index 66683f630d..a1e9e1f2a6 100644 --- a/coresimd/x86/avx2.rs +++ b/coresimd/x86/avx2.rs @@ -27,99 +27,135 @@ use mem; use stdsimd_test::assert_instr; /// Computes the absolute values of packed 32-bit integers in `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_abs_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpabsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_abs_epi32(a: __m256i) -> __m256i { mem::transmute(pabsd(a.as_i32x8())) } /// Computes the absolute values of packed 16-bit integers in `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_abs_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpabsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_abs_epi16(a: __m256i) -> __m256i { mem::transmute(pabsw(a.as_i16x16())) } /// Computes the absolute values of packed 8-bit integers in `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_abs_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpabsb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_abs_epi8(a: __m256i) -> __m256i { mem::transmute(pabsb(a.as_i8x32())) } /// Add packed 64-bit integers in `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpaddq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i { mem::transmute(simd_add(a.as_i64x4(), b.as_i64x4())) } /// Add packed 32-bit integers in `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpaddd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i { mem::transmute(simd_add(a.as_i32x8(), b.as_i32x8())) } /// Add packed 16-bit integers in `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpaddw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(simd_add(a.as_i16x16(), b.as_i16x16())) } /// Add packed 8-bit integers in `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpaddb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i { mem::transmute(simd_add(a.as_i8x32(), b.as_i8x32())) } /// Add packed 8-bit integers in `a` and `b` using saturation. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_adds_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpaddsb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i { mem::transmute(paddsb(a.as_i8x32(), b.as_i8x32())) } /// Add packed 16-bit integers in `a` and `b` using saturation. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_adds_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpaddsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(paddsw(a.as_i16x16(), b.as_i16x16())) } /// Add packed unsigned 8-bit integers in `a` and `b` using saturation. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_adds_epu8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpaddusb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i { mem::transmute(paddusb(a.as_u8x32(), b.as_u8x32())) } /// Add packed unsigned 16-bit integers in `a` and `b` using saturation. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_adds_epu16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpaddusw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(paddusw(a.as_u16x16(), b.as_u16x16())) } /// Concatenate pairs of 16-byte blocks in `a` and `b` into a 32-byte temporary /// result, shift the result right by `n` bytes, and return the low 16 bytes. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_alignr_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpalignr, n = 7))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i, n: i32) -> __m256i { let n = n as u32; // If palignr is shifting the pair of vectors more than the size of two @@ -284,18 +320,24 @@ pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i, n: i32) -> __m256i { /// Compute the bitwise AND of 256 bits (representing integer data) /// in `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_and_si256) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vandps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i { mem::transmute(simd_and(a.as_i64x4(), b.as_i64x4())) } /// Compute the bitwise NOT of 256 bits (representing integer data) /// in `a` and then AND with `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_andnot_si256) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vandnps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i { let all_ones = _mm256_set1_epi8(-1); mem::transmute(simd_and( @@ -305,26 +347,35 @@ pub unsafe fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i { } /// Average packed unsigned 16-bit integers in `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_avg_epu16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpavgw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pavgw(a.as_u16x16(), b.as_u16x16())) } /// Average packed unsigned 8-bit integers in `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_avg_epu8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpavgb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pavgb(a.as_u8x32(), b.as_u8x32())) } /// Blend packed 32-bit integers from `a` and `b` using control mask `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vblendps, imm8 = 9))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_blend_epi32(a: __m128i, b: __m128i, imm8: i32) -> __m128i { let imm8 = (imm8 & 0xFF) as u8; let a = a.as_i32x4(); @@ -354,10 +405,13 @@ pub unsafe fn _mm_blend_epi32(a: __m128i, b: __m128i, imm8: i32) -> __m128i { } /// Blend packed 32-bit integers from `a` and `b` using control mask `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vblendps, imm8 = 9))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_blend_epi32( a: __m256i, b: __m256i, imm8: i32 ) -> __m256i { @@ -418,10 +472,13 @@ pub unsafe fn _mm256_blend_epi32( } /// Blend packed 16-bit integers from `a` and `b` using control mask `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpblendw, imm8 = 9))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_blend_epi16( a: __m256i, b: __m256i, imm8: i32 ) -> __m256i { @@ -639,9 +696,12 @@ pub unsafe fn _mm256_blend_epi16( } /// Blend packed 8-bit integers from `a` and `b` using `mask`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blendv_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpblendvb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_blendv_epi8( a: __m256i, b: __m256i, mask: __m256i ) -> __m256i { @@ -654,9 +714,12 @@ pub unsafe fn _mm256_blendv_epi8( /// Broadcast the low packed 8-bit integer from `a` to all elements of /// the 128-bit returned value. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastb_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpbroadcastb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_broadcastb_epi8(a: __m128i) -> __m128i { let zero = _mm_setzero_si128(); let ret = simd_shuffle16(a.as_i8x16(), zero.as_i8x16(), [0_u32; 16]); @@ -665,9 +728,12 @@ pub unsafe fn _mm_broadcastb_epi8(a: __m128i) -> __m128i { /// Broadcast the low packed 8-bit integer from `a` to all elements of /// the 256-bit returned value. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastb_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpbroadcastb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i { let zero = _mm_setzero_si128(); let ret = simd_shuffle32(a.as_i8x16(), zero.as_i8x16(), [0_u32; 32]); @@ -678,9 +744,12 @@ pub unsafe fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i { // often compiled to vbroadcastss. /// Broadcast the low packed 32-bit integer from `a` to all elements of /// the 128-bit returned value. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastd_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vbroadcastss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_broadcastd_epi32(a: __m128i) -> __m128i { let zero = _mm_setzero_si128(); let ret = simd_shuffle4(a.as_i32x4(), zero.as_i32x4(), [0_u32; 4]); @@ -691,9 +760,12 @@ pub unsafe fn _mm_broadcastd_epi32(a: __m128i) -> __m128i { // often compiled to vbroadcastss. /// Broadcast the low packed 32-bit integer from `a` to all elements of /// the 256-bit returned value. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastd_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vbroadcastss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i { let zero = _mm_setzero_si128(); let ret = simd_shuffle8(a.as_i32x4(), zero.as_i32x4(), [0_u32; 8]); @@ -702,9 +774,12 @@ pub unsafe fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i { /// Broadcast the low packed 64-bit integer from `a` to all elements of /// the 128-bit returned value. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastq_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpbroadcastq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_broadcastq_epi64(a: __m128i) -> __m128i { let zero = _mm_setzero_si128().as_i64x2(); let ret = simd_shuffle2(a.as_i64x2(), zero, [0_u32; 2]); @@ -715,9 +790,12 @@ pub unsafe fn _mm_broadcastq_epi64(a: __m128i) -> __m128i { // often compiled to vbroadcastsd. /// Broadcast the low packed 64-bit integer from `a` to all elements of /// the 256-bit returned value. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastq_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vbroadcastsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i { let zero = _mm_setzero_si128(); let ret = simd_shuffle4(a.as_i64x2(), zero.as_i64x2(), [0_u32; 4]); @@ -726,18 +804,24 @@ pub unsafe fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i { /// Broadcast the low double-precision (64-bit) floating-point element /// from `a` to all elements of the 128-bit returned value. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastsd_pd) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vmovddup))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_broadcastsd_pd(a: __m128d) -> __m128d { simd_shuffle2(a, _mm_setzero_pd(), [0_u32; 2]) } /// Broadcast the low double-precision (64-bit) floating-point element /// from `a` to all elements of the 256-bit returned value. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastsd_pd) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vbroadcastsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d { simd_shuffle4(a, _mm_setzero_pd(), [0_u32; 4]) } @@ -746,8 +830,11 @@ pub unsafe fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d { // vbroadcastf128. /// Broadcast 128 bits of integer data from a to all 128-bit lanes in /// the 256-bit returned value. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastsi128_si256) #[inline] #[target_feature(enable = "avx2")] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i { let zero = _mm_setzero_si128(); let ret = simd_shuffle4(a.as_i64x2(), zero.as_i64x2(), [0, 1, 0, 1]); @@ -756,27 +843,36 @@ pub unsafe fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i { /// Broadcast the low single-precision (32-bit) floating-point element /// from `a` to all elements of the 128-bit returned value. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastss_ps) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vbroadcastss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_broadcastss_ps(a: __m128) -> __m128 { simd_shuffle4(a, _mm_setzero_ps(), [0_u32; 4]) } /// Broadcast the low single-precision (32-bit) floating-point element /// from `a` to all elements of the 256-bit returned value. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastss_ps) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vbroadcastss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcastss_ps(a: __m128) -> __m256 { simd_shuffle8(a, _mm_setzero_ps(), [0_u32; 8]) } /// Broadcast the low packed 16-bit integer from a to all elements of /// the 128-bit returned value +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastw_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpbroadcastw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_broadcastw_epi16(a: __m128i) -> __m128i { let zero = _mm_setzero_si128(); let ret = simd_shuffle8(a.as_i16x8(), zero.as_i16x8(), [0_u32; 8]); @@ -785,9 +881,12 @@ pub unsafe fn _mm_broadcastw_epi16(a: __m128i) -> __m128i { /// Broadcast the low packed 16-bit integer from a to all elements of /// the 256-bit returned value +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastw_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpbroadcastw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i { let zero = _mm_setzero_si128(); let ret = simd_shuffle16(a.as_i16x8(), zero.as_i16x8(), [0_u32; 16]); @@ -795,81 +894,111 @@ pub unsafe fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i { } /// Compare packed 64-bit integers in `a` and `b` for equality. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpcmpeqq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i { mem::transmute::(simd_eq(a.as_i64x4(), b.as_i64x4())) } /// Compare packed 32-bit integers in `a` and `b` for equality. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpcmpeqd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i { mem::transmute::(simd_eq(a.as_i32x8(), b.as_i32x8())) } /// Compare packed 16-bit integers in `a` and `b` for equality. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpcmpeqw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute::(simd_eq(a.as_i16x16(), b.as_i16x16())) } /// Compare packed 8-bit integers in `a` and `b` for equality. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpcmpeqb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i { mem::transmute::(simd_eq(a.as_i8x32(), b.as_i8x32())) } /// Compare packed 64-bit integers in `a` and `b` for greater-than. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpcmpgtq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i { mem::transmute::(simd_gt(a.as_i64x4(), b.as_i64x4())) } /// Compare packed 32-bit integers in `a` and `b` for greater-than. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpcmpgtd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i { mem::transmute::(simd_gt(a.as_i32x8(), b.as_i32x8())) } /// Compare packed 16-bit integers in `a` and `b` for greater-than. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpcmpgtw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute::(simd_gt(a.as_i16x16(), b.as_i16x16())) } /// Compare packed 8-bit integers in `a` and `b` for greater-than. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpcmpgtb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cmpgt_epi8(a: __m256i, b: __m256i) -> __m256i { mem::transmute::(simd_gt(a.as_i8x32(), b.as_i8x32())) } /// Sign-extend 16-bit integers to 32-bit integers. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi16_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovsxwd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i { mem::transmute::(simd_cast(a.as_i16x8())) } /// Sign-extend 16-bit integers to 64-bit integers. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi16_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovsxwq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i { let a = a.as_i16x8(); let v64: i16x4 = simd_shuffle4(a, a, [0, 1, 2, 3]); @@ -877,25 +1006,34 @@ pub unsafe fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i { } /// Sign-extend 32-bit integers to 64-bit integers. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi32_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovsxdq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepi32_epi64(a: __m128i) -> __m256i { mem::transmute::(simd_cast(a.as_i32x4())) } /// Sign-extend 8-bit integers to 16-bit integers. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi8_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovsxbw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i { mem::transmute::(simd_cast(a.as_i8x16())) } /// Sign-extend 8-bit integers to 32-bit integers. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi8_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovsxbd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i { let a = a.as_i8x16(); let v64: i8x8 = simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); @@ -903,9 +1041,12 @@ pub unsafe fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i { } /// Sign-extend 8-bit integers to 64-bit integers. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi8_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovsxbq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i { let a = a.as_i8x16(); let v32: i8x4 = simd_shuffle4(a, a, [0, 1, 2, 3]); @@ -914,18 +1055,24 @@ pub unsafe fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i { /// Zero extend packed unsigned 16-bit integers in `a` to packed 32-bit /// integers, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu16_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovzxwd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i { mem::transmute::(simd_cast(a.as_u16x8())) } /// Zero-extend the lower four unsigned 16-bit integers in `a` to 64-bit /// integers. The upper four elements of `a` are unused. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu16_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovzxwq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i { let a = a.as_u16x8(); let v64: u16x4 = simd_shuffle4(a, a, [0, 1, 2, 3]); @@ -933,26 +1080,35 @@ pub unsafe fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i { } /// Zero-extend unsigned 32-bit integers in `a` to 64-bit integers. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu32_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovzxdq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepu32_epi64(a: __m128i) -> __m256i { mem::transmute::(simd_cast(a.as_u32x4())) } /// Zero-extend unsigned 8-bit integers in `a` to 16-bit integers. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu8_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovzxbw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i { mem::transmute::(simd_cast(a.as_u8x16())) } /// Zero-extend the lower eight unsigned 8-bit integers in `a` to 32-bit /// integers. The upper eight elements of `a` are unused. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu8_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovzxbd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i { let a = a.as_u8x16(); let v64: u8x8 = simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); @@ -961,9 +1117,12 @@ pub unsafe fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i { /// Zero-extend the lower four unsigned 8-bit integers in `a` to 64-bit /// integers. The upper twelve elements of `a` are unused. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu8_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovzxbq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i { let a = a.as_u8x16(); let v32: u8x4 = simd_shuffle4(a, a, [0, 1, 2, 3]); @@ -971,10 +1130,13 @@ pub unsafe fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i { } /// Extract 128 bits (of integer data) from `a` selected with `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extracti128_si256) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vextractf128, imm8 = 1))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_extracti128_si256(a: __m256i, imm8: i32) -> __m128i { let a = a.as_i64x4(); let b = _mm256_undefined_si256().as_i64x4(); @@ -986,51 +1148,69 @@ pub unsafe fn _mm256_extracti128_si256(a: __m256i, imm8: i32) -> __m128i { } /// Horizontally add adjacent pairs of 16-bit integers in `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hadd_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vphaddw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(phaddw(a.as_i16x16(), b.as_i16x16())) } /// Horizontally add adjacent pairs of 32-bit integers in `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hadd_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vphaddd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i { mem::transmute(phaddd(a.as_i32x8(), b.as_i32x8())) } /// Horizontally add adjacent pairs of 16-bit integers in `a` and `b` /// using saturation. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hadds_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vphaddsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_hadds_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(phaddsw(a.as_i16x16(), b.as_i16x16())) } /// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hsub_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vphsubw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(phsubw(a.as_i16x16(), b.as_i16x16())) } /// Horizontally subtract adjacent pairs of 32-bit integers in `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hsub_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vphsubd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i { mem::transmute(phsubd(a.as_i32x8(), b.as_i32x8())) } /// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b` /// using saturation. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hsubs_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vphsubsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(phsubsw(a.as_i16x16(), b.as_i16x16())) } @@ -1038,10 +1218,13 @@ pub unsafe fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i { /// Return values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32gather_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_i32gather_epi32( slice: *const i32, offsets: __m128i, scale: i32 ) -> __m128i { @@ -1062,10 +1245,13 @@ pub unsafe fn _mm_i32gather_epi32( /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32gather_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))] #[rustc_args_required_const(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mask_i32gather_epi32( src: __m128i, slice: *const i32, offsets: __m128i, mask: __m128i, scale: i32, @@ -1086,10 +1272,13 @@ pub unsafe fn _mm_mask_i32gather_epi32( /// Return values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32gather_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_i32gather_epi32( slice: *const i32, offsets: __m256i, scale: i32 ) -> __m256i { @@ -1110,10 +1299,13 @@ pub unsafe fn _mm256_i32gather_epi32( /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32gather_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))] #[rustc_args_required_const(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mask_i32gather_epi32( src: __m256i, slice: *const i32, offsets: __m256i, mask: __m256i, scale: i32, @@ -1134,10 +1326,13 @@ pub unsafe fn _mm256_mask_i32gather_epi32( /// Return values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32gather_ps) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_i32gather_ps( slice: *const f32, offsets: __m128i, scale: i32 ) -> __m128 { @@ -1157,10 +1352,13 @@ pub unsafe fn _mm_i32gather_ps( /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32gather_ps) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))] #[rustc_args_required_const(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mask_i32gather_ps( src: __m128, slice: *const f32, offsets: __m128i, mask: __m128, scale: i32 ) -> __m128 { @@ -1177,10 +1375,13 @@ pub unsafe fn _mm_mask_i32gather_ps( /// Return values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32gather_ps) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_i32gather_ps( slice: *const f32, offsets: __m256i, scale: i32 ) -> __m256 { @@ -1200,10 +1401,13 @@ pub unsafe fn _mm256_i32gather_ps( /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32gather_ps) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))] #[rustc_args_required_const(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mask_i32gather_ps( src: __m256, slice: *const f32, offsets: __m256i, mask: __m256, scale: i32 ) -> __m256 { @@ -1220,10 +1424,13 @@ pub unsafe fn _mm256_mask_i32gather_ps( /// Return values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32gather_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_i32gather_epi64( slice: *const i64, offsets: __m128i, scale: i32 ) -> __m128i { @@ -1244,10 +1451,13 @@ pub unsafe fn _mm_i32gather_epi64( /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32gather_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))] #[rustc_args_required_const(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mask_i32gather_epi64( src: __m128i, slice: *const i64, offsets: __m128i, mask: __m128i, scale: i32, @@ -1268,10 +1478,13 @@ pub unsafe fn _mm_mask_i32gather_epi64( /// Return values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32gather_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_i32gather_epi64( slice: *const i64, offsets: __m128i, scale: i32 ) -> __m256i { @@ -1292,10 +1505,13 @@ pub unsafe fn _mm256_i32gather_epi64( /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32gather_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))] #[rustc_args_required_const(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mask_i32gather_epi64( src: __m256i, slice: *const i64, offsets: __m128i, mask: __m256i, scale: i32, @@ -1316,10 +1532,13 @@ pub unsafe fn _mm256_mask_i32gather_epi64( /// Return values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32gather_pd) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_i32gather_pd( slice: *const f64, offsets: __m128i, scale: i32 ) -> __m128d { @@ -1339,10 +1558,13 @@ pub unsafe fn _mm_i32gather_pd( /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32gather_pd) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))] #[rustc_args_required_const(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mask_i32gather_pd( src: __m128d, slice: *const f64, offsets: __m128i, mask: __m128d, scale: i32, @@ -1360,10 +1582,13 @@ pub unsafe fn _mm_mask_i32gather_pd( /// Return values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32gather_pd) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_i32gather_pd( slice: *const f64, offsets: __m128i, scale: i32 ) -> __m256d { @@ -1383,10 +1608,13 @@ pub unsafe fn _mm256_i32gather_pd( /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32gather_pd) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))] #[rustc_args_required_const(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mask_i32gather_pd( src: __m256d, slice: *const f64, offsets: __m128i, mask: __m256d, scale: i32, @@ -1404,10 +1632,13 @@ pub unsafe fn _mm256_mask_i32gather_pd( /// Return values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64gather_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_i64gather_epi32( slice: *const i32, offsets: __m128i, scale: i32 ) -> __m128i { @@ -1428,10 +1659,13 @@ pub unsafe fn _mm_i64gather_epi32( /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64gather_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))] #[rustc_args_required_const(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mask_i64gather_epi32( src: __m128i, slice: *const i32, offsets: __m128i, mask: __m128i, scale: i32, @@ -1452,10 +1686,13 @@ pub unsafe fn _mm_mask_i64gather_epi32( /// Return values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64gather_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_i64gather_epi32( slice: *const i32, offsets: __m256i, scale: i32 ) -> __m128i { @@ -1476,10 +1713,13 @@ pub unsafe fn _mm256_i64gather_epi32( /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64gather_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))] #[rustc_args_required_const(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mask_i64gather_epi32( src: __m128i, slice: *const i32, offsets: __m256i, mask: __m128i, scale: i32, @@ -1500,10 +1740,13 @@ pub unsafe fn _mm256_mask_i64gather_epi32( /// Return values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64gather_ps) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_i64gather_ps( slice: *const f32, offsets: __m128i, scale: i32 ) -> __m128 { @@ -1523,10 +1766,13 @@ pub unsafe fn _mm_i64gather_ps( /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64gather_ps) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))] #[rustc_args_required_const(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mask_i64gather_ps( src: __m128, slice: *const f32, offsets: __m128i, mask: __m128, scale: i32 ) -> __m128 { @@ -1543,10 +1789,13 @@ pub unsafe fn _mm_mask_i64gather_ps( /// Return values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64gather_ps) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_i64gather_ps( slice: *const f32, offsets: __m256i, scale: i32 ) -> __m128 { @@ -1566,10 +1815,13 @@ pub unsafe fn _mm256_i64gather_ps( /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64gather_ps) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))] #[rustc_args_required_const(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mask_i64gather_ps( src: __m128, slice: *const f32, offsets: __m256i, mask: __m128, scale: i32 ) -> __m128 { @@ -1586,10 +1838,13 @@ pub unsafe fn _mm256_mask_i64gather_ps( /// Return values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64gather_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_i64gather_epi64( slice: *const i64, offsets: __m128i, scale: i32 ) -> __m128i { @@ -1610,10 +1865,13 @@ pub unsafe fn _mm_i64gather_epi64( /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64gather_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))] #[rustc_args_required_const(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mask_i64gather_epi64( src: __m128i, slice: *const i64, offsets: __m128i, mask: __m128i, scale: i32, @@ -1634,10 +1892,13 @@ pub unsafe fn _mm_mask_i64gather_epi64( /// Return values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64gather_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_i64gather_epi64( slice: *const i64, offsets: __m256i, scale: i32 ) -> __m256i { @@ -1658,10 +1919,13 @@ pub unsafe fn _mm256_i64gather_epi64( /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64gather_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))] #[rustc_args_required_const(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mask_i64gather_epi64( src: __m256i, slice: *const i64, offsets: __m256i, mask: __m256i, scale: i32, @@ -1682,10 +1946,13 @@ pub unsafe fn _mm256_mask_i64gather_epi64( /// Return values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64gather_pd) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_i64gather_pd( slice: *const f64, offsets: __m128i, scale: i32 ) -> __m128d { @@ -1705,10 +1972,13 @@ pub unsafe fn _mm_i64gather_pd( /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64gather_pd) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))] #[rustc_args_required_const(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mask_i64gather_pd( src: __m128d, slice: *const f64, offsets: __m128i, mask: __m128d, scale: i32, @@ -1726,10 +1996,13 @@ pub unsafe fn _mm_mask_i64gather_pd( /// Return values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64gather_pd) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_i64gather_pd( slice: *const f64, offsets: __m256i, scale: i32 ) -> __m256d { @@ -1749,10 +2022,13 @@ pub unsafe fn _mm256_i64gather_pd( /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64gather_pd) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))] #[rustc_args_required_const(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mask_i64gather_pd( src: __m256d, slice: *const f64, offsets: __m256i, mask: __m256d, scale: i32, @@ -1769,10 +2045,13 @@ pub unsafe fn _mm256_mask_i64gather_pd( /// Copy `a` to `dst`, then insert 128 bits (of integer data) from `b` at the /// location specified by `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_inserti128_si256) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vinsertf128, imm8 = 1))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_inserti128_si256( a: __m256i, b: __m128i, imm8: i32 ) -> __m256i { @@ -1788,9 +2067,12 @@ pub unsafe fn _mm256_inserti128_si256( /// Multiply packed signed 16-bit integers in `a` and `b`, producing /// intermediate signed 32-bit integers. Horizontally add adjacent pairs /// of intermediate 32-bit integers. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_madd_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaddwd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pmaddwd(a.as_i16x16(), b.as_i16x16())) } @@ -1799,9 +2081,12 @@ pub unsafe fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i { /// corresponding signed 8-bit integer from `b`, producing intermediate /// signed 16-bit integers. Horizontally add adjacent pairs of intermediate /// signed 16-bit integers +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maddubs_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaddubsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pmaddubsw(a.as_u8x32(), b.as_u8x32())) } @@ -1809,9 +2094,12 @@ pub unsafe fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i { /// Load packed 32-bit integers from memory pointed by `mem_addr` using `mask` /// (elements are zeroed out when the highest bit is not set in the /// corresponding element). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskload_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaskmovd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_maskload_epi32( mem_addr: *const i32, mask: __m128i ) -> __m128i { @@ -1824,9 +2112,12 @@ pub unsafe fn _mm_maskload_epi32( /// Load packed 32-bit integers from memory pointed by `mem_addr` using `mask` /// (elements are zeroed out when the highest bit is not set in the /// corresponding element). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskload_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaskmovd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_maskload_epi32( mem_addr: *const i32, mask: __m256i ) -> __m256i { @@ -1839,9 +2130,12 @@ pub unsafe fn _mm256_maskload_epi32( /// Load packed 64-bit integers from memory pointed by `mem_addr` using `mask` /// (elements are zeroed out when the highest bit is not set in the /// corresponding element). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskload_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaskmovq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_maskload_epi64( mem_addr: *const i64, mask: __m128i ) -> __m128i { @@ -1854,9 +2148,12 @@ pub unsafe fn _mm_maskload_epi64( /// Load packed 64-bit integers from memory pointed by `mem_addr` using `mask` /// (elements are zeroed out when the highest bit is not set in the /// corresponding element). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskload_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaskmovq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_maskload_epi64( mem_addr: *const i64, mask: __m256i ) -> __m256i { @@ -1869,9 +2166,12 @@ pub unsafe fn _mm256_maskload_epi64( /// Store packed 32-bit integers from `a` into memory pointed by `mem_addr` /// using `mask` (elements are not stored when the highest bit is not set /// in the corresponding element). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskstore_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaskmovd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_maskstore_epi32( mem_addr: *mut i32, mask: __m128i, a: __m128i ) { @@ -1885,9 +2185,12 @@ pub unsafe fn _mm_maskstore_epi32( /// Store packed 32-bit integers from `a` into memory pointed by `mem_addr` /// using `mask` (elements are not stored when the highest bit is not set /// in the corresponding element). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskstore_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaskmovd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_maskstore_epi32( mem_addr: *mut i32, mask: __m256i, a: __m256i ) { @@ -1901,9 +2204,12 @@ pub unsafe fn _mm256_maskstore_epi32( /// Store packed 64-bit integers from `a` into memory pointed by `mem_addr` /// using `mask` (elements are not stored when the highest bit is not set /// in the corresponding element). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskstore_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaskmovq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_maskstore_epi64( mem_addr: *mut i64, mask: __m128i, a: __m128i ) { @@ -1917,9 +2223,12 @@ pub unsafe fn _mm_maskstore_epi64( /// Store packed 64-bit integers from `a` into memory pointed by `mem_addr` /// using `mask` (elements are not stored when the highest bit is not set /// in the corresponding element). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskstore_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaskmovq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_maskstore_epi64( mem_addr: *mut i64, mask: __m256i, a: __m256i ) { @@ -1932,117 +2241,156 @@ pub unsafe fn _mm256_maskstore_epi64( /// Compare packed 16-bit integers in `a` and `b`, and return the packed /// maximum values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaxsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pmaxsw(a.as_i16x16(), b.as_i16x16())) } /// Compare packed 32-bit integers in `a` and `b`, and return the packed /// maximum values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaxsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pmaxsd(a.as_i32x8(), b.as_i32x8())) } /// Compare packed 8-bit integers in `a` and `b`, and return the packed /// maximum values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaxsb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pmaxsb(a.as_i8x32(), b.as_i8x32())) } /// Compare packed unsigned 16-bit integers in `a` and `b`, and return /// the packed maximum values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epu16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaxuw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pmaxuw(a.as_u16x16(), b.as_u16x16())) } /// Compare packed unsigned 32-bit integers in `a` and `b`, and return /// the packed maximum values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epu32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaxud))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pmaxud(a.as_u32x8(), b.as_u32x8())) } /// Compare packed unsigned 8-bit integers in `a` and `b`, and return /// the packed maximum values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epu8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaxub))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pmaxub(a.as_u8x32(), b.as_u8x32())) } /// Compare packed 16-bit integers in `a` and `b`, and return the packed /// minimum values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpminsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pminsw(a.as_i16x16(), b.as_i16x16())) } /// Compare packed 32-bit integers in `a` and `b`, and return the packed /// minimum values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpminsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pminsd(a.as_i32x8(), b.as_i32x8())) } /// Compare packed 8-bit integers in `a` and `b`, and return the packed /// minimum values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpminsb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pminsb(a.as_i8x32(), b.as_i8x32())) } /// Compare packed unsigned 16-bit integers in `a` and `b`, and return /// the packed minimum values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epu16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpminuw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pminuw(a.as_u16x16(), b.as_u16x16())) } /// Compare packed unsigned 32-bit integers in `a` and `b`, and return /// the packed minimum values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epu32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpminud))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pminud(a.as_u32x8(), b.as_u32x8())) } /// Compare packed unsigned 8-bit integers in `a` and `b`, and return /// the packed minimum values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epu8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpminub))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pminub(a.as_u8x32(), b.as_u8x32())) } /// Create mask from the most significant bit of each 8-bit element in `a`, /// return the result. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movemask_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovmskb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_movemask_epi8(a: __m256i) -> i32 { pmovmskb(a.as_i8x32()) } @@ -2054,10 +2402,13 @@ pub unsafe fn _mm256_movemask_epi8(a: __m256i) -> i32 { /// selected from `b` starting at on the offset specified in `imm8`. Eight /// quadruplets are formed from sequential 8-bit integers selected from `a` /// starting at the offset specified in `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mpsadbw_epu8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vmpsadbw, imm8 = 0))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mpsadbw_epu8( a: __m256i, b: __m256i, imm8: i32 ) -> __m256i { @@ -2076,9 +2427,12 @@ pub unsafe fn _mm256_mpsadbw_epu8( /// `a` and `b` /// /// Return the 64-bit results. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmuldq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pmuldq(a.as_i32x8(), b.as_i32x8())) } @@ -2087,9 +2441,12 @@ pub unsafe fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i { /// element in `a` and `b` /// /// Return the unsigned 64-bit results. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_epu32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmuludq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pmuludq(a.as_u32x8(), b.as_u32x8())) } @@ -2097,9 +2454,12 @@ pub unsafe fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i { /// Multiply the packed 16-bit integers in `a` and `b`, producing /// intermediate 32-bit integers and returning the high 16 bits of the /// intermediate integers. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mulhi_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmulhw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pmulhw(a.as_i16x16(), b.as_i16x16())) } @@ -2107,9 +2467,12 @@ pub unsafe fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i { /// Multiply the packed unsigned 16-bit integers in `a` and `b`, producing /// intermediate 32-bit integers and returning the high 16 bits of the /// intermediate integers. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mulhi_epu16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmulhuw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pmulhuw(a.as_u16x16(), b.as_u16x16())) } @@ -2117,9 +2480,12 @@ pub unsafe fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i { /// Multiply the packed 16-bit integers in `a` and `b`, producing /// intermediate 32-bit integers, and return the low 16 bits of the /// intermediate integers +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mullo_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmullw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(simd_mul(a.as_i16x16(), b.as_i16x16())) } @@ -2127,9 +2493,12 @@ pub unsafe fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i { /// Multiply the packed 32-bit integers in `a` and `b`, producing /// intermediate 64-bit integers, and return the low 16 bits of the /// intermediate integers +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mullo_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmulld))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i { mem::transmute(simd_mul(a.as_i32x8(), b.as_i32x8())) } @@ -2138,54 +2507,72 @@ pub unsafe fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i { /// intermediate signed 32-bit integers. Truncate each intermediate /// integer to the 18 most significant bits, round by adding 1, and /// return bits [16:1] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mulhrs_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmulhrsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mulhrs_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pmulhrsw(a.as_i16x16(), b.as_i16x16())) } /// Compute the bitwise OR of 256 bits (representing integer data) in `a` /// and `b` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_or_si256) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vorps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i { mem::transmute(simd_or(a.as_i32x8(), b.as_i32x8())) } /// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers /// using signed saturation +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_packs_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpacksswb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(packsswb(a.as_i16x16(), b.as_i16x16())) } /// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers /// using signed saturation +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_packs_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpackssdw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i { mem::transmute(packssdw(a.as_i32x8(), b.as_i32x8())) } /// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers /// using unsigned saturation +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_packus_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpackuswb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(packuswb(a.as_i16x16(), b.as_i16x16())) } /// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers /// using unsigned saturation +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_packus_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpackusdw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i { mem::transmute(packusdw(a.as_i32x8(), b.as_i32x8())) } @@ -2194,18 +2581,24 @@ pub unsafe fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i { /// /// The last 3 bits of each integer of `b` are used as addresses into the 8 /// integers of `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutevar8x32_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpermps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i { mem::transmute(permd(a.as_u32x8(), b.as_u32x8())) } /// Permutes 64-bit integers from `a` using control mask `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute4x64_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpermpd, imm8 = 9))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_permute4x64_epi64(a: __m256i, imm8: i32) -> __m256i { let imm8 = (imm8 & 0xFF) as u8; let zero = _mm256_setzero_si256().as_i64x4(); @@ -2255,10 +2648,13 @@ pub unsafe fn _mm256_permute4x64_epi64(a: __m256i, imm8: i32) -> __m256i { } /// Shuffle 128-bits of integer data selected by `imm8` from `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute2x128_si256) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vperm2f128, imm8 = 9))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_permute2x128_si256( a: __m256i, b: __m256i, imm8: i32 ) -> __m256i { @@ -2274,10 +2670,13 @@ pub unsafe fn _mm256_permute2x128_si256( /// Shuffle 64-bit floating-point elements in `a` across lanes using the /// control in `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute4x64_pd) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpermpd, imm8 = 1))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_permute4x64_pd(a: __m256d, imm8: i32) -> __m256d { let imm8 = (imm8 & 0xFF) as u8; let undef = _mm256_undefined_pd(); @@ -2326,9 +2725,12 @@ pub unsafe fn _mm256_permute4x64_pd(a: __m256d, imm8: i32) -> __m256d { /// Shuffle eight 32-bit foating-point elements in `a` across lanes using /// the corresponding 32-bit integer index in `idx`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutevar8x32_ps) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpermps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_permutevar8x32_ps(a: __m256, idx: __m256i) -> __m256 { permps(a, idx.as_i32x8()) } @@ -2337,9 +2739,12 @@ pub unsafe fn _mm256_permutevar8x32_ps(a: __m256, idx: __m256i) -> __m256 { /// and `b`, then horizontally sum each consecutive 8 differences to /// produce four unsigned 16-bit integers, and pack these unsigned 16-bit /// integers in the low 16 bits of the 64-bit return value +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sad_epu8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsadbw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sad_epu8(a: __m256i, b: __m256i) -> __m256i { mem::transmute(psadbw(a.as_u8x32(), b.as_u8x32())) } @@ -2373,9 +2778,12 @@ pub unsafe fn _mm256_sad_epu8(a: __m256i, b: __m256i) -> __m256i { /// r /// } /// ``` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpshufb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pshufb(a.as_u8x32(), b.as_u8x32())) } @@ -2416,10 +2824,13 @@ pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i { /// # } /// # } /// ``` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_shuffle_epi32(a: __m256i, imm8: i32) -> __m256i { // simd_shuffleX requires that its selector parameter be made up of // constant values, but we can't enforce that here. In spirit, we need @@ -2491,10 +2902,13 @@ pub unsafe fn _mm256_shuffle_epi32(a: __m256i, imm8: i32) -> __m256i { /// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of `a` using /// the control in `imm8`. The low 64 bits of 128-bit lanes of `a` are copied /// to the output. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shufflehi_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpshufhw, imm8 = 9))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_shufflehi_epi16(a: __m256i, imm8: i32) -> __m256i { let imm8 = (imm8 & 0xFF) as u8; let a = a.as_i16x16(); @@ -2549,10 +2963,13 @@ pub unsafe fn _mm256_shufflehi_epi16(a: __m256i, imm8: i32) -> __m256i { /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of `a` using /// the control in `imm8`. The high 64 bits of 128-bit lanes of `a` are copied /// to the output. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shufflelo_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpshuflw, imm8 = 9))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_shufflelo_epi16(a: __m256i, imm8: i32) -> __m256i { let imm8 = (imm8 & 0xFF) as u8; let a = a.as_i16x16(); @@ -2607,9 +3024,12 @@ pub unsafe fn _mm256_shufflelo_epi16(a: __m256i, imm8: i32) -> __m256i { /// Negate packed 16-bit integers in `a` when the corresponding signed /// 16-bit integer in `b` is negative, and return the results. /// Results are zeroed out when the corresponding element in `b` is zero. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sign_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsignw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sign_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(psignw(a.as_i16x16(), b.as_i16x16())) } @@ -2617,9 +3037,12 @@ pub unsafe fn _mm256_sign_epi16(a: __m256i, b: __m256i) -> __m256i { /// Negate packed 32-bit integers in `a` when the corresponding signed /// 32-bit integer in `b` is negative, and return the results. /// Results are zeroed out when the corresponding element in `b` is zero. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sign_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsignd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sign_epi32(a: __m256i, b: __m256i) -> __m256i { mem::transmute(psignd(a.as_i32x8(), b.as_i32x8())) } @@ -2627,72 +3050,96 @@ pub unsafe fn _mm256_sign_epi32(a: __m256i, b: __m256i) -> __m256i { /// Negate packed 8-bit integers in `a` when the corresponding signed /// 8-bit integer in `b` is negative, and return the results. /// Results are zeroed out when the corresponding element in `b` is zero. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sign_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsignb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sign_epi8(a: __m256i, b: __m256i) -> __m256i { mem::transmute(psignb(a.as_i8x32(), b.as_i8x32())) } /// Shift packed 16-bit integers in `a` left by `count` while /// shifting in zeros, and return the result +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sll_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsllw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i { mem::transmute(psllw(a.as_i16x16(), count.as_i16x8())) } /// Shift packed 32-bit integers in `a` left by `count` while /// shifting in zeros, and return the result +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sll_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpslld))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i { mem::transmute(pslld(a.as_i32x8(), count.as_i32x4())) } /// Shift packed 64-bit integers in `a` left by `count` while /// shifting in zeros, and return the result +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sll_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsllq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i { mem::transmute(psllq(a.as_i64x4(), count.as_i64x2())) } /// Shift packed 16-bit integers in `a` left by `imm8` while /// shifting in zeros, return the results; +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_slli_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsllw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_slli_epi16(a: __m256i, imm8: i32) -> __m256i { mem::transmute(pslliw(a.as_i16x16(), imm8)) } /// Shift packed 32-bit integers in `a` left by `imm8` while /// shifting in zeros, return the results; +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_slli_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpslld))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_slli_epi32(a: __m256i, imm8: i32) -> __m256i { mem::transmute(psllid(a.as_i32x8(), imm8)) } /// Shift packed 64-bit integers in `a` left by `imm8` while /// shifting in zeros, return the results; +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_slli_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsllq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_slli_epi64(a: __m256i, imm8: i32) -> __m256i { mem::transmute(pslliq(a.as_i64x4(), imm8)) } /// Shift 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_slli_si256) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpslldq, imm8 = 3))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_slli_si256(a: __m256i, imm8: i32) -> __m256i { let a = a.as_i64x4(); macro_rules! call { @@ -2704,10 +3151,13 @@ pub unsafe fn _mm256_slli_si256(a: __m256i, imm8: i32) -> __m256i { } /// Shift 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bslli_epi128) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpslldq, imm8 = 3))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_bslli_epi128(a: __m256i, imm8: i32) -> __m256i { let a = a.as_i64x4(); macro_rules! call { @@ -2721,9 +3171,12 @@ pub unsafe fn _mm256_bslli_epi128(a: __m256i, imm8: i32) -> __m256i { /// Shift packed 32-bit integers in `a` left by the amount /// specified by the corresponding element in `count` while /// shifting in zeros, and return the result. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sllv_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsllvd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i { mem::transmute(psllvd(a.as_i32x4(), count.as_i32x4())) } @@ -2731,9 +3184,12 @@ pub unsafe fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i { /// Shift packed 32-bit integers in `a` left by the amount /// specified by the corresponding element in `count` while /// shifting in zeros, and return the result. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sllv_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsllvd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i { mem::transmute(psllvd256(a.as_i32x8(), count.as_i32x8())) } @@ -2741,9 +3197,12 @@ pub unsafe fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i { /// Shift packed 64-bit integers in `a` left by the amount /// specified by the corresponding element in `count` while /// shifting in zeros, and return the result. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sllv_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsllvq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i { mem::transmute(psllvq(a.as_i64x2(), count.as_i64x2())) } @@ -2751,72 +3210,96 @@ pub unsafe fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i { /// Shift packed 64-bit integers in `a` left by the amount /// specified by the corresponding element in `count` while /// shifting in zeros, and return the result. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sllv_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsllvq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i { mem::transmute(psllvq256(a.as_i64x4(), count.as_i64x4())) } /// Shift packed 16-bit integers in `a` right by `count` while /// shifting in sign bits. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sra_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsraw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i { mem::transmute(psraw(a.as_i16x16(), count.as_i16x8())) } /// Shift packed 32-bit integers in `a` right by `count` while /// shifting in sign bits. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sra_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrad))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i { mem::transmute(psrad(a.as_i32x8(), count.as_i32x4())) } /// Shift packed 16-bit integers in `a` right by `imm8` while /// shifting in sign bits. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srai_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsraw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_srai_epi16(a: __m256i, imm8: i32) -> __m256i { mem::transmute(psraiw(a.as_i16x16(), imm8)) } /// Shift packed 32-bit integers in `a` right by `imm8` while /// shifting in sign bits. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srai_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrad))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_srai_epi32(a: __m256i, imm8: i32) -> __m256i { mem::transmute(psraid(a.as_i32x8(), imm8)) } /// Shift packed 32-bit integers in `a` right by the amount specified by the /// corresponding element in `count` while shifting in sign bits. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srav_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsravd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i { mem::transmute(psravd(a.as_i32x4(), count.as_i32x4())) } /// Shift packed 32-bit integers in `a` right by the amount specified by the /// corresponding element in `count` while shifting in sign bits. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srav_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsravd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i { mem::transmute(psravd256(a.as_i32x8(), count.as_i32x8())) } /// Shift 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srli_si256) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrldq, imm8 = 3))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_srli_si256(a: __m256i, imm8: i32) -> __m256i { let a = a.as_i64x4(); macro_rules! call { @@ -2828,10 +3311,13 @@ pub unsafe fn _mm256_srli_si256(a: __m256i, imm8: i32) -> __m256i { } /// Shift 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bsrli_epi128) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrldq, imm8 = 3))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_bsrli_epi128(a: __m256i, imm8: i32) -> __m256i { let a = a.as_i64x4(); macro_rules! call { @@ -2844,90 +3330,120 @@ pub unsafe fn _mm256_bsrli_epi128(a: __m256i, imm8: i32) -> __m256i { /// Shift packed 16-bit integers in `a` right by `count` while shifting in /// zeros. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srl_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrlw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i { mem::transmute(psrlw(a.as_i16x16(), count.as_i16x8())) } /// Shift packed 32-bit integers in `a` right by `count` while shifting in /// zeros. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srl_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrld))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i { mem::transmute(psrld(a.as_i32x8(), count.as_i32x4())) } /// Shift packed 64-bit integers in `a` right by `count` while shifting in /// zeros. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srl_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrlq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i { mem::transmute(psrlq(a.as_i64x4(), count.as_i64x2())) } /// Shift packed 16-bit integers in `a` right by `imm8` while shifting in /// zeros +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srli_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrlw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_srli_epi16(a: __m256i, imm8: i32) -> __m256i { mem::transmute(psrliw(a.as_i16x16(), imm8)) } /// Shift packed 32-bit integers in `a` right by `imm8` while shifting in /// zeros +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srli_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrld))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_srli_epi32(a: __m256i, imm8: i32) -> __m256i { mem::transmute(psrlid(a.as_i32x8(), imm8)) } /// Shift packed 64-bit integers in `a` right by `imm8` while shifting in /// zeros +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srli_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrlq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_srli_epi64(a: __m256i, imm8: i32) -> __m256i { mem::transmute(psrliq(a.as_i64x4(), imm8)) } /// Shift packed 32-bit integers in `a` right by the amount specified by /// the corresponding element in `count` while shifting in zeros, +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srlv_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrlvd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i { mem::transmute(psrlvd(a.as_i32x4(), count.as_i32x4())) } /// Shift packed 32-bit integers in `a` right by the amount specified by /// the corresponding element in `count` while shifting in zeros, +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srlv_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrlvd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i { mem::transmute(psrlvd256(a.as_i32x8(), count.as_i32x8())) } /// Shift packed 64-bit integers in `a` right by the amount specified by /// the corresponding element in `count` while shifting in zeros, +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srlv_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrlvq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i { mem::transmute(psrlvq(a.as_i64x2(), count.as_i64x2())) } /// Shift packed 64-bit integers in `a` right by the amount specified by /// the corresponding element in `count` while shifting in zeros, +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srlv_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrlvq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i { mem::transmute(psrlvq256(a.as_i64x4(), count.as_i64x4())) } @@ -2935,69 +3451,93 @@ pub unsafe fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i { // TODO _mm256_stream_load_si256 (__m256i const* mem_addr) /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsubw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sub_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(simd_sub(a.as_i16x16(), b.as_i16x16())) } /// Subtract packed 32-bit integers in `b` from packed 16-bit integers in `a` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsubd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sub_epi32(a: __m256i, b: __m256i) -> __m256i { mem::transmute(simd_sub(a.as_i32x8(), b.as_i32x8())) } /// Subtract packed 64-bit integers in `b` from packed 16-bit integers in `a` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsubq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sub_epi64(a: __m256i, b: __m256i) -> __m256i { mem::transmute(simd_sub(a.as_i64x4(), b.as_i64x4())) } /// Subtract packed 8-bit integers in `b` from packed 16-bit integers in `a` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsubb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i { mem::transmute(simd_sub(a.as_i8x32(), b.as_i8x32())) } /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in /// `a` using saturation. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_subs_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsubsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(psubsw(a.as_i16x16(), b.as_i16x16())) } /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in /// `a` using saturation. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_subs_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsubsb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i { mem::transmute(psubsb(a.as_i8x32(), b.as_i8x32())) } /// Subtract packed unsigned 16-bit integers in `b` from packed 16-bit /// integers in `a` using saturation. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_subs_epu16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsubusw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(psubusw(a.as_u16x16(), b.as_u16x16())) } /// Subtract packed unsigned 8-bit integers in `b` from packed 8-bit /// integers in `a` using saturation. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_subs_epu8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsubusb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i { mem::transmute(psubusb(a.as_u8x32(), b.as_u8x32())) } @@ -3040,9 +3580,12 @@ pub unsafe fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i { /// # } /// # } /// ``` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpackhi_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpunpckhbw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i { #[cfg_attr(rustfmt, rustfmt_skip)] let r: i8x32 = simd_shuffle32(a.as_i8x32(), b.as_i8x32(), [ @@ -3091,9 +3634,12 @@ pub unsafe fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i { /// # } /// # } /// ``` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpacklo_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpunpcklbw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i { #[cfg_attr(rustfmt, rustfmt_skip)] let r: i8x32 = simd_shuffle32(a.as_i8x32(), b.as_i8x32(), [ @@ -3140,9 +3686,12 @@ pub unsafe fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i { /// # } /// # } /// ``` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpackhi_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpunpckhwd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i { let r: i16x16 = simd_shuffle16( a.as_i16x16(), @@ -3190,9 +3739,12 @@ pub unsafe fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i { /// # } /// # } /// ``` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpacklo_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpunpcklwd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i { let r: i16x16 = simd_shuffle16( a.as_i16x16(), @@ -3238,9 +3790,12 @@ pub unsafe fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i { /// # } /// # } /// ``` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpackhi_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vunpckhps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i { let r: i32x8 = simd_shuffle8( a.as_i32x8(), @@ -3284,9 +3839,12 @@ pub unsafe fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i { /// # } /// # } /// ``` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpacklo_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vunpcklps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i { let r: i32x8 = simd_shuffle8( a.as_i32x8(), @@ -3330,9 +3888,12 @@ pub unsafe fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i { /// # } /// # } /// ``` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpackhi_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vunpckhpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i { let r: i64x4 = simd_shuffle4(a.as_i64x4(), b.as_i64x4(), [1, 5, 3, 7]); mem::transmute(r) @@ -3372,9 +3933,12 @@ pub unsafe fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i { /// # } /// # } /// ``` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpacklo_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vunpcklpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i { let r: i64x4 = simd_shuffle4(a.as_i64x4(), b.as_i64x4(), [0, 4, 2, 6]); mem::transmute(r) @@ -3382,9 +3946,12 @@ pub unsafe fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i { /// Compute the bitwise XOR of 256 bits (representing integer data) /// in `a` and `b` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_xor_si256) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vxorps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i { mem::transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) } @@ -3393,10 +3960,13 @@ pub unsafe fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i { /// integer containing the zero-extended integer data. /// /// See [LLVM commit D20468][https://reviews.llvm.org/D20468]. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extract_epi8) #[inline] #[target_feature(enable = "avx2")] // This intrinsic has no corresponding instruction. #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_extract_epi8(a: __m256i, imm8: i32) -> i8 { let imm8 = (imm8 & 31) as u32; simd_extract(a.as_i8x32(), imm8) @@ -3406,37 +3976,49 @@ pub unsafe fn _mm256_extract_epi8(a: __m256i, imm8: i32) -> i8 { /// integer containing the zero-extended integer data. /// /// See [LLVM commit D20468][https://reviews.llvm.org/D20468]. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extract_epi16) #[inline] #[target_feature(enable = "avx2")] // This intrinsic has no corresponding instruction. #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_extract_epi16(a: __m256i, imm8: i32) -> i16 { let imm8 = (imm8 & 15) as u32; simd_extract(a.as_i16x16(), imm8) } /// Extract a 32-bit integer from `a`, selected with `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extract_epi32) #[inline] #[target_feature(enable = "avx2")] // This intrinsic has no corresponding instruction. #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_extract_epi32(a: __m256i, imm8: i32) -> i32 { let imm8 = (imm8 & 7) as u32; simd_extract(a.as_i32x8(), imm8) } /// Returns the first element of the input vector of [4 x double]. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsd_f64) #[inline] #[target_feature(enable = "avx2")] //#[cfg_attr(test, assert_instr(movsd))] FIXME +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtsd_f64(a: __m256d) -> f64 { simd_extract(a, 0) } /// Returns the first element of the input vector of [8 x i32]. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsi256_si32) #[inline] #[target_feature(enable = "avx2")] //#[cfg_attr(test, assert_instr(movd))] FIXME +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtsi256_si32(a: __m256i) -> i32 { simd_extract(a.as_i32x8(), 0) } diff --git a/coresimd/x86/bmi.rs b/coresimd/x86/bmi1.rs similarity index 78% rename from coresimd/x86/bmi.rs rename to coresimd/x86/bmi1.rs index aac578cb45..dc27fb2b7e 100644 --- a/coresimd/x86/bmi.rs +++ b/coresimd/x86/bmi1.rs @@ -14,9 +14,12 @@ use stdsimd_test::assert_instr; /// Extracts bits in range [`start`, `start` + `length`) from `a` into /// the least significant bits of the result. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr_u32) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(bextr))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 { _bextr2_u32( a, @@ -29,33 +32,45 @@ pub unsafe fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 { /// /// Bits [7,0] of `control` specify the index to the first bit in the range to /// be extracted, and bits [15,8] specify the length of the range. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr2_u32) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(bextr))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _bextr2_u32(a: u32, control: u32) -> u32 { x86_bmi_bextr_32(a, control) } /// Bitwise logical `AND` of inverted `a` with `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_andn_u32) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(andn))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _andn_u32(a: u32, b: u32) -> u32 { !a & b } /// Extract lowest set isolated bit. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsi_u32) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(blsi))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blsi_u32(x: u32) -> u32 { x & x.wrapping_neg() } /// Get mask up to lowest set bit. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsmsk_u32) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(blsmsk))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blsmsk_u32(x: u32) -> u32 { x ^ (x.wrapping_sub(1_u32)) } @@ -63,9 +78,12 @@ pub unsafe fn _blsmsk_u32(x: u32) -> u32 { /// Resets the lowest set bit of `x`. /// /// If `x` is sets CF. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsr_u32) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(blsr))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blsr_u32(x: u32) -> u32 { x & (x.wrapping_sub(1)) } @@ -73,9 +91,12 @@ pub unsafe fn _blsr_u32(x: u32) -> u32 { /// Counts the number of trailing least significant zero bits. /// /// When the source operand is 0, it returns its size in bits. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_tzcnt_u32) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(tzcnt))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _tzcnt_u32(x: u32) -> u32 { x.trailing_zeros() } @@ -83,9 +104,12 @@ pub unsafe fn _tzcnt_u32(x: u32) -> u32 { /// Counts the number of trailing least significant zero bits. /// /// When the source operand is 0, it returns its size in bits. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_tzcnt_32) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(tzcnt))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_tzcnt_32(x: u32) -> i32 { x.trailing_zeros() as i32 } diff --git a/coresimd/x86/bmi2.rs b/coresimd/x86/bmi2.rs index 686576ce42..adb1b52fee 100644 --- a/coresimd/x86/bmi2.rs +++ b/coresimd/x86/bmi2.rs @@ -17,11 +17,14 @@ use stdsimd_test::assert_instr; /// /// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with /// the low half and the high half of the result. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mulx_u32) #[inline] // LLVM BUG (should be mulxl): https://bugs.llvm.org/show_bug.cgi?id=34232 #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(imul))] #[cfg_attr(all(test, target_arch = "x86"), assert_instr(mulx))] #[target_feature(enable = "bmi2")] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mulx_u32(a: u32, b: u32, hi: &mut u32) -> u32 { let result: u64 = (a as u64) * (b as u64); *hi = (result >> 32) as u32; @@ -29,27 +32,36 @@ pub unsafe fn _mulx_u32(a: u32, b: u32, hi: &mut u32) -> u32 { } /// Zero higher bits of `a` >= `index`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bzhi_u32) #[inline] #[target_feature(enable = "bmi2")] #[cfg_attr(test, assert_instr(bzhi))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _bzhi_u32(a: u32, index: u32) -> u32 { x86_bmi2_bzhi_32(a, index) } /// Scatter contiguous low order bits of `a` to the result at the positions /// specified by the `mask`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_pdep_u32) #[inline] #[target_feature(enable = "bmi2")] #[cfg_attr(test, assert_instr(pdep))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _pdep_u32(a: u32, mask: u32) -> u32 { x86_bmi2_pdep_32(a, mask) } /// Gathers the bits of `x` specified by the `mask` into the contiguous low /// order bit positions of the result. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_pext_u32) #[inline] #[target_feature(enable = "bmi2")] #[cfg_attr(test, assert_instr(pext))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _pext_u32(a: u32, mask: u32) -> u32 { x86_bmi2_pext_32(a, mask) } diff --git a/coresimd/x86/bswap.rs b/coresimd/x86/bswap.rs index 72b981e3ba..d7152562c7 100644 --- a/coresimd/x86/bswap.rs +++ b/coresimd/x86/bswap.rs @@ -6,8 +6,11 @@ use stdsimd_test::assert_instr; /// Return an integer with the reversed byte order of x +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bswap) #[inline] #[cfg_attr(test, assert_instr(bswap))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _bswap(x: i32) -> i32 { bswap_i32(x) } diff --git a/coresimd/x86/cpuid.rs b/coresimd/x86/cpuid.rs index 9f72e6b53e..7e000625ce 100644 --- a/coresimd/x86/cpuid.rs +++ b/coresimd/x86/cpuid.rs @@ -10,14 +10,19 @@ use stdsimd_test::assert_instr; /// Result of the `cpuid` instruction. #[derive(Copy, Clone, Eq, Ord, PartialEq, PartialOrd)] #[cfg_attr(feature = "cargo-clippy", allow(stutter))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub struct CpuidResult { /// EAX register. + #[stable(feature = "simd_x86", since = "1.27.0")] pub eax: u32, /// EBX register. + #[stable(feature = "simd_x86", since = "1.27.0")] pub ebx: u32, /// ECX register. + #[stable(feature = "simd_x86", since = "1.27.0")] pub ecx: u32, /// EDX register. + #[stable(feature = "simd_x86", since = "1.27.0")] pub edx: u32, } @@ -46,6 +51,7 @@ pub struct CpuidResult { /// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf #[inline] #[cfg_attr(test, assert_instr(cpuid))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn __cpuid_count(leaf: u32, sub_leaf: u32) -> CpuidResult { let mut r = mem::uninitialized::(); if cfg!(target_arch = "x86") { @@ -66,6 +72,7 @@ pub unsafe fn __cpuid_count(leaf: u32, sub_leaf: u32) -> CpuidResult { /// See [`__cpuid_count`](fn.__cpuid_count.html). #[inline] #[cfg_attr(test, assert_instr(cpuid))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn __cpuid(leaf: u32) -> CpuidResult { __cpuid_count(leaf, 0) } @@ -114,6 +121,7 @@ pub fn has_cpuid() -> bool { /// See also [`__cpuid`](fn.__cpuid.html) and /// [`__cpuid_count`](fn.__cpuid_count.html). #[inline] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn __get_cpuid_max(leaf: u32) -> (u32, u32) { let CpuidResult { eax, ebx, .. } = __cpuid(leaf); (eax, ebx) diff --git a/coresimd/x86/eflags.rs b/coresimd/x86/eflags.rs index 8d925ecdd0..0a7ba919a7 100644 --- a/coresimd/x86/eflags.rs +++ b/coresimd/x86/eflags.rs @@ -1,8 +1,11 @@ //! `i386` intrinsics /// Reads EFLAGS. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__readeflags) #[cfg(target_arch = "x86")] #[inline(always)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn __readeflags() -> u32 { let eflags: u32; asm!("pushfd; popl $0" : "=r"(eflags) : : : "volatile"); @@ -10,8 +13,11 @@ pub unsafe fn __readeflags() -> u32 { } /// Reads EFLAGS. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__readeflags) #[cfg(target_arch = "x86_64")] #[inline(always)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn __readeflags() -> u64 { let eflags: u64; asm!("pushfq; popq $0" : "=r"(eflags) : : : "volatile"); @@ -19,15 +25,21 @@ pub unsafe fn __readeflags() -> u64 { } /// Write EFLAGS. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__writeeflags) #[cfg(target_arch = "x86")] #[inline(always)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn __writeeflags(eflags: u32) { asm!("pushl $0; popfd" : : "r"(eflags) : "cc", "flags" : "volatile"); } /// Write EFLAGS. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__writeeflags) #[cfg(target_arch = "x86_64")] #[inline(always)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn __writeeflags(eflags: u64) { asm!("pushq $0; popfq" : : "r"(eflags) : "cc", "flags" : "volatile"); } diff --git a/coresimd/x86/fxsr.rs b/coresimd/x86/fxsr.rs index 2fa2685157..c8da47f3ca 100644 --- a/coresimd/x86/fxsr.rs +++ b/coresimd/x86/fxsr.rs @@ -21,9 +21,12 @@ extern "C" { /// /// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html /// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_fxsave) #[inline] #[target_feature(enable = "fxsr")] #[cfg_attr(test, assert_instr(fxsave))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _fxsave(mem_addr: *mut u8) { fxsave(mem_addr) } @@ -42,9 +45,12 @@ pub unsafe fn _fxsave(mem_addr: *mut u8) { /// /// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html /// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_fxrstor) #[inline] #[target_feature(enable = "fxsr")] #[cfg_attr(test, assert_instr(fxrstor))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _fxrstor(mem_addr: *const u8) { fxrstor(mem_addr) } diff --git a/coresimd/x86/mod.rs b/coresimd/x86/mod.rs index 16ef3eead5..521b98e84e 100644 --- a/coresimd/x86/mod.rs +++ b/coresimd/x86/mod.rs @@ -105,6 +105,7 @@ types! { /// # if is_x86_feature_detected!("sse2") { unsafe { foo() } } /// # } /// ``` + #[stable(feature = "simd_x86", since = "1.27.0")] pub struct __m128i(i64, i64); /// 128-bit wide set of four `f32` types, x86-specific @@ -148,6 +149,7 @@ types! { /// # if is_x86_feature_detected!("sse") { unsafe { foo() } } /// # } /// ``` + #[stable(feature = "simd_x86", since = "1.27.0")] pub struct __m128(f32, f32, f32, f32); /// 128-bit wide set of two `f64` types, x86-specific @@ -191,6 +193,7 @@ types! { /// # if is_x86_feature_detected!("sse") { unsafe { foo() } } /// # } /// ``` + #[stable(feature = "simd_x86", since = "1.27.0")] pub struct __m128d(f64, f64); /// 256-bit wide integer vector type, x86-specific @@ -238,6 +241,7 @@ types! { /// # if is_x86_feature_detected!("avx") { unsafe { foo() } } /// # } /// ``` + #[stable(feature = "simd_x86", since = "1.27.0")] pub struct __m256i(i64, i64, i64, i64); /// 256-bit wide set of eight `f32` types, x86-specific @@ -281,6 +285,7 @@ types! { /// # if is_x86_feature_detected!("sse") { unsafe { foo() } } /// # } /// ``` + #[stable(feature = "simd_x86", since = "1.27.0")] pub struct __m256(f32, f32, f32, f32, f32, f32, f32, f32); /// 256-bit wide set of four `f64` types, x86-specific @@ -324,6 +329,7 @@ types! { /// # if is_x86_feature_detected!("avx") { unsafe { foo() } } /// # } /// ``` + #[stable(feature = "simd_x86", since = "1.27.0")] pub struct __m256d(f64, f64, f64, f64); } @@ -334,6 +340,7 @@ pub use self::test::*; #[doc(hidden)] #[allow(non_camel_case_types)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub(crate) trait m128iExt: Sized { fn as_m128i(self) -> __m128i; @@ -387,6 +394,7 @@ impl m128iExt for __m128i { #[doc(hidden)] #[allow(non_camel_case_types)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub(crate) trait m256iExt: Sized { fn as_m256i(self) -> __m256i; @@ -570,8 +578,8 @@ pub use self::avx2::*; mod abm; pub use self::abm::*; -mod bmi; -pub use self::bmi::*; +mod bmi1; +pub use self::bmi1::*; mod bmi2; pub use self::bmi2::*; diff --git a/coresimd/x86/pclmulqdq.rs b/coresimd/x86/pclmulqdq.rs index b5b7465ca4..4cef220295 100644 --- a/coresimd/x86/pclmulqdq.rs +++ b/coresimd/x86/pclmulqdq.rs @@ -21,6 +21,8 @@ extern "C" { /// /// The immediate byte is used for determining which halves of `a` and `b` /// should be used. Immediate bits other than 0 and 4 are ignored. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clmulepi64_si128) #[inline] #[target_feature(enable = "pclmulqdq")] #[cfg_attr(all(test, not(target_os = "linux")), @@ -34,6 +36,7 @@ extern "C" { #[cfg_attr(all(test, target_os = "linux"), assert_instr(pclmulhqhqdq, imm8 = 17))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_clmulepi64_si128( a: __m128i, b: __m128i, imm8: i32 ) -> __m128i { diff --git a/coresimd/x86/rdrand.rs b/coresimd/x86/rdrand.rs index 9877125851..2b900837fd 100644 --- a/coresimd/x86/rdrand.rs +++ b/coresimd/x86/rdrand.rs @@ -14,10 +14,13 @@ use stdsimd_test::assert_instr; /// Read a hardware generated 16-bit random value and store the result in val. /// Return 1 if a random value was generated, and 0 otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdrand16_step) #[inline] #[target_feature(enable = "rdrand")] #[cfg_attr(test, assert_instr(rdrand))] #[cfg_attr(feature = "cargo-clippy", allow(stutter))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _rdrand16_step(val: &mut u16) -> i32 { let (v, flag) = x86_rdrand16_step(); *val = v; @@ -26,10 +29,13 @@ pub unsafe fn _rdrand16_step(val: &mut u16) -> i32 { /// Read a hardware generated 32-bit random value and store the result in val. /// Return 1 if a random value was generated, and 0 otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdrand32_step) #[inline] #[target_feature(enable = "rdrand")] #[cfg_attr(test, assert_instr(rdrand))] #[cfg_attr(feature = "cargo-clippy", allow(stutter))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _rdrand32_step(val: &mut u32) -> i32 { let (v, flag) = x86_rdrand32_step(); *val = v; @@ -38,9 +44,12 @@ pub unsafe fn _rdrand32_step(val: &mut u32) -> i32 { /// Read a 16-bit NIST SP800-90B and SP800-90C compliant random value and store /// in val. Return 1 if a random value was generated, and 0 otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdseed16_step) #[inline] #[target_feature(enable = "rdseed")] #[cfg_attr(test, assert_instr(rdseed))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _rdseed16_step(val: &mut u16) -> i32 { let (v, flag) = x86_rdseed16_step(); *val = v; @@ -49,9 +58,12 @@ pub unsafe fn _rdseed16_step(val: &mut u16) -> i32 { /// Read a 32-bit NIST SP800-90B and SP800-90C compliant random value and store /// in val. Return 1 if a random value was generated, and 0 otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdseed32_step) #[inline] #[target_feature(enable = "rdseed")] #[cfg_attr(test, assert_instr(rdseed))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _rdseed32_step(val: &mut u32) -> i32 { let (v, flag) = x86_rdseed32_step(); *val = v; diff --git a/coresimd/x86/rdtsc.rs b/coresimd/x86/rdtsc.rs index 468fa09bc8..79e32a7943 100644 --- a/coresimd/x86/rdtsc.rs +++ b/coresimd/x86/rdtsc.rs @@ -17,8 +17,11 @@ use stdsimd_test::assert_instr; /// /// On processors that support the Intel 64 architecture, the /// high-order 32 bits of each of RAX and RDX are cleared. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdtsc) #[inline] #[cfg_attr(test, assert_instr(rdtsc))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _rdtsc() -> i64 { rdtsc() } @@ -37,8 +40,11 @@ pub unsafe fn _rdtsc() -> i64 { /// /// On processors that support the Intel 64 architecture, the /// high-order 32 bits of each of RAX, RDX, and RCX are cleared. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__rdtscp) #[inline] #[cfg_attr(test, assert_instr(rdtscp))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn __rdtscp(aux: *mut u32) -> u64 { rdtscp(aux as *mut _) } diff --git a/coresimd/x86/sha.rs b/coresimd/x86/sha.rs index d574e7d9d7..5135165cf6 100644 --- a/coresimd/x86/sha.rs +++ b/coresimd/x86/sha.rs @@ -26,9 +26,12 @@ use stdsimd_test::assert_instr; /// Perform an intermediate calculation for the next four SHA1 message values /// (unsigned 32-bit integers) using previous message values from `a` and `b`, /// and returning the result. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha1msg1_epu32) #[inline] #[target_feature(enable = "sha")] #[cfg_attr(test, assert_instr(sha1msg1))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sha1msg1_epu32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(sha1msg1(a.as_i32x4(), b.as_i32x4())) } @@ -36,9 +39,12 @@ pub unsafe fn _mm_sha1msg1_epu32(a: __m128i, b: __m128i) -> __m128i { /// Perform the final calculation for the next four SHA1 message values /// (unsigned 32-bit integers) using the intermediate result in `a` and the /// previous message values in `b`, and returns the result. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha1msg2_epu32) #[inline] #[target_feature(enable = "sha")] #[cfg_attr(test, assert_instr(sha1msg2))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sha1msg2_epu32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(sha1msg2(a.as_i32x4(), b.as_i32x4())) } @@ -46,9 +52,12 @@ pub unsafe fn _mm_sha1msg2_epu32(a: __m128i, b: __m128i) -> __m128i { /// Calculate SHA1 state variable E after four rounds of operation from the /// current SHA1 state variable `a`, add that value to the scheduled values /// (unsigned 32-bit integers) in `b`, and returns the result. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha1nexte_epu32) #[inline] #[target_feature(enable = "sha")] #[cfg_attr(test, assert_instr(sha1nexte))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sha1nexte_epu32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(sha1nexte(a.as_i32x4(), b.as_i32x4())) } @@ -58,10 +67,13 @@ pub unsafe fn _mm_sha1nexte_epu32(a: __m128i, b: __m128i) -> __m128i { /// (unsigned 32-bit integers), and state variable E from `b`, and return the /// updated SHA1 state (A,B,C,D). `func` contains the logic functions and round /// constants. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha1rnds4_epu32) #[inline] #[target_feature(enable = "sha")] #[cfg_attr(test, assert_instr(sha1rnds4, func = 0))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sha1rnds4_epu32( a: __m128i, b: __m128i, func: i32 ) -> __m128i { @@ -79,9 +91,12 @@ pub unsafe fn _mm_sha1rnds4_epu32( /// Perform an intermediate calculation for the next four SHA256 message values /// (unsigned 32-bit integers) using previous message values from `a` and `b`, /// and return the result. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha256msg1_epu32) #[inline] #[target_feature(enable = "sha")] #[cfg_attr(test, assert_instr(sha256msg1))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sha256msg1_epu32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(sha256msg1(a.as_i32x4(), b.as_i32x4())) } @@ -89,9 +104,12 @@ pub unsafe fn _mm_sha256msg1_epu32(a: __m128i, b: __m128i) -> __m128i { /// Perform the final calculation for the next four SHA256 message values /// (unsigned 32-bit integers) using previous message values from `a` and `b`, /// and return the result. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha256msg2_epu32) #[inline] #[target_feature(enable = "sha")] #[cfg_attr(test, assert_instr(sha256msg2))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sha256msg2_epu32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(sha256msg2(a.as_i32x4(), b.as_i32x4())) } @@ -101,9 +119,12 @@ pub unsafe fn _mm_sha256msg2_epu32(a: __m128i, b: __m128i) -> __m128i { /// pre-computed sum of the next 2 round message values (unsigned 32-bit /// integers) and the corresponding round constants from `k`, and store the /// updated SHA256 state (A,B,E,F) in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha256rnds2_epu32) #[inline] #[target_feature(enable = "sha")] #[cfg_attr(test, assert_instr(sha256rnds2))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sha256rnds2_epu32( a: __m128i, b: __m128i, k: __m128i ) -> __m128i { diff --git a/coresimd/x86/sse.rs b/coresimd/x86/sse.rs index dbdb727902..aa24c28891 100644 --- a/coresimd/x86/sse.rs +++ b/coresimd/x86/sse.rs @@ -12,122 +12,164 @@ use stdsimd_test::assert_instr; /// Adds the first component of `a` and `b`, the other components are copied /// from `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(addss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_add_ss(a: __m128, b: __m128) -> __m128 { addss(a, b) } /// Adds __m128 vectors. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(addps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_add_ps(a: __m128, b: __m128) -> __m128 { simd_add(a, b) } /// Subtracts the first component of `b` from `a`, the other components are /// copied from `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(subss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sub_ss(a: __m128, b: __m128) -> __m128 { subss(a, b) } /// Subtracts __m128 vectors. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(subps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sub_ps(a: __m128, b: __m128) -> __m128 { simd_sub(a, b) } /// Multiplies the first component of `a` and `b`, the other components are /// copied from `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(mulss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mul_ss(a: __m128, b: __m128) -> __m128 { mulss(a, b) } /// Multiplies __m128 vectors. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(mulps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mul_ps(a: __m128, b: __m128) -> __m128 { simd_mul(a, b) } /// Divides the first component of `b` by `a`, the other components are /// copied from `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(divss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_div_ss(a: __m128, b: __m128) -> __m128 { divss(a, b) } /// Divides __m128 vectors. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(divps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_div_ps(a: __m128, b: __m128) -> __m128 { simd_div(a, b) } /// Return the square root of the first single-precision (32-bit) /// floating-point element in `a`, the other elements are unchanged. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(sqrtss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sqrt_ss(a: __m128) -> __m128 { sqrtss(a) } /// Return the square root of packed single-precision (32-bit) floating-point /// elements in `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(sqrtps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sqrt_ps(a: __m128) -> __m128 { sqrtps(a) } /// Return the approximate reciprocal of the first single-precision /// (32-bit) floating-point element in `a`, the other elements are unchanged. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(rcpss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_rcp_ss(a: __m128) -> __m128 { rcpss(a) } /// Return the approximate reciprocal of packed single-precision (32-bit) /// floating-point elements in `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(rcpps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_rcp_ps(a: __m128) -> __m128 { rcpps(a) } /// Return the approximate reciprocal square root of the fist single-precision /// (32-bit) floating-point elements in `a`, the other elements are unchanged. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(rsqrtss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_rsqrt_ss(a: __m128) -> __m128 { rsqrtss(a) } /// Return the approximate reciprocal square root of packed single-precision /// (32-bit) floating-point elements in `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(rsqrtps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_rsqrt_ps(a: __m128) -> __m128 { rsqrtps(a) } @@ -135,18 +177,24 @@ pub unsafe fn _mm_rsqrt_ps(a: __m128) -> __m128 { /// Compare the first single-precision (32-bit) floating-point element of `a` /// and `b`, and return the minimum value in the first element of the return /// value, the other elements are copied from `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(minss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_min_ss(a: __m128, b: __m128) -> __m128 { minss(a, b) } /// Compare packed single-precision (32-bit) floating-point elements in `a` and /// `b`, and return the corresponding minimum values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(minps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_min_ps(a: __m128, b: __m128) -> __m128 { minps(a, b) } @@ -154,28 +202,37 @@ pub unsafe fn _mm_min_ps(a: __m128, b: __m128) -> __m128 { /// Compare the first single-precision (32-bit) floating-point element of `a` /// and `b`, and return the maximum value in the first element of the return /// value, the other elements are copied from `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(maxss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_max_ss(a: __m128, b: __m128) -> __m128 { maxss(a, b) } /// Compare packed single-precision (32-bit) floating-point elements in `a` and /// `b`, and return the corresponding maximum values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(maxps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_max_ps(a: __m128, b: __m128) -> __m128 { maxps(a, b) } /// Bitwise AND of packed single-precision (32-bit) floating-point elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_ps) #[inline] #[target_feature(enable = "sse")] // i586 only seems to generate plain `and` instructions, so ignore it. #[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2")), assert_instr(andps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_and_ps(a: __m128, b: __m128) -> __m128 { let a: __m128i = mem::transmute(a); let b: __m128i = mem::transmute(b); @@ -186,12 +243,15 @@ pub unsafe fn _mm_and_ps(a: __m128, b: __m128) -> __m128 { /// elements. /// /// Computes `!a & b` for each bit in `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_ps) #[inline] #[target_feature(enable = "sse")] // i586 only seems to generate plain `not` and `and` instructions, so ignore // it. #[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2")), assert_instr(andnps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_andnot_ps(a: __m128, b: __m128) -> __m128 { let a: __m128i = mem::transmute(a); let b: __m128i = mem::transmute(b); @@ -200,11 +260,14 @@ pub unsafe fn _mm_andnot_ps(a: __m128, b: __m128) -> __m128 { } /// Bitwise OR of packed single-precision (32-bit) floating-point elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_or_ps) #[inline] #[target_feature(enable = "sse")] // i586 only seems to generate plain `or` instructions, so we ignore it. #[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2")), assert_instr(orps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_or_ps(a: __m128, b: __m128) -> __m128 { let a: __m128i = mem::transmute(a); let b: __m128i = mem::transmute(b); @@ -213,11 +276,14 @@ pub unsafe fn _mm_or_ps(a: __m128, b: __m128) -> __m128 { /// Bitwise exclusive OR of packed single-precision (32-bit) floating-point /// elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_ps) #[inline] #[target_feature(enable = "sse")] // i586 only seems to generate plain `xor` instructions, so we ignore it. #[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2")), assert_instr(xorps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_xor_ps(a: __m128, b: __m128) -> __m128 { let a: __m128i = mem::transmute(a); let b: __m128i = mem::transmute(b); @@ -227,9 +293,12 @@ pub unsafe fn _mm_xor_ps(a: __m128, b: __m128) -> __m128 { /// Compare the lowest `f32` of both inputs for equality. The lowest 32 bits of /// the result will be `0xffffffff` if the two inputs are equal, or `0` /// otherwise. The upper 96 bits of the result are the upper 96 bits of `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpeqss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpeq_ss(a: __m128, b: __m128) -> __m128 { cmpss(a, b, 0) } @@ -238,9 +307,12 @@ pub unsafe fn _mm_cmpeq_ss(a: __m128, b: __m128) -> __m128 { /// of the result will be `0xffffffff` if `a.extract(0)` is less than /// `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are the /// upper 96 bits of `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpltss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmplt_ss(a: __m128, b: __m128) -> __m128 { cmpss(a, b, 1) } @@ -249,9 +321,12 @@ pub unsafe fn _mm_cmplt_ss(a: __m128, b: __m128) -> __m128 { /// 32 bits of the result will be `0xffffffff` if `a.extract(0)` is less than /// or equal `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result /// are the upper 96 bits of `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpless))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmple_ss(a: __m128, b: __m128) -> __m128 { cmpss(a, b, 2) } @@ -260,9 +335,12 @@ pub unsafe fn _mm_cmple_ss(a: __m128, b: __m128) -> __m128 { /// bits of the result will be `0xffffffff` if `a.extract(0)` is greater /// than `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result /// are the upper 96 bits of `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpltss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpgt_ss(a: __m128, b: __m128) -> __m128 { simd_shuffle4(a, cmpss(b, a, 1), [4, 1, 2, 3]) } @@ -271,9 +349,12 @@ pub unsafe fn _mm_cmpgt_ss(a: __m128, b: __m128) -> __m128 { /// lowest 32 bits of the result will be `0xffffffff` if `a.extract(0)` is /// greater than or equal `b.extract(0)`, or `0` otherwise. The upper 96 bits /// of the result are the upper 96 bits of `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpless))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpge_ss(a: __m128, b: __m128) -> __m128 { simd_shuffle4(a, cmpss(b, a, 2), [4, 1, 2, 3]) } @@ -282,9 +363,12 @@ pub unsafe fn _mm_cmpge_ss(a: __m128, b: __m128) -> __m128 { /// of the result will be `0xffffffff` if `a.extract(0)` is not equal to /// `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are the /// upper 96 bits of `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpneqss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpneq_ss(a: __m128, b: __m128) -> __m128 { cmpss(a, b, 4) } @@ -293,9 +377,12 @@ pub unsafe fn _mm_cmpneq_ss(a: __m128, b: __m128) -> __m128 { /// bits of the result will be `0xffffffff` if `a.extract(0)` is not less than /// `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are the /// upper 96 bits of `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpnltss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpnlt_ss(a: __m128, b: __m128) -> __m128 { cmpss(a, b, 5) } @@ -304,9 +391,12 @@ pub unsafe fn _mm_cmpnlt_ss(a: __m128, b: __m128) -> __m128 { /// lowest 32 bits of the result will be `0xffffffff` if `a.extract(0)` is not /// less than or equal to `b.extract(0)`, or `0` otherwise. The upper 96 bits /// of the result are the upper 96 bits of `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpnless))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpnle_ss(a: __m128, b: __m128) -> __m128 { cmpss(a, b, 6) } @@ -315,9 +405,12 @@ pub unsafe fn _mm_cmpnle_ss(a: __m128, b: __m128) -> __m128 { /// bits of the result will be `0xffffffff` if `a.extract(0)` is not greater /// than `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are /// the upper 96 bits of `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpnltss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpngt_ss(a: __m128, b: __m128) -> __m128 { simd_shuffle4(a, cmpss(b, a, 5), [4, 1, 2, 3]) } @@ -326,9 +419,12 @@ pub unsafe fn _mm_cmpngt_ss(a: __m128, b: __m128) -> __m128 { /// lowest 32 bits of the result will be `0xffffffff` if `a.extract(0)` is not /// greater than or equal to `b.extract(0)`, or `0` otherwise. The upper 96 /// bits of the result are the upper 96 bits of `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpnless))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpnge_ss(a: __m128, b: __m128) -> __m128 { simd_shuffle4(a, cmpss(b, a, 6), [4, 1, 2, 3]) } @@ -337,9 +433,12 @@ pub unsafe fn _mm_cmpnge_ss(a: __m128, b: __m128) -> __m128 { /// the result will be `0xffffffff` if neither of `a.extract(0)` or /// `b.extract(0)` is a NaN, or `0` otherwise. The upper 96 bits of the result /// are the upper 96 bits of `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpordss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpord_ss(a: __m128, b: __m128) -> __m128 { cmpss(a, b, 7) } @@ -348,9 +447,12 @@ pub unsafe fn _mm_cmpord_ss(a: __m128, b: __m128) -> __m128 { /// of the result will be `0xffffffff` if any of `a.extract(0)` or /// `b.extract(0)` is a NaN, or `0` otherwise. The upper 96 bits of the result /// are the upper 96 bits of `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpunordss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpunord_ss(a: __m128, b: __m128) -> __m128 { cmpss(a, b, 3) } @@ -358,9 +460,12 @@ pub unsafe fn _mm_cmpunord_ss(a: __m128, b: __m128) -> __m128 { /// Compare each of the four floats in `a` to the corresponding element in `b`. /// The result in the output vector will be `0xffffffff` if the input elements /// were equal, or `0` otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpeqps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpeq_ps(a: __m128, b: __m128) -> __m128 { cmpps(a, b, 0) } @@ -368,9 +473,12 @@ pub unsafe fn _mm_cmpeq_ps(a: __m128, b: __m128) -> __m128 { /// Compare each of the four floats in `a` to the corresponding element in `b`. /// The result in the output vector will be `0xffffffff` if the input element /// in `a` is less than the corresponding element in `b`, or `0` otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpltps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmplt_ps(a: __m128, b: __m128) -> __m128 { cmpps(a, b, 1) } @@ -379,9 +487,12 @@ pub unsafe fn _mm_cmplt_ps(a: __m128, b: __m128) -> __m128 { /// The result in the output vector will be `0xffffffff` if the input element /// in `a` is less than or equal to the corresponding element in `b`, or `0` /// otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpleps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmple_ps(a: __m128, b: __m128) -> __m128 { cmpps(a, b, 2) } @@ -389,9 +500,12 @@ pub unsafe fn _mm_cmple_ps(a: __m128, b: __m128) -> __m128 { /// Compare each of the four floats in `a` to the corresponding element in `b`. /// The result in the output vector will be `0xffffffff` if the input element /// in `a` is greater than the corresponding element in `b`, or `0` otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpltps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpgt_ps(a: __m128, b: __m128) -> __m128 { cmpps(b, a, 1) } @@ -400,9 +514,12 @@ pub unsafe fn _mm_cmpgt_ps(a: __m128, b: __m128) -> __m128 { /// The result in the output vector will be `0xffffffff` if the input element /// in `a` is greater than or equal to the corresponding element in `b`, or `0` /// otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpleps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpge_ps(a: __m128, b: __m128) -> __m128 { cmpps(b, a, 2) } @@ -410,9 +527,12 @@ pub unsafe fn _mm_cmpge_ps(a: __m128, b: __m128) -> __m128 { /// Compare each of the four floats in `a` to the corresponding element in `b`. /// The result in the output vector will be `0xffffffff` if the input elements /// are *not* equal, or `0` otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpneqps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpneq_ps(a: __m128, b: __m128) -> __m128 { cmpps(a, b, 4) } @@ -421,9 +541,12 @@ pub unsafe fn _mm_cmpneq_ps(a: __m128, b: __m128) -> __m128 { /// The result in the output vector will be `0xffffffff` if the input element /// in `a` is *not* less than the corresponding element in `b`, or `0` /// otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpnltps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpnlt_ps(a: __m128, b: __m128) -> __m128 { cmpps(a, b, 5) } @@ -432,9 +555,12 @@ pub unsafe fn _mm_cmpnlt_ps(a: __m128, b: __m128) -> __m128 { /// The result in the output vector will be `0xffffffff` if the input element /// in `a` is *not* less than or equal to the corresponding element in `b`, or /// `0` otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpnleps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpnle_ps(a: __m128, b: __m128) -> __m128 { cmpps(a, b, 6) } @@ -443,9 +569,12 @@ pub unsafe fn _mm_cmpnle_ps(a: __m128, b: __m128) -> __m128 { /// The result in the output vector will be `0xffffffff` if the input element /// in `a` is *not* greater than the corresponding element in `b`, or `0` /// otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpnltps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpngt_ps(a: __m128, b: __m128) -> __m128 { cmpps(b, a, 5) } @@ -454,9 +583,12 @@ pub unsafe fn _mm_cmpngt_ps(a: __m128, b: __m128) -> __m128 { /// The result in the output vector will be `0xffffffff` if the input element /// in `a` is *not* greater than or equal to the corresponding element in `b`, /// or `0` otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpnleps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpnge_ps(a: __m128, b: __m128) -> __m128 { cmpps(b, a, 6) } @@ -465,9 +597,12 @@ pub unsafe fn _mm_cmpnge_ps(a: __m128, b: __m128) -> __m128 { /// Returns four floats that have one of two possible bit patterns. The element /// in the output vector will be `0xffffffff` if the input elements in `a` and /// `b` are ordered (i.e., neither of them is a NaN), or 0 otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpordps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpord_ps(a: __m128, b: __m128) -> __m128 { cmpps(b, a, 7) } @@ -476,27 +611,36 @@ pub unsafe fn _mm_cmpord_ps(a: __m128, b: __m128) -> __m128 { /// Returns four floats that have one of two possible bit patterns. The element /// in the output vector will be `0xffffffff` if the input elements in `a` and /// `b` are unordered (i.e., at least on of them is a NaN), or 0 otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpunordps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpunord_ps(a: __m128, b: __m128) -> __m128 { cmpps(b, a, 3) } /// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns /// `1` if they are equal, or `0` otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(comiss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_comieq_ss(a: __m128, b: __m128) -> i32 { comieq_ss(a, b) } /// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns /// `1` if the value from `a` is less than the one from `b`, or `0` otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(comiss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_comilt_ss(a: __m128, b: __m128) -> i32 { comilt_ss(a, b) } @@ -504,9 +648,12 @@ pub unsafe fn _mm_comilt_ss(a: __m128, b: __m128) -> i32 { /// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns /// `1` if the value from `a` is less than or equal to the one from `b`, or `0` /// otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(comiss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_comile_ss(a: __m128, b: __m128) -> i32 { comile_ss(a, b) } @@ -514,9 +661,12 @@ pub unsafe fn _mm_comile_ss(a: __m128, b: __m128) -> i32 { /// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns /// `1` if the value from `a` is greater than the one from `b`, or `0` /// otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(comiss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_comigt_ss(a: __m128, b: __m128) -> i32 { comigt_ss(a, b) } @@ -524,18 +674,24 @@ pub unsafe fn _mm_comigt_ss(a: __m128, b: __m128) -> i32 { /// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns /// `1` if the value from `a` is greater than or equal to the one from `b`, or /// `0` otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(comiss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_comige_ss(a: __m128, b: __m128) -> i32 { comige_ss(a, b) } /// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns /// `1` if they are *not* equal, or `0` otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(comiss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_comineq_ss(a: __m128, b: __m128) -> i32 { comineq_ss(a, b) } @@ -543,9 +699,12 @@ pub unsafe fn _mm_comineq_ss(a: __m128, b: __m128) -> i32 { /// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns /// `1` if they are equal, or `0` otherwise. This instruction will not signal /// an exception if either argument is a quiet NaN. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomieq_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(ucomiss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ucomieq_ss(a: __m128, b: __m128) -> i32 { ucomieq_ss(a, b) } @@ -554,9 +713,12 @@ pub unsafe fn _mm_ucomieq_ss(a: __m128, b: __m128) -> i32 { /// `1` if the value from `a` is less than the one from `b`, or `0` otherwise. /// This instruction will not signal an exception if either argument is a quiet /// NaN. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomilt_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(ucomiss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ucomilt_ss(a: __m128, b: __m128) -> i32 { ucomilt_ss(a, b) } @@ -565,9 +727,12 @@ pub unsafe fn _mm_ucomilt_ss(a: __m128, b: __m128) -> i32 { /// `1` if the value from `a` is less than or equal to the one from `b`, or `0` /// otherwise. This instruction will not signal an exception if either argument /// is a quiet NaN. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomile_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(ucomiss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ucomile_ss(a: __m128, b: __m128) -> i32 { ucomile_ss(a, b) } @@ -576,9 +741,12 @@ pub unsafe fn _mm_ucomile_ss(a: __m128, b: __m128) -> i32 { /// `1` if the value from `a` is greater than the one from `b`, or `0` /// otherwise. This instruction will not signal an exception if either argument /// is a quiet NaN. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomigt_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(ucomiss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ucomigt_ss(a: __m128, b: __m128) -> i32 { ucomigt_ss(a, b) } @@ -587,9 +755,12 @@ pub unsafe fn _mm_ucomigt_ss(a: __m128, b: __m128) -> i32 { /// `1` if the value from `a` is greater than or equal to the one from `b`, or /// `0` otherwise. This instruction will not signal an exception if either /// argument is a quiet NaN. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomige_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(ucomiss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ucomige_ss(a: __m128, b: __m128) -> i32 { ucomige_ss(a, b) } @@ -597,9 +768,12 @@ pub unsafe fn _mm_ucomige_ss(a: __m128, b: __m128) -> i32 { /// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns /// `1` if they are *not* equal, or `0` otherwise. This instruction will not /// signal an exception if either argument is a quiet NaN. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(ucomiss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ucomineq_ss(a: __m128, b: __m128) -> i32 { ucomineq_ss(a, b) } @@ -612,17 +786,23 @@ pub unsafe fn _mm_ucomineq_ss(a: __m128, b: __m128) -> i32 { /// unmasked (see [`_mm_setcsr`](fn._mm_setcsr.html)). /// /// This corresponds to the `CVTSS2SI` instruction (with 32 bit output). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_si32) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cvtss2si))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtss_si32(a: __m128) -> i32 { cvtss2si(a) } /// Alias for [`_mm_cvtss_si32`](fn._mm_cvtss_si32.html). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_ss2si) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cvtss2si))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvt_ss2si(a: __m128) -> i32 { _mm_cvtss_si32(a) } @@ -637,26 +817,35 @@ pub unsafe fn _mm_cvt_ss2si(a: __m128) -> i32 { /// exception if unmasked (see [`_mm_setcsr`](fn._mm_setcsr.html)). /// /// This corresponds to the `CVTTSS2SI` instruction (with 32 bit output). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttss_si32) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cvttss2si))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvttss_si32(a: __m128) -> i32 { cvttss2si(a) } /// Alias for [`_mm_cvttss_si32`](fn._mm_cvttss_si32.html). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_ss2si) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cvttss2si))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtt_ss2si(a: __m128) -> i32 { _mm_cvttss_si32(a) } /// Extract the lowest 32 bit float from the input vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_f32) #[inline] #[target_feature(enable = "sse")] // No point in using assert_instrs. In Unix x86_64 calling convention this is a // no-op, and on Windows it's just a `mov`. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtss_f32(a: __m128) -> f32 { simd_extract(a, 0) } @@ -666,42 +855,57 @@ pub unsafe fn _mm_cvtss_f32(a: __m128) -> f32 { /// /// This intrinsic corresponds to the `CVTSI2SS` instruction (with 32 bit /// input). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cvtsi2ss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsi32_ss(a: __m128, b: i32) -> __m128 { cvtsi2ss(a, b) } /// Alias for [`_mm_cvtsi32_ss`](fn._mm_cvtsi32_ss.html). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_si2ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cvtsi2ss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvt_si2ss(a: __m128, b: i32) -> __m128 { _mm_cvtsi32_ss(a, b) } /// Construct a `__m128` with the lowest element set to `a` and the rest set to /// zero. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set_ss(a: f32) -> __m128 { __m128(a, 0.0, 0.0, 0.0) } /// Construct a `__m128` with all element set to `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(shufps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set1_ps(a: f32) -> __m128 { __m128(a, a, a, a) } /// Alias for [`_mm_set1_ps`](fn._mm_set1_ps.html) +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_ps1) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(shufps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set_ps1(a: f32) -> __m128 { _mm_set1_ps(a) } @@ -723,9 +927,12 @@ pub unsafe fn _mm_set_ps1(a: f32) -> __m128 { /// ```text /// let v = _mm_set_ps(d, c, b, a); /// ``` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(unpcklps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 { __m128(d, c, b, a) } @@ -738,19 +945,25 @@ pub unsafe fn _mm_set_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 { /// ```text /// assert_eq!(__m128::new(a, b, c, d), _mm_setr_ps(a, b, c, d)); /// ``` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(unpcklps))] // On a 32-bit architecture it just copies the operands from the stack. #[cfg_attr(all(test, target_arch = "x86"), assert_instr(movaps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_setr_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 { __m128(a, b, c, d) } /// Construct a `__m128` with all elements initialized to zero. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(xorps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_setzero_ps() -> __m128 { __m128(0.0, 0.0, 0.0, 0.0) } @@ -760,10 +973,13 @@ pub unsafe fn _mm_setzero_ps() -> __m128 { /// /// The lower half of result takes values from `a` and the higher half from /// `b`. Mask is split to 2 control bits each to index the element from inputs. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(shufps, mask = 3))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_shuffle_ps(a: __m128, b: __m128, mask: u32) -> __m128 { let mask = (mask & 0xFF) as u8; @@ -812,27 +1028,36 @@ pub unsafe fn _mm_shuffle_ps(a: __m128, b: __m128, mask: u32) -> __m128 { /// Unpack and interleave single-precision (32-bit) floating-point elements /// from the higher half of `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(unpckhps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpackhi_ps(a: __m128, b: __m128) -> __m128 { simd_shuffle4(a, b, [2, 6, 3, 7]) } /// Unpack and interleave single-precision (32-bit) floating-point elements /// from the lower half of `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(unpcklps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpacklo_ps(a: __m128, b: __m128) -> __m128 { simd_shuffle4(a, b, [0, 4, 1, 5]) } /// Combine higher half of `a` and `b`. The highwe half of `b` occupies the /// lower half of result. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movehl_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movhlps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_movehl_ps(a: __m128, b: __m128) -> __m128 { // TODO; figure why this is a different instruction on Windows? simd_shuffle4(a, b, [6, 7, 2, 3]) @@ -840,9 +1065,12 @@ pub unsafe fn _mm_movehl_ps(a: __m128, b: __m128) -> __m128 { /// Combine lower half of `a` and `b`. The lower half of `b` occupies the /// higher half of result. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movelh_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movlhps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 { simd_shuffle4(a, b, [0, 1, 4, 5]) } @@ -851,9 +1079,12 @@ pub unsafe fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 { /// /// The mask is stored in the 4 least significant bits of the return value. /// All other bits are set to `0`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movmskps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 { movmskps(a) } @@ -973,9 +1204,12 @@ pub unsafe fn _mm_loadl_pi(a: __m128, p: *const __m64) -> __m128 { /// elements set to zero. /// /// This corresponds to instructions `VMOVSS` / `MOVSS`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_load_ss(p: *const f32) -> __m128 { __m128(*p, 0.0, 0.0, 0.0) } @@ -985,18 +1219,24 @@ pub unsafe fn _mm_load_ss(p: *const f32) -> __m128 { /// /// This corresponds to instructions `VMOVSS` / `MOVSS` followed by some /// shuffling. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load1_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_load1_ps(p: *const f32) -> __m128 { let a = *p; __m128(a, a, a, a) } /// Alias for [`_mm_load1_ps`](fn._mm_load1_ps.html) +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_ps1) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_load_ps1(p: *const f32) -> __m128 { _mm_load1_ps(p) } @@ -1009,9 +1249,12 @@ pub unsafe fn _mm_load_ps1(p: *const f32) -> __m128 { /// memory. /// /// This corresponds to instructions `VMOVAPS` / `MOVAPS`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movaps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_load_ps(p: *const f32) -> __m128 { *(p as *const __m128) } @@ -1023,9 +1266,12 @@ pub unsafe fn _mm_load_ps(p: *const f32) -> __m128 { /// may be faster. /// /// This corresponds to instructions `VMOVUPS` / `MOVUPS`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movups))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_loadu_ps(p: *const f32) -> __m128 { // Note: Using `*p` would require `f32` alignment, but `movups` has no // alignment restrictions. @@ -1057,9 +1303,12 @@ pub unsafe fn _mm_loadu_ps(p: *const f32) -> __m128 { /// /// This corresponds to instructions `VMOVAPS` / `MOVAPS` followed by some /// shuffling. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadr_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movaps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_loadr_ps(p: *const f32) -> __m128 { let a = _mm_load_ps(p); simd_shuffle4(a, a, [3, 2, 1, 0]) @@ -1124,9 +1373,12 @@ pub unsafe fn _mm_storel_pi(p: *mut __m64, a: __m128) { /// Store the lowest 32 bit float of `a` into memory. /// /// This intrinsic corresponds to the `MOVSS` instruction. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_store_ss(p: *mut f32, a: __m128) { *p = simd_extract(a, 0); } @@ -1147,18 +1399,24 @@ pub unsafe fn _mm_store_ss(p: *mut f32, a: __m128) { /// *p.offset(2) = x; /// *p.offset(3) = x; /// ``` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store1_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movaps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_store1_ps(p: *mut f32, a: __m128) { let b: __m128 = simd_shuffle4(a, a, [0, 0, 0, 0]); *(p as *mut __m128) = b; } /// Alias for [`_mm_store1_ps`](fn._mm_store1_ps.html) +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_ps1) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movaps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_store_ps1(p: *mut f32, a: __m128) { _mm_store1_ps(p, a); } @@ -1172,9 +1430,12 @@ pub unsafe fn _mm_store_ps1(p: *mut f32, a: __m128) { /// memory. /// /// This corresponds to instructions `VMOVAPS` / `MOVAPS`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movaps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_store_ps(p: *mut f32, a: __m128) { *(p as *mut __m128) = a; } @@ -1184,9 +1445,12 @@ pub unsafe fn _mm_store_ps(p: *mut f32, a: __m128) { /// faster. /// /// This corresponds to instructions `VMOVUPS` / `MOVUPS`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movups))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_storeu_ps(p: *mut f32, a: __m128) { ptr::copy_nonoverlapping( &a as *const __m128 as *const u8, @@ -1209,9 +1473,12 @@ pub unsafe fn _mm_storeu_ps(p: *mut f32, a: __m128) { /// *p.offset(2) = a.extract(1); /// *p.offset(3) = a.extract(0); /// ``` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storer_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movaps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_storer_ps(p: *mut f32, a: __m128) { let b: __m128 = simd_shuffle4(a, a, [3, 2, 1, 0]); *(p as *mut __m128) = b; @@ -1224,9 +1491,12 @@ pub unsafe fn _mm_storer_ps(p: *mut f32, a: __m128) { /// ```text /// _mm_move_ss(a, b) == a.replace(0, b.extract(0)) /// ``` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_move_ss(a: __m128, b: __m128) -> __m128 { simd_shuffle4(a, b, [4, 1, 2, 3]) } @@ -1237,9 +1507,12 @@ pub unsafe fn _mm_move_ss(a: __m128, b: __m128) -> __m128 { /// Guarantees that every store instruction that precedes, in program order, is /// globally visible before any store instruction which follows the fence in /// program order. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sfence) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(sfence))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sfence() { sfence() } @@ -1247,9 +1520,12 @@ pub unsafe fn _mm_sfence() { /// Get the unsigned 32-bit value of the MXCSR control and status register. /// /// For more info see [`_mm_setcsr`](fn._mm_setcsr.html) +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getcsr) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(stmxcsr))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_getcsr() -> u32 { let mut result = 0_i32; stmxcsr((&mut result) as *mut _ as *mut i8); @@ -1381,114 +1657,160 @@ pub unsafe fn _mm_getcsr() -> u32 { /// _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); // turn on /// ``` /// +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setcsr) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(ldmxcsr))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_setcsr(val: u32) { ldmxcsr(&val as *const _ as *const i8); } /// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_EXCEPT_INVALID: u32 = 0x0001; /// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_EXCEPT_DENORM: u32 = 0x0002; /// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_EXCEPT_DIV_ZERO: u32 = 0x0004; /// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_EXCEPT_OVERFLOW: u32 = 0x0008; /// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_EXCEPT_UNDERFLOW: u32 = 0x0010; /// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_EXCEPT_INEXACT: u32 = 0x0020; /// See [`_MM_GET_EXCEPTION_STATE`](fn._MM_GET_EXCEPTION_STATE.html) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_EXCEPT_MASK: u32 = 0x003f; /// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_MASK_INVALID: u32 = 0x0080; /// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_MASK_DENORM: u32 = 0x0100; /// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_MASK_DIV_ZERO: u32 = 0x0200; /// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_MASK_OVERFLOW: u32 = 0x0400; /// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_MASK_UNDERFLOW: u32 = 0x0800; /// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_MASK_INEXACT: u32 = 0x1000; /// See [`_MM_GET_EXCEPTION_MASK`](fn._MM_GET_EXCEPTION_MASK.html) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_MASK_MASK: u32 = 0x1f80; /// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_ROUND_NEAREST: u32 = 0x0000; /// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_ROUND_DOWN: u32 = 0x2000; /// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_ROUND_UP: u32 = 0x4000; /// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_ROUND_TOWARD_ZERO: u32 = 0x6000; /// See [`_MM_GET_ROUNDING_MODE`](fn._MM_GET_ROUNDING_MODE.html) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_ROUND_MASK: u32 = 0x6000; /// See [`_MM_GET_FLUSH_ZERO_MODE`](fn._MM_GET_FLUSH_ZERO_MODE.html) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FLUSH_ZERO_MASK: u32 = 0x8000; /// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FLUSH_ZERO_ON: u32 = 0x8000; /// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FLUSH_ZERO_OFF: u32 = 0x0000; /// See [`_mm_setcsr`](fn._mm_setcsr.html) +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_GET_EXCEPTION_MASK) #[inline] #[allow(non_snake_case)] #[target_feature(enable = "sse")] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _MM_GET_EXCEPTION_MASK() -> u32 { _mm_getcsr() & _MM_MASK_MASK } /// See [`_mm_setcsr`](fn._mm_setcsr.html) +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_GET_EXCEPTION_STATE) #[inline] #[allow(non_snake_case)] #[target_feature(enable = "sse")] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _MM_GET_EXCEPTION_STATE() -> u32 { _mm_getcsr() & _MM_EXCEPT_MASK } /// See [`_mm_setcsr`](fn._mm_setcsr.html) +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_GET_FLUSH_ZERO_MODE) #[inline] #[allow(non_snake_case)] #[target_feature(enable = "sse")] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _MM_GET_FLUSH_ZERO_MODE() -> u32 { _mm_getcsr() & _MM_FLUSH_ZERO_MASK } /// See [`_mm_setcsr`](fn._mm_setcsr.html) +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_GET_ROUNDING_MODE) #[inline] #[allow(non_snake_case)] #[target_feature(enable = "sse")] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _MM_GET_ROUNDING_MODE() -> u32 { _mm_getcsr() & _MM_ROUND_MASK } /// See [`_mm_setcsr`](fn._mm_setcsr.html) +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_SET_EXCEPTION_MASK) #[inline] #[allow(non_snake_case)] #[target_feature(enable = "sse")] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _MM_SET_EXCEPTION_MASK(x: u32) { _mm_setcsr((_mm_getcsr() & !_MM_MASK_MASK) | x) } /// See [`_mm_setcsr`](fn._mm_setcsr.html) +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_SET_EXCEPTION_STATE) #[inline] #[allow(non_snake_case)] #[target_feature(enable = "sse")] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _MM_SET_EXCEPTION_STATE(x: u32) { _mm_setcsr((_mm_getcsr() & !_MM_EXCEPT_MASK) | x) } /// See [`_mm_setcsr`](fn._mm_setcsr.html) +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_SET_FLUSH_ZERO_MODE) #[inline] #[allow(non_snake_case)] #[target_feature(enable = "sse")] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _MM_SET_FLUSH_ZERO_MODE(x: u32) { let val = (_mm_getcsr() & !_MM_FLUSH_ZERO_MASK) | x; // println!("setting csr={:x}", val); @@ -1496,23 +1818,30 @@ pub unsafe fn _MM_SET_FLUSH_ZERO_MODE(x: u32) { } /// See [`_mm_setcsr`](fn._mm_setcsr.html) +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_SET_ROUNDING_MODE) #[inline] #[allow(non_snake_case)] #[target_feature(enable = "sse")] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _MM_SET_ROUNDING_MODE(x: u32) { _mm_setcsr((_mm_getcsr() & !_MM_ROUND_MASK) | x) } /// See [`_mm_prefetch`](fn._mm_prefetch.html). +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_HINT_T0: i32 = 3; /// See [`_mm_prefetch`](fn._mm_prefetch.html). +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_HINT_T1: i32 = 2; /// See [`_mm_prefetch`](fn._mm_prefetch.html). +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_HINT_T2: i32 = 1; /// See [`_mm_prefetch`](fn._mm_prefetch.html). +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_HINT_NTA: i32 = 0; /// Fetch the cache line that contains address `p` using the given `strategy`. @@ -1551,6 +1880,8 @@ pub const _MM_HINT_NTA: i32 = 0; /// * Prefetching may also fail if there are not enough memory-subsystem /// resources (e.g., request buffers). /// +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_prefetch) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(prefetcht0, strategy = _MM_HINT_T0))] @@ -1558,6 +1889,7 @@ pub const _MM_HINT_NTA: i32 = 0; #[cfg_attr(test, assert_instr(prefetcht2, strategy = _MM_HINT_T2))] #[cfg_attr(test, assert_instr(prefetchnta, strategy = _MM_HINT_NTA))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_prefetch(p: *const i8, strategy: i32) { // The `strategy` must be a compile-time constant, so we use a short form // of `constify_imm8!` for now. @@ -1577,8 +1909,11 @@ pub unsafe fn _mm_prefetch(p: *const i8, strategy: i32) { } /// Return vector of type __m128 with undefined elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_ps) #[inline] #[target_feature(enable = "sse")] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_undefined_ps() -> __m128 { __m128( mem::uninitialized(), @@ -1589,9 +1924,12 @@ pub unsafe fn _mm_undefined_ps() -> __m128 { } /// Transpose the 4x4 matrix formed by 4 rows of __m128 in place. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_TRANSPOSE4_PS) #[inline] #[allow(non_snake_case)] #[target_feature(enable = "sse")] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _MM_TRANSPOSE4_PS( row0: &mut __m128, row1: &mut __m128, row2: &mut __m128, row3: &mut __m128 ) { @@ -1720,9 +2058,12 @@ extern "C" { /// /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection /// exception _may_ be generated. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movntps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: __m128) { intrinsics::nontemporal_store(mem::transmute(mem_addr), a); } diff --git a/coresimd/x86/sse2.rs b/coresimd/x86/sse2.rs index 6d1b6de109..d443bbe96e 100644 --- a/coresimd/x86/sse2.rs +++ b/coresimd/x86/sse2.rs @@ -14,18 +14,24 @@ use ptr; /// /// This can help improve the performance and power consumption of spin-wait /// loops. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_pause) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pause))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_pause() { pause() } /// Invalidate and flush the cache line that contains `p` from all levels of /// the cache hierarchy. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clflush) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(clflush))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_clflush(p: *mut u8) { clflush(p) } @@ -36,9 +42,12 @@ pub unsafe fn _mm_clflush(p: *mut u8) { /// Guarantees that every load instruction that precedes, in program order, is /// globally visible before any load instruction which follows the fence in /// program order. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lfence) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(lfence))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_lfence() { lfence() } @@ -49,89 +58,122 @@ pub unsafe fn _mm_lfence() { /// Guarantees that every memory access that precedes, in program order, the /// memory fence instruction is globally visible before any memory instruction /// which follows the fence in program order. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mfence) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(mfence))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mfence() { mfence() } /// Add packed 8-bit integers in `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(paddb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i { mem::transmute(simd_add(a.as_i8x16(), b.as_i8x16())) } /// Add packed 16-bit integers in `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(paddw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(simd_add(a.as_i16x8(), b.as_i16x8())) } /// Add packed 32-bit integers in `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(paddd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(simd_add(a.as_i32x4(), b.as_i32x4())) } /// Add packed 64-bit integers in `a` and "b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi64) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(paddq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i { mem::transmute(simd_add(a.as_i64x2(), b.as_i64x2())) } /// Add packed 8-bit integers in `a` and `b` using saturation. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epi8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(paddsb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i { mem::transmute(paddsb(a.as_i8x16(), b.as_i8x16())) } /// Add packed 16-bit integers in `a` and `b` using saturation. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(paddsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(paddsw(a.as_i16x8(), b.as_i16x8())) } /// Add packed unsigned 8-bit integers in `a` and `b` using saturation. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epu8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(paddusb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i { mem::transmute(paddsub(a.as_u8x16(), b.as_u8x16())) } /// Add packed unsigned 16-bit integers in `a` and `b` using saturation. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epu16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(paddusw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(paddsuw(a.as_u16x8(), b.as_u16x8())) } /// Average packed unsigned 8-bit integers in `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_epu8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pavgb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pavgb(a.as_u8x16(), b.as_u8x16())) } /// Average packed unsigned 16-bit integers in `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_epu16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pavgw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pavgw(a.as_u16x8(), b.as_u16x8())) } @@ -141,45 +183,60 @@ pub unsafe fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i { /// Multiply packed signed 16-bit integers in `a` and `b`, producing /// intermediate signed 32-bit integers. Horizontally add adjacent pairs of /// intermediate 32-bit integers. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_madd_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pmaddwd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pmaddwd(a.as_i16x8(), b.as_i16x8())) } /// Compare packed 16-bit integers in `a` and `b`, and return the packed /// maximum values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pmaxsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pmaxsw(a.as_i16x8(), b.as_i16x8())) } /// Compare packed unsigned 8-bit integers in `a` and `b`, and return the /// packed maximum values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pmaxub))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pmaxub(a.as_u8x16(), b.as_u8x16())) } /// Compare packed 16-bit integers in `a` and `b`, and return the packed /// minimum values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pminsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pminsw(a.as_i16x8(), b.as_i16x8())) } /// Compare packed unsigned 8-bit integers in `a` and `b`, and return the /// packed minimum values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pminub))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pminub(a.as_u8x16(), b.as_u8x16())) } @@ -188,9 +245,12 @@ pub unsafe fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i { /// /// The multiplication produces intermediate 32-bit integers, and returns the /// high 16 bits of the intermediate integers. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhi_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pmulhw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pmulhw(a.as_i16x8(), b.as_i16x8())) } @@ -199,9 +259,12 @@ pub unsafe fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i { /// /// The multiplication produces intermediate 32-bit integers, and returns the /// high 16 bits of the intermediate integers. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhi_epu16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pmulhuw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pmulhuw(a.as_u16x8(), b.as_u16x8())) } @@ -210,9 +273,12 @@ pub unsafe fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i { /// /// The multiplication produces intermediate 32-bit integers, and returns the /// low 16 bits of the intermediate integers. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mullo_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pmullw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(simd_mul(a.as_i16x8(), b.as_i16x8())) } @@ -221,9 +287,12 @@ pub unsafe fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i { /// in `a` and `b`. /// /// Return the unsigned 64-bit results. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_epu32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pmuludq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pmuludq(a.as_u32x4(), b.as_u32x4())) } @@ -234,86 +303,116 @@ pub unsafe fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i { /// and `b`, then horizontally sum each consecutive 8 differences to produce /// two unsigned 16-bit integers, and pack these unsigned 16-bit integers in /// the low 16 bits of 64-bit elements returned. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sad_epu8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psadbw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i { mem::transmute(psadbw(a.as_u8x16(), b.as_u8x16())) } /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psubb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i { mem::transmute(simd_sub(a.as_i8x16(), b.as_i8x16())) } /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psubw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(simd_sub(a.as_i16x8(), b.as_i16x8())) } /// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psubd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(simd_sub(a.as_i32x4(), b.as_i32x4())) } /// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi64) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psubq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i { mem::transmute(simd_sub(a.as_i64x2(), b.as_i64x2())) } /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a` /// using saturation. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epi8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psubsb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i { mem::transmute(psubsb(a.as_i8x16(), b.as_i8x16())) } /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a` /// using saturation. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psubsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(psubsw(a.as_i16x8(), b.as_i16x8())) } /// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit /// integers in `a` using saturation. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epu8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psubusb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i { mem::transmute(psubusb(a.as_u8x16(), b.as_u8x16())) } /// Subtract packed unsigned 16-bit integers in `b` from packed unsigned 16-bit /// integers in `a` using saturation. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epu16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psubusw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(psubusw(a.as_u16x8(), b.as_u16x8())) } /// Shift `a` left by `imm8` bytes while shifting in zeros. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pslldq, imm8 = 1))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_slli_si128(a: __m128i, imm8: i32) -> __m128i { _mm_slli_si128_impl(a, imm8) } @@ -374,120 +473,159 @@ unsafe fn _mm_slli_si128_impl(a: __m128i, imm8: i32) -> __m128i { } /// Shift `a` left by `imm8` bytes while shifting in zeros. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bslli_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pslldq, imm8 = 1))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_bslli_si128(a: __m128i, imm8: i32) -> __m128i { _mm_slli_si128_impl(a, imm8) } /// Shift `a` right by `imm8` bytes while shifting in zeros. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bsrli_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrldq, imm8 = 1))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_bsrli_si128(a: __m128i, imm8: i32) -> __m128i { _mm_srli_si128_impl(a, imm8) } /// Shift packed 16-bit integers in `a` left by `imm8` while shifting in zeros. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psllw, imm8 = 7))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_slli_epi16(a: __m128i, imm8: i32) -> __m128i { mem::transmute(pslliw(a.as_i16x8(), imm8)) } /// Shift packed 16-bit integers in `a` left by `count` while shifting in /// zeros. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psllw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i { mem::transmute(psllw(a.as_i16x8(), count.as_i16x8())) } /// Shift packed 32-bit integers in `a` left by `imm8` while shifting in zeros. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pslld, imm8 = 7))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_slli_epi32(a: __m128i, imm8: i32) -> __m128i { mem::transmute(psllid(a.as_i32x4(), imm8)) } /// Shift packed 32-bit integers in `a` left by `count` while shifting in /// zeros. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pslld))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i { mem::transmute(pslld(a.as_i32x4(), count.as_i32x4())) } /// Shift packed 64-bit integers in `a` left by `imm8` while shifting in zeros. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi64) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psllq, imm8 = 7))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_slli_epi64(a: __m128i, imm8: i32) -> __m128i { mem::transmute(pslliq(a.as_i64x2(), imm8)) } /// Shift packed 64-bit integers in `a` left by `count` while shifting in /// zeros. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi64) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psllq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i { mem::transmute(psllq(a.as_i64x2(), count.as_i64x2())) } /// Shift packed 16-bit integers in `a` right by `imm8` while shifting in sign /// bits. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psraw, imm8 = 1))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_srai_epi16(a: __m128i, imm8: i32) -> __m128i { mem::transmute(psraiw(a.as_i16x8(), imm8)) } /// Shift packed 16-bit integers in `a` right by `count` while shifting in sign /// bits. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sra_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psraw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i { mem::transmute(psraw(a.as_i16x8(), count.as_i16x8())) } /// Shift packed 32-bit integers in `a` right by `imm8` while shifting in sign /// bits. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrad, imm8 = 1))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_srai_epi32(a: __m128i, imm8: i32) -> __m128i { mem::transmute(psraid(a.as_i32x4(), imm8)) } /// Shift packed 32-bit integers in `a` right by `count` while shifting in sign /// bits. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sra_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrad))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i { mem::transmute(psrad(a.as_i32x4(), count.as_i32x4())) } /// Shift `a` right by `imm8` bytes while shifting in zeros. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrldq, imm8 = 1))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_srli_si128(a: __m128i, imm8: i32) -> __m128i { _mm_srli_si128_impl(a, imm8) } @@ -549,174 +687,234 @@ unsafe fn _mm_srli_si128_impl(a: __m128i, imm8: i32) -> __m128i { /// Shift packed 16-bit integers in `a` right by `imm8` while shifting in /// zeros. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrlw, imm8 = 1))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_srli_epi16(a: __m128i, imm8: i32) -> __m128i { mem::transmute(psrliw(a.as_i16x8(), imm8)) } /// Shift packed 16-bit integers in `a` right by `count` while shifting in /// zeros. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrlw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i { mem::transmute(psrlw(a.as_i16x8(), count.as_i16x8())) } /// Shift packed 32-bit integers in `a` right by `imm8` while shifting in /// zeros. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrld, imm8 = 8))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_srli_epi32(a: __m128i, imm8: i32) -> __m128i { mem::transmute(psrlid(a.as_i32x4(), imm8)) } /// Shift packed 32-bit integers in `a` right by `count` while shifting in /// zeros. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrld))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i { mem::transmute(psrld(a.as_i32x4(), count.as_i32x4())) } /// Shift packed 64-bit integers in `a` right by `imm8` while shifting in /// zeros. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi64) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrlq, imm8 = 1))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_srli_epi64(a: __m128i, imm8: i32) -> __m128i { mem::transmute(psrliq(a.as_i64x2(), imm8)) } /// Shift packed 64-bit integers in `a` right by `count` while shifting in /// zeros. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi64) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrlq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i { mem::transmute(psrlq(a.as_i64x2(), count.as_i64x2())) } /// Compute the bitwise AND of 128 bits (representing integer data) in `a` and /// `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(andps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i { simd_and(a, b) } /// Compute the bitwise NOT of 128 bits (representing integer data) in `a` and /// then AND with `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(andnps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i { simd_and(simd_xor(_mm_set1_epi8(-1), a), b) } /// Compute the bitwise OR of 128 bits (representing integer data) in `a` and /// `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_or_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(orps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i { simd_or(a, b) } /// Compute the bitwise XOR of 128 bits (representing integer data) in `a` and /// `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(xorps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i { simd_xor(a, b) } /// Compare packed 8-bit integers in `a` and `b` for equality. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pcmpeqb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i { mem::transmute::(simd_eq(a.as_i8x16(), b.as_i8x16())) } /// Compare packed 16-bit integers in `a` and `b` for equality. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pcmpeqw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute::(simd_eq(a.as_i16x8(), b.as_i16x8())) } /// Compare packed 32-bit integers in `a` and `b` for equality. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pcmpeqd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i { mem::transmute::(simd_eq(a.as_i32x4(), b.as_i32x4())) } /// Compare packed 8-bit integers in `a` and `b` for greater-than. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pcmpgtb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i { mem::transmute::(simd_gt(a.as_i8x16(), b.as_i8x16())) } /// Compare packed 16-bit integers in `a` and `b` for greater-than. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pcmpgtw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute::(simd_gt(a.as_i16x8(), b.as_i16x8())) } /// Compare packed 32-bit integers in `a` and `b` for greater-than. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pcmpgtd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i { mem::transmute::(simd_gt(a.as_i32x4(), b.as_i32x4())) } /// Compare packed 8-bit integers in `a` and `b` for less-than. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pcmpgtb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i { mem::transmute::(simd_lt(a.as_i8x16(), b.as_i8x16())) } /// Compare packed 16-bit integers in `a` and `b` for less-than. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pcmpgtw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute::(simd_lt(a.as_i16x8(), b.as_i16x8())) } /// Compare packed 32-bit integers in `a` and `b` for less-than. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pcmpgtd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i { mem::transmute::(simd_lt(a.as_i32x4(), b.as_i32x4())) } /// Convert the lower two packed 32-bit integers in `a` to packed /// double-precision (64-bit) floating-point elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtdq2pd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepi32_pd(a: __m128i) -> __m128d { let a = a.as_i32x4(); simd_cast::(simd_shuffle2(a, a, [0, 1])) @@ -724,69 +922,93 @@ pub unsafe fn _mm_cvtepi32_pd(a: __m128i) -> __m128d { /// Return `a` with its lower element replaced by `b` after converting it to /// an `f64`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtsi2sd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d { simd_insert(a, 0, b as f64) } /// Convert packed 32-bit integers in `a` to packed single-precision (32-bit) /// floating-point elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_ps) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtdq2ps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepi32_ps(a: __m128i) -> __m128 { cvtdq2ps(a.as_i32x4()) } /// Convert packed single-precision (32-bit) floating-point elements in `a` /// to packed 32-bit integers. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtps2dq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtps_epi32(a: __m128) -> __m128i { mem::transmute(cvtps2dq(a)) } /// Return a vector whose lowest element is `a` and all higher elements are /// `0`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsi32_si128(a: i32) -> __m128i { mem::transmute(i32x4::new(a, 0, 0, 0)) } /// Return the lowest element of `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsi128_si32(a: __m128i) -> i32 { simd_extract(a.as_i32x4(), 0) } /// Set packed 64-bit integers with the supplied values, from highest to /// lowest. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi64x) #[inline] #[target_feature(enable = "sse2")] // no particular instruction to test +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i { mem::transmute(i64x2::new(e0, e1)) } /// Set packed 32-bit integers with the supplied values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi32) #[inline] #[target_feature(enable = "sse2")] // no particular instruction to test +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { mem::transmute(i32x4::new(e0, e1, e2, e3)) } /// Set packed 16-bit integers with the supplied values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi16) #[inline] #[target_feature(enable = "sse2")] // no particular instruction to test +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set_epi16( e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16 ) -> __m128i { @@ -794,9 +1016,12 @@ pub unsafe fn _mm_set_epi16( } /// Set packed 8-bit integers with the supplied values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi8) #[inline] #[target_feature(enable = "sse2")] // no particular instruction to test +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set_epi8( e15: i8, e14: i8, e13: i8, e12: i8, e11: i8, e10: i8, e9: i8, e8: i8, e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8, @@ -808,49 +1033,67 @@ pub unsafe fn _mm_set_epi8( } /// Broadcast 64-bit integer `a` to all elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi64x) #[inline] #[target_feature(enable = "sse2")] // no particular instruction to test +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set1_epi64x(a: i64) -> __m128i { _mm_set_epi64x(a, a) } /// Broadcast 32-bit integer `a` to all elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi32) #[inline] #[target_feature(enable = "sse2")] // no particular instruction to test +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set1_epi32(a: i32) -> __m128i { _mm_set_epi32(a, a, a, a) } /// Broadcast 16-bit integer `a` to all elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi16) #[inline] #[target_feature(enable = "sse2")] // no particular instruction to test +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set1_epi16(a: i16) -> __m128i { _mm_set_epi16(a, a, a, a, a, a, a, a) } /// Broadcast 8-bit integer `a` to all elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi8) #[inline] #[target_feature(enable = "sse2")] // no particular instruction to test +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set1_epi8(a: i8) -> __m128i { _mm_set_epi8(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a) } /// Set packed 32-bit integers with the supplied values in reverse order. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi32) #[inline] #[target_feature(enable = "sse2")] // no particular instruction to test +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { _mm_set_epi32(e0, e1, e2, e3) } /// Set packed 16-bit integers with the supplied values in reverse order. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi16) #[inline] #[target_feature(enable = "sse2")] // no particular instruction to test +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_setr_epi16( e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16 ) -> __m128i { @@ -858,9 +1101,12 @@ pub unsafe fn _mm_setr_epi16( } /// Set packed 8-bit integers with the supplied values in reverse order. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi8) #[inline] #[target_feature(enable = "sse2")] // no particular instruction to test +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_setr_epi8( e15: i8, e14: i8, e13: i8, e12: i8, e11: i8, e10: i8, e9: i8, e8: i8, e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8, @@ -872,14 +1118,19 @@ pub unsafe fn _mm_setr_epi8( } /// Returns a vector with all elements set to zero. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(xorps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_setzero_si128() -> __m128i { _mm_set1_epi64x(0) } /// Load 64-bit integer from memory into first element of returned vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_epi64) #[inline] #[target_feature(enable = "sse2")] // FIXME movsd on windows @@ -887,6 +1138,7 @@ pub unsafe fn _mm_setzero_si128() -> __m128i { not(all(target_os = "linux", target_arch = "x86_64")), target_arch = "x86_64"), assert_instr(movq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i { _mm_set_epi64x(0, simd_extract((*mem_addr).as_i64x2(), 0)) } @@ -894,9 +1146,12 @@ pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i { /// Load 128-bits of integer data from memory into a new vector. /// /// `mem_addr` must be aligned on a 16-byte boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movaps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i { *mem_addr } @@ -904,9 +1159,12 @@ pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i { /// Load 128-bits of integer data from memory into a new vector. /// /// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movups))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i { let mut dst: __m128i = _mm_undefined_si128(); ptr::copy_nonoverlapping( @@ -925,9 +1183,12 @@ pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i { /// /// `mem_addr` should correspond to a 128-bit memory location and does not need /// to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskmoveu_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(maskmovdqu))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_maskmoveu_si128( a: __m128i, mask: __m128i, mem_addr: *mut i8 ) { @@ -937,9 +1198,12 @@ pub unsafe fn _mm_maskmoveu_si128( /// Store 128-bits of integer data from `a` into memory. /// /// `mem_addr` must be aligned on a 16-byte boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movaps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) { *mem_addr = a; } @@ -947,9 +1211,12 @@ pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) { /// Store 128-bits of integer data from `a` into memory. /// /// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movups))] // FIXME movdqu expected +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) { storeudq(mem_addr as *mut i8, a); } @@ -957,6 +1224,8 @@ pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) { /// Store the lower 64-bit integer `a` to a memory location. /// /// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storel_epi64) #[inline] #[target_feature(enable = "sse2")] // FIXME mov on windows, movlps on i686 @@ -964,6 +1233,7 @@ pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) { not(all(target_os = "linux", target_arch = "x86_64")), target_arch = "x86_64"), assert_instr(movq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) { ptr::copy_nonoverlapping( &a as *const _ as *const u8, @@ -975,9 +1245,12 @@ pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) { /// Stores a 128-bit integer vector to a 128-bit aligned memory location. /// To minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movntps))] // FIXME movntdq +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) { ::intrinsics::nontemporal_store(mem_addr, a); } @@ -985,20 +1258,26 @@ pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) { /// Stores a 32-bit integer value in the specified memory location. /// To minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movnti))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) { ::intrinsics::nontemporal_store(mem_addr, a); } /// Return a vector where the low element is extracted from `a` and its upper /// element is zero. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_epi64) #[inline] #[target_feature(enable = "sse2")] // FIXME movd on windows, movd on i686 #[cfg_attr(all(test, not(windows), target_arch = "x86_64"), assert_instr(movq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_move_epi64(a: __m128i) -> __m128i { let zero = _mm_setzero_si128(); let r: i64x2 = simd_shuffle2(a.as_i64x2(), zero.as_i64x2(), [0, 2]); @@ -1007,45 +1286,60 @@ pub unsafe fn _mm_move_epi64(a: __m128i) -> __m128i { /// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers /// using signed saturation. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packs_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(packsswb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(packsswb(a.as_i16x8(), b.as_i16x8())) } /// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers /// using signed saturation. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packs_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(packssdw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(packssdw(a.as_i32x4(), b.as_i32x4())) } /// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers /// using unsigned saturation. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packus_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(packuswb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(packuswb(a.as_i16x8(), b.as_i16x8())) } /// Return the `imm8` element of `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pextrw, imm8 = 9))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_extract_epi16(a: __m128i, imm8: i32) -> i32 { simd_extract::<_, i16>(a.as_i16x8(), (imm8 & 7) as u32) as i32 } /// Return a new vector where the `imm8` element of `a` is replaced with `i`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pinsrw, imm8 = 9))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_insert_epi16(a: __m128i, i: i32, imm8: i32) -> __m128i { mem::transmute(simd_insert( a.as_i16x8(), @@ -1055,18 +1349,24 @@ pub unsafe fn _mm_insert_epi16(a: __m128i, i: i32, imm8: i32) -> __m128i { } /// Return a mask of the most significant bit of each element in `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_epi8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pmovmskb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_movemask_epi8(a: __m128i) -> i32 { pmovmskb(a.as_i8x16()) } /// Shuffle 32-bit integers in `a` using the control in `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pshufd, imm8 = 9))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_shuffle_epi32(a: __m128i, imm8: i32) -> __m128i { // simd_shuffleX requires that its selector parameter be made up of // constant values, but we can't enforce that here. In spirit, we need @@ -1127,10 +1427,13 @@ pub unsafe fn _mm_shuffle_epi32(a: __m128i, imm8: i32) -> __m128i { /// /// Put the results in the high 64 bits of the returned vector, with the low 64 /// bits being copied from from `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shufflehi_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pshufhw, imm8 = 9))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_shufflehi_epi16(a: __m128i, imm8: i32) -> __m128i { // See _mm_shuffle_epi32. let imm8 = (imm8 & 0xFF) as u8; @@ -1197,10 +1500,13 @@ pub unsafe fn _mm_shufflehi_epi16(a: __m128i, imm8: i32) -> __m128i { /// /// Put the results in the low 64 bits of the returned vector, with the high 64 /// bits being copied from from `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shufflelo_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pshuflw, imm8 = 9))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_shufflelo_epi16(a: __m128i, imm8: i32) -> __m128i { // See _mm_shuffle_epi32. let imm8 = (imm8 & 0xFF) as u8; @@ -1251,9 +1557,12 @@ pub unsafe fn _mm_shufflelo_epi16(a: __m128i, imm8: i32) -> __m128i { } /// Unpack and interleave 8-bit integers from the high half of `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(punpckhbw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i { mem::transmute::(simd_shuffle16( a.as_i8x16(), @@ -1265,9 +1574,12 @@ pub unsafe fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i { } /// Unpack and interleave 16-bit integers from the high half of `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(punpckhwd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i { let x = simd_shuffle8( a.as_i16x8(), @@ -1278,9 +1590,12 @@ pub unsafe fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i { } /// Unpack and interleave 32-bit integers from the high half of `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(unpckhps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i { mem::transmute::(simd_shuffle4( a.as_i32x4(), @@ -1290,9 +1605,12 @@ pub unsafe fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i { } /// Unpack and interleave 64-bit integers from the high half of `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi64) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(unpckhpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i { mem::transmute::(simd_shuffle2( a.as_i64x2(), @@ -1302,9 +1620,12 @@ pub unsafe fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i { } /// Unpack and interleave 8-bit integers from the low half of `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(punpcklbw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i { mem::transmute::(simd_shuffle16( a.as_i8x16(), @@ -1316,9 +1637,12 @@ pub unsafe fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i { } /// Unpack and interleave 16-bit integers from the low half of `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(punpcklwd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i { let x = simd_shuffle8( a.as_i16x8(), @@ -1329,9 +1653,12 @@ pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i { } /// Unpack and interleave 32-bit integers from the low half of `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(unpcklps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i { mem::transmute::(simd_shuffle4( a.as_i32x4(), @@ -1341,9 +1668,12 @@ pub unsafe fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i { } /// Unpack and interleave 64-bit integers from the low half of `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi64) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movlhps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i { mem::transmute::(simd_shuffle2( a.as_i64x2(), @@ -1354,134 +1684,179 @@ pub unsafe fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i { /// Return a new vector with the low element of `a` replaced by the sum of the /// low elements of `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(addsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d { simd_insert(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) } /// Add packed double-precision (64-bit) floating-point elements in `a` and /// `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(addpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d { simd_add(a, b) } /// Return a new vector with the low element of `a` replaced by the result of /// diving the lower element of `a` by the lower element of `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(divsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d { simd_insert(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) } /// Divide packed double-precision (64-bit) floating-point elements in `a` by /// packed elements in `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(divpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d { simd_div(a, b) } /// Return a new vector with the low element of `a` replaced by the maximum /// of the lower elements of `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(maxsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d { maxsd(a, b) } /// Return a new vector with the maximum values from corresponding elements in /// `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(maxpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d { maxpd(a, b) } /// Return a new vector with the low element of `a` replaced by the minimum /// of the lower elements of `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(minsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d { minsd(a, b) } /// Return a new vector with the minimum values from corresponding elements in /// `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(minpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d { minpd(a, b) } /// Return a new vector with the low element of `a` replaced by multiplying the /// low elements of `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(mulsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d { simd_insert(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) } /// Multiply packed double-precision (64-bit) floating-point elements in `a` /// and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(mulpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d { simd_mul(a, b) } /// Return a new vector with the low element of `a` replaced by the square /// root of the lower element `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(sqrtsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d { simd_insert(a, 0, _mm_cvtsd_f64(sqrtsd(b))) } /// Return a new vector with the square root of each of the values in `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(sqrtpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sqrt_pd(a: __m128d) -> __m128d { sqrtpd(a) } /// Return a new vector with the low element of `a` replaced by subtracting the /// low element by `b` from the low element of `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(subsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d { simd_insert(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) } /// Subtract packed double-precision (64-bit) floating-point elements in `b` /// from `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(subpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d { simd_sub(a, b) } /// Compute the bitwise AND of packed double-precision (64-bit) floating-point /// elements in `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(andps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d { let a: __m128i = mem::transmute(a); let b: __m128i = mem::transmute(b); @@ -1489,9 +1864,12 @@ pub unsafe fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d { } /// Compute the bitwise NOT of `a` and then AND with `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(andnps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d { let a: __m128i = mem::transmute(a); let b: __m128i = mem::transmute(b); @@ -1499,9 +1877,12 @@ pub unsafe fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d { } /// Compute the bitwise OR of `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_or_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(orps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d { let a: __m128i = mem::transmute(a); let b: __m128i = mem::transmute(b); @@ -1509,9 +1890,12 @@ pub unsafe fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d { } /// Compute the bitwise OR of `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(xorps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d { let a: __m128i = mem::transmute(a); let b: __m128i = mem::transmute(b); @@ -1520,36 +1904,48 @@ pub unsafe fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d { /// Return a new vector with the low element of `a` replaced by the equality /// comparison of the lower elements of `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpeqsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 0) } /// Return a new vector with the low element of `a` replaced by the less-than /// comparison of the lower elements of `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpltsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 1) } /// Return a new vector with the low element of `a` replaced by the /// less-than-or-equal comparison of the lower elements of `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmplesd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 2) } /// Return a new vector with the low element of `a` replaced by the /// greater-than comparison of the lower elements of `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpltsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d { simd_insert( _mm_cmplt_sd(b, a), @@ -1560,9 +1956,12 @@ pub unsafe fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d { /// Return a new vector with the low element of `a` replaced by the /// greater-than-or-equal comparison of the lower elements of `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmplesd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d { simd_insert( _mm_cmple_sd(b, a), @@ -1575,9 +1974,12 @@ pub unsafe fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d { /// of comparing both of the lower elements of `a` and `b` to `NaN`. If /// neither are equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` /// otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpordsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 7) } @@ -1585,45 +1987,60 @@ pub unsafe fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d { /// Return a new vector with the low element of `a` replaced by the result of /// comparing both of the lower elements of `a` and `b` to `NaN`. If either is /// equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpunordsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 3) } /// Return a new vector with the low element of `a` replaced by the not-equal /// comparison of the lower elements of `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpneqsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 4) } /// Return a new vector with the low element of `a` replaced by the /// not-less-than comparison of the lower elements of `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpnltsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 5) } /// Return a new vector with the low element of `a` replaced by the /// not-less-than-or-equal comparison of the lower elements of `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpnlesd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 6) } /// Return a new vector with the low element of `a` replaced by the /// not-greater-than comparison of the lower elements of `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpnltsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d { simd_insert( _mm_cmpnlt_sd(b, a), @@ -1634,9 +2051,12 @@ pub unsafe fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d { /// Return a new vector with the low element of `a` replaced by the /// not-greater-than-or-equal comparison of the lower elements of `a` and `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpnlesd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d { simd_insert( _mm_cmpnle_sd(b, a), @@ -1646,203 +2066,278 @@ pub unsafe fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d { } /// Compare corresponding elements in `a` and `b` for equality. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpeqpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 0) } /// Compare corresponding elements in `a` and `b` for less-than. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpltpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 1) } /// Compare corresponding elements in `a` and `b` for less-than-or-equal +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmplepd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 2) } /// Compare corresponding elements in `a` and `b` for greater-than. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpltpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d { _mm_cmplt_pd(b, a) } /// Compare corresponding elements in `a` and `b` for greater-than-or-equal. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmplepd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d { _mm_cmple_pd(b, a) } /// Compare corresponding elements in `a` and `b` to see if neither is `NaN`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpordpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 7) } /// Compare corresponding elements in `a` and `b` to see if either is `NaN`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpunordpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 3) } /// Compare corresponding elements in `a` and `b` for not-equal. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpneqpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 4) } /// Compare corresponding elements in `a` and `b` for not-less-than. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpnltpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 5) } /// Compare corresponding elements in `a` and `b` for not-less-than-or-equal. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpnlepd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 6) } /// Compare corresponding elements in `a` and `b` for not-greater-than. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpnltpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d { _mm_cmpnlt_pd(b, a) } /// Compare corresponding elements in `a` and `b` for /// not-greater-than-or-equal. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpnlepd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d { _mm_cmpnle_pd(b, a) } /// Compare the lower element of `a` and `b` for equality. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(comisd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 { comieqsd(a, b) } /// Compare the lower element of `a` and `b` for less-than. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(comisd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 { comiltsd(a, b) } /// Compare the lower element of `a` and `b` for less-than-or-equal. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(comisd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 { comilesd(a, b) } /// Compare the lower element of `a` and `b` for greater-than. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(comisd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 { comigtsd(a, b) } /// Compare the lower element of `a` and `b` for greater-than-or-equal. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(comisd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 { comigesd(a, b) } /// Compare the lower element of `a` and `b` for not-equal. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(comisd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 { comineqsd(a, b) } /// Compare the lower element of `a` and `b` for equality. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomieq_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(ucomisd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 { ucomieqsd(a, b) } /// Compare the lower element of `a` and `b` for less-than. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomilt_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(ucomisd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 { ucomiltsd(a, b) } /// Compare the lower element of `a` and `b` for less-than-or-equal. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomile_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(ucomisd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 { ucomilesd(a, b) } /// Compare the lower element of `a` and `b` for greater-than. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomigt_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(ucomisd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 { ucomigtsd(a, b) } /// Compare the lower element of `a` and `b` for greater-than-or-equal. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomige_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(ucomisd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 { ucomigesd(a, b) } /// Compare the lower element of `a` and `b` for not-equal. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(ucomisd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 { ucomineqsd(a, b) } /// Convert packed double-precision (64-bit) floating-point elements in "a" to /// packed single-precision (32-bit) floating-point elements +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_ps) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtpd2ps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtpd_ps(a: __m128d) -> __m128 { cvtpd2ps(a) } @@ -1850,27 +2345,36 @@ pub unsafe fn _mm_cvtpd_ps(a: __m128d) -> __m128 { /// Convert packed single-precision (32-bit) floating-point elements in `a` to /// packed /// double-precision (64-bit) floating-point elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtps2pd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtps_pd(a: __m128) -> __m128d { cvtps2pd(a) } /// Convert packed double-precision (64-bit) floating-point elements in `a` to /// packed 32-bit integers. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtpd2dq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtpd_epi32(a: __m128d) -> __m128i { mem::transmute(cvtpd2dq(a)) } /// Convert the lower double-precision (64-bit) floating-point element in a to /// a 32-bit integer. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtsd2si))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsd_si32(a: __m128d) -> i32 { cvtsd2si(a) } @@ -1879,16 +2383,22 @@ pub unsafe fn _mm_cvtsd_si32(a: __m128d) -> i32 { /// to a single-precision (32-bit) floating-point element, store the result in /// the lower element of the return value, and copy the upper element from `a` /// to the upper element the return value. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_ss) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtsd2ss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 { cvtsd2ss(a, b) } /// Return the lower double-precision (64-bit) floating-point element of "a". +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_f64) #[inline] #[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsd_f64(a: __m128d) -> f64 { simd_extract(a, 0) } @@ -1897,85 +2407,115 @@ pub unsafe fn _mm_cvtsd_f64(a: __m128d) -> f64 { /// to a double-precision (64-bit) floating-point element, store the result in /// the lower element of the return value, and copy the upper element from `a` /// to the upper element the return value. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtss2sd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d { cvtss2sd(a, b) } /// Convert packed double-precision (64-bit) floating-point elements in `a` to /// packed 32-bit integers with truncation. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttpd_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvttpd2dq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvttpd_epi32(a: __m128d) -> __m128i { mem::transmute(cvttpd2dq(a)) } /// Convert the lower double-precision (64-bit) floating-point element in `a` /// to a 32-bit integer with truncation. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvttsd2si))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvttsd_si32(a: __m128d) -> i32 { cvttsd2si(a) } /// Convert packed single-precision (32-bit) floating-point elements in `a` to /// packed 32-bit integers with truncation. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttps_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvttps2dq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvttps_epi32(a: __m128) -> __m128i { mem::transmute(cvttps2dq(a)) } /// Copy double-precision (64-bit) floating-point element `a` to the lower /// element of the packed 64-bit return value. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_sd) #[inline] #[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set_sd(a: f64) -> __m128d { _mm_set_pd(0.0, a) } /// Broadcast double-precision (64-bit) floating-point value a to all elements /// of the return value. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_pd) #[inline] #[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set1_pd(a: f64) -> __m128d { _mm_set_pd(a, a) } /// Broadcast double-precision (64-bit) floating-point value a to all elements /// of the return value. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd1) #[inline] #[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set_pd1(a: f64) -> __m128d { _mm_set_pd(a, a) } /// Set packed double-precision (64-bit) floating-point elements in the return /// value with the supplied values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd) #[inline] #[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set_pd(a: f64, b: f64) -> __m128d { __m128d(b, a) } /// Set packed double-precision (64-bit) floating-point elements in the return /// value with the supplied values in reverse order. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_pd) #[inline] #[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_setr_pd(a: f64, b: f64) -> __m128d { _mm_set_pd(b, a) } /// Returns packed double-precision (64-bit) floating-point elements with all /// zeros. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(xorps))] // FIXME xorpd expected +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_setzero_pd() -> __m128d { _mm_set_pd(0.0, 0.0) } @@ -1984,9 +2524,12 @@ pub unsafe fn _mm_setzero_pd() -> __m128d { /// /// The mask is stored in the 2 least significant bits of the return value. /// All other bits are set to `0`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movmskpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_movemask_pd(a: __m128d) -> i32 { movmskpd(a) } @@ -1995,18 +2538,24 @@ pub unsafe fn _mm_movemask_pd(a: __m128d) -> i32 { /// floating-point elements) from memory into the returned vector. /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection /// exception may be generated. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movaps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d { *(mem_addr as *const __m128d) } /// Loads a 64-bit double-precision value to the low element of a /// 128-bit integer vector and clears the upper element. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d { _mm_setr_pd(*mem_addr, 0.) } @@ -2014,9 +2563,12 @@ pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d { /// Loads a double-precision value into the high-order bits of a 128-bit /// vector of [2 x double]. The low-order bits are copied from the low-order /// bits of the first operand. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadh_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movhpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d { _mm_setr_pd(simd_extract(a, 0), *mem_addr) } @@ -2024,9 +2576,12 @@ pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d { /// Loads a double-precision value into the low-order bits of a 128-bit /// vector of [2 x double]. The high-order bits are copied from the /// high-order bits of the first operand. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movlpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d { _mm_setr_pd(*mem_addr, simd_extract(a, 1)) } @@ -2035,18 +2590,24 @@ pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d { /// aligned memory location. /// To minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movntps))] // FIXME movntpd +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) { intrinsics::nontemporal_store(mem::transmute(mem_addr), a); } /// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a /// memory location. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movlps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) { *mem_addr = simd_extract(a, 0) } @@ -2054,9 +2615,12 @@ pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) { /// Store 128-bits (composed of 2 packed double-precision (64-bit) /// floating-point elements) from `a` into memory. `mem_addr` must be aligned /// on a 16-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movaps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) { *(mem_addr as *mut __m128d) = a; } @@ -2064,9 +2628,12 @@ pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) { /// Store 128-bits (composed of 2 packed double-precision (64-bit) /// floating-point elements) from `a` into memory. /// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movups))] // FIXME movupd expected +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) { storeupd(mem_addr as *mut i8, a); } @@ -2074,8 +2641,11 @@ pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) { /// Store the lower double-precision (64-bit) floating-point element from `a` /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a /// 16-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store1_pd) #[inline] #[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) { let b: __m128d = simd_shuffle2(a, a, [0, 0]); *(mem_addr as *mut __m128d) = b; @@ -2084,8 +2654,11 @@ pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) { /// Store the lower double-precision (64-bit) floating-point element from `a` /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a /// 16-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_pd1) #[inline] #[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) { let b: __m128d = simd_shuffle2(a, a, [0, 0]); *(mem_addr as *mut __m128d) = b; @@ -2095,8 +2668,11 @@ pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) { /// memory in reverse order. /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection /// exception may be generated. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storer_pd) #[inline] #[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) { let b: __m128d = simd_shuffle2(a, a, [1, 0]); *(mem_addr as *mut __m128d) = b; @@ -2104,27 +2680,36 @@ pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) { /// Stores the upper 64 bits of a 128-bit vector of [2 x double] to a /// memory location. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeh_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movhpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) { *mem_addr = simd_extract(a, 1); } /// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a /// memory location. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storel_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movlps))] // FIXME movlpd +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) { *mem_addr = simd_extract(a, 0); } /// Load a double-precision (64-bit) floating-point element from memory /// into both elements of returned vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load1_pd) #[inline] #[target_feature(enable = "sse2")] // #[cfg_attr(test, assert_instr(movapd))] // FIXME LLVM uses different codegen +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d { let d = *mem_addr; _mm_setr_pd(d, d) @@ -2132,9 +2717,12 @@ pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d { /// Load a double-precision (64-bit) floating-point element from memory /// into both elements of returned vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd1) #[inline] #[target_feature(enable = "sse2")] // #[cfg_attr(test, assert_instr(movapd))] // FIXME same as _mm_load1_pd +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d { _mm_load1_pd(mem_addr) } @@ -2142,9 +2730,12 @@ pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d { /// Load 2 double-precision (64-bit) floating-point elements from memory into /// the returned vector in reverse order. `mem_addr` must be aligned on a /// 16-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadr_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movapd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d { let a = _mm_load_pd(mem_addr); simd_shuffle2(a, a, [1, 0]) @@ -2153,9 +2744,12 @@ pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d { /// Load 128-bits (composed of 2 packed double-precision (64-bit) /// floating-point elements) from memory into the returned vector. /// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movups))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d { let mut dst = _mm_undefined_pd(); ptr::copy_nonoverlapping( @@ -2169,10 +2763,13 @@ pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d { /// Constructs a 128-bit floating-point vector of [2 x double] from two /// 128-bit vector parameters of [2 x double], using the immediate-value /// parameter as a specifier. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(shufpd, imm8 = 1))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_shuffle_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d { match imm8 & 0b11 { 0b00 => simd_shuffle2(a, b, [0, 2]), @@ -2185,71 +2782,98 @@ pub unsafe fn _mm_shuffle_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d { /// Constructs a 128-bit floating-point vector of [2 x double]. The lower /// 64 bits are set to the lower 64 bits of the second parameter. The upper /// 64 bits are set to the upper 64 bits of the first parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d { _mm_setr_pd(simd_extract(b, 0), simd_extract(a, 1)) } /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit /// floating-point vector of [4 x float]. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_ps) #[inline] #[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_castpd_ps(a: __m128d) -> __m128 { mem::transmute(a) } /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit /// integer vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_si128) #[inline] #[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_castpd_si128(a: __m128d) -> __m128i { mem::transmute::(simd_cast(a)) } /// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit /// floating-point vector of [2 x double]. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_pd) #[inline] #[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_castps_pd(a: __m128) -> __m128d { mem::transmute(a) } /// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit /// integer vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_si128) #[inline] #[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_castps_si128(a: __m128) -> __m128i { mem::transmute(a) } /// Casts a 128-bit integer vector into a 128-bit floating-point vector /// of [2 x double]. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_pd) #[inline] #[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_castsi128_pd(a: __m128i) -> __m128d { simd_cast(a.as_i64x2()) } /// Casts a 128-bit integer vector into a 128-bit floating-point vector /// of [4 x float]. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_ps) #[inline] #[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_castsi128_ps(a: __m128i) -> __m128 { mem::transmute(a) } /// Return vector of type __m128d with undefined elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_pd) #[inline] #[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_undefined_pd() -> __m128d { _mm_set1_pd(mem::uninitialized()) } /// Return vector of type __m128i with undefined elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_si128) #[inline] #[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_undefined_si128() -> __m128i { _mm_set1_epi8(mem::uninitialized()) } @@ -2259,9 +2883,12 @@ pub unsafe fn _mm_undefined_si128() -> __m128i { /// /// * The [127:64] bits are copied from the [127:64] bits of the second input /// * The [63:0] bits are copied from the [127:64] bits of the first input +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(unpckhpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d { simd_shuffle2(a, b, [1, 3]) } @@ -2271,9 +2898,12 @@ pub unsafe fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d { /// /// * The [127:64] bits are copied from the [63:0] bits of the second input /// * The [63:0] bits are copied from the [63:0] bits of the first input +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movlhps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d { simd_shuffle2(a, b, [0, 2]) } diff --git a/coresimd/x86/sse3.rs b/coresimd/x86/sse3.rs index ea41164b24..edcbf1deac 100644 --- a/coresimd/x86/sse3.rs +++ b/coresimd/x86/sse3.rs @@ -9,54 +9,72 @@ use stdsimd_test::assert_instr; /// Alternatively add and subtract packed single-precision (32-bit) /// floating-point elements in `a` to/from packed elements in `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_addsub_ps) #[inline] #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(addsubps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_addsub_ps(a: __m128, b: __m128) -> __m128 { addsubps(a, b) } /// Alternatively add and subtract packed double-precision (64-bit) /// floating-point elements in `a` to/from packed elements in `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_addsub_pd) #[inline] #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(addsubpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_addsub_pd(a: __m128d, b: __m128d) -> __m128d { addsubpd(a, b) } /// Horizontally add adjacent pairs of double-precision (64-bit) /// floating-point elements in `a` and `b`, and pack the results. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_pd) #[inline] #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(haddpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_hadd_pd(a: __m128d, b: __m128d) -> __m128d { haddpd(a, b) } /// Horizontally add adjacent pairs of single-precision (32-bit) /// floating-point elements in `a` and `b`, and pack the results. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_ps) #[inline] #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(haddps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_hadd_ps(a: __m128, b: __m128) -> __m128 { haddps(a, b) } /// Horizontally subtract adjacent pairs of double-precision (64-bit) /// floating-point elements in `a` and `b`, and pack the results. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_pd) #[inline] #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(hsubpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_hsub_pd(a: __m128d, b: __m128d) -> __m128d { hsubpd(a, b) } /// Horizontally add adjacent pairs of single-precision (32-bit) /// floating-point elements in `a` and `b`, and pack the results. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_ps) #[inline] #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(hsubps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_hsub_ps(a: __m128, b: __m128) -> __m128 { hsubps(a, b) } @@ -64,45 +82,60 @@ pub unsafe fn _mm_hsub_ps(a: __m128, b: __m128) -> __m128 { /// Load 128-bits of integer data from unaligned memory. /// This intrinsic may perform better than `_mm_loadu_si128` /// when the data crosses a cache line boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lddqu_si128) #[inline] #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(lddqu))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_lddqu_si128(mem_addr: *const __m128i) -> __m128i { mem::transmute(lddqu(mem_addr as *const _)) } /// Duplicate the low double-precision (64-bit) floating-point element /// from `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movedup_pd) #[inline] #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(movddup))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_movedup_pd(a: __m128d) -> __m128d { simd_shuffle2(a, a, [0, 0]) } /// Load a double-precision (64-bit) floating-point element from memory /// into both elements of return vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loaddup_pd) #[inline] #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(movddup))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_loaddup_pd(mem_addr: *const f64) -> __m128d { _mm_load1_pd(mem_addr) } /// Duplicate odd-indexed single-precision (32-bit) floating-point elements /// from `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movehdup_ps) #[inline] #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(movshdup))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_movehdup_ps(a: __m128) -> __m128 { simd_shuffle4(a, a, [1, 1, 3, 3]) } /// Duplicate even-indexed single-precision (32-bit) floating-point elements /// from `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_moveldup_ps) #[inline] #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(movsldup))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_moveldup_ps(a: __m128) -> __m128 { simd_shuffle4(a, a, [0, 0, 2, 2]) } diff --git a/coresimd/x86/sse41.rs b/coresimd/x86/sse41.rs index 5e03bcc8b5..81819f7b34 100644 --- a/coresimd/x86/sse41.rs +++ b/coresimd/x86/sse41.rs @@ -10,34 +10,47 @@ use stdsimd_test::assert_instr; // SSE4 rounding constans /// round to nearest +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FROUND_TO_NEAREST_INT: i32 = 0x00; /// round down +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FROUND_TO_NEG_INF: i32 = 0x01; /// round up +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FROUND_TO_POS_INF: i32 = 0x02; /// truncate +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FROUND_TO_ZERO: i32 = 0x03; /// use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE` +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FROUND_CUR_DIRECTION: i32 = 0x04; /// do not suppress exceptions +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FROUND_RAISE_EXC: i32 = 0x00; /// suppress exceptions +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FROUND_NO_EXC: i32 = 0x08; /// round to nearest and do not suppress exceptions +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FROUND_NINT: i32 = 0x00; /// round down and do not suppress exceptions +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FROUND_FLOOR: i32 = (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF); /// round up and do not suppress exceptions +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FROUND_CEIL: i32 = (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF); /// truncate and do not suppress exceptions +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FROUND_TRUNC: i32 = (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO); /// use MXCSR.RC and do not suppress exceptions; see /// `vendor::_MM_SET_ROUNDING_MODE` +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FROUND_RINT: i32 = (_MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION); /// use MXCSR.RC and suppress exceptions; see `vendor::_MM_SET_ROUNDING_MODE` +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FROUND_NEARBYINT: i32 = (_MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION); @@ -46,9 +59,12 @@ pub const _MM_FROUND_NEARBYINT: i32 = /// The high bit of each corresponding mask byte determines the selection. /// If the high bit is set the element of `a` is selected. The element /// of `b` is selected otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blendv_epi8) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pblendvb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_blendv_epi8( a: __m128i, b: __m128i, mask: __m128i ) -> __m128i { @@ -64,10 +80,13 @@ pub unsafe fn _mm_blendv_epi8( /// The mask bits determine the selection. A clear bit selects the /// corresponding element of `a`, and a set bit the corresponding /// element of `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_epi16) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pblendw, imm8 = 0xF0))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_blend_epi16(a: __m128i, b: __m128i, imm8: i32) -> __m128i { let a = a.as_i16x8(); let b = b.as_i16x8(); @@ -81,28 +100,37 @@ pub unsafe fn _mm_blend_epi16(a: __m128i, b: __m128i, imm8: i32) -> __m128i { /// Blend packed double-precision (64-bit) floating-point elements from `a` /// and `b` using `mask` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blendv_pd) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(blendvpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d { blendvpd(a, b, mask) } /// Blend packed single-precision (32-bit) floating-point elements from `a` /// and `b` using `mask` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blendv_ps) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(blendvps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 { blendvps(a, b, mask) } /// Blend packed double-precision (64-bit) floating-point elements from `a` /// and `b` using control mask `imm2` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_pd) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(blendpd, imm2 = 0b10))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_blend_pd(a: __m128d, b: __m128d, imm2: i32) -> __m128d { macro_rules! call { ($imm2:expr) => { @@ -114,10 +142,13 @@ pub unsafe fn _mm_blend_pd(a: __m128d, b: __m128d, imm2: i32) -> __m128d { /// Blend packed single-precision (32-bit) floating-point elements from `a` /// and `b` using mask `imm4` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_ps) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(blendps, imm4 = 0b0101))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_blend_ps(a: __m128, b: __m128, imm4: i32) -> __m128 { macro_rules! call { ($imm4:expr) => { @@ -129,11 +160,14 @@ pub unsafe fn _mm_blend_ps(a: __m128, b: __m128, imm4: i32) -> __m128 { /// Extract a single-precision (32-bit) floating-point element from `a`, /// selected with `imm8` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_ps) #[inline] #[target_feature(enable = "sse4.1")] // TODO: Add test for Windows #[cfg_attr(test, assert_instr(extractps, imm8 = 0))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_extract_ps(a: __m128, imm8: i32) -> i32 { mem::transmute(simd_extract::<_, f32>(a, imm8 as u32 & 0b11)) } @@ -142,21 +176,27 @@ pub unsafe fn _mm_extract_ps(a: __m128, imm8: i32) -> i32 { /// integer containing the zero-extended integer data. /// /// See [LLVM commit D20468][https://reviews.llvm.org/D20468]. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi8) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pextrb, imm8 = 0))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_extract_epi8(a: __m128i, imm8: i32) -> i32 { let imm8 = (imm8 & 15) as u32; simd_extract::<_, u8>(a.as_u8x16(), imm8) as i32 } /// Extract an 32-bit integer from `a` selected with `imm8` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi32) #[inline] #[target_feature(enable = "sse4.1")] // TODO: Add test for Windows #[cfg_attr(test, assert_instr(extractps, imm8 = 1))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_extract_epi32(a: __m128i, imm8: i32) -> i32 { let imm8 = (imm8 & 3) as u32; simd_extract::<_, i32>(a.as_i32x4(), imm8) @@ -184,10 +224,13 @@ pub unsafe fn _mm_extract_epi32(a: __m128i, imm8: i32) -> i32 { /// /// * Bits `[3:0]`: If any of these bits are set, the corresponding result /// element is cleared. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_ps) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(insertps, imm8 = 0b1010))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_insert_ps(a: __m128, b: __m128, imm8: i32) -> __m128 { macro_rules! call { ($imm8:expr) => { @@ -199,10 +242,13 @@ pub unsafe fn _mm_insert_ps(a: __m128, b: __m128, imm8: i32) -> __m128 { /// Return a copy of `a` with the 8-bit integer from `i` inserted at a /// location specified by `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi8) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pinsrb, imm8 = 0))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_insert_epi8(a: __m128i, i: i32, imm8: i32) -> __m128i { mem::transmute(simd_insert( a.as_i8x16(), @@ -213,10 +259,13 @@ pub unsafe fn _mm_insert_epi8(a: __m128i, i: i32, imm8: i32) -> __m128i { /// Return a copy of `a` with the 32-bit integer from `i` inserted at a /// location specified by `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi32) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pinsrd, imm8 = 0))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_insert_epi32(a: __m128i, i: i32, imm8: i32) -> __m128i { mem::transmute(simd_insert( a.as_i32x4(), @@ -227,97 +276,130 @@ pub unsafe fn _mm_insert_epi32(a: __m128i, i: i32, imm8: i32) -> __m128i { /// Compare packed 8-bit integers in `a` and `b` and return packed maximum /// values in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi8) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmaxsb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pmaxsb(a.as_i8x16(), b.as_i8x16())) } /// Compare packed unsigned 16-bit integers in `a` and `b`, and return packed /// maximum. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu16) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmaxuw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pmaxuw(a.as_u16x8(), b.as_u16x8())) } /// Compare packed 32-bit integers in `a` and `b`, and return packed maximum /// values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi32) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmaxsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pmaxsd(a.as_i32x4(), b.as_i32x4())) } /// Compare packed unsigned 32-bit integers in `a` and `b`, and return packed /// maximum values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu32) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmaxud))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pmaxud(a.as_u32x4(), b.as_u32x4())) } /// Compare packed 8-bit integers in `a` and `b` and return packed minimum /// values in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi8) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pminsb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pminsb(a.as_i8x16(), b.as_i8x16())) } /// Compare packed unsigned 16-bit integers in `a` and `b`, and return packed /// minimum. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu16) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pminuw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pminuw(a.as_u16x8(), b.as_u16x8())) } /// Compare packed 32-bit integers in `a` and `b`, and return packed minimum /// values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi32) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pminsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pminsd(a.as_i32x4(), b.as_i32x4())) } /// Compare packed unsigned 32-bit integers in `a` and `b`, and return packed /// minimum values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu32) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pminud))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pminud(a.as_u32x4(), b.as_u32x4())) } /// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers /// using unsigned saturation +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packus_epi32) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(packusdw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(packusdw(a.as_i32x4(), b.as_i32x4())) } /// Compare packed 64-bit integers in `a` and `b` for equality +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi64) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pcmpeqq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i { mem::transmute(simd_eq::<_, i64x2>(a.as_i64x2(), b.as_i64x2())) } /// Sign extend packed 8-bit integers in `a` to packed 16-bit integers +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi8_epi16) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovsxbw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i { let a = a.as_i8x16(); let a = simd_shuffle8::<_, i8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); @@ -325,9 +407,12 @@ pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i { } /// Sign extend packed 8-bit integers in `a` to packed 32-bit integers +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi8_epi32) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovsxbd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i { let a = a.as_i8x16(); let a = simd_shuffle4::<_, i8x4>(a, a, [0, 1, 2, 3]); @@ -336,9 +421,12 @@ pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i { /// Sign extend packed 8-bit integers in the low 8 bytes of `a` to packed /// 64-bit integers +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi8_epi64) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovsxbq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i { let a = a.as_i8x16(); let a = simd_shuffle2::<_, i8x2>(a, a, [0, 1]); @@ -346,9 +434,12 @@ pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i { } /// Sign extend packed 16-bit integers in `a` to packed 32-bit integers +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_epi32) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovsxwd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i { let a = a.as_i16x8(); let a = simd_shuffle4::<_, i16x4>(a, a, [0, 1, 2, 3]); @@ -356,9 +447,12 @@ pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i { } /// Sign extend packed 16-bit integers in `a` to packed 64-bit integers +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_epi64) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovsxwq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i { let a = a.as_i16x8(); let a = simd_shuffle2::<_, i16x2>(a, a, [0, 1]); @@ -366,9 +460,12 @@ pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i { } /// Sign extend packed 32-bit integers in `a` to packed 64-bit integers +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_epi64) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovsxdq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i { let a = a.as_i32x4(); let a = simd_shuffle2::<_, i32x2>(a, a, [0, 1]); @@ -376,9 +473,12 @@ pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i { } /// Zero extend packed unsigned 8-bit integers in `a` to packed 16-bit integers +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi16) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovzxbw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i { let a = a.as_u8x16(); let a = simd_shuffle8::<_, u8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); @@ -386,9 +486,12 @@ pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i { } /// Zero extend packed unsigned 8-bit integers in `a` to packed 32-bit integers +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi32) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovzxbd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i { let a = a.as_u8x16(); let a = simd_shuffle4::<_, u8x4>(a, a, [0, 1, 2, 3]); @@ -396,9 +499,12 @@ pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i { } /// Zero extend packed unsigned 8-bit integers in `a` to packed 64-bit integers +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi64) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovzxbq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i { let a = a.as_u8x16(); let a = simd_shuffle2::<_, u8x2>(a, a, [0, 1]); @@ -407,9 +513,12 @@ pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i { /// Zero extend packed unsigned 16-bit integers in `a` /// to packed 32-bit integers +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu16_epi32) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovzxwd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i { let a = a.as_u16x8(); let a = simd_shuffle4::<_, u16x4>(a, a, [0, 1, 2, 3]); @@ -418,9 +527,12 @@ pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i { /// Zero extend packed unsigned 16-bit integers in `a` /// to packed 64-bit integers +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu16_epi64) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovzxwq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i { let a = a.as_u16x8(); let a = simd_shuffle2::<_, u16x2>(a, a, [0, 1]); @@ -429,9 +541,12 @@ pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i { /// Zero extend packed unsigned 32-bit integers in `a` /// to packed 64-bit integers +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu32_epi64) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovzxdq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i { let a = a.as_u32x4(); let a = simd_shuffle2::<_, u32x2>(a, a, [0, 1]); @@ -445,10 +560,13 @@ pub unsafe fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i { /// replaced by a value of `0.0`. If a broadcast mask bit is one, the result of /// the dot product will be stored in the return value component. Otherwise if /// the broadcast mask bit is zero then the return component will be zero. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dp_pd) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(dppd, imm8 = 0))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_dp_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d { macro_rules! call { ($imm8:expr) => { @@ -465,10 +583,13 @@ pub unsafe fn _mm_dp_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d { /// replaced by a value of `0.0`. If a broadcast mask bit is one, the result of /// the dot product will be stored in the return value component. Otherwise if /// the broadcast mask bit is zero then the return component will be zero. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dp_ps) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(dpps, imm8 = 0))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_dp_ps(a: __m128, b: __m128, imm8: i32) -> __m128 { macro_rules! call { ($imm8:expr) => { @@ -481,9 +602,12 @@ pub unsafe fn _mm_dp_ps(a: __m128, b: __m128, imm8: i32) -> __m128 { /// Round the packed double-precision (64-bit) floating-point elements in `a` /// down to an integer value, and store the results as packed double-precision /// floating-point elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_pd) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_floor_pd(a: __m128d) -> __m128d { roundpd(a, _MM_FROUND_FLOOR) } @@ -491,9 +615,12 @@ pub unsafe fn _mm_floor_pd(a: __m128d) -> __m128d { /// Round the packed single-precision (32-bit) floating-point elements in `a` /// down to an integer value, and store the results as packed single-precision /// floating-point elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_ps) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_floor_ps(a: __m128) -> __m128 { roundps(a, _MM_FROUND_FLOOR) } @@ -503,9 +630,12 @@ pub unsafe fn _mm_floor_ps(a: __m128) -> __m128 { /// floating-point element in the lower element of the intrinsic result, /// and copy the upper element from `a` to the upper element of the intrinsic /// result. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_sd) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_floor_sd(a: __m128d, b: __m128d) -> __m128d { roundsd(a, b, _MM_FROUND_FLOOR) } @@ -515,9 +645,12 @@ pub unsafe fn _mm_floor_sd(a: __m128d, b: __m128d) -> __m128d { /// floating-point element in the lower element of the intrinsic result, /// and copy the upper 3 packed elements from `a` to the upper elements /// of the intrinsic result. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_ss) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_floor_ss(a: __m128, b: __m128) -> __m128 { roundss(a, b, _MM_FROUND_FLOOR) } @@ -525,9 +658,12 @@ pub unsafe fn _mm_floor_ss(a: __m128, b: __m128) -> __m128 { /// Round the packed double-precision (64-bit) floating-point elements in `a` /// up to an integer value, and store the results as packed double-precision /// floating-point elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_pd) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ceil_pd(a: __m128d) -> __m128d { roundpd(a, _MM_FROUND_CEIL) } @@ -535,9 +671,12 @@ pub unsafe fn _mm_ceil_pd(a: __m128d) -> __m128d { /// Round the packed single-precision (32-bit) floating-point elements in `a` /// up to an integer value, and store the results as packed single-precision /// floating-point elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_ps) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundps))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ceil_ps(a: __m128) -> __m128 { roundps(a, _MM_FROUND_CEIL) } @@ -547,9 +686,12 @@ pub unsafe fn _mm_ceil_ps(a: __m128) -> __m128 { /// floating-point element in the lower element of the intrisic result, /// and copy the upper element from `a` to the upper element /// of the intrinsic result. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_sd) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ceil_sd(a: __m128d, b: __m128d) -> __m128d { roundsd(a, b, _MM_FROUND_CEIL) } @@ -559,9 +701,12 @@ pub unsafe fn _mm_ceil_sd(a: __m128d, b: __m128d) -> __m128d { /// floating-point element in the lower element of the intrinsic result, /// and copy the upper 3 packed elements from `a` to the upper elements /// of the intrinsic result. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_ss) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 { roundss(a, b, _MM_FROUND_CEIL) } @@ -602,10 +747,13 @@ pub unsafe fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 { /// _MM_FROUND_CUR_DIRECTION; /// # } /// ``` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_pd) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundpd, rounding = 0))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_round_pd(a: __m128d, rounding: i32) -> __m128d { macro_rules! call { ($imm4:expr) => { @@ -652,10 +800,13 @@ pub unsafe fn _mm_round_pd(a: __m128d, rounding: i32) -> __m128d { /// _MM_FROUND_CUR_DIRECTION; /// # } /// ``` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_ps) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundps, rounding = 0))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_round_ps(a: __m128, rounding: i32) -> __m128 { macro_rules! call { ($imm4:expr) => { @@ -703,10 +854,13 @@ pub unsafe fn _mm_round_ps(a: __m128, rounding: i32) -> __m128 { /// _MM_FROUND_CUR_DIRECTION; /// # } /// ``` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_sd) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundsd, rounding = 0))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d { macro_rules! call { ($imm4:expr) => { @@ -754,10 +908,13 @@ pub unsafe fn _mm_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d { /// _MM_FROUND_CUR_DIRECTION; /// # } /// ``` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_ss) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundss, rounding = 0))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 { macro_rules! call { ($imm4:expr) => { @@ -786,18 +943,24 @@ pub unsafe fn _mm_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 { /// * bits `[15:0]` - contain the minimum value found in parameter `a`, /// * bits `[18:16]` - contain the index of the minimum value /// * remaining bits are set to `0`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_minpos_epu16) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(phminposuw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_minpos_epu16(a: __m128i) -> __m128i { mem::transmute(phminposuw(a.as_u16x8())) } /// Multiply the low 32-bit integers from each packed 64-bit /// element in `a` and `b`, and return the signed 64-bit result. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_epi32) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmuldq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pmuldq(a.as_i32x4(), b.as_i32x4())) } @@ -808,9 +971,12 @@ pub unsafe fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i { /// __m128i::splat(2)` returns the obvious `__m128i::splat(4)`, due to wrapping /// arithmetic `pmulld __m128i::splat(i32::MAX), __m128i::splat(2)` would /// return a negative number. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mullo_epi32) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmulld))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(simd_mul(a.as_i32x4(), b.as_i32x4())) } @@ -846,10 +1012,13 @@ pub unsafe fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i { /// /// * A `__m128i` vector containing the sums of the sets of /// absolute differences between both operands. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mpsadbw_epu8) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(mpsadbw, imm8 = 0))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mpsadbw_epu8(a: __m128i, b: __m128i, imm8: i32) -> __m128i { let a = a.as_u8x16(); let b = b.as_u8x16(); @@ -874,9 +1043,12 @@ pub unsafe fn _mm_mpsadbw_epu8(a: __m128i, b: __m128i, imm8: i32) -> __m128i { /// /// * `1` - if the specified bits are all zeros, /// * `0` - otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testz_si128) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(ptest))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 { ptestz(a.as_i64x2(), mask.as_i64x2()) } @@ -894,9 +1066,12 @@ pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 { /// /// * `1` - if the specified bits are all ones, /// * `0` - otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testc_si128) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(ptest))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 { ptestc(a.as_i64x2(), mask.as_i64x2()) } @@ -914,9 +1089,12 @@ pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 { /// /// * `1` - if the specified bits are neither all zeros nor all ones, /// * `0` - otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testnzc_si128) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(ptest))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 { ptestnzc(a.as_i64x2(), mask.as_i64x2()) } @@ -934,9 +1112,12 @@ pub unsafe fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 { /// /// * `1` - if the specified bits are all zeros, /// * `0` - otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_all_zeros) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(ptest))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 { _mm_testz_si128(a, mask) } @@ -952,10 +1133,13 @@ pub unsafe fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 { /// /// * `1` - if the bits specified in the operand are all set to 1, /// * `0` - otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_all_ones) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pcmpeqd))] #[cfg_attr(test, assert_instr(ptest))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_test_all_ones(a: __m128i) -> i32 { _mm_testc_si128(a, _mm_cmpeq_epi32(a, a)) } @@ -973,9 +1157,12 @@ pub unsafe fn _mm_test_all_ones(a: __m128i) -> i32 { /// /// * `1` - if the specified bits are neither all zeros nor all ones, /// * `0` - otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_mix_ones_zeros) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(ptest))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 { _mm_testnzc_si128(a, mask) } diff --git a/coresimd/x86/sse42.rs b/coresimd/x86/sse42.rs index 21284f1cde..efbfe79d3e 100644 --- a/coresimd/x86/sse42.rs +++ b/coresimd/x86/sse42.rs @@ -10,49 +10,68 @@ use coresimd::simd_llvm::*; use coresimd::x86::*; /// String contains unsigned 8-bit characters *(Default)* +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_UBYTE_OPS: i32 = 0b0000_0000; /// String contains unsigned 16-bit characters +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_UWORD_OPS: i32 = 0b0000_0001; /// String contains signed 8-bit characters +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_SBYTE_OPS: i32 = 0b0000_0010; /// String contains unsigned 16-bit characters +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_SWORD_OPS: i32 = 0b0000_0011; /// For each character in `a`, find if it is in `b` *(Default)* +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_CMP_EQUAL_ANY: i32 = 0b0000_0000; /// For each character in `a`, determine if /// `b[0] <= c <= b[1] or b[1] <= c <= b[2]...` +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_CMP_RANGES: i32 = 0b0000_0100; /// The strings defined by `a` and `b` are equal +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_CMP_EQUAL_EACH: i32 = 0b0000_1000; /// Search for the defined substring in the target +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_CMP_EQUAL_ORDERED: i32 = 0b0000_1100; /// Do not negate results *(Default)* +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_POSITIVE_POLARITY: i32 = 0b0000_0000; /// Negate results +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_NEGATIVE_POLARITY: i32 = 0b0001_0000; /// Do not negate results before the end of the string +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_MASKED_POSITIVE_POLARITY: i32 = 0b0010_0000; /// Negate results only before the end of the string +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_MASKED_NEGATIVE_POLARITY: i32 = 0b0011_0000; /// **Index only**: return the least significant bit *(Default)* +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_LEAST_SIGNIFICANT: i32 = 0b0000_0000; /// **Index only**: return the most significant bit +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_MOST_SIGNIFICANT: i32 = 0b0100_0000; /// **Mask only**: return the bit mask +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_BIT_MASK: i32 = 0b0000_0000; /// **Mask only**: return the byte mask +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_UNIT_MASK: i32 = 0b0100_0000; /// Compare packed strings with implicit lengths in `a` and `b` using the /// control in `imm8`, and return the generated mask. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrm) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpistrm, imm8 = 0))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i { let a = a.as_i8x16(); let b = b.as_i8x16(); @@ -296,10 +315,13 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i { /// [`_SIDD_LEAST_SIGNIFICANT`]: constant._SIDD_LEAST_SIGNIFICANT.html /// [`_SIDD_MOST_SIGNIFICANT`]: constant._SIDD_MOST_SIGNIFICANT.html /// [`_mm_cmpestri`]: fn._mm_cmpestri.html +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistri) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpistri(a: __m128i, b: __m128i, imm8: i32) -> i32 { let a = a.as_i8x16(); let b = b.as_i8x16(); @@ -314,10 +336,13 @@ pub unsafe fn _mm_cmpistri(a: __m128i, b: __m128i, imm8: i32) -> i32 { /// Compare packed strings with implicit lengths in `a` and `b` using the /// control in `imm8`, and return `1` if any character in `b` was null. /// and `0` otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrz) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpistrz(a: __m128i, b: __m128i, imm8: i32) -> i32 { let a = a.as_i8x16(); let b = b.as_i8x16(); @@ -332,10 +357,13 @@ pub unsafe fn _mm_cmpistrz(a: __m128i, b: __m128i, imm8: i32) -> i32 { /// Compare packed strings with implicit lengths in `a` and `b` using the /// control in `imm8`, and return `1` if the resulting mask was non-zero, /// and `0` otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrc) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpistrc(a: __m128i, b: __m128i, imm8: i32) -> i32 { let a = a.as_i8x16(); let b = b.as_i8x16(); @@ -350,10 +378,13 @@ pub unsafe fn _mm_cmpistrc(a: __m128i, b: __m128i, imm8: i32) -> i32 { /// Compare packed strings with implicit lengths in `a` and `b` using the /// control in `imm8`, and returns `1` if any character in `a` was null, /// and `0` otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrs) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpistrs(a: __m128i, b: __m128i, imm8: i32) -> i32 { let a = a.as_i8x16(); let b = b.as_i8x16(); @@ -367,10 +398,13 @@ pub unsafe fn _mm_cmpistrs(a: __m128i, b: __m128i, imm8: i32) -> i32 { /// Compare packed strings with implicit lengths in `a` and `b` using the /// control in `imm8`, and return bit `0` of the resulting bit mask. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistro) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpistro(a: __m128i, b: __m128i, imm8: i32) -> i32 { let a = a.as_i8x16(); let b = b.as_i8x16(); @@ -385,10 +419,13 @@ pub unsafe fn _mm_cmpistro(a: __m128i, b: __m128i, imm8: i32) -> i32 { /// Compare packed strings with implicit lengths in `a` and `b` using the /// control in `imm8`, and return `1` if `b` did not contain a null /// character and the resulting mask was zero, and `0` otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistra) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpistra(a: __m128i, b: __m128i, imm8: i32) -> i32 { let a = a.as_i8x16(); let b = b.as_i8x16(); @@ -402,10 +439,13 @@ pub unsafe fn _mm_cmpistra(a: __m128i, b: __m128i, imm8: i32) -> i32 { /// Compare packed strings in `a` and `b` with lengths `la` and `lb` /// using the control in `imm8`, and return the generated mask. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrm) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpestrm, imm8 = 0))] #[rustc_args_required_const(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpestrm( a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32 ) -> __m128i { @@ -506,10 +546,13 @@ pub unsafe fn _mm_cmpestrm( /// [`_SIDD_LEAST_SIGNIFICANT`]: constant._SIDD_LEAST_SIGNIFICANT.html /// [`_SIDD_MOST_SIGNIFICANT`]: constant._SIDD_MOST_SIGNIFICANT.html /// [`_mm_cmpistri`]: fn._mm_cmpistri.html +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestri) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))] #[rustc_args_required_const(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpestri( a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32 ) -> i32 { @@ -526,10 +569,13 @@ pub unsafe fn _mm_cmpestri( /// Compare packed strings in `a` and `b` with lengths `la` and `lb` /// using the control in `imm8`, and return `1` if any character in /// `b` was null, and `0` otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrz) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))] #[rustc_args_required_const(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpestrz( a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32 ) -> i32 { @@ -546,10 +592,13 @@ pub unsafe fn _mm_cmpestrz( /// Compare packed strings in `a` and `b` with lengths `la` and `lb` /// using the control in `imm8`, and return `1` if the resulting mask /// was non-zero, and `0` otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrc) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))] #[rustc_args_required_const(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpestrc( a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32 ) -> i32 { @@ -566,10 +615,13 @@ pub unsafe fn _mm_cmpestrc( /// Compare packed strings in `a` and `b` with lengths `la` and `lb` /// using the control in `imm8`, and return `1` if any character in /// a was null, and `0` otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrs) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))] #[rustc_args_required_const(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpestrs( a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32 ) -> i32 { @@ -586,10 +638,13 @@ pub unsafe fn _mm_cmpestrs( /// Compare packed strings in `a` and `b` with lengths `la` and `lb` /// using the control in `imm8`, and return bit `0` of the resulting /// bit mask. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestro) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))] #[rustc_args_required_const(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpestro( a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32 ) -> i32 { @@ -607,10 +662,13 @@ pub unsafe fn _mm_cmpestro( /// using the control in `imm8`, and return `1` if `b` did not /// contain a null character and the resulting mask was zero, and `0` /// otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestra) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))] #[rustc_args_required_const(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpestra( a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32 ) -> i32 { @@ -626,36 +684,48 @@ pub unsafe fn _mm_cmpestra( /// Starting with the initial value in `crc`, return the accumulated /// CRC32 value for unsigned 8-bit integer `v`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_crc32_u8) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(crc32))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_crc32_u8(crc: u32, v: u8) -> u32 { crc32_32_8(crc, v) } /// Starting with the initial value in `crc`, return the accumulated /// CRC32 value for unsigned 16-bit integer `v`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_crc32_u16) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(crc32))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_crc32_u16(crc: u32, v: u16) -> u32 { crc32_32_16(crc, v) } /// Starting with the initial value in `crc`, return the accumulated /// CRC32 value for unsigned 32-bit integer `v`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_crc32_u32) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(crc32))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_crc32_u32(crc: u32, v: u32) -> u32 { crc32_32_32(crc, v) } /// Compare packed 64-bit integers in `a` and `b` for greater-than, /// return the results. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi64) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpgtq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpgt_epi64(a: __m128i, b: __m128i) -> __m128i { mem::transmute(simd_gt::<_, i64x2>(a.as_i64x2(), b.as_i64x2())) } diff --git a/coresimd/x86/sse4a.rs b/coresimd/x86/sse4a.rs index 370034c38a..fff6043564 100644 --- a/coresimd/x86/sse4a.rs +++ b/coresimd/x86/sse4a.rs @@ -36,6 +36,7 @@ extern "C" { #[inline] #[target_feature(enable = "sse4a")] #[cfg_attr(test, assert_instr(extrq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_extract_si64(x: __m128i, y: __m128i) -> __m128i { mem::transmute(extrq(x.as_i64x2(), y.as_i8x16())) } @@ -52,6 +53,7 @@ pub unsafe fn _mm_extract_si64(x: __m128i, y: __m128i) -> __m128i { #[inline] #[target_feature(enable = "sse4a")] #[cfg_attr(test, assert_instr(insertq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_insert_si64(x: __m128i, y: __m128i) -> __m128i { mem::transmute(insertq(x.as_i64x2(), y.as_i64x2())) } @@ -60,6 +62,7 @@ pub unsafe fn _mm_insert_si64(x: __m128i, y: __m128i) -> __m128i { #[inline] #[target_feature(enable = "sse4a")] #[cfg_attr(test, assert_instr(movntsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_stream_sd(p: *mut f64, a: __m128d) { movntsd(p, a); } @@ -68,6 +71,7 @@ pub unsafe fn _mm_stream_sd(p: *mut f64, a: __m128d) { #[inline] #[target_feature(enable = "sse4a")] #[cfg_attr(test, assert_instr(movntss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_stream_ss(p: *mut f32, a: __m128) { movntss(p, a); } diff --git a/coresimd/x86/ssse3.rs b/coresimd/x86/ssse3.rs index 4bee30e776..ec0477a611 100644 --- a/coresimd/x86/ssse3.rs +++ b/coresimd/x86/ssse3.rs @@ -10,9 +10,12 @@ use stdsimd_test::assert_instr; /// Compute the absolute value of packed 8-bit signed integers in `a` and /// return the unsigned results. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi8) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(pabsb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_abs_epi8(a: __m128i) -> __m128i { mem::transmute(pabsb128(a.as_i8x16())) } @@ -20,9 +23,12 @@ pub unsafe fn _mm_abs_epi8(a: __m128i) -> __m128i { /// Compute the absolute value of each of the packed 16-bit signed integers in /// `a` and /// return the 16-bit unsigned integer +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi16) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(pabsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_abs_epi16(a: __m128i) -> __m128i { mem::transmute(pabsw128(a.as_i16x8())) } @@ -30,9 +36,12 @@ pub unsafe fn _mm_abs_epi16(a: __m128i) -> __m128i { /// Compute the absolute value of each of the packed 32-bit signed integers in /// `a` and /// return the 32-bit unsigned integer +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi32) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(pabsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_abs_epi32(a: __m128i) -> __m128i { mem::transmute(pabsd128(a.as_i32x4())) } @@ -61,19 +70,25 @@ pub unsafe fn _mm_abs_epi32(a: __m128i) -> __m128i { /// r /// } /// ``` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_epi8) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(pshufb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pshufb128(a.as_u8x16(), b.as_u8x16())) } /// Concatenate 16-byte blocks in `a` and `b` into a 32-byte temporary result, /// shift the result right by `n` bytes, and return the low 16 bytes. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_alignr_epi8) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(palignr, n = 15))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_alignr_epi8(a: __m128i, b: __m128i, n: i32) -> __m128i { let n = n as u32; // If palignr is shifting the pair of vectors more than the size of two @@ -141,9 +156,12 @@ pub unsafe fn _mm_alignr_epi8(a: __m128i, b: __m128i, n: i32) -> __m128i { /// Horizontally add the adjacent pairs of values contained in 2 packed /// 128-bit vectors of [8 x i16]. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_epi16) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(phaddw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(phaddw128(a.as_i16x8(), b.as_i16x8())) } @@ -151,27 +169,36 @@ pub unsafe fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i { /// Horizontally add the adjacent pairs of values contained in 2 packed /// 128-bit vectors of [8 x i16]. Positive sums greater than 7FFFh are /// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadds_epi16) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(phaddsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(phaddsw128(a.as_i16x8(), b.as_i16x8())) } /// Horizontally add the adjacent pairs of values contained in 2 packed /// 128-bit vectors of [4 x i32]. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_epi32) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(phaddd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(phaddd128(a.as_i32x4(), b.as_i32x4())) } /// Horizontally subtract the adjacent pairs of values contained in 2 /// packed 128-bit vectors of [8 x i16]. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_epi16) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(phsubw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(phsubw128(a.as_i16x8(), b.as_i16x8())) } @@ -180,18 +207,24 @@ pub unsafe fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i { /// packed 128-bit vectors of [8 x i16]. Positive differences greater than /// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are /// saturated to 8000h. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsubs_epi16) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(phsubsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(phsubsw128(a.as_i16x8(), b.as_i16x8())) } /// Horizontally subtract the adjacent pairs of values contained in 2 /// packed 128-bit vectors of [4 x i32]. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_epi32) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(phsubd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(phsubd128(a.as_i32x4(), b.as_i32x4())) } @@ -201,9 +234,12 @@ pub unsafe fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i { /// integer values contained in the second source operand, add pairs of /// contiguous products with signed saturation, and writes the 16-bit sums to /// the corresponding bits in the destination. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maddubs_epi16) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(pmaddubsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pmaddubsw128(a.as_u8x16(), b.as_i8x16())) } @@ -211,9 +247,12 @@ pub unsafe fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i { /// Multiply packed 16-bit signed integer values, truncate the 32-bit /// product to the 18 most significant bits by right-shifting, round the /// truncated value by adding 1, and write bits [16:1] to the destination. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhrs_epi16) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(pmulhrsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pmulhrsw128(a.as_i16x8(), b.as_i16x8())) } @@ -222,9 +261,12 @@ pub unsafe fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i { /// integer in `b` is negative, and return the result. /// Elements in result are zeroed out when the corresponding element in `b` /// is zero. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_epi8) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(psignb))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i { mem::transmute(psignb128(a.as_i8x16(), b.as_i8x16())) } @@ -233,9 +275,12 @@ pub unsafe fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i { /// integer in `b` is negative, and return the results. /// Elements in result are zeroed out when the corresponding element in `b` /// is zero. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_epi16) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(psignw))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(psignw128(a.as_i16x8(), b.as_i16x8())) } @@ -244,9 +289,12 @@ pub unsafe fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i { /// integer in `b` is negative, and return the results. /// Element in result are zeroed out when the corresponding element in `b` /// is zero. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_epi32) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(psignd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(psignd128(a.as_i32x4(), b.as_i32x4())) } diff --git a/coresimd/x86/tbm.rs b/coresimd/x86/tbm.rs index fa5b08d197..9793edfede 100644 --- a/coresimd/x86/tbm.rs +++ b/coresimd/x86/tbm.rs @@ -70,6 +70,7 @@ pub fn _bextr2_u64(a: u64, control: u64) -> u64 { #[inline] #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blcfill))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blcfill_u32(x: u32) -> u32 { x & (x.wrapping_add(1)) } @@ -81,6 +82,7 @@ pub unsafe fn _blcfill_u32(x: u32) -> u32 { #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blcfill))] #[cfg(not(target_arch = "x86"))] // generates lots of instructions +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blcfill_u64(x: u64) -> u64 { x & (x.wrapping_add(1)) } @@ -91,6 +93,7 @@ pub unsafe fn _blcfill_u64(x: u64) -> u64 { #[inline] #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blci))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blci_u32(x: u32) -> u32 { x | !(x.wrapping_add(1)) } @@ -102,6 +105,7 @@ pub unsafe fn _blci_u32(x: u32) -> u32 { #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blci))] #[cfg(not(target_arch = "x86"))] // generates lots of instructions +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blci_u64(x: u64) -> u64 { x | !(x.wrapping_add(1)) } @@ -112,6 +116,7 @@ pub unsafe fn _blci_u64(x: u64) -> u64 { #[inline] #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blcic))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blcic_u32(x: u32) -> u32 { !x & (x.wrapping_add(1)) } @@ -123,6 +128,7 @@ pub unsafe fn _blcic_u32(x: u32) -> u32 { #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blcic))] #[cfg(not(target_arch = "x86"))] // generates lots of instructions +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blcic_u64(x: u64) -> u64 { !x & (x.wrapping_add(1)) } @@ -134,6 +140,7 @@ pub unsafe fn _blcic_u64(x: u64) -> u64 { #[inline] #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blcmsk))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blcmsk_u32(x: u32) -> u32 { x ^ (x.wrapping_add(1)) } @@ -146,6 +153,7 @@ pub unsafe fn _blcmsk_u32(x: u32) -> u32 { #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blcmsk))] #[cfg(not(target_arch = "x86"))] // generates lots of instructions +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blcmsk_u64(x: u64) -> u64 { x ^ (x.wrapping_add(1)) } @@ -156,6 +164,7 @@ pub unsafe fn _blcmsk_u64(x: u64) -> u64 { #[inline] #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blcs))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blcs_u32(x: u32) -> u32 { x | (x.wrapping_add(1)) } @@ -167,6 +176,7 @@ pub unsafe fn _blcs_u32(x: u32) -> u32 { #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blcs))] #[cfg(not(target_arch = "x86"))] // generates lots of instructions +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blcs_u64(x: u64) -> u64 { x | x.wrapping_add(1) } @@ -177,6 +187,7 @@ pub unsafe fn _blcs_u64(x: u64) -> u64 { #[inline] #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blsfill))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blsfill_u32(x: u32) -> u32 { x | (x.wrapping_sub(1)) } @@ -188,6 +199,7 @@ pub unsafe fn _blsfill_u32(x: u32) -> u32 { #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blsfill))] #[cfg(not(target_arch = "x86"))] // generates lots of instructions +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blsfill_u64(x: u64) -> u64 { x | (x.wrapping_sub(1)) } @@ -198,6 +210,7 @@ pub unsafe fn _blsfill_u64(x: u64) -> u64 { #[inline] #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blsic))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blsic_u32(x: u32) -> u32 { !x | (x.wrapping_sub(1)) } @@ -209,6 +222,7 @@ pub unsafe fn _blsic_u32(x: u32) -> u32 { #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blsic))] #[cfg(not(target_arch = "x86"))] // generates lots of instructions +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blsic_u64(x: u64) -> u64 { !x | (x.wrapping_sub(1)) } @@ -220,6 +234,7 @@ pub unsafe fn _blsic_u64(x: u64) -> u64 { #[inline] #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(t1mskc))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _t1mskc_u32(x: u32) -> u32 { !x | (x.wrapping_add(1)) } @@ -232,6 +247,7 @@ pub unsafe fn _t1mskc_u32(x: u32) -> u32 { #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(t1mskc))] #[cfg(not(target_arch = "x86"))] // generates lots of instructions +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _t1mskc_u64(x: u64) -> u64 { !x | (x.wrapping_add(1)) } @@ -243,6 +259,7 @@ pub unsafe fn _t1mskc_u64(x: u64) -> u64 { #[inline] #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(tzmsk))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _tzmsk_u32(x: u32) -> u32 { !x & (x.wrapping_sub(1)) } @@ -255,6 +272,7 @@ pub unsafe fn _tzmsk_u32(x: u32) -> u32 { #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(tzmsk))] #[cfg(not(target_arch = "x86"))] // generates lots of instructions +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _tzmsk_u64(x: u64) -> u64 { !x & (x.wrapping_sub(1)) } diff --git a/coresimd/x86/xsave.rs b/coresimd/x86/xsave.rs index 06f5e9a215..6f59c384ea 100644 --- a/coresimd/x86/xsave.rs +++ b/coresimd/x86/xsave.rs @@ -31,9 +31,12 @@ extern "C" { /// /// The format of the XSAVE area is detailed in Section 13.4, “XSAVE Area,” of /// Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsave) #[inline] #[target_feature(enable = "xsave")] #[cfg_attr(test, assert_instr(xsave))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _xsave(mem_addr: *mut u8, save_mask: u64) { xsave( mem_addr, @@ -48,9 +51,12 @@ pub unsafe fn _xsave(mem_addr: *mut u8, save_mask: u64) { /// State is restored based on bits [62:0] in `rs_mask`, `XCR0`, and /// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte /// boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xrstor) #[inline] #[target_feature(enable = "xsave")] #[cfg_attr(test, assert_instr(xrstor))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _xrstor(mem_addr: *const u8, rs_mask: u64) { xrstor(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32); } @@ -58,24 +64,31 @@ pub unsafe fn _xrstor(mem_addr: *const u8, rs_mask: u64) { /// `XFEATURE_ENABLED_MASK` for `XCR` /// /// This intrinsic maps to `XSETBV` instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] pub const _XCR_XFEATURE_ENABLED_MASK: u32 = 0; /// Copy 64-bits from `val` to the extended control register (`XCR`) specified /// by `a`. /// /// Currently only `XFEATURE_ENABLED_MASK` `XCR` is supported. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsetbv) #[inline] #[target_feature(enable = "xsave")] #[cfg_attr(test, assert_instr(xsetbv))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _xsetbv(a: u32, val: u64) { xsetbv(a, (val >> 32) as u32, val as u32); } /// Reads the contents of the extended control register `XCR` /// specified in `xcr_no`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xgetbv) #[inline] #[target_feature(enable = "xsave")] #[cfg_attr(test, assert_instr(xgetbv))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _xgetbv(xcr_no: u32) -> u64 { let eax: u32; let edx: u32; @@ -90,9 +103,12 @@ pub unsafe fn _xgetbv(xcr_no: u32) -> u64 { /// `mem_addr` must be aligned on a 64-byte boundary. The hardware may optimize /// the manner in which data is saved. The performance of this instruction will /// be equal to or better than using the `XSAVE` instruction. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsaveopt) #[inline] #[target_feature(enable = "xsave,xsaveopt")] #[cfg_attr(test, assert_instr(xsaveopt))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _xsaveopt(mem_addr: *mut u8, save_mask: u64) { xsaveopt( mem_addr, @@ -107,9 +123,12 @@ pub unsafe fn _xsaveopt(mem_addr: *mut u8, save_mask: u64) { /// `xsavec` differs from `xsave` in that it uses compaction and that it may /// use init optimization. State is saved based on bits [62:0] in `save_mask` /// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsavec) #[inline] #[target_feature(enable = "xsave,xsavec")] #[cfg_attr(test, assert_instr(xsavec))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _xsavec(mem_addr: *mut u8, save_mask: u64) { xsavec( mem_addr, @@ -125,9 +144,12 @@ pub unsafe fn _xsavec(mem_addr: *mut u8, save_mask: u64) { /// corresponding to bits set in `IA32_XSS` `MSR` and that it may use the /// modified optimization. State is saved based on bits [62:0] in `save_mask` /// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsaves) #[inline] #[target_feature(enable = "xsave,xsaves")] #[cfg_attr(test, assert_instr(xsaves))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _xsaves(mem_addr: *mut u8, save_mask: u64) { xsaves( mem_addr, @@ -145,9 +167,12 @@ pub unsafe fn _xsaves(mem_addr: *mut u8, save_mask: u64) { /// State is restored based on bits [62:0] in `rs_mask`, `XCR0`, and /// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte /// boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xrstors) #[inline] #[target_feature(enable = "xsave,xsaves")] #[cfg_attr(test, assert_instr(xrstors))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _xrstors(mem_addr: *const u8, rs_mask: u64) { xrstors(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32); } diff --git a/coresimd/x86_64/abm.rs b/coresimd/x86_64/abm.rs index 43fbee28ef..8175c10f99 100644 --- a/coresimd/x86_64/abm.rs +++ b/coresimd/x86_64/abm.rs @@ -23,17 +23,23 @@ use stdsimd_test::assert_instr; /// Counts the leading most significant zero bits. /// /// When the operand is zero, it returns its size in bits. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_lzcnt_u64) #[inline] #[target_feature(enable = "lzcnt")] #[cfg_attr(test, assert_instr(lzcnt))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _lzcnt_u64(x: u64) -> u64 { x.leading_zeros() as u64 } /// Counts the bits that are set. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_popcnt64) #[inline] #[target_feature(enable = "popcnt")] #[cfg_attr(test, assert_instr(popcnt))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _popcnt64(x: i64) -> i32 { x.count_ones() as i32 } diff --git a/coresimd/x86_64/avx.rs b/coresimd/x86_64/avx.rs index df534d0aa6..510949ad4a 100644 --- a/coresimd/x86_64/avx.rs +++ b/coresimd/x86_64/avx.rs @@ -19,10 +19,13 @@ use mem; /// Copy `a` to result, and insert the 64-bit integer `i` into result /// at the location specified by `index`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_insert_epi64) #[inline] #[rustc_args_required_const(2)] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_insert_epi64(a: __m256i, i: i64, index: i32) -> __m256i { mem::transmute(simd_insert(a.as_i64x4(), (index as u32) & 3, i)) } diff --git a/coresimd/x86_64/avx2.rs b/coresimd/x86_64/avx2.rs index 86d2863739..3e15e2595e 100644 --- a/coresimd/x86_64/avx2.rs +++ b/coresimd/x86_64/avx2.rs @@ -22,10 +22,13 @@ use coresimd::simd_llvm::*; use coresimd::x86::*; /// Extract a 64-bit integer from `a`, selected with `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extract_epi64) #[inline] #[target_feature(enable = "avx2")] #[rustc_args_required_const(1)] // This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_extract_epi64(a: __m256i, imm8: i32) -> i64 { let imm8 = (imm8 & 3) as u32; simd_extract(a.as_i64x4(), imm8) diff --git a/coresimd/x86_64/bmi.rs b/coresimd/x86_64/bmi.rs index 2f55b7109c..e9841c36ef 100644 --- a/coresimd/x86_64/bmi.rs +++ b/coresimd/x86_64/bmi.rs @@ -14,10 +14,13 @@ use stdsimd_test::assert_instr; /// Extracts bits in range [`start`, `start` + `length`) from `a` into /// the least significant bits of the result. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr_u64) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(bextr))] #[cfg(not(target_arch = "x86"))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _bextr_u64(a: u64, start: u32, len: u32) -> u64 { _bextr2_u64(a, ((start & 0xff) | ((len & 0xff) << 8)) as u64) } @@ -27,36 +30,48 @@ pub unsafe fn _bextr_u64(a: u64, start: u32, len: u32) -> u64 { /// /// Bits [7,0] of `control` specify the index to the first bit in the range to /// be extracted, and bits [15,8] specify the length of the range. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr2_u64) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(bextr))] #[cfg(not(target_arch = "x86"))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _bextr2_u64(a: u64, control: u64) -> u64 { x86_bmi_bextr_64(a, control) } /// Bitwise logical `AND` of inverted `a` with `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_andn_u64) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(andn))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _andn_u64(a: u64, b: u64) -> u64 { !a & b } /// Extract lowest set isolated bit. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsi_u64) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(blsi))] #[cfg(not(target_arch = "x86"))] // generates lots of instructions +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blsi_u64(x: u64) -> u64 { x & x.wrapping_neg() } /// Get mask up to lowest set bit. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsmsk_u64) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(blsmsk))] #[cfg(not(target_arch = "x86"))] // generates lots of instructions +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blsmsk_u64(x: u64) -> u64 { x ^ (x.wrapping_sub(1_u64)) } @@ -64,10 +79,13 @@ pub unsafe fn _blsmsk_u64(x: u64) -> u64 { /// Resets the lowest set bit of `x`. /// /// If `x` is sets CF. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsr_u64) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(blsr))] #[cfg(not(target_arch = "x86"))] // generates lots of instructions +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blsr_u64(x: u64) -> u64 { x & (x.wrapping_sub(1)) } @@ -75,9 +93,12 @@ pub unsafe fn _blsr_u64(x: u64) -> u64 { /// Counts the number of trailing least significant zero bits. /// /// When the source operand is 0, it returns its size in bits. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_tzcnt_u64) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(tzcnt))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _tzcnt_u64(x: u64) -> u64 { x.trailing_zeros() as u64 } @@ -85,9 +106,12 @@ pub unsafe fn _tzcnt_u64(x: u64) -> u64 { /// Counts the number of trailing least significant zero bits. /// /// When the source operand is 0, it returns its size in bits. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_tzcnt_64) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(tzcnt))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_tzcnt_64(x: u64) -> i64 { x.trailing_zeros() as i64 } diff --git a/coresimd/x86_64/bmi2.rs b/coresimd/x86_64/bmi2.rs index d97d371b3e..29c5ee0c77 100644 --- a/coresimd/x86_64/bmi2.rs +++ b/coresimd/x86_64/bmi2.rs @@ -17,10 +17,13 @@ use stdsimd_test::assert_instr; /// /// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with /// the low half and the high half of the result. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mulx_u64) #[inline] #[cfg_attr(test, assert_instr(mulx))] #[target_feature(enable = "bmi2")] #[cfg(not(target_arch = "x86"))] // calls an intrinsic +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mulx_u64(a: u64, b: u64, hi: &mut u64) -> u64 { let result: u128 = (a as u128) * (b as u128); *hi = (result >> 64) as u64; @@ -28,30 +31,39 @@ pub unsafe fn _mulx_u64(a: u64, b: u64, hi: &mut u64) -> u64 { } /// Zero higher bits of `a` >= `index`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bzhi_u64) #[inline] #[target_feature(enable = "bmi2")] #[cfg_attr(test, assert_instr(bzhi))] #[cfg(not(target_arch = "x86"))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _bzhi_u64(a: u64, index: u32) -> u64 { x86_bmi2_bzhi_64(a, index as u64) } /// Scatter contiguous low order bits of `a` to the result at the positions /// specified by the `mask`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_pdep_u64) #[inline] #[target_feature(enable = "bmi2")] #[cfg_attr(test, assert_instr(pdep))] #[cfg(not(target_arch = "x86"))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _pdep_u64(a: u64, mask: u64) -> u64 { x86_bmi2_pdep_64(a, mask) } /// Gathers the bits of `x` specified by the `mask` into the contiguous low /// order bit positions of the result. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_pext_u64) #[inline] #[target_feature(enable = "bmi2")] #[cfg_attr(test, assert_instr(pext))] #[cfg(not(target_arch = "x86"))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _pext_u64(a: u64, mask: u64) -> u64 { x86_bmi2_pext_64(a, mask) } diff --git a/coresimd/x86_64/bswap.rs b/coresimd/x86_64/bswap.rs index 340780f7f5..bcd3c4bbc7 100644 --- a/coresimd/x86_64/bswap.rs +++ b/coresimd/x86_64/bswap.rs @@ -6,8 +6,11 @@ use stdsimd_test::assert_instr; /// Return an integer with the reversed byte order of x +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bswap64) #[inline] #[cfg_attr(test, assert_instr(bswap))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _bswap64(x: i64) -> i64 { bswap_i64(x) } diff --git a/coresimd/x86_64/fxsr.rs b/coresimd/x86_64/fxsr.rs index 42280b4b3a..1bc5441647 100644 --- a/coresimd/x86_64/fxsr.rs +++ b/coresimd/x86_64/fxsr.rs @@ -21,9 +21,12 @@ extern "C" { /// /// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html /// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_fxsave64) #[inline] #[target_feature(enable = "fxsr")] #[cfg_attr(test, assert_instr(fxsave64))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _fxsave64(mem_addr: *mut u8) { fxsave64(mem_addr) } @@ -42,9 +45,12 @@ pub unsafe fn _fxsave64(mem_addr: *mut u8) { /// /// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html /// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_fxrstor64) #[inline] #[target_feature(enable = "fxsr")] #[cfg_attr(test, assert_instr(fxrstor64))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _fxrstor64(mem_addr: *const u8) { fxrstor64(mem_addr) } diff --git a/coresimd/x86_64/rdrand.rs b/coresimd/x86_64/rdrand.rs index 917e900fef..7a64697591 100644 --- a/coresimd/x86_64/rdrand.rs +++ b/coresimd/x86_64/rdrand.rs @@ -12,10 +12,13 @@ use stdsimd_test::assert_instr; /// Read a hardware generated 64-bit random value and store the result in val. /// Return 1 if a random value was generated, and 0 otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdrand64_step) #[inline] #[target_feature(enable = "rdrand")] #[cfg_attr(test, assert_instr(rdrand))] #[cfg_attr(feature = "cargo-clippy", allow(stutter))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _rdrand64_step(val: &mut u64) -> i32 { let (v, flag) = x86_rdrand64_step(); *val = v; @@ -24,9 +27,12 @@ pub unsafe fn _rdrand64_step(val: &mut u64) -> i32 { /// Read a 64-bit NIST SP800-90B and SP800-90C compliant random value and store /// in val. Return 1 if a random value was generated, and 0 otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdseed64_step) #[inline] #[target_feature(enable = "rdseed")] #[cfg_attr(test, assert_instr(rdseed))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _rdseed64_step(val: &mut u64) -> i32 { let (v, flag) = x86_rdseed64_step(); *val = v; diff --git a/coresimd/x86_64/sse.rs b/coresimd/x86_64/sse.rs index 808470c17f..a06a515bc4 100644 --- a/coresimd/x86_64/sse.rs +++ b/coresimd/x86_64/sse.rs @@ -24,9 +24,12 @@ extern "C" { /// [`_mm_setcsr`](fn._mm_setcsr.html)). /// /// This corresponds to the `CVTSS2SI` instruction (with 64 bit output). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_si64) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cvtss2si))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtss_si64(a: __m128) -> i64 { cvtss2si64(a) } @@ -40,9 +43,12 @@ pub unsafe fn _mm_cvtss_si64(a: __m128) -> i64 { /// point exception if unmasked (see [`_mm_setcsr`](fn._mm_setcsr.html)). /// /// This corresponds to the `CVTTSS2SI` instruction (with 64 bit output). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttss_si64) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cvttss2si))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvttss_si64(a: __m128) -> i64 { cvttss2si64(a) } @@ -52,9 +58,12 @@ pub unsafe fn _mm_cvttss_si64(a: __m128) -> i64 { /// /// This intrinsic corresponds to the `CVTSI2SS` instruction (with 64 bit /// input). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cvtsi2ss))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsi64_ss(a: __m128, b: i64) -> __m128 { cvtsi642ss(a, b) } diff --git a/coresimd/x86_64/sse2.rs b/coresimd/x86_64/sse2.rs index e48708ee59..30089da6f8 100644 --- a/coresimd/x86_64/sse2.rs +++ b/coresimd/x86_64/sse2.rs @@ -17,34 +17,46 @@ extern "C" { /// Convert the lower double-precision (64-bit) floating-point element in a to /// a 64-bit integer. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si64) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtsd2si))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsd_si64(a: __m128d) -> i64 { cvtsd2si64(a) } /// Alias for `_mm_cvtsd_si64` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si64x) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtsd2si))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsd_si64x(a: __m128d) -> i64 { _mm_cvtsd_si64(a) } /// Convert the lower double-precision (64-bit) floating-point element in `a` /// to a 64-bit integer with truncation. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si64) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvttsd2si))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvttsd_si64(a: __m128d) -> i64 { cvttsd2si64(a) } /// Alias for `_mm_cvttsd_si64` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si64x) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvttsd2si))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvttsd_si64x(a: __m128d) -> i64 { _mm_cvttsd_si64(a) } @@ -52,61 +64,82 @@ pub unsafe fn _mm_cvttsd_si64x(a: __m128d) -> i64 { /// Stores a 64-bit integer value in the specified memory location. /// To minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si64) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movnti))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_stream_si64(mem_addr: *mut i64, a: i64) { intrinsics::nontemporal_store(mem_addr, a); } /// Return a vector whose lowest element is `a` and all higher elements are /// `0`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(all(test, not(windows)), assert_instr(movq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsi64_si128(a: i64) -> __m128i { _mm_set_epi64x(0, a) } /// Return a vector whose lowest element is `a` and all higher elements are /// `0`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64x_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(all(test, not(windows)), assert_instr(movq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsi64x_si128(a: i64) -> __m128i { _mm_cvtsi64_si128(a) } /// Return the lowest element of `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si64) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(all(test, not(windows)), assert_instr(movq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsi128_si64(a: __m128i) -> i64 { simd_extract(a.as_i64x2(), 0) } /// Return the lowest element of `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si64x) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(all(test, not(windows)), assert_instr(movq))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsi128_si64x(a: __m128i) -> i64 { _mm_cvtsi128_si64(a) } /// Return `a` with its lower element replaced by `b` after converting it to /// an `f64`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtsi2sd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsi64_sd(a: __m128d, b: i64) -> __m128d { simd_insert(a, 0, b as f64) } /// Return `a` with its lower element replaced by `b` after converting it to /// an `f64`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64x_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtsi2sd))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsi64x_sd(a: __m128d, b: i64) -> __m128d { _mm_cvtsi64_sd(a, b) } diff --git a/coresimd/x86_64/sse41.rs b/coresimd/x86_64/sse41.rs index 8a0bcdb05f..63b4a6c4ca 100644 --- a/coresimd/x86_64/sse41.rs +++ b/coresimd/x86_64/sse41.rs @@ -8,11 +8,14 @@ use mem; use stdsimd_test::assert_instr; /// Extract an 64-bit integer from `a` selected with `imm8` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi64) #[inline] #[target_feature(enable = "sse4.1")] // TODO: Add test for Windows #[cfg_attr(test, assert_instr(pextrq, imm8 = 1))] #[rustc_args_required_const(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_extract_epi64(a: __m128i, imm8: i32) -> i64 { let imm8 = (imm8 & 1) as u32; simd_extract(a.as_i64x2(), imm8) @@ -20,10 +23,13 @@ pub unsafe fn _mm_extract_epi64(a: __m128i, imm8: i32) -> i64 { /// Return a copy of `a` with the 64-bit integer from `i` inserted at a /// location specified by `imm8`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi64) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pinsrq, imm8 = 0))] #[rustc_args_required_const(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_insert_epi64(a: __m128i, i: i64, imm8: i32) -> __m128i { mem::transmute(simd_insert(a.as_i64x2(), (imm8 & 1) as u32, i)) } diff --git a/coresimd/x86_64/sse42.rs b/coresimd/x86_64/sse42.rs index 1cbd04d6a8..bf42fd2c59 100644 --- a/coresimd/x86_64/sse42.rs +++ b/coresimd/x86_64/sse42.rs @@ -11,9 +11,12 @@ extern "C" { /// Starting with the initial value in `crc`, return the accumulated /// CRC32 value for unsigned 64-bit integer `v`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_crc32_u64) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(crc32))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_crc32_u64(crc: u64, v: u64) -> u64 { crc32_64_64(crc, v) } diff --git a/coresimd/x86_64/xsave.rs b/coresimd/x86_64/xsave.rs index 0faaef1864..8fc721ada8 100644 --- a/coresimd/x86_64/xsave.rs +++ b/coresimd/x86_64/xsave.rs @@ -29,9 +29,12 @@ extern "C" { /// /// The format of the XSAVE area is detailed in Section 13.4, “XSAVE Area,” of /// Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsave64) #[inline] #[target_feature(enable = "xsave")] #[cfg_attr(test, assert_instr(xsave64))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _xsave64(mem_addr: *mut u8, save_mask: u64) { xsave64( mem_addr, @@ -46,9 +49,12 @@ pub unsafe fn _xsave64(mem_addr: *mut u8, save_mask: u64) { /// State is restored based on bits [62:0] in `rs_mask`, `XCR0`, and /// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte /// boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xrstor64) #[inline] #[target_feature(enable = "xsave")] #[cfg_attr(test, assert_instr(xrstor64))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _xrstor64(mem_addr: *const u8, rs_mask: u64) { xrstor64(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32); } @@ -60,9 +66,12 @@ pub unsafe fn _xrstor64(mem_addr: *const u8, rs_mask: u64) { /// `mem_addr` must be aligned on a 64-byte boundary. The hardware may optimize /// the manner in which data is saved. The performance of this instruction will /// be equal to or better than using the `XSAVE64` instruction. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsaveopt64) #[inline] #[target_feature(enable = "xsave,xsaveopt")] #[cfg_attr(test, assert_instr(xsaveopt64))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _xsaveopt64(mem_addr: *mut u8, save_mask: u64) { xsaveopt64( mem_addr, @@ -77,9 +86,12 @@ pub unsafe fn _xsaveopt64(mem_addr: *mut u8, save_mask: u64) { /// `xsavec` differs from `xsave` in that it uses compaction and that it may /// use init optimization. State is saved based on bits [62:0] in `save_mask` /// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsavec64) #[inline] #[target_feature(enable = "xsave,xsavec")] #[cfg_attr(test, assert_instr(xsavec64))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _xsavec64(mem_addr: *mut u8, save_mask: u64) { xsavec64( mem_addr, @@ -95,9 +107,12 @@ pub unsafe fn _xsavec64(mem_addr: *mut u8, save_mask: u64) { /// corresponding to bits set in `IA32_XSS` `MSR` and that it may use the /// modified optimization. State is saved based on bits [62:0] in `save_mask` /// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsaves64) #[inline] #[target_feature(enable = "xsave,xsaves")] #[cfg_attr(test, assert_instr(xsaves64))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _xsaves64(mem_addr: *mut u8, save_mask: u64) { xsaves64( mem_addr, @@ -115,9 +130,12 @@ pub unsafe fn _xsaves64(mem_addr: *mut u8, save_mask: u64) { /// State is restored based on bits [62:0] in `rs_mask`, `XCR0`, and /// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte /// boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xrstors64) #[inline] #[target_feature(enable = "xsave,xsaves")] #[cfg_attr(test, assert_instr(xrstors64))] +#[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _xrstors64(mem_addr: *const u8, rs_mask: u64) { xrstors64(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32); } diff --git a/crates/stdsimd/src/lib.rs b/crates/stdsimd/src/lib.rs index c77b7afd9f..e797f66d6f 100644 --- a/crates/stdsimd/src/lib.rs +++ b/crates/stdsimd/src/lib.rs @@ -8,7 +8,7 @@ //! [stdsimd]: https://rust-lang-nursery.github.io/stdsimd/x86_64/stdsimd/ #![feature(const_fn, integer_atomics, staged_api, stdsimd)] -#![feature(cfg_target_feature, doc_cfg)] +#![feature(cfg_target_feature, doc_cfg, allow_internal_unstable)] #![cfg_attr(feature = "cargo-clippy", allow(shadow_reuse))] #![cfg_attr(target_os = "linux", feature(linkage))] #![no_std] diff --git a/stdsimd/arch/detect/arch/x86.rs b/stdsimd/arch/detect/arch/x86.rs index eb43021375..ed08bea3c6 100644 --- a/stdsimd/arch/detect/arch/x86.rs +++ b/stdsimd/arch/detect/arch/x86.rs @@ -16,8 +16,70 @@ //! in a global `AtomicUsize` variable. The query is performed by just checking //! whether the feature bit in this global variable is set or cleared. +/// A macro to test at *runtime* whether a CPU feature is available on +/// x86/x86-64 platforms. +/// +/// This macro is provided in the standard library and will detect at runtime +/// whether the specified CPU feature is detected. This does *not* resolve at +/// compile time unless the specified feature is already enabled for the entire +/// crate. Runtime detection currently relies mostly on the `cpuid` instruction. +/// +/// This macro only takes one argument which is a string literal of the feature +/// being tested for. The feature names supported are the lowercase versions of +/// the ones defined by Intel in [their documentation][docs]. +/// +/// ## Supported arguments +/// +/// This macro supports the same names that `#[target_feature]` supports. Unlike +/// `#[target_feature]`, however, this macro does not support names separated +/// with a comma. Instead testing for multiple features must be done through +/// separate macro invocations for now. +/// +/// Supported arguments are: +/// +/// * `"aes"` +/// * `"pclmulqdq"` +/// * `"rdrand"` +/// * `"rdseed"` +/// * `"tsc"` +/// * `"mmx"` +/// * `"sse"` +/// * `"sse2"` +/// * `"sse3"` +/// * `"ssse3"` +/// * `"sse4.1"` +/// * `"sse4.2"` +/// * `"sse4a"` +/// * `"sha"` +/// * `"avx"` +/// * `"avx2"` +/// * `"avx512f"` +/// * `"avx512cd"` +/// * `"avx512er"` +/// * `"avx512pf"` +/// * `"avx512bw"` +/// * `"avx512dq"` +/// * `"avx512vl"` +/// * `"avx512ifma"` +/// * `"avx512vbmi"` +/// * `"avx512vpopcntdq"` +/// * `"fma"` +/// * `"bmi1"` +/// * `"bmi2"` +/// * `"abm"` +/// * `"lzcnt"` +/// * `"tbm"` +/// * `"popcnt"` +/// * `"fxsr"` +/// * `"xsave"` +/// * `"xsaveopt"` +/// * `"xsaves"` +/// * `"xsavec"` +/// +/// [docs]: https://software.intel.com/sites/landingpage/IntrinsicsGuide #[macro_export] -#[unstable(feature = "stdsimd", issue = "0")] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[allow_internal_unstable] macro_rules! is_x86_feature_detected { ("aes") => { cfg!(target_feature = "aes") || $crate::arch::detect::check_for( diff --git a/stdsimd/mod.rs b/stdsimd/mod.rs index bec4884054..30b5beced1 100644 --- a/stdsimd/mod.rs +++ b/stdsimd/mod.rs @@ -343,30 +343,38 @@ /// } /// } /// ``` -#[unstable(feature = "stdsimd", issue = "0")] +#[stable(feature = "simd_arch", since = "1.27.0")] pub mod arch { #[cfg(all(not(dox), target_arch = "x86"))] + #[stable(feature = "simd_x86", since = "1.27.0")] pub use coresimd::arch::x86; #[cfg(all(not(dox), target_arch = "x86_64"))] + #[stable(feature = "simd_x86", since = "1.27.0")] pub use coresimd::arch::x86_64; #[cfg(all(not(dox), target_arch = "arm"))] + #[unstable(feature = "stdsimd", issue = "0")] pub use coresimd::arch::arm; #[cfg(all(not(dox), target_arch = "aarch64"))] + #[unstable(feature = "stdsimd", issue = "0")] pub use coresimd::arch::aarch64; #[cfg(target_arch = "wasm32")] + #[unstable(feature = "stdsimd", issue = "0")] pub use coresimd::arch::wasm32; #[cfg(all(not(dox), target_arch = "mips"))] + #[unstable(feature = "stdsimd", issue = "0")] pub use coresimd::arch::mips; #[cfg(all(not(dox), target_arch = "mips64"))] + #[unstable(feature = "stdsimd", issue = "0")] pub use coresimd::arch::mips64; #[doc(hidden)] // unstable implementation detail + #[unstable(feature = "stdsimd", issue = "0")] pub mod detect; /// Platform-specific intrinsics for the `x86` platform. @@ -378,6 +386,7 @@ pub mod arch { /// [libcore]: ../../../core/arch/x86/index.html #[cfg(dox)] #[doc(cfg(target_arch = "x86"))] + #[stable(feature = "simd_x86", since = "1.27.0")] pub mod x86 {} /// Platform-specific intrinsics for the `x86_64` platform. @@ -389,6 +398,7 @@ pub mod arch { /// [libcore]: ../../../core/arch/x86_64/index.html #[cfg(dox)] #[doc(cfg(target_arch = "x86_64"))] + #[stable(feature = "simd_x86", since = "1.27.0")] pub mod x86_64 {} /// Platform-specific intrinsics for the `arm` platform. @@ -400,6 +410,7 @@ pub mod arch { /// [libcore]: ../../../core/arch/arm/index.html #[cfg(dox)] #[doc(cfg(target_arch = "arm"))] + #[unstable(feature = "stdsimd", issue = "0")] pub mod arm {} /// Platform-specific intrinsics for the `aarch64` platform. @@ -411,6 +422,7 @@ pub mod arch { /// [libcore]: ../../../core/arch/aarch64/index.html #[cfg(dox)] #[doc(cfg(target_arch = "aarch64"))] + #[unstable(feature = "stdsimd", issue = "0")] pub mod aarch64 {} /// Platform-specific intrinsics for the `mips` platform. @@ -422,6 +434,7 @@ pub mod arch { /// [libcore]: ../../../core/arch/mips/index.html #[cfg(dox)] #[doc(cfg(target_arch = "mips"))] + #[unstable(feature = "stdsimd", issue = "0")] pub mod mips {} /// Platform-specific intrinsics for the `mips64` platform. @@ -433,6 +446,7 @@ pub mod arch { /// [libcore]: ../../../core/arch/mips64/index.html #[cfg(dox)] #[doc(cfg(target_arch = "mips64"))] + #[unstable(feature = "stdsimd", issue = "0")] pub mod mips64 {} }