From b5c36607cd8d3c170969953cdcec1f998a7fc8e2 Mon Sep 17 00:00:00 2001 From: Fabian Penezic Date: Fri, 17 Jan 2025 17:21:42 +0100 Subject: [PATCH] x86 AVX512: add support for the _mm512_alignr_epi32 instruction --- meson.build | 1 + simde/x86/avx512/align.h | 73 +++++++++++++++++++++++++++++++++++++ test/x86/avx512/align.c | 78 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 152 insertions(+) create mode 100644 simde/x86/avx512/align.h create mode 100644 test/x86/avx512/align.c diff --git a/meson.build b/meson.build index 5b211ba07..1a71a5ddf 100644 --- a/meson.build +++ b/meson.build @@ -322,6 +322,7 @@ simde_avx512_families = [ 'abs', 'add', 'adds', + 'align', 'and', 'andnot', 'avg', diff --git a/simde/x86/avx512/align.h b/simde/x86/avx512/align.h new file mode 100644 index 000000000..095a8a2b3 --- /dev/null +++ b/simde/x86/avx512/align.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + * 2023 Michael R. Crusoe + */ + +#if !defined(SIMDE_X86_AVX512_ALIGN_H) +#define SIMDE_X86_AVX512_ALIGN_H + +#include "types.h" +#include "../avx2.h" +#include "mov.h" +#include "extract.h" + +#include + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_alignr_epi32 (simde__m512i a, simde__m512i b, const int imm8){ + simde__m512i_private + a_p = simde__m512i_to_private(a), + b_p = simde__m512i_to_private(b), + r_p; + + size_t len = sizeof(a_p)/sizeof(a_p.i32[0]); + + for (size_t i = 0; i < (0xF & imm8); i++) { + r_p.i32[len-i-1] = a_p.i32[(0xF & imm8) - i - 1]; + } + + for (size_t i = (imm8 & 0xF), j=0; i < len; i++) { + r_p.i32[len - i - 1] = b_p.i32[len - j - 1]; + j++; + } + + return simde__m512i_from_private(r_p); +} + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_alignr_epi32(a, b, imm8) _mm512_alignr_epi32(a, b, imm8); +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_ALGIN_H) */ diff --git a/test/x86/avx512/align.c b/test/x86/avx512/align.c new file mode 100644 index 000000000..25d03a32c --- /dev/null +++ b/test/x86/avx512/align.c @@ -0,0 +1,78 @@ +#define SIMDE_TEST_X86_AVX512_INSN align + +#include +#include +#include +#include +#include + +static int +test_simde_mm512_alignr_epi32(SIMDE_MUNIT_TEST_ARGS) { + const struct { + simde__m512i a; + simde__m512i b; + const simde__mmask8 imm8; + simde__m512i r; + } test_vec[] = { + { simde_mm512_set_epi32(INT32_C(168), INT32_C( 54), INT32_C( 25), INT32_C( 16), + INT32_C( 42), INT32_C( 64), INT32_C(892), INT32_C( 79), + INT32_C( 35), INT32_C( 14), INT32_C(522), INT32_C( 49), + INT32_C( 42), INT32_C( 64), INT32_C( 7), INT32_C( 19)), + simde_mm512_set_epi32(INT32_C( 1), INT32_C( 2), INT32_C( 3), INT32_C( 4), + INT32_C( 5), INT32_C( 6), INT32_C( 7), INT32_C( 8), + INT32_C( 9), INT32_C( 10), INT32_C( 11), INT32_C( 12), + INT32_C( 13), INT32_C( 14), INT32_C( 15), INT32_C( 16)), + 1, + simde_mm512_set_epi32(INT32_C( 19), INT32_C( 1), INT32_C( 2), INT32_C( 3), + INT32_C( 4), INT32_C( 5), INT32_C( 6), INT32_C( 7), + INT32_C( 8), INT32_C( 9), INT32_C( 10), INT32_C( 11), + INT32_C( 12), INT32_C( 13), INT32_C( 14), INT32_C( 15)), + }, + { simde_mm512_set_epi32(INT32_C(168), INT32_C( 54), INT32_C( 25), INT32_C( 16), + INT32_C( 42), INT32_C( 64), INT32_C(892), INT32_C( 79), + INT32_C( 35), INT32_C( 14), INT32_C(522), INT32_C( 49), + INT32_C( 42), INT32_C( 64), INT32_C( 7), INT32_C( 19)), + simde_mm512_set_epi32(INT32_C( 1), INT32_C( 2), INT32_C( 3), INT32_C( 4), + INT32_C( 5), INT32_C( 6), INT32_C( 7), INT32_C( 8), + INT32_C( 9), INT32_C( 10), INT32_C( 11), INT32_C( 12), + INT32_C( 13), INT32_C( 14), INT32_C( 15), INT32_C( 16)), + 17, + simde_mm512_set_epi32(INT32_C( 19), INT32_C( 1), INT32_C( 2), INT32_C( 3), + INT32_C( 4), INT32_C( 5), INT32_C( 6), INT32_C( 7), + INT32_C( 8), INT32_C( 9), INT32_C( 10), INT32_C( 11), + INT32_C( 12), INT32_C( 13), INT32_C( 14), INT32_C( 15)), + }, + { + simde_mm512_set_epi32(INT32_C(8642), INT32_C(7802), INT32_C(1252), INT32_C(1585), + INT32_C(3509), INT32_C(5362), INT32_C(8605), INT32_C(5927), + INT32_C(6701), INT32_C(3014), INT32_C(2816), INT32_C(2818), + INT32_C(6544), INT32_C(9829), INT32_C(7991), INT32_C(1111)), + simde_mm512_set_epi32(INT32_C(4513), INT32_C(3854), INT32_C(4402), INT32_C(6551), + INT32_C(5642), INT32_C(3525), INT32_C(3873), INT32_C(5884), + INT32_C(3443), INT32_C(1354), INT32_C(1070), INT32_C(5500), + INT32_C(2349), INT32_C(1754), INT32_C(8819), INT32_C( 716)), + 4, + simde_mm512_set_epi32(INT32_C(6544), INT32_C(9829), INT32_C(7991), INT32_C(1111), + INT32_C(4513), INT32_C(3854), INT32_C(4402), INT32_C(6551), + INT32_C(5642), INT32_C(3525), INT32_C(3873), INT32_C(5884), + INT32_C(3443), INT32_C(1354), INT32_C(1070), INT32_C(5500)), + } + }; + + simde__m512i r = simde_mm512_alignr_epi32(test_vec[0].a, test_vec[0].b, 1); + simde_assert_m512i_i32(r, ==, test_vec[0].r); + + r = simde_mm512_alignr_epi32(test_vec[1].a, test_vec[1].b, 17); + simde_assert_m512i_i32(r, ==, test_vec[1].r); + + r = simde_mm512_alignr_epi32(test_vec[2].a, test_vec[2].b, 4); + simde_assert_m512i_i32(r, ==, test_vec[2].r); + + return 0; +} + +SIMDE_TEST_FUNC_LIST_BEGIN + SIMDE_TEST_FUNC_LIST_ENTRY(mm512_alignr_epi32) +SIMDE_TEST_FUNC_LIST_END + +#include