diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index b025a7681bfac3..666d4b2b65978a 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -1218,6 +1218,12 @@ def NondetermenisticValue : Builtin { let Prototype = "void(...)"; } +def VectorCompress : Builtin { + let Spellings = ["__builtin_experimental_vectorcompress"]; + let Attributes = [NoThrow, Const, CustomTypeChecking]; + let Prototype = "void(...)"; +} + def ElementwiseAbs : Builtin { let Spellings = ["__builtin_elementwise_abs"]; let Attributes = [NoThrow, Const, CustomTypeChecking]; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 5cdf36660b2a66..1e7de962dc26d4 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -3387,6 +3387,8 @@ def err_typecheck_vector_lengths_not_equal : Error< def warn_typecheck_vector_element_sizes_not_equal : Warning< "vector operands do not have the same elements sizes (%0 and %1)">, InGroup>, DefaultError; +def err_typecheck_scalable_fixed_vector_mismatch : Error< + "vectors must both be scalable or fixed-sized vectors">; def err_ext_vector_component_exceeds_length : Error< "vector component access exceeds type %0">; def err_ext_vector_component_name_illegal : Error< diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 51d1162c6e403c..ab2875f7572f0f 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -3773,6 +3773,24 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(Result); } + case Builtin::BI__builtin_experimental_vectorcompress: { + QualType VecTy = E->getArg(0)->getType(); + Value *Vec = EmitScalarExpr(E->getArg(0)); + Value *Mask = EmitScalarExpr(E->getArg(1)); + Value *Passthru = E->getNumArgs() == 3 + ? EmitScalarExpr(E->getArg(2)) + : llvm::UndefValue::get(ConvertType(VecTy)); + + // Cast svbool_t to right number of elements. + if (VecTy->isSVESizelessBuiltinType()) + Mask = EmitSVEPredicateCast( + Mask, cast(Vec->getType())); + + Function *F = CGM.getIntrinsic(Intrinsic::experimental_vector_compress, + Vec->getType()); + return RValue::get(Builder.CreateCall(F, {Vec, Mask, Passthru})); + } + case Builtin::BI__builtin_elementwise_abs: { Value *Result; QualType QT = E->getArg(0)->getType(); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index ee143381cf4f79..ca9f638c960e0a 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2804,6 +2804,60 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, TheCall->setType(Magnitude.get()->getType()); break; } + case Builtin::BI__builtin_experimental_vectorcompress: { + unsigned NumArgs = TheCall->getNumArgs(); + if (NumArgs < 2) + return Diag(TheCall->getEndLoc(), + diag::err_typecheck_call_too_few_args_at_least) + << /*function*/ 0 << /*at least*/ 2 << /*got*/ NumArgs + << /*is non object*/ 0; + + if (NumArgs > 3) + return Diag(TheCall->getEndLoc(), + diag::err_typecheck_call_too_many_args_at_most) + << /*function*/ 0 << /*at most*/ 3 << /*got*/ NumArgs + << /*is non object*/ 0; + + Expr *VecArg = TheCall->getArg(0); + QualType VecTy = VecArg->getType(); + if (!VecTy->isVectorType() && !VecTy->isSizelessVectorType()) + return Diag(VecArg->getBeginLoc(), diag::err_builtin_invalid_arg_type) + << 1 << /* vector ty*/ 4 << VecTy; + + Expr *MaskArg = TheCall->getArg(1); + QualType MaskTy = MaskArg->getType(); + if (!MaskTy->isVectorType() && !MaskTy->isSizelessVectorType()) + return Diag(MaskArg->getBeginLoc(), diag::err_builtin_invalid_arg_type) + << 2 << /* vector ty*/ 4 << MaskTy; + + if (VecTy->isVectorType() != MaskTy->isVectorType()) + return Diag(MaskArg->getBeginLoc(), + diag::err_typecheck_scalable_fixed_vector_mismatch); + + if (VecTy->isVectorType() && + VecTy->getAs()->getNumElements() != + MaskTy->getAs()->getNumElements()) + return Diag(VecArg->getBeginLoc(), + diag::err_typecheck_vector_lengths_not_equal) + << VecTy->getAs()->getNumElements() + << MaskTy->getAs()->getNumElements(); + + // TODO: find way to compare MinKnownElements for sizeless vectors. + // if (VecTy->isSizelessVectorType() && + // VecTy->getAs()->getNumElements() != + // MaskTy->getAs()->getNumElements()) {} + + if (NumArgs == 3) { + Expr *PassthruArg = TheCall->getArg(2); + QualType PassthruTy = PassthruArg->getType(); + if (PassthruTy != VecTy) + return Diag(PassthruArg->getBeginLoc(), + diag::err_typecheck_call_different_arg_types) + << VecTy << PassthruTy; + } + TheCall->setType(VecTy); + break; + } case Builtin::BI__builtin_reduce_max: case Builtin::BI__builtin_reduce_min: { if (PrepareBuiltinReduceMathOneArgCall(TheCall)) diff --git a/clang/test/CodeGen/builtin_vectorcompress.c b/clang/test/CodeGen/builtin_vectorcompress.c new file mode 100644 index 00000000000000..1eebb3461241b5 --- /dev/null +++ b/clang/test/CodeGen/builtin_vectorcompress.c @@ -0,0 +1,81 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -O1 -triple x86_64 %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK %s + +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -O1 -triple aarch64 -target-feature +sve %s -emit-llvm -o - | FileCheck --check-prefixes=SVE %s + +typedef int int4 __attribute__((vector_size(16))); +typedef float float8 __attribute__((vector_size(32))); +typedef _Bool bitvec4 __attribute__((ext_vector_type(4))); +typedef _Bool bitvec8 __attribute__((ext_vector_type(8))); + +// CHECK-LABEL: define dso_local <4 x i32> @test_builtin_vectorcompress_int4( +// CHECK-SAME: <4 x i32> noundef [[VEC:%.*]], i8 noundef [[MASK_COERCE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8 [[MASK_COERCE]] to <8 x i1> +// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.experimental.vector.compress.v4i32(<4 x i32> [[VEC]], <4 x i1> [[EXTRACTVEC]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP1]] +int4 test_builtin_vectorcompress_int4(int4 vec, bitvec4 mask) { + return __builtin_experimental_vectorcompress(vec, mask); +} + +// CHECK-LABEL: define dso_local <4 x i32> @test_builtin_vectorcompress_int4_passthru( +// CHECK-SAME: <4 x i32> noundef [[VEC:%.*]], i8 noundef [[MASK_COERCE:%.*]], <4 x i32> noundef [[PASSTHRU:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8 [[MASK_COERCE]] to <8 x i1> +// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.experimental.vector.compress.v4i32(<4 x i32> [[VEC]], <4 x i1> [[EXTRACTVEC]], <4 x i32> [[PASSTHRU]]) +// CHECK-NEXT: ret <4 x i32> [[TMP1]] +int4 test_builtin_vectorcompress_int4_passthru(int4 vec, bitvec4 mask, int4 passthru) { + return __builtin_experimental_vectorcompress(vec, mask, passthru); +} + +// CHECK-LABEL: define dso_local <8 x float> @test_builtin_vectorcompress_float8( +// CHECK-SAME: ptr nocapture noundef readonly byval(<8 x float>) align 32 [[TMP0:%.*]], i8 noundef [[MASK_COERCE:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VEC:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[TBAA2:![0-9]+]] +// CHECK-NEXT: [[MASK1:%.*]] = bitcast i8 [[MASK_COERCE]] to <8 x i1> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.experimental.vector.compress.v8f32(<8 x float> [[VEC]], <8 x i1> [[MASK1]], <8 x float> undef) +// CHECK-NEXT: ret <8 x float> [[TMP1]] +float8 test_builtin_vectorcompress_float8(float8 vec, bitvec8 mask) { + return __builtin_experimental_vectorcompress(vec, mask); +} + +// CHECK-LABEL: define dso_local <8 x float> @test_builtin_vectorcompress_float8_passthru( +// CHECK-SAME: ptr nocapture noundef readonly byval(<8 x float>) align 32 [[TMP0:%.*]], i8 noundef [[MASK_COERCE:%.*]], ptr nocapture noundef readonly byval(<8 x float>) align 32 [[TMP1:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VEC:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[MASK1:%.*]] = bitcast i8 [[MASK_COERCE]] to <8 x i1> +// CHECK-NEXT: [[PASSTHRU:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.experimental.vector.compress.v8f32(<8 x float> [[VEC]], <8 x i1> [[MASK1]], <8 x float> [[PASSTHRU]]) +// CHECK-NEXT: ret <8 x float> [[TMP2]] +float8 test_builtin_vectorcompress_float8_passthru(float8 vec, bitvec8 mask, float8 passthru) { + return __builtin_experimental_vectorcompress(vec, mask, passthru); +} + +#if defined(__ARM_FEATURE_SVE) +#include + +// SVE-LABEL: define dso_local @test_builtin_vectorelements_sve32( +// SVE-SAME: [[VEC:%.*]], [[MASK:%.*]]) local_unnamed_addr +// SVE-NEXT: [[ENTRY:.*:]] +// SVE-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[MASK]]) +// SVE-NEXT: [[TMP1:%.*]] = tail call @llvm.experimental.vector.compress.nxv4i32( [[VEC]], [[TMP0]], undef) +// SVE-NEXT: ret [[TMP1]] +// +svuint32_t test_builtin_vectorelements_sve32(svuint32_t vec, svbool_t mask) { + return __builtin_experimental_vectorcompress(vec, mask); +} + +// SVE-LABEL: define dso_local @test_builtin_vectorelements_sve8( +// SVE-SAME: [[VEC:%.*]], [[MASK:%.*]], [[PASSTHRU:%.*]]) local_unnamed_addr +// SVE-NEXT: [[ENTRY:.*:]] +// SVE-NEXT: [[TMP0:%.*]] = tail call @llvm.experimental.vector.compress.nxv16i8( [[VEC]], [[MASK]], [[PASSTHRU]]) +// SVE-NEXT: ret [[TMP0]] +// +svuint8_t test_builtin_vectorelements_sve8(svuint8_t vec, svbool_t mask, svuint8_t passthru) { + return __builtin_experimental_vectorcompress(vec, mask, passthru); +} +#endif + diff --git a/clang/test/Sema/builtin_vectorcompress.c b/clang/test/Sema/builtin_vectorcompress.c new file mode 100644 index 00000000000000..5b55a4081c1660 --- /dev/null +++ b/clang/test/Sema/builtin_vectorcompress.c @@ -0,0 +1,30 @@ +// RUN: %clang_cc1 -triple aarch64 -fsyntax-only -verify -disable-llvm-passes %s + +typedef int int4 __attribute__((vector_size(16))); +typedef float float8 __attribute__((vector_size(32))); +typedef _Bool bitvec4 __attribute__((ext_vector_type(4))); +typedef _Bool bitvec8 __attribute__((ext_vector_type(8))); + +void test_builtin_vectorelements(int4 vec1, float8 vec2, bitvec4 mask1, bitvec8 mask2, int4 passthru1, float8 passthru2) { + // wrong number of arguments + __builtin_experimental_vectorcompress(vec1); // expected-error {{too few arguments to function call}} + __builtin_experimental_vectorcompress(vec1, mask2, passthru1, passthru1); // expected-error {{too many arguments to function call}} + + // valid + (void) __builtin_experimental_vectorcompress(vec1, mask1); + (void) __builtin_experimental_vectorcompress(vec1, mask1, passthru1); + (void) __builtin_experimental_vectorcompress(vec2, mask2); + (void) __builtin_experimental_vectorcompress(vec2, mask2, passthru2); + + // type mismatch + __builtin_experimental_vectorcompress(vec1, mask2); // expected-error {{vector operands do not have the same number of elements}} + __builtin_experimental_vectorcompress(vec2, mask1); // expected-error {{vector operands do not have the same number of elements}} + __builtin_experimental_vectorcompress(vec1, mask1, passthru2); // expected-error {{arguments are of different types}} + + // invalid types + int a; + __builtin_experimental_vectorcompress(a, mask1, passthru1); // expected-error {{1st argument must be a vector type (was 'int')}} + __builtin_experimental_vectorcompress(vec1, a, passthru1); // expected-error {{2nd argument must be a vector type (was 'int')}} + __builtin_experimental_vectorcompress(vec1, mask1, a); // expected-error {{arguments are of different types}} +} +