llvm · lawben · Aug 8, 2024 · Aug 8, 2024
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
@@ -1218,6 +1218,12 @@ def NondetermenisticValue : Builtin {
   let Prototype = "void(...)";
 }
 
+def VectorCompress : Builtin {
+  let Spellings = ["__builtin_experimental_vectorcompress"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
 def ElementwiseAbs : Builtin {
   let Spellings = ["__builtin_elementwise_abs"];
   let Attributes = [NoThrow, Const, CustomTypeChecking];

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3387,6 +3387,8 @@ def err_typecheck_vector_lengths_not_equal : Error<
 def warn_typecheck_vector_element_sizes_not_equal : Warning<
   "vector operands do not have the same elements sizes (%0 and %1)">,
   InGroup<DiagGroup<"vec-elem-size">>, DefaultError;
+def err_typecheck_scalable_fixed_vector_mismatch : Error<
+  "vectors must both be scalable or fixed-sized vectors">;
 def err_ext_vector_component_exceeds_length : Error<
   "vector component access exceeds type %0">;
 def err_ext_vector_component_name_illegal : Error<

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3773,6 +3773,24 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     return RValue::get(Result);
   }
 
+  case Builtin::BI__builtin_experimental_vectorcompress: {
+    QualType VecTy = E->getArg(0)->getType();
+    Value *Vec = EmitScalarExpr(E->getArg(0));
+    Value *Mask = EmitScalarExpr(E->getArg(1));
+    Value *Passthru = E->getNumArgs() == 3
+                          ? EmitScalarExpr(E->getArg(2))
+                          : llvm::UndefValue::get(ConvertType(VecTy));
+
+    // Cast svbool_t to right number of elements.
+    if (VecTy->isSVESizelessBuiltinType())
+      Mask = EmitSVEPredicateCast(
+          Mask, cast<llvm::ScalableVectorType>(Vec->getType()));
+
+    Function *F = CGM.getIntrinsic(Intrinsic::experimental_vector_compress,
+                                   Vec->getType());
+    return RValue::get(Builder.CreateCall(F, {Vec, Mask, Passthru}));
+  }
+
   case Builtin::BI__builtin_elementwise_abs: {
     Value *Result;
     QualType QT = E->getArg(0)->getType();

diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
@@ -2804,6 +2804,60 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
     TheCall->setType(Magnitude.get()->getType());
     break;
   }
+  case Builtin::BI__builtin_experimental_vectorcompress: {
+    unsigned NumArgs = TheCall->getNumArgs();
+    if (NumArgs < 2)
+      return Diag(TheCall->getEndLoc(),
+                  diag::err_typecheck_call_too_few_args_at_least)
+             << /*function*/ 0 << /*at least*/ 2 << /*got*/ NumArgs
+             << /*is non object*/ 0;
+
+    if (NumArgs > 3)
+      return Diag(TheCall->getEndLoc(),
+                  diag::err_typecheck_call_too_many_args_at_most)
+             << /*function*/ 0 << /*at most*/ 3 << /*got*/ NumArgs
+             << /*is non object*/ 0;
+
+    Expr *VecArg = TheCall->getArg(0);
+    QualType VecTy = VecArg->getType();
+    if (!VecTy->isVectorType() && !VecTy->isSizelessVectorType())
+      return Diag(VecArg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
+             << 1 << /* vector ty*/ 4 << VecTy;
+
+    Expr *MaskArg = TheCall->getArg(1);
+    QualType MaskTy = MaskArg->getType();
+    if (!MaskTy->isVectorType() && !MaskTy->isSizelessVectorType())
+      return Diag(MaskArg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
+             << 2 << /* vector ty*/ 4 << MaskTy;
+
+    if (VecTy->isVectorType() != MaskTy->isVectorType())
+      return Diag(MaskArg->getBeginLoc(),
+                  diag::err_typecheck_scalable_fixed_vector_mismatch);
+
+    if (VecTy->isVectorType() &&
+        VecTy->getAs<VectorType>()->getNumElements() !=
+            MaskTy->getAs<VectorType>()->getNumElements())
+      return Diag(VecArg->getBeginLoc(),
+                  diag::err_typecheck_vector_lengths_not_equal)
+             << VecTy->getAs<VectorType>()->getNumElements()
+             << MaskTy->getAs<VectorType>()->getNumElements();
+
+    // TODO: find way to compare MinKnownElements for sizeless vectors.
+    // if (VecTy->isSizelessVectorType() &&
+    // VecTy->getAs<VectorType>()->getNumElements() !=
+    // MaskTy->getAs<VectorType>()->getNumElements()) {}
+
+    if (NumArgs == 3) {
+      Expr *PassthruArg = TheCall->getArg(2);
+      QualType PassthruTy = PassthruArg->getType();
+      if (PassthruTy != VecTy)
+        return Diag(PassthruArg->getBeginLoc(),
+                    diag::err_typecheck_call_different_arg_types)
+               << VecTy << PassthruTy;
+    }
+    TheCall->setType(VecTy);
+    break;
+  }
   case Builtin::BI__builtin_reduce_max:
   case Builtin::BI__builtin_reduce_min: {
     if (PrepareBuiltinReduceMathOneArgCall(TheCall))

diff --git a/clang/test/CodeGen/builtin_vectorcompress.c b/clang/test/CodeGen/builtin_vectorcompress.c
@@ -0,0 +1,81 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -O1 -triple x86_64 %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK %s
+
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -O1 -triple aarch64 -target-feature +sve  %s -emit-llvm -o - | FileCheck --check-prefixes=SVE   %s
+
+typedef int int4 __attribute__((vector_size(16)));
+typedef float float8 __attribute__((vector_size(32)));
+typedef _Bool bitvec4 __attribute__((ext_vector_type(4)));
+typedef _Bool bitvec8 __attribute__((ext_vector_type(8)));
+
+// CHECK-LABEL: define dso_local <4 x i32> @test_builtin_vectorcompress_int4(
+// CHECK-SAME: <4 x i32> noundef [[VEC:%.*]], i8 noundef [[MASK_COERCE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8 [[MASK_COERCE]] to <8 x i1>
+// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.experimental.vector.compress.v4i32(<4 x i32> [[VEC]], <4 x i1> [[EXTRACTVEC]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+int4 test_builtin_vectorcompress_int4(int4 vec, bitvec4 mask) {
+  return __builtin_experimental_vectorcompress(vec, mask);
+}
+
+// CHECK-LABEL: define dso_local <4 x i32> @test_builtin_vectorcompress_int4_passthru(
+// CHECK-SAME: <4 x i32> noundef [[VEC:%.*]], i8 noundef [[MASK_COERCE:%.*]], <4 x i32> noundef [[PASSTHRU:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8 [[MASK_COERCE]] to <8 x i1>
+// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.experimental.vector.compress.v4i32(<4 x i32> [[VEC]], <4 x i1> [[EXTRACTVEC]], <4 x i32> [[PASSTHRU]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+int4 test_builtin_vectorcompress_int4_passthru(int4 vec, bitvec4 mask, int4 passthru) {
+  return __builtin_experimental_vectorcompress(vec, mask, passthru);
+}
+
+// CHECK-LABEL: define dso_local <8 x float> @test_builtin_vectorcompress_float8(
+// CHECK-SAME: ptr nocapture noundef readonly byval(<8 x float>) align 32 [[TMP0:%.*]], i8 noundef [[MASK_COERCE:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VEC:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-NEXT:    [[MASK1:%.*]] = bitcast i8 [[MASK_COERCE]] to <8 x i1>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.experimental.vector.compress.v8f32(<8 x float> [[VEC]], <8 x i1> [[MASK1]], <8 x float> undef)
+// CHECK-NEXT:    ret <8 x float> [[TMP1]]
+float8 test_builtin_vectorcompress_float8(float8 vec, bitvec8 mask) {
+  return __builtin_experimental_vectorcompress(vec, mask);
+}
+
+// CHECK-LABEL: define dso_local <8 x float> @test_builtin_vectorcompress_float8_passthru(
+// CHECK-SAME: ptr nocapture noundef readonly byval(<8 x float>) align 32 [[TMP0:%.*]], i8 noundef [[MASK_COERCE:%.*]], ptr nocapture noundef readonly byval(<8 x float>) align 32 [[TMP1:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VEC:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[MASK1:%.*]] = bitcast i8 [[MASK_COERCE]] to <8 x i1>
+// CHECK-NEXT:    [[PASSTHRU:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.experimental.vector.compress.v8f32(<8 x float> [[VEC]], <8 x i1> [[MASK1]], <8 x float> [[PASSTHRU]])
+// CHECK-NEXT:    ret <8 x float> [[TMP2]]
+float8 test_builtin_vectorcompress_float8_passthru(float8 vec, bitvec8 mask, float8 passthru) {
+  return __builtin_experimental_vectorcompress(vec, mask, passthru);
+}
+
+#if defined(__ARM_FEATURE_SVE)
+#include <arm_sve.h>
+
+// SVE-LABEL: define dso_local <vscale x 4 x i32> @test_builtin_vectorelements_sve32(
+// SVE-SAME: <vscale x 4 x i32> [[VEC:%.*]], <vscale x 16 x i1> [[MASK:%.*]]) local_unnamed_addr
+// SVE-NEXT:  [[ENTRY:.*:]]
+// SVE-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[MASK]])
+// SVE-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vector.compress.nxv4i32(<vscale x 4 x i32> [[VEC]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> undef)
+// SVE-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
+//
+svuint32_t test_builtin_vectorelements_sve32(svuint32_t vec, svbool_t mask) {
+  return __builtin_experimental_vectorcompress(vec, mask);
+}
+
+// SVE-LABEL: define dso_local <vscale x 16 x i8> @test_builtin_vectorelements_sve8(
+// SVE-SAME: <vscale x 16 x i8> [[VEC:%.*]], <vscale x 16 x i1> [[MASK:%.*]], <vscale x 16 x i8> [[PASSTHRU:%.*]]) local_unnamed_addr
+// SVE-NEXT:  [[ENTRY:.*:]]
+// SVE-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.experimental.vector.compress.nxv16i8(<vscale x 16 x i8> [[VEC]], <vscale x 16 x i1> [[MASK]], <vscale x 16 x i8> [[PASSTHRU]])
+// SVE-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_builtin_vectorelements_sve8(svuint8_t vec, svbool_t mask, svuint8_t passthru) {
+  return __builtin_experimental_vectorcompress(vec, mask, passthru);
+}
+#endif
+
diff --git a/clang/test/Sema/builtin_vectorcompress.c b/clang/test/Sema/builtin_vectorcompress.c
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -triple aarch64 -fsyntax-only -verify -disable-llvm-passes %s
+
+typedef int int4 __attribute__((vector_size(16)));
+typedef float float8 __attribute__((vector_size(32)));
+typedef _Bool bitvec4 __attribute__((ext_vector_type(4)));
+typedef _Bool bitvec8 __attribute__((ext_vector_type(8)));
+
+void test_builtin_vectorelements(int4 vec1, float8 vec2, bitvec4 mask1, bitvec8 mask2, int4 passthru1, float8 passthru2) {
+  // wrong number of arguments
+  __builtin_experimental_vectorcompress(vec1); // expected-error {{too few arguments to function call}}
+  __builtin_experimental_vectorcompress(vec1, mask2, passthru1, passthru1); // expected-error {{too many arguments to function call}}
+
+  // valid
+  (void) __builtin_experimental_vectorcompress(vec1, mask1);
+  (void) __builtin_experimental_vectorcompress(vec1, mask1, passthru1);
+  (void) __builtin_experimental_vectorcompress(vec2, mask2);
+  (void) __builtin_experimental_vectorcompress(vec2, mask2, passthru2);
+
+  // type mismatch
+  __builtin_experimental_vectorcompress(vec1, mask2); // expected-error {{vector operands do not have the same number of elements}}
+  __builtin_experimental_vectorcompress(vec2, mask1); // expected-error {{vector operands do not have the same number of elements}}
+  __builtin_experimental_vectorcompress(vec1, mask1, passthru2); // expected-error {{arguments are of different types}}
+
+  // invalid types
+  int a;
+  __builtin_experimental_vectorcompress(a, mask1, passthru1); // expected-error {{1st argument must be a vector type (was 'int')}}
+  __builtin_experimental_vectorcompress(vec1, a, passthru1); // expected-error {{2nd argument must be a vector type (was 'int')}}
+  __builtin_experimental_vectorcompress(vec1, mask1, a); // expected-error {{arguments are of different types}}
+}
+