diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 96e6f71344a787..d3a86897ce8e44 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -269,6 +269,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setCondCodeAction( {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, Expand); + setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); } for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) setOperationAction(ISD::BITREVERSE, VT, Custom); @@ -287,6 +288,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, ISD::SETUGE, ISD::SETUGT}, VT, Expand); + setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal); } setOperationAction(ISD::CTPOP, GRLenVT, Legal); setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal); @@ -325,6 +327,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setCondCodeAction( {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, Expand); + setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); } for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32}) setOperationAction(ISD::BITREVERSE, VT, Custom); @@ -343,6 +346,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, ISD::SETUGE, ISD::SETUGT}, VT, Expand); + setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal); } } @@ -446,10 +450,25 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, return lowerVECTOR_SHUFFLE(Op, DAG); case ISD::BITREVERSE: return lowerBITREVERSE(Op, DAG); + case ISD::SCALAR_TO_VECTOR: + return lowerSCALAR_TO_VECTOR(Op, DAG); } return SDValue(); } +SDValue +LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + MVT OpVT = Op.getSimpleValueType(); + + SDValue Vector = DAG.getUNDEF(OpVT); + SDValue Val = Op.getOperand(0); + SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT()); + + return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx); +} + SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op, SelectionDAG &DAG) const { EVT ResTy = Op->getValueType(0); diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index e619cb69f33325..a14d5d49ee9d1b 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -336,6 +336,7 @@ class LoongArchTargetLowering : public TargetLowering { SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBITREVERSE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index 49ae440073f2e0..24b5ed5a9344f2 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -1562,6 +1562,12 @@ def : Pat<(vector_insert v8f32:$vd, FPR32:$fj, uimm3:$imm), def : Pat<(vector_insert v4f64:$vd, FPR64:$fj, uimm2:$imm), (XVINSGR2VR_D $vd, (COPY_TO_REGCLASS FPR64:$fj, GPR), uimm2:$imm)>; +// scalar_to_vector +def : Pat<(v8f32 (scalar_to_vector FPR32:$fj)), + (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32)>; +def : Pat<(v4f64 (scalar_to_vector FPR64:$fj)), + (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64)>; + // XVPICKVE2GR_W[U] def : Pat<(loongarch_vpick_sext_elt v8i32:$xd, uimm3:$imm, i32), (XVPICKVE2GR_W v8i32:$xd, uimm3:$imm)>; diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index ced430216b2fed..d2063a8aaae9bc 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -1719,6 +1719,12 @@ def : Pat<(vector_insert v4f32:$vd, FPR32:$fj, uimm2:$imm), def : Pat<(vector_insert v2f64:$vd, FPR64:$fj, uimm1:$imm), (VINSGR2VR_D $vd, (COPY_TO_REGCLASS FPR64:$fj, GPR), uimm1:$imm)>; +// scalar_to_vector +def : Pat<(v4f32 (scalar_to_vector FPR32:$fj)), + (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32)>; +def : Pat<(v2f64 (scalar_to_vector FPR64:$fj)), + (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64)>; + // VPICKVE2GR_{B/H/W}[U] def : Pat<(loongarch_vpick_sext_elt v16i8:$vd, uimm4:$imm, i8), (VPICKVE2GR_B v16i8:$vd, uimm4:$imm)>; diff --git a/llvm/test/CodeGen/LoongArch/lasx/scalar-to-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/scalar-to-vector.ll new file mode 100644 index 00000000000000..05fbb746bd9d3d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/scalar-to-vector.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +;; Test scalar_to_vector expansion. + +define <32 x i8> @scalar_to_32xi8(i8 %val) { +; CHECK-LABEL: scalar_to_32xi8: +; CHECK: # %bb.0: +; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 0 +; CHECK-NEXT: ret + %ret = insertelement <32 x i8> poison, i8 %val, i32 0 + ret <32 x i8> %ret +} + +define <16 x i16> @scalar_to_16xi16(i16 %val) { +; CHECK-LABEL: scalar_to_16xi16: +; CHECK: # %bb.0: +; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0 +; CHECK-NEXT: ret + %ret = insertelement <16 x i16> poison, i16 %val, i32 0 + ret <16 x i16> %ret +} + +define <8 x i32> @scalar_to_8xi32(i32 %val) { +; CHECK-LABEL: scalar_to_8xi32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 0 +; CHECK-NEXT: ret + %ret = insertelement <8 x i32> poison, i32 %val, i32 0 + ret <8 x i32> %ret +} + +define <4 x i64> @scalar_to_4xi64(i64 %val) { +; CHECK-LABEL: scalar_to_4xi64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 0 +; CHECK-NEXT: ret + %ret = insertelement <4 x i64> poison, i64 %val, i32 0 + ret <4 x i64> %ret +} + +define <8 x float> @scalar_to_8xf32(float %val) { +; CHECK-LABEL: scalar_to_8xf32: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0 +; CHECK-NEXT: ret + %ret = insertelement <8 x float> poison, float %val, i32 0 + ret <8 x float> %ret +} + +define <4 x double> @scalar_to_4xf64(double %val) { +; CHECK-LABEL: scalar_to_4xf64: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 +; CHECK-NEXT: ret + %ret = insertelement <4 x double> poison, double %val, i32 0 + ret <4 x double> %ret +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll index ba19fe75d7570c..eaab6524c53177 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll @@ -369,19 +369,15 @@ entry: ret void } -;; BUILD_VECTOR through stack. ;; If `isShuffleMaskLegal` returns true, it will lead to an infinite loop. define void @extract1_i32_zext_insert0_i64_undef(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: extract1_i32_zext_insert0_i64_undef: ; CHECK: # %bb.0: -; CHECK-NEXT: addi.d $sp, $sp, -16 ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1 ; CHECK-NEXT: bstrpick.d $a0, $a0, 31, 0 -; CHECK-NEXT: st.d $a0, $sp, 0 -; CHECK-NEXT: vld $vr0, $sp, 0 +; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 ; CHECK-NEXT: vst $vr0, $a1, 0 -; CHECK-NEXT: addi.d $sp, $sp, 16 ; CHECK-NEXT: ret %v = load volatile <4 x i32>, ptr %src %e = extractelement <4 x i32> %v, i32 1 diff --git a/llvm/test/CodeGen/LoongArch/lsx/scalar-to-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/scalar-to-vector.ll new file mode 100644 index 00000000000000..87b68ac5917278 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/scalar-to-vector.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +;; Test scalar_to_vector expansion. + +define <16 x i8> @scalar_to_16xi8(i8 %val) { +; CHECK-LABEL: scalar_to_16xi8: +; CHECK: # %bb.0: +; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 0 +; CHECK-NEXT: ret + %ret = insertelement <16 x i8> poison, i8 %val, i32 0 + ret <16 x i8> %ret +} + +define <8 x i16> @scalar_to_8xi16(i16 %val) { +; CHECK-LABEL: scalar_to_8xi16: +; CHECK: # %bb.0: +; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0 +; CHECK-NEXT: ret + %ret = insertelement <8 x i16> poison, i16 %val, i32 0 + ret <8 x i16> %ret +} + +define <4 x i32> @scalar_to_4xi32(i32 %val) { +; CHECK-LABEL: scalar_to_4xi32: +; CHECK: # %bb.0: +; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 +; CHECK-NEXT: ret + %ret = insertelement <4 x i32> poison, i32 %val, i32 0 + ret <4 x i32> %ret +} + +define <2 x i64> @scalar_to_2xi64(i64 %val) { +; CHECK-LABEL: scalar_to_2xi64: +; CHECK: # %bb.0: +; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; CHECK-NEXT: ret + %ret = insertelement <2 x i64> poison, i64 %val, i32 0 + ret <2 x i64> %ret +} + +define <4 x float> @scalar_to_4xf32(float %val) { +; CHECK-LABEL: scalar_to_4xf32: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0 +; CHECK-NEXT: ret + %ret = insertelement <4 x float> poison, float %val, i32 0 + ret <4 x float> %ret +} + +define <2 x double> @scalar_to_2xf64(double %val) { +; CHECK-LABEL: scalar_to_2xf64: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 +; CHECK-NEXT: ret + %ret = insertelement <2 x double> poison, double %val, i32 0 + ret <2 x double> %ret +} diff --git a/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll b/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll index 3e1b6d8eaadbcb..0bdace6b601129 100644 --- a/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll +++ b/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll @@ -126,17 +126,14 @@ define void @test_f2(ptr %P, ptr %S) nounwind { ; ; LA64D-LABEL: test_f2: ; LA64D: # %bb.0: -; LA64D-NEXT: addi.d $sp, $sp, -16 ; LA64D-NEXT: ld.d $a0, $a0, 0 -; LA64D-NEXT: st.d $a0, $sp, 0 -; LA64D-NEXT: vld $vr0, $sp, 0 +; LA64D-NEXT: vinsgr2vr.d $vr0, $a0, 0 ; LA64D-NEXT: lu12i.w $a0, 260096 ; LA64D-NEXT: lu52i.d $a0, $a0, 1024 ; LA64D-NEXT: vreplgr2vr.d $vr1, $a0 ; LA64D-NEXT: vfadd.s $vr0, $vr0, $vr1 ; LA64D-NEXT: vpickve2gr.d $a0, $vr0, 0 ; LA64D-NEXT: st.d $a0, $a1, 0 -; LA64D-NEXT: addi.d $sp, $sp, 16 ; LA64D-NEXT: ret %p = load %f2, ptr %P %R = fadd %f2 %p, < float 1.000000e+00, float 2.000000e+00 >