-
Notifications
You must be signed in to change notification settings - Fork 12.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LoongArch] lower SCALAR_TO_VECTOR to INSERT_VECTOR_ELT #122863
Conversation
@llvm/pr-subscribers-backend-loongarch Author: None (tangaac) Changesdefine <16 x i8> @<!-- -->scalar_to_16xi8(i8 %val) {
%ret = insertelement <16 x i8> undef, i8 %val, i32 0
ret <16 x i8> %ret
} before addi.d $sp, $sp, -16
st.b $a0, $sp, 0
vld $vr0, $sp, 0
addi.d $sp, $sp, 16
ret after vinsgr2vr.b $vr0, $a0, 0
ret Full diff: /~https://github.com/llvm/llvm-project/pull/122863.diff 4 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 96e6f71344a787..af8566680b2a07 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -255,6 +255,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SETCC, VT, Legal);
setOperationAction(ISD::VSELECT, VT, Legal);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
}
for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
@@ -311,6 +312,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SETCC, VT, Legal);
setOperationAction(ISD::VSELECT, VT, Legal);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
}
for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
@@ -446,10 +448,26 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
return lowerVECTOR_SHUFFLE(Op, DAG);
case ISD::BITREVERSE:
return lowerBITREVERSE(Op, DAG);
+ case ISD::SCALAR_TO_VECTOR:
+ return lowerSCALAR_TO_VECTOR(Op, DAG);
}
return SDValue();
}
+SDValue
+LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ MVT OpVT = Op.getSimpleValueType();
+
+ SDValue Vector = DAG.getUNDEF(OpVT);
+ SDValue Val = Op.getOperand(0);
+ SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
+
+ Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
+ return Vector;
+}
+
SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
SelectionDAG &DAG) const {
EVT ResTy = Op->getValueType(0);
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index e619cb69f33325..a14d5d49ee9d1b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -336,6 +336,7 @@ class LoongArchTargetLowering : public TargetLowering {
SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBITREVERSE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
diff --git a/llvm/test/CodeGen/LoongArch/lasx/scalar-to-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/scalar-to-vector.ll
new file mode 100644
index 00000000000000..9020db76738f6a
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/scalar-to-vector.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+
+; Test scalar_to_vector expansion.
+
+define <32 x i8> @scalar_to_32xi8(i8 %val) {
+; CHECK-LABEL: scalar_to_32xi8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %ret = insertelement <32 x i8> undef, i8 %val, i32 0
+ ret <32 x i8> %ret
+}
+
+define <16 x i16> @scalar_to_16xi16(i16 %val) {
+; CHECK-LABEL: scalar_to_16xi16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %ret = insertelement <16 x i16> undef, i16 %val, i32 0
+ ret <16 x i16> %ret
+}
+
+define <8 x i32> @scalar_to_8xi32(i32 %val) {
+; CHECK-LABEL: scalar_to_8xi32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %ret = insertelement <8 x i32> undef, i32 %val, i32 0
+ ret <8 x i32> %ret
+}
+
+define <4 x i64> @scalar_to_4xi64(i64 %val) {
+; CHECK-LABEL: scalar_to_4xi64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %ret = insertelement <4 x i64> undef, i64 %val, i32 0
+ ret <4 x i64> %ret
+}
+
+define <8 x float> @scalar_to_8xf32(float %val) {
+; CHECK-LABEL: scalar_to_8xf32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movfr2gr.s $a0, $fa0
+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %ret = insertelement <8 x float> undef, float %val, i32 0
+ ret <8 x float> %ret
+}
+
+define <4 x double> @scalar_to_4xf64(double %val) {
+; CHECK-LABEL: scalar_to_4xf64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movfr2gr.d $a0, $fa0
+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %ret = insertelement <4 x double> undef, double %val, i32 0
+ ret <4 x double> %ret
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/scalar-to-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/scalar-to-vector.ll
new file mode 100644
index 00000000000000..4a9471bbf552b0
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/scalar-to-vector.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
+
+; Test scalar_to_vector expansion.
+
+define <16 x i8> @scalar_to_16xi8(i8 %val) {
+; CHECK-LABEL: scalar_to_16xi8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %ret = insertelement <16 x i8> undef, i8 %val, i32 0
+ ret <16 x i8> %ret
+}
+
+define <8 x i16> @scalar_to_8xi16(i16 %val) {
+; CHECK-LABEL: scalar_to_8xi16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %ret = insertelement <8 x i16> undef, i16 %val, i32 0
+ ret <8 x i16> %ret
+}
+
+define <4 x i32> @scalar_to_4xi32(i32 %val) {
+; CHECK-LABEL: scalar_to_4xi32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %ret = insertelement <4 x i32> undef, i32 %val, i32 0
+ ret <4 x i32> %ret
+}
+
+define <2 x i64> @scalar_to_2xi64(i64 %val) {
+; CHECK-LABEL: scalar_to_2xi64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %ret = insertelement <2 x i64> undef, i64 %val, i32 0
+ ret <2 x i64> %ret
+}
+
+define <4 x float> @scalar_to_4xf32(float %val) {
+; CHECK-LABEL: scalar_to_4xf32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movfr2gr.s $a0, $fa0
+; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %ret = insertelement <4 x float> undef, float %val, i32 0
+ ret <4 x float> %ret
+}
+
+define <2 x double> @scalar_to_2xf64(double %val) {
+; CHECK-LABEL: scalar_to_2xf64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movfr2gr.d $a0, $fa0
+; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %ret = insertelement <2 x double> undef, double %val, i32 0
+ ret <2 x double> %ret
+}
|
✅ With the latest revision this PR passed the undef deprecator. |
2b3fd87
to
7e36c8d
Compare
@@ -255,6 +255,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, | |||
setOperationAction(ISD::SETCC, VT, Legal); | |||
setOperationAction(ISD::VSELECT, VT, Legal); | |||
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); | |||
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why not make it Legal
and define patterns in .td files.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why not make it
Legal
and define patterns in .td files.
Hi @SixWeining, I thought that in .td
files, we can only lower operations to machine instructions. However, if this is done during ISel lowering, it gets canonicalized into a different standard SDNode. Could there be potential benefits if we incorporate some DAGCombine patterns in this approach?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why not make it
Legal
and define patterns in .td files.
def : Pat<(v16i8 (scalar_to_vector GRLenVT:$rj)),
(VINSGR2VR_B ?, GRLenVT:$rj, 0)>;
We cannot replace scalar_to_vector
with the VINSGR2VR
instruction because of the unknown ?
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, I agree.
For the test case in the commit message, we do too many transforms: insert_vector_elt -> BUILD_VECTOR -> scalar_to_vector -> insert_vector_elt. Could we do: insert_vector_elt -> BUILD_VECTOR -> insert_vector_elt ?
Maybe we should change: LoongArchTargetLowering::lowerBUILD_VECTOR()
or SelectionDAGLegalize::ExpandBUILD_VECTOR()
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Which one should we use: movgr2fr or vinsgr2vr?
ret <2 x i64> %ret | ||
} | ||
|
||
define <4 x float> @scalar_to_4xf32(float %val) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems these could be empty
because FR overlap with the lower part of the SIMD register.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
vector_insert
in .td files cannot deal with this.
We could make v4f32, v2f64 Legal
, and process scalar_to_vector
in .td files like this,
def : Pat<(v4f32 (scalar_to_vector FPR32:$fj)),
(SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32)>;
def : Pat<(v2f64 (scalar_to_vector FPR64:$fj)),
(SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64)>;
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Beyond this PR: If the insertion index is non-zero and the vector is undef
or poison
, stack store and load could also be replaced with vinsgr2vr.
Yes |
Co-authored-by: Lu Weining <luweining@loongson.cn>
Co-authored-by: Lu Weining <luweining@loongson.cn>
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Let's handle the non-zero case in a separate PR.
before
after