Revert "[InstCombine] Transform high latency, dependent FSQRT/FDIV into FMUL" #123289

sushgokh · 2025-01-17T06:50:08Z

Reverts #87474

…to FMUL …" This reverts commit 7253c6f.

llvmbot · 2025-01-17T06:50:41Z

@llvm/pr-subscribers-llvm-transforms

Author: Sushant Gokhale (sushgokh)

Changes

Reverts llvm/llvm-project#87474

Patch is 34.98 KiB, truncated to 20.00 KiB below, full version: /~https://github.com/llvm/llvm-project/pull/123289.diff

2 Files Affected:

(modified) llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp (-176)
(removed) llvm/test/Transforms/InstCombine/fsqrtdiv-transform.ll (-631)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index b6acde9bdd1104..d0b2ded127ff73 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -13,7 +13,6 @@
 
 #include "InstCombineInternal.h"
 #include "llvm/ADT/APInt.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -658,94 +657,6 @@ Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
   return nullptr;
 }
 
-// If we have the following pattern,
-// X = 1.0/sqrt(a)
-// R1 = X * X
-// R2 = a/sqrt(a)
-// then this method collects all the instructions that match R1 and R2.
-static bool getFSqrtDivOptPattern(Instruction *Div,
-                                  SmallPtrSetImpl<Instruction *> &R1,
-                                  SmallPtrSetImpl<Instruction *> &R2) {
-  Value *A;
-  if (match(Div, m_FDiv(m_FPOne(), m_Sqrt(m_Value(A)))) ||
-      match(Div, m_FDiv(m_SpecificFP(-1.0), m_Sqrt(m_Value(A))))) {
-    for (User *U : Div->users()) {
-      Instruction *I = cast<Instruction>(U);
-      if (match(I, m_FMul(m_Specific(Div), m_Specific(Div))))
-        R1.insert(I);
-    }
-
-    CallInst *CI = cast<CallInst>(Div->getOperand(1));
-    for (User *U : CI->users()) {
-      Instruction *I = cast<Instruction>(U);
-      if (match(I, m_FDiv(m_Specific(A), m_Sqrt(m_Specific(A)))))
-        R2.insert(I);
-    }
-  }
-  return !R1.empty() && !R2.empty();
-}
-
-// Check legality for transforming
-// x = 1.0/sqrt(a)
-// r1 = x * x;
-// r2 = a/sqrt(a);
-//
-// TO
-//
-// r1 = 1/a
-// r2 = sqrt(a)
-// x = r1 * r2
-// This transform works only when 'a' is known positive.
-static bool isFSqrtDivToFMulLegal(Instruction *X,
-                                  SmallPtrSetImpl<Instruction *> &R1,
-                                  SmallPtrSetImpl<Instruction *> &R2) {
-  // Check if the required pattern for the transformation exists.
-  if (!getFSqrtDivOptPattern(X, R1, R2))
-    return false;
-
-  BasicBlock *BBx = X->getParent();
-  BasicBlock *BBr1 = (*R1.begin())->getParent();
-  BasicBlock *BBr2 = (*R2.begin())->getParent();
-
-  CallInst *FSqrt = cast<CallInst>(X->getOperand(1));
-  if (!FSqrt->hasAllowReassoc() || !FSqrt->hasNoNaNs() ||
-      !FSqrt->hasNoSignedZeros() || !FSqrt->hasNoInfs())
-    return false;
-
-  // We change x = 1/sqrt(a) to x = sqrt(a) * 1/a . This change isn't allowed
-  // by recip fp as it is strictly meant to transform ops of type a/b to
-  // a * 1/b. So, this can be considered as algebraic rewrite and reassoc flag
-  // has been used(rather abused)in the past for algebraic rewrites.
-  if (!X->hasAllowReassoc() || !X->hasAllowReciprocal() || !X->hasNoInfs())
-    return false;
-
-  // Check the constraints on X, R1 and R2 combined.
-  // fdiv instruction and one of the multiplications must reside in the same
-  // block. If not, the optimized code may execute more ops than before and
-  // this may hamper the performance.
-  if (BBx != BBr1 && BBx != BBr2)
-    return false;
-
-  // Check the constraints on instructions in R1.
-  if (any_of(R1, [BBr1](Instruction *I) {
-        // When you have multiple instructions residing in R1 and R2
-        // respectively, it's difficult to generate combinations of (R1,R2) and
-        // then check if we have the required pattern. So, for now, just be
-        // conservative.
-        return (I->getParent() != BBr1 || !I->hasAllowReassoc());
-      }))
-    return false;
-
-  // Check the constraints on instructions in R2.
-  return all_of(R2, [BBr2](Instruction *I) {
-    // When you have multiple instructions residing in R1 and R2
-    // respectively, it's difficult to generate combination of (R1,R2) and
-    // then check if we have the required pattern. So, for now, just be
-    // conservative.
-    return (I->getParent() == BBr2 && I->hasAllowReassoc());
-  });
-}
-
 Instruction *InstCombinerImpl::foldFMulReassoc(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0);
   Value *Op1 = I.getOperand(1);
@@ -2002,75 +1913,6 @@ static Instruction *foldFDivSqrtDivisor(BinaryOperator &I,
   return BinaryOperator::CreateFMulFMF(Op0, NewSqrt, &I);
 }
 
-// Change
-// X = 1/sqrt(a)
-// R1 = X * X
-// R2 = a * X
-//
-// TO
-//
-// FDiv = 1/a
-// FSqrt = sqrt(a)
-// FMul = FDiv * FSqrt
-// Replace Uses Of R1 With FDiv
-// Replace Uses Of R2 With FSqrt
-// Replace Uses Of X With FMul
-static Instruction *
-convertFSqrtDivIntoFMul(CallInst *CI, Instruction *X,
-                        const SmallPtrSetImpl<Instruction *> &R1,
-                        const SmallPtrSetImpl<Instruction *> &R2,
-                        InstCombiner::BuilderTy &B, InstCombinerImpl *IC) {
-
-  B.SetInsertPoint(X);
-
-  // Have an instruction that is representative of all of instructions in R1 and
-  // get the most common fpmath metadata and fast-math flags on it.
-  Value *SqrtOp = CI->getArgOperand(0);
-  auto *FDiv = cast<Instruction>(
-      B.CreateFDiv(ConstantFP::get(X->getType(), 1.0), SqrtOp));
-  auto *R1FPMathMDNode = (*R1.begin())->getMetadata(LLVMContext::MD_fpmath);
-  FastMathFlags R1FMF = (*R1.begin())->getFastMathFlags(); // Common FMF
-  for (Instruction *I : R1) {
-    R1FPMathMDNode = MDNode::getMostGenericFPMath(
-        R1FPMathMDNode, I->getMetadata(LLVMContext::MD_fpmath));
-    R1FMF &= I->getFastMathFlags();
-    IC->replaceInstUsesWith(*I, FDiv);
-    IC->eraseInstFromFunction(*I);
-  }
-  FDiv->setMetadata(LLVMContext::MD_fpmath, R1FPMathMDNode);
-  FDiv->copyFastMathFlags(R1FMF);
-
-  // Have a single sqrt call instruction that is representative of all of
-  // instructions in R2 and get the most common fpmath metadata and fast-math
-  // flags on it.
-  auto *FSqrt = cast<CallInst>(CI->clone());
-  FSqrt->insertBefore(CI);
-  auto *R2FPMathMDNode = (*R2.begin())->getMetadata(LLVMContext::MD_fpmath);
-  FastMathFlags R2FMF = (*R2.begin())->getFastMathFlags(); // Common FMF
-  for (Instruction *I : R2) {
-    R2FPMathMDNode = MDNode::getMostGenericFPMath(
-        R2FPMathMDNode, I->getMetadata(LLVMContext::MD_fpmath));
-    R2FMF &= I->getFastMathFlags();
-    IC->replaceInstUsesWith(*I, FSqrt);
-    IC->eraseInstFromFunction(*I);
-  }
-  FSqrt->setMetadata(LLVMContext::MD_fpmath, R2FPMathMDNode);
-  FSqrt->copyFastMathFlags(R2FMF);
-
-  Instruction *FMul;
-  // If X = -1/sqrt(a) initially,then FMul = -(FDiv * FSqrt)
-  if (match(X, m_FDiv(m_SpecificFP(-1.0), m_Specific(CI)))) {
-    Value *Mul = B.CreateFMul(FDiv, FSqrt);
-    FMul = cast<Instruction>(B.CreateFNeg(Mul));
-  } else
-    FMul = cast<Instruction>(B.CreateFMul(FDiv, FSqrt));
-  FMul->copyMetadata(*X);
-  FMul->copyFastMathFlags(FastMathFlags::intersectRewrite(R1FMF, R2FMF) |
-                          FastMathFlags::unionValue(R1FMF, R2FMF));
-  IC->replaceInstUsesWith(*X, FMul);
-  return IC->eraseInstFromFunction(*X);
-}
-
 Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) {
   Module *M = I.getModule();
 
@@ -2095,24 +1937,6 @@ Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) {
     return R;
 
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
-  // Convert
-  // x = 1.0/sqrt(a)
-  // r1 = x * x;
-  // r2 = a/sqrt(a);
-  //
-  // TO
-  //
-  // r1 = 1/a
-  // r2 = sqrt(a)
-  // x = r1 * r2
-  SmallPtrSet<Instruction *, 2> R1, R2;
-  if (isFSqrtDivToFMulLegal(&I, R1, R2)) {
-    CallInst *CI = cast<CallInst>(I.getOperand(1));
-    if (Instruction *D = convertFSqrtDivIntoFMul(CI, &I, R1, R2, Builder, this))
-      return D;
-  }
-
   if (isa<Constant>(Op0))
     if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
       if (Instruction *R = FoldOpIntoSelect(I, SI))
diff --git a/llvm/test/Transforms/InstCombine/fsqrtdiv-transform.ll b/llvm/test/Transforms/InstCombine/fsqrtdiv-transform.ll
deleted file mode 100644
index 6296954333e8a7..00000000000000
--- a/llvm/test/Transforms/InstCombine/fsqrtdiv-transform.ll
+++ /dev/null
@@ -1,631 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
-; RUN: opt -S -passes='instcombine<no-verify-fixpoint>' < %s | FileCheck %s
-
-@x = global double 0.000000e+00
-@r1 = global double 0.000000e+00
-@r2 = global double 0.000000e+00
-@r3 = global double 0.000000e+00
-@v = global [2 x double] zeroinitializer
-@v1 = global [2 x double] zeroinitializer
-@v2 = global [2 x double] zeroinitializer
-
-; div/mul/div1 in the same block.
-define void @bb_constraint_case1(double %a) {
-; CHECK-LABEL: define void @bb_constraint_case1(
-; CHECK-SAME: double [[A:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SQRT1:%.*]] = call reassoc double @llvm.sqrt.f64(double [[A]])
-; CHECK-NEXT:    [[TMP0:%.*]] = fdiv reassoc double 1.000000e+00, [[A]]
-; CHECK-NEXT:    [[DIV:%.*]] = fmul reassoc double [[TMP0]], [[SQRT1]]
-; CHECK-NEXT:    store double [[DIV]], ptr @x, align 8
-; CHECK-NEXT:    store double [[TMP0]], ptr @r1, align 8
-; CHECK-NEXT:    store double [[SQRT1]], ptr @r2, align 8
-; CHECK-NEXT:    ret void
-;
-entry:
-  %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a)
-  %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt
-  store double %div, ptr @x
-  %mul = fmul reassoc double %div, %div
-  store double %mul, ptr @r1
-  %div1 = fdiv reassoc double %a, %sqrt
-  store double %div1, ptr @r2
-  ret void
-}
-
-; div/mul in one block and div1 in other block with conditional guard.
-define void @bb_constraint_case2(double %a, i32 %d) {
-; CHECK-LABEL: define void @bb_constraint_case2(
-; CHECK-SAME: double [[A:%.*]], i32 [[D:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SQRT1:%.*]] = call reassoc double @llvm.sqrt.f64(double [[A]])
-; CHECK-NEXT:    [[TMP0:%.*]] = fdiv reassoc double 1.000000e+00, [[A]]
-; CHECK-NEXT:    [[DIV:%.*]] = fmul reassoc double [[TMP0]], [[SQRT1]]
-; CHECK-NEXT:    store double [[DIV]], ptr @x, align 8
-; CHECK-NEXT:    store double [[TMP0]], ptr @r1, align 8
-; CHECK-NEXT:    [[D_NOT:%.*]] = icmp eq i32 [[D]], 0
-; CHECK-NEXT:    br i1 [[D_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
-; CHECK:       if.then:
-; CHECK-NEXT:    store double [[SQRT1]], ptr @r2, align 8
-; CHECK-NEXT:    br label [[IF_END]]
-; CHECK:       if.end:
-; CHECK-NEXT:    ret void
-;
-entry:
-  %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a)
-  %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt
-  store double %div, ptr @x
-  %mul = fmul reassoc double %div, %div
-  store double %mul, ptr @r1
-  %d.not = icmp eq i32 %d, 0
-  br i1 %d.not, label %if.end, label %if.then
-
-if.then:                                          ; preds = %entry
-  %div1 = fdiv reassoc double %a, %sqrt
-  store double %div1, ptr @r2
-  br label %if.end
-
-if.end:                                           ; preds = %if.then, %entry
-  ret void
-}
-
-; div in one block. mul/div1 in other block and conditionally guarded. Don't optimize.
-define void @bb_constraint_case3(double %a, i32 %d) {
-; CHECK-LABEL: define void @bb_constraint_case3(
-; CHECK-SAME: double [[A:%.*]], i32 [[D:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SQRT:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[A]])
-; CHECK-NEXT:    [[DIV:%.*]] = fdiv reassoc ninf arcp double 1.000000e+00, [[SQRT]]
-; CHECK-NEXT:    store double [[DIV]], ptr @x, align 8
-; CHECK-NEXT:    [[D_NOT:%.*]] = icmp eq i32 [[D]], 0
-; CHECK-NEXT:    br i1 [[D_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
-; CHECK:       if.then:
-; CHECK-NEXT:    [[MUL:%.*]] = fmul reassoc double [[DIV]], [[DIV]]
-; CHECK-NEXT:    store double [[MUL]], ptr @r1, align 8
-; CHECK-NEXT:    [[DIV1:%.*]] = fdiv reassoc double [[A]], [[SQRT]]
-; CHECK-NEXT:    store double [[DIV1]], ptr @r2, align 8
-; CHECK-NEXT:    br label [[IF_END]]
-; CHECK:       if.end:
-; CHECK-NEXT:    ret void
-;
-entry:
-  %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a)
-  %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt
-  store double %div, ptr @x
-  %d.not = icmp eq i32 %d, 0
-  br i1 %d.not, label %if.end, label %if.then
-
-if.then:                                          ; preds = %entry
-  %mul = fmul reassoc double %div, %div
-  store double %mul, ptr @r1
-  %div1 = fdiv reassoc double %a, %sqrt
-  store double %div1, ptr @r2
-  br label %if.end
-
-if.end:                                           ; preds = %if.then, %entry
-  ret void
-}
-
-; div in one block. mul/div1 each in different block and conditionally guarded. Don't optimize.
-define void @bb_constraint_case4(double %a, i32 %c, i32 %d) {
-; CHECK-LABEL: define void @bb_constraint_case4(
-; CHECK-SAME: double [[A:%.*]], i32 [[C:%.*]], i32 [[D:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SQRT:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[A]])
-; CHECK-NEXT:    [[DIV:%.*]] = fdiv reassoc ninf arcp double 1.000000e+00, [[SQRT]]
-; CHECK-NEXT:    store double [[DIV]], ptr @x, align 8
-; CHECK-NEXT:    [[C_NOT:%.*]] = icmp eq i32 [[C]], 0
-; CHECK-NEXT:    br i1 [[C_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
-; CHECK:       if.then:
-; CHECK-NEXT:    [[MUL:%.*]] = fmul reassoc double [[DIV]], [[DIV]]
-; CHECK-NEXT:    store double [[MUL]], ptr @r1, align 8
-; CHECK-NEXT:    br label [[IF_END]]
-; CHECK:       if.end:
-; CHECK-NEXT:    [[D_NOT:%.*]] = icmp eq i32 [[D]], 0
-; CHECK-NEXT:    br i1 [[D_NOT]], label [[IF_END1:%.*]], label [[IF_THEN1:%.*]]
-; CHECK:       if.then1:
-; CHECK-NEXT:    [[DIV1:%.*]] = fdiv reassoc double [[A]], [[SQRT]]
-; CHECK-NEXT:    store double [[DIV1]], ptr @r2, align 8
-; CHECK-NEXT:    br label [[IF_END1]]
-; CHECK:       if.end1:
-; CHECK-NEXT:    ret void
-;
-entry:
-  %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a)
-  %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt
-  store double %div, ptr @x
-  %c.not = icmp eq i32 %c, 0
-  br i1 %c.not, label %if.end, label %if.then
-
-if.then:                                          ; preds = %entry
-  %mul = fmul reassoc double %div, %div
-  store double %mul, ptr @r1
-  br label %if.end
-
-if.end:                                           ; preds = %if.then, %entry
-  %d.not = icmp eq i32 %d, 0
-  br i1 %d.not, label %if.end1, label %if.then1
-
-if.then1:                                         ; preds = %if.end
-  %div1 = fdiv reassoc double %a, %sqrt
-  store double %div1, ptr @r2
-  br label %if.end1
-
-if.end1:                                          ; preds = %if.then1, %if.end
-  ret void
-}
-
-; sqrt value comes from different blocks. Don't optimize.
-define void @bb_constraint_case5(double %a, i32 %c) {
-; CHECK-LABEL: define void @bb_constraint_case5(
-; CHECK-SAME: double [[A:%.*]], i32 [[C:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[C_NOT:%.*]] = icmp eq i32 [[C]], 0
-; CHECK-NEXT:    br i1 [[C_NOT]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
-; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP0:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[A]])
-; CHECK-NEXT:    br label [[IF_END:%.*]]
-; CHECK:       if.else:
-; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[A]], 1.000000e+01
-; CHECK-NEXT:    [[TMP1:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[ADD]])
-; CHECK-NEXT:    br label [[IF_END]]
-; CHECK:       if.end:
-; CHECK-NEXT:    [[SQRT:%.*]] = phi double [ [[TMP0]], [[IF_THEN]] ], [ [[TMP1]], [[IF_ELSE]] ]
-; CHECK-NEXT:    [[DIV:%.*]] = fdiv reassoc ninf arcp double 1.000000e+00, [[SQRT]]
-; CHECK-NEXT:    [[MUL:%.*]] = fmul reassoc double [[DIV]], [[DIV]]
-; CHECK-NEXT:    store double [[MUL]], ptr @r1, align 8
-; CHECK-NEXT:    [[DIV1:%.*]] = fdiv reassoc double [[A]], [[SQRT]]
-; CHECK-NEXT:    store double [[DIV1]], ptr @r2, align 8
-; CHECK-NEXT:    ret void
-;
-entry:
-  %c.not = icmp eq i32 %c, 0
-  br i1 %c.not, label %if.else, label %if.then
-
-if.then:                                          ; preds = %entry
-  %0 = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a)
-  br label %if.end
-
-if.else:                                          ; preds = %entry
-  %add = fadd double %a, 1.000000e+01
-  %1 = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %add)
-  br label %if.end
-
-if.end:                                           ; preds = %if.else, %if.then
-  %sqrt = phi double[ %0, %if.then], [ %1, %if.else]
-  %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt
-  %mul = fmul reassoc double %div, %div
-  store double %mul, ptr @r1
-  %div1 = fdiv reassoc double %a, %sqrt
-  store double %div1, ptr @r2
-  ret void
-}
-
-; div in one block and conditionally guarded. mul/div1 in other block. Don't optimize.
-define void @bb_constraint_case6(double %a, i32 %d) {
-; CHECK-LABEL: define void @bb_constraint_case6(
-; CHECK-SAME: double [[A:%.*]], i32 [[D:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SQRT:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[A]])
-; CHECK-NEXT:    [[D_NOT:%.*]] = icmp eq i32 [[D]], 0
-; CHECK-NEXT:    br i1 [[D_NOT]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
-; CHECK:       if.else:
-; CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr @x, align 8
-; CHECK-NEXT:    br label [[IF_END:%.*]]
-; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP1:%.*]] = fdiv reassoc ninf arcp double 1.000000e+00, [[SQRT]]
-; CHECK-NEXT:    store double [[TMP1]], ptr @x, align 8
-; CHECK-NEXT:    br label [[IF_END]]
-; CHECK:       if.end:
-; CHECK-NEXT:    [[DIV:%.*]] = phi double [ [[TMP0]], [[IF_ELSE]] ], [ [[TMP1]], [[IF_THEN]] ]
-; CHECK-NEXT:    [[MUL:%.*]] = fmul reassoc double [[DIV]], [[DIV]]
-; CHECK-NEXT:    store double [[MUL]], ptr @r1, align 8
-; CHECK-NEXT:    [[DIV1:%.*]] = fdiv reassoc double [[A]], [[SQRT]]
-; CHECK-NEXT:    store double [[DIV1]], ptr @r2, align 8
-; CHECK-NEXT:    ret void
-;
-entry:
-  %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a)
-  %d.not = icmp eq i32 %d, 0
-  br i1 %d.not, label %if.else, label %if.then
-
-if.else:                                          ; preds = %entry
-  %1 = load double, ptr @x
-  br label %if.end
-
-if.then:                                          ; preds = %entry
-  %2 = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt
-  store double %2, ptr @x
-  br label %if.end
-
-if.end:                                           ; preds = %if.else, %if.then
-  %div = phi double [ %1, %if.else ], [ %2, %if.then ]
-  %mul = fmul reassoc double %div, %div
-  store double %mul, ptr @r1
-  %div1 = fdiv reassoc double %a, %sqrt
-  store double %div1, ptr @r2
-  ret void
-}
-
-; value for mul comes from different blocks. Don't optimize.
-define void @bb_constraint_case7(double %a, i32 %c, i32 %d) {
-; CHECK-LABEL: define void @bb_constraint_case7(
-; CHECK-SAME: double [[A:%.*]], i32 [[C:%.*]], i32 [[D:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SQRT:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[A]])
-; CHECK-NEXT:    [[DIV:%.*]] = fdiv reassoc ninf arcp double 1.000000e+00, [[SQRT]]
-; CHECK-NEXT:    store double [[DIV]], ptr @x, align 8
-; CHECK-NEXT:    [[C_NOT:%.*]] = icmp eq i32 [[C]], 0
-; CHECK-NEXT:    br i1 [[C_NOT]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
-; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP0:%.*]] = fdiv double 3.000000e+00, [[A]]
-; CHECK-NEXT:    br label [[IF_END:%.*]]
-; CHECK:       if.else:
-; CHECK-NEXT:    [[D_NOT:%.*]] = icmp eq i32 [[D]], 0
-; CHECK-NEXT:    br i1 [[D_NOT]], label [[IF_ELSE1:%.*]], label [[IF_THEN1:%.*]]
-; CHECK:       if.then1:
-; CHECK-NEXT:    [[TMP1:%.*]] = fdiv double 2.000000e+00, [[A]]
-; CHECK-NEXT:    br label [[IF_END]]
-; CHECK:       if.else1:
-; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc double [[DIV]], [[DIV]]
-; CHECK-NEXT:    br label [[IF_END]]
-; CHECK:       if.end:
-; CHECK-NEXT:    [[MUL:%.*]] = phi double [ [[TMP1]], [[IF_THEN1]] ], [ [[TMP2]], [[IF_ELSE1]] ], [ [[TMP0]], [[IF_THEN]] ]
-; CHECK-NEXT:    store double [[MUL]], ptr @r1, align 8
-; CHECK-NEXT:    [[DIV1:%.*]] = fdiv reassoc double [[A]], [[SQRT]]
-; CHECK-NEXT:    store double [[DIV1]], ptr @r2, align 8
-; CHECK-NEXT:    ret void
-;
-entry:
-  %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a)
-  %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt
-  store double %div, ptr @x
-  %c.not = icmp eq i32 %c, 0
-  br i1 %c.not, label %if.else, label %if.then
-
-if.then:              ...
[truncated]

…/FDIV into FMUL"" (#123313) Reverts #123289

…ndent FSQRT/FDIV into FMUL"" (#123313) Reverts llvm/llvm-project#123289

Revert "[InstCombine] Transform high latency, dependent FSQRT/FDIV in…

b2a7179

…to FMUL …" This reverts commit 7253c6f.

sushgokh requested a review from nikic as a code owner January 17, 2025 06:50

sushgokh merged commit 606d0a7 into main Jan 17, 2025
5 of 7 checks passed

llvmbot added llvm:instcombine llvm:transforms labels Jan 17, 2025

sushgokh deleted the revert-87474-GRCO-14 branch January 17, 2025 06:50

sushgokh mentioned this pull request Jan 17, 2025

Revert "Revert "[InstCombine] Transform high latency, dependent FSQRT/FDIV into FMUL"" #123313

Merged

sushgokh added a commit that referenced this pull request Jan 17, 2025

Revert "Revert "[InstCombine] Transform high latency, dependent FSQRT…

3b3590a

…/FDIV into FMUL"" (#123313) Reverts #123289

github-actions bot pushed a commit to arm/arm-toolchain that referenced this pull request Jan 17, 2025

Automerge: Revert "Revert "[InstCombine] Transform high latency, depe…

26d1d76

…ndent FSQRT/FDIV into FMUL"" (#123313) Reverts llvm/llvm-project#123289

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Revert "[InstCombine] Transform high latency, dependent FSQRT/FDIV into FMUL" #123289

Revert "[InstCombine] Transform high latency, dependent FSQRT/FDIV into FMUL" #123289

sushgokh commented Jan 17, 2025

llvmbot commented Jan 17, 2025

Revert "[InstCombine] Transform high latency, dependent FSQRT/FDIV into FMUL" #123289

Revert "[InstCombine] Transform high latency, dependent FSQRT/FDIV into FMUL" #123289

Conversation

sushgokh commented Jan 17, 2025

llvmbot commented Jan 17, 2025