test/Transforms/InstCombine/fast-math.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt < %s -instcombine -S | FileCheck %s
   3
   4 ; testing-case "float fold(float a) { return 1.2f * a * 2.3f; }"
   5 ; 1.2f and 2.3f is supposed to be fold.
   6 define float @fold(float %a) {
   7 ; CHECK-LABEL: @fold(
   8 ; CHECK-NEXT:    [[MUL1:%.*]] = fmul fast float [[A:%.*]], 0x4006147AE0000000
   9 ; CHECK-NEXT:    ret float [[MUL1]]
  10 ;
  11   %mul = fmul fast float %a, 0x3FF3333340000000
  12   %mul1 = fmul fast float %mul, 0x4002666660000000
  13   ret float %mul1
  14 }
  15
  16 ; Same testing-case as the one used in fold() except that the operators have
  17 ; fixed FP mode.
  18 define float @notfold(float %a) {
  19 ; CHECK-LABEL: @notfold(
  20 ; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[A:%.*]], 0x3FF3333340000000
  21 ; CHECK-NEXT:    [[MUL1:%.*]] = fmul float [[MUL]], 0x4002666660000000
  22 ; CHECK-NEXT:    ret float [[MUL1]]
  23 ;
  24   %mul = fmul fast float %a, 0x3FF3333340000000
  25   %mul1 = fmul float %mul, 0x4002666660000000
  26   ret float %mul1
  27 }
  28
  29 define float @fold2(float %a) {
  30 ; CHECK-LABEL: @fold2(
  31 ; CHECK-NEXT:    [[MUL1:%.*]] = fmul fast float [[A:%.*]], 0x4006147AE0000000
  32 ; CHECK-NEXT:    ret float [[MUL1]]
  33 ;
  34   %mul = fmul float %a, 0x3FF3333340000000
  35   %mul1 = fmul fast float %mul, 0x4002666660000000
  36   ret float %mul1
  37 }
  38
  39 ; C * f1 + f1 = (C+1) * f1
  40 ; TODO: The particular case where C is 2 (so the folded result is 3.0*f1) is
  41 ; always safe, and so doesn't need any FMF.
  42 ; That is, (x + x + x) and (3*x) each have only a single rounding.
  43 define double @fold3(double %f1) {
  44 ; CHECK-LABEL: @fold3(
  45 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[F1:%.*]], 6.000000e+00
  46 ; CHECK-NEXT:    ret double [[TMP1]]
  47 ;
  48   %t1 = fmul fast double 5.000000e+00, %f1
  49   %t2 = fadd fast double %f1, %t1
  50   ret double %t2
  51 }
  52
  53 ; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
  54 define double @fold3_reassoc_nsz(double %f1) {
  55 ; CHECK-LABEL: @fold3_reassoc_nsz(
  56 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc nsz double [[F1:%.*]], 6.000000e+00
  57 ; CHECK-NEXT:    ret double [[TMP1]]
  58 ;
  59   %t1 = fmul reassoc nsz double 5.000000e+00, %f1
  60   %t2 = fadd reassoc nsz double %f1, %t1
  61   ret double %t2
  62 }
  63
  64 ; TODO: This doesn't require 'nsz'.  It should fold to f1 * 6.0.
  65 define double @fold3_reassoc(double %f1) {
  66 ; CHECK-LABEL: @fold3_reassoc(
  67 ; CHECK-NEXT:    [[T1:%.*]] = fmul reassoc double [[F1:%.*]], 5.000000e+00
  68 ; CHECK-NEXT:    [[T2:%.*]] = fadd reassoc double [[T1]], [[F1]]
  69 ; CHECK-NEXT:    ret double [[T2]]
  70 ;
  71   %t1 = fmul reassoc double 5.000000e+00, %f1
  72   %t2 = fadd reassoc double %f1, %t1
  73   ret double %t2
  74 }
  75
  76 ; (C1 - X) + (C2 - Y) => (C1+C2) - (X + Y)
  77 define float @fold4(float %f1, float %f2) {
  78 ; CHECK-LABEL: @fold4(
  79 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast float [[F1:%.*]], [[F2:%.*]]
  80 ; CHECK-NEXT:    [[TMP2:%.*]] = fsub fast float 9.000000e+00, [[TMP1]]
  81 ; CHECK-NEXT:    ret float [[TMP2]]
  82 ;
  83   %sub = fsub float 4.000000e+00, %f1
  84   %sub1 = fsub float 5.000000e+00, %f2
  85   %add = fadd fast float %sub, %sub1
  86   ret float %add
  87 }
  88
  89 ; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
  90 define float @fold4_reassoc_nsz(float %f1, float %f2) {
  91 ; CHECK-LABEL: @fold4_reassoc_nsz(
  92 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc nsz float [[F1:%.*]], [[F2:%.*]]
  93 ; CHECK-NEXT:    [[TMP2:%.*]] = fsub reassoc nsz float 9.000000e+00, [[TMP1]]
  94 ; CHECK-NEXT:    ret float [[TMP2]]
  95 ;
  96   %sub = fsub float 4.000000e+00, %f1
  97   %sub1 = fsub float 5.000000e+00, %f2
  98   %add = fadd reassoc nsz float %sub, %sub1
  99   ret float %add
 100 }
 101
 102 ; TODO: This doesn't require 'nsz'.  It should fold to (9.0 - (f1 + f2)).
 103 define float @fold4_reassoc(float %f1, float %f2) {
 104 ; CHECK-LABEL: @fold4_reassoc(
 105 ; CHECK-NEXT:    [[SUB:%.*]] = fsub float 4.000000e+00, [[F1:%.*]]
 106 ; CHECK-NEXT:    [[SUB1:%.*]] = fsub float 5.000000e+00, [[F2:%.*]]
 107 ; CHECK-NEXT:    [[ADD:%.*]] = fadd reassoc float [[SUB]], [[SUB1]]
 108 ; CHECK-NEXT:    ret float [[ADD]]
 109 ;
 110   %sub = fsub float 4.000000e+00, %f1
 111   %sub1 = fsub float 5.000000e+00, %f2
 112   %add = fadd reassoc float %sub, %sub1
 113   ret float %add
 114 }
 115
 116 ; (X + C1) + C2 => X + (C1 + C2)
 117 define float @fold5(float %f1) {
 118 ; CHECK-LABEL: @fold5(
 119 ; CHECK-NEXT:    [[ADD1:%.*]] = fadd fast float [[F1:%.*]], 9.000000e+00
 120 ; CHECK-NEXT:    ret float [[ADD1]]
 121 ;
 122   %add = fadd float %f1, 4.000000e+00
 123   %add1 = fadd fast float %add, 5.000000e+00
 124   ret float %add1
 125 }
 126
 127 ; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
 128 define float @fold5_reassoc_nsz(float %f1) {
 129 ; CHECK-LABEL: @fold5_reassoc_nsz(
 130 ; CHECK-NEXT:    [[ADD1:%.*]] = fadd reassoc nsz float [[F1:%.*]], 9.000000e+00
 131 ; CHECK-NEXT:    ret float [[ADD1]]
 132 ;
 133   %add = fadd float %f1, 4.000000e+00
 134   %add1 = fadd reassoc nsz float %add, 5.000000e+00
 135   ret float %add1
 136 }
 137
 138 ; TODO: This doesn't require 'nsz'.  It should fold to f1 + 9.0
 139 define float @fold5_reassoc(float %f1) {
 140 ; CHECK-LABEL: @fold5_reassoc(
 141 ; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[F1:%.*]], 4.000000e+00
 142 ; CHECK-NEXT:    [[ADD1:%.*]] = fadd reassoc float [[ADD]], 5.000000e+00
 143 ; CHECK-NEXT:    ret float [[ADD1]]
 144 ;
 145   %add = fadd float %f1, 4.000000e+00
 146   %add1 = fadd reassoc float %add, 5.000000e+00
 147   ret float %add1
 148 }
 149
 150 ; (X + X) + X + X => 4.0 * X
 151 define float @fold6(float %f1) {
 152 ; CHECK-LABEL: @fold6(
 153 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[F1:%.*]], 4.000000e+00
 154 ; CHECK-NEXT:    ret float [[TMP1]]
 155 ;
 156   %t1 = fadd fast float %f1, %f1
 157   %t2 = fadd fast float %f1, %t1
 158   %t3 = fadd fast float %t2, %f1
 159   ret float %t3
 160 }
 161
 162 ; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
 163 define float @fold6_reassoc_nsz(float %f1) {
 164 ; CHECK-LABEL: @fold6_reassoc_nsz(
 165 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc nsz float [[F1:%.*]], 4.000000e+00
 166 ; CHECK-NEXT:    ret float [[TMP1]]
 167 ;
 168   %t1 = fadd reassoc nsz float %f1, %f1
 169   %t2 = fadd reassoc nsz float %f1, %t1
 170   %t3 = fadd reassoc nsz float %t2, %f1
 171   ret float %t3
 172 }
 173
 174 ; TODO: This doesn't require 'nsz'.  It should fold to f1 * 4.0.
 175 define float @fold6_reassoc(float %f1) {
 176 ; CHECK-LABEL: @fold6_reassoc(
 177 ; CHECK-NEXT:    [[T1:%.*]] = fadd reassoc float [[F1:%.*]], [[F1]]
 178 ; CHECK-NEXT:    [[T2:%.*]] = fadd reassoc float [[T1]], [[F1]]
 179 ; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc float [[T2]], [[F1]]
 180 ; CHECK-NEXT:    ret float [[T3]]
 181 ;
 182   %t1 = fadd reassoc float %f1, %f1
 183   %t2 = fadd reassoc float %f1, %t1
 184   %t3 = fadd reassoc float %t2, %f1
 185   ret float %t3
 186 }
 187
 188 ; C1 * X + (X + X) = (C1 + 2) * X
 189 define float @fold7(float %f1) {
 190 ; CHECK-LABEL: @fold7(
 191 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[F1:%.*]], 7.000000e+00
 192 ; CHECK-NEXT:    ret float [[TMP1]]
 193 ;
 194   %t1 = fmul fast float %f1, 5.000000e+00
 195   %t2 = fadd fast float %f1, %f1
 196   %t3 = fadd fast float %t1, %t2
 197   ret float %t3
 198 }
 199
 200 ; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
 201 define float @fold7_reassoc_nsz(float %f1) {
 202 ; CHECK-LABEL: @fold7_reassoc_nsz(
 203 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc nsz float [[F1:%.*]], 7.000000e+00
 204 ; CHECK-NEXT:    ret float [[TMP1]]
 205 ;
 206   %t1 = fmul reassoc nsz float %f1, 5.000000e+00
 207   %t2 = fadd reassoc nsz float %f1, %f1
 208   %t3 = fadd reassoc nsz float %t1, %t2
 209   ret float %t3
 210 }
 211
 212 ; TODO: This doesn't require 'nsz'.  It should fold to f1 * 7.0.
 213 define float @fold7_reassoc(float %f1) {
 214 ; CHECK-LABEL: @fold7_reassoc(
 215 ; CHECK-NEXT:    [[T1:%.*]] = fmul reassoc float [[F1:%.*]], 5.000000e+00
 216 ; CHECK-NEXT:    [[T2:%.*]] = fadd reassoc float [[F1]], [[F1]]
 217 ; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc float [[T1]], [[T2]]
 218 ; CHECK-NEXT:    ret float [[T3]]
 219 ;
 220   %t1 = fmul reassoc float %f1, 5.000000e+00
 221   %t2 = fadd reassoc float %f1, %f1
 222   %t3 = fadd reassoc float %t1, %t2
 223   ret float %t3
 224 }
 225
 226 ; (X + X) + (X + X) + X => 5.0 * X
 227 define float @fold8(float %f1) {
 228 ; CHECK-LABEL: @fold8(
 229 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[F1:%.*]], 5.000000e+00
 230 ; CHECK-NEXT:    ret float [[TMP1]]
 231 ;
 232   %t1 = fadd fast float %f1, %f1
 233   %t2 = fadd fast float %f1, %f1
 234   %t3 = fadd fast float %t1, %t2
 235   %t4 = fadd fast float %t3, %f1
 236   ret float %t4
 237 }
 238
 239 ; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
 240 define float @fold8_reassoc_nsz(float %f1) {
 241 ; CHECK-LABEL: @fold8_reassoc_nsz(
 242 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc nsz float [[F1:%.*]], 5.000000e+00
 243 ; CHECK-NEXT:    ret float [[TMP1]]
 244 ;
 245   %t1 = fadd reassoc nsz float %f1, %f1
 246   %t2 = fadd reassoc nsz float %f1, %f1
 247   %t3 = fadd reassoc nsz float %t1, %t2
 248   %t4 = fadd reassoc nsz float %t3, %f1
 249   ret float %t4
 250 }
 251
 252 ; TODO: This doesn't require 'nsz'.  It should fold to f1 * 5.0.
 253 define float @fold8_reassoc(float %f1) {
 254 ; CHECK-LABEL: @fold8_reassoc(
 255 ; CHECK-NEXT:    [[T1:%.*]] = fadd reassoc float [[F1:%.*]], [[F1]]
 256 ; CHECK-NEXT:    [[T2:%.*]] = fadd reassoc float [[F1]], [[F1]]
 257 ; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc float [[T1]], [[T2]]
 258 ; CHECK-NEXT:    [[T4:%.*]] = fadd reassoc float [[T3]], [[F1]]
 259 ; CHECK-NEXT:    ret float [[T4]]
 260 ;
 261   %t1 = fadd reassoc float %f1, %f1
 262   %t2 = fadd reassoc float %f1, %f1
 263   %t3 = fadd reassoc float %t1, %t2
 264   %t4 = fadd reassoc float %t3, %f1
 265   ret float %t4
 266 }
 267
 268 ; Y - (X + Y) --> -X
 269
 270 define float @fsub_fadd_common_op_fneg(float %x, float %y) {
 271 ; CHECK-LABEL: @fsub_fadd_common_op_fneg(
 272 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float -0.000000e+00, [[X:%.*]]
 273 ; CHECK-NEXT:    ret float [[TMP1]]
 274 ;
 275   %a = fadd float %x, %y
 276   %r = fsub fast float %y, %a
 277   ret float %r
 278 }
 279
 280 ; Y - (X + Y) --> -X
 281 ; Check again with 'reassoc' and 'nsz'.
 282 ; nsz is required because: 0.0 - (0.0 + 0.0) -> 0.0, not -0.0
 283
 284 define float @fsub_fadd_common_op_fneg_reassoc_nsz(float %x, float %y) {
 285 ; CHECK-LABEL: @fsub_fadd_common_op_fneg_reassoc_nsz(
 286 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz float -0.000000e+00, [[X:%.*]]
 287 ; CHECK-NEXT:    ret float [[TMP1]]
 288 ;
 289   %a = fadd float %x, %y
 290   %r = fsub reassoc nsz float %y, %a
 291   ret float %r
 292 }
 293
 294 ; Y - (X + Y) --> -X
 295
 296 define <2 x float> @fsub_fadd_common_op_fneg_vec(<2 x float> %x, <2 x float> %y) {
 297 ; CHECK-LABEL: @fsub_fadd_common_op_fneg_vec(
 298 ; CHECK-NEXT:    [[A:%.*]] = fadd <2 x float> [[X:%.*]], [[Y:%.*]]
 299 ; CHECK-NEXT:    [[R:%.*]] = fsub reassoc nsz <2 x float> [[Y]], [[A]]
 300 ; CHECK-NEXT:    ret <2 x float> [[R]]
 301 ;
 302   %a = fadd <2 x float> %x, %y
 303   %r = fsub nsz reassoc <2 x float> %y, %a
 304   ret <2 x float> %r
 305 }
 306
 307 ; Y - (Y + X) --> -X
 308 ; Commute operands of the 'add'.
 309
 310 define float @fsub_fadd_common_op_fneg_commute(float %x, float %y) {
 311 ; CHECK-LABEL: @fsub_fadd_common_op_fneg_commute(
 312 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz float -0.000000e+00, [[X:%.*]]
 313 ; CHECK-NEXT:    ret float [[TMP1]]
 314 ;
 315   %a = fadd float %y, %x
 316   %r = fsub reassoc nsz float %y, %a
 317   ret float %r
 318 }
 319
 320 ; Y - (Y + X) --> -X
 321
 322 define <2 x float> @fsub_fadd_common_op_fneg_commute_vec(<2 x float> %x, <2 x float> %y) {
 323 ; CHECK-LABEL: @fsub_fadd_common_op_fneg_commute_vec(
 324 ; CHECK-NEXT:    [[A:%.*]] = fadd <2 x float> [[Y:%.*]], [[X:%.*]]
 325 ; CHECK-NEXT:    [[R:%.*]] = fsub reassoc nsz <2 x float> [[Y]], [[A]]
 326 ; CHECK-NEXT:    ret <2 x float> [[R]]
 327 ;
 328   %a = fadd <2 x float> %y, %x
 329   %r = fsub reassoc nsz <2 x float> %y, %a
 330   ret <2 x float> %r
 331 }
 332
 333 ; (Y - X) - Y --> -X
 334 ; nsz is required because: (0.0 - 0.0) - 0.0 -> 0.0, not -0.0
 335
 336 define float @fsub_fsub_common_op_fneg(float %x, float %y) {
 337 ; CHECK-LABEL: @fsub_fsub_common_op_fneg(
 338 ; CHECK-NEXT:    [[R:%.*]] = fsub reassoc nsz float -0.000000e+00, [[X:%.*]]
 339 ; CHECK-NEXT:    ret float [[R]]
 340 ;
 341   %s = fsub float %y, %x
 342   %r = fsub reassoc nsz float %s, %y
 343   ret float %r
 344 }
 345
 346 ; (Y - X) - Y --> -X
 347
 348 define <2 x float> @fsub_fsub_common_op_fneg_vec(<2 x float> %x, <2 x float> %y) {
 349 ; CHECK-LABEL: @fsub_fsub_common_op_fneg_vec(
 350 ; CHECK-NEXT:    [[R:%.*]] = fsub reassoc nsz <2 x float> <float -0.000000e+00, float -0.000000e+00>, [[X:%.*]]
 351 ; CHECK-NEXT:    ret <2 x float> [[R]]
 352 ;
 353   %s = fsub <2 x float> %y, %x
 354   %r = fsub reassoc nsz <2 x float> %s, %y
 355   ret <2 x float> %r
 356 }
 357
 358 ; TODO: This doesn't require 'nsz'.  It should fold to 0 - f2
 359 define float @fold9_reassoc(float %f1, float %f2) {
 360 ; CHECK-LABEL: @fold9_reassoc(
 361 ; CHECK-NEXT:    [[T1:%.*]] = fadd float [[F1:%.*]], [[F2:%.*]]
 362 ; CHECK-NEXT:    [[T3:%.*]] = fsub reassoc float [[F1]], [[T1]]
 363 ; CHECK-NEXT:    ret float [[T3]]
 364 ;
 365   %t1 = fadd float %f1, %f2
 366   %t3 = fsub reassoc float %f1, %t1
 367   ret float %t3
 368 }
 369
 370 ; Let C3 = C1 + C2. (f1 + C1) + (f2 + C2) => (f1 + f2) + C3 instead of
 371 ; "(f1 + C3) + f2" or "(f2 + C3) + f1". Placing constant-addend at the
 372 ; top of resulting simplified expression tree may potentially reveal some
 373 ; optimization opportunities in the super-expression trees.
 374 ;
 375 define float @fold10(float %f1, float %f2) {
 376 ; CHECK-LABEL: @fold10(
 377 ; CHECK-NEXT:    [[T2:%.*]] = fadd fast float [[F1:%.*]], [[F2:%.*]]
 378 ; CHECK-NEXT:    [[T3:%.*]] = fadd fast float [[T2]], -1.000000e+00
 379 ; CHECK-NEXT:    ret float [[T3]]
 380 ;
 381   %t1 = fadd fast float 2.000000e+00, %f1
 382   %t2 = fsub fast float %f2, 3.000000e+00
 383   %t3 = fadd fast float %t1, %t2
 384   ret float %t3
 385 }
 386
 387 ; Check again with 'reassoc' and 'nsz'.
 388 ; TODO: We may be able to remove the 'nsz' requirement.
 389 define float @fold10_reassoc_nsz(float %f1, float %f2) {
 390 ; CHECK-LABEL: @fold10_reassoc_nsz(
 391 ; CHECK-NEXT:    [[T2:%.*]] = fadd reassoc nsz float [[F1:%.*]], [[F2:%.*]]
 392 ; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc nsz float [[T2]], -1.000000e+00
 393 ; CHECK-NEXT:    ret float [[T3]]
 394 ;
 395   %t1 = fadd reassoc nsz float 2.000000e+00, %f1
 396   %t2 = fsub reassoc nsz float %f2, 3.000000e+00
 397   %t3 = fadd reassoc nsz float %t1, %t2
 398   ret float %t3
 399 }
 400
 401 ; Observe that the fold is not done with only reassoc (the instructions are
 402 ; canonicalized, but not folded).
 403 ; TODO: As noted above, 'nsz' may not be required for this to be fully folded.
 404 define float @fold10_reassoc(float %f1, float %f2) {
 405 ; CHECK-LABEL: @fold10_reassoc(
 406 ; CHECK-NEXT:    [[T1:%.*]] = fadd reassoc float [[F1:%.*]], 2.000000e+00
 407 ; CHECK-NEXT:    [[T2:%.*]] = fadd reassoc float [[F2:%.*]], -3.000000e+00
 408 ; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc float [[T1]], [[T2]]
 409 ; CHECK-NEXT:    ret float [[T3]]
 410 ;
 411   %t1 = fadd reassoc float 2.000000e+00, %f1
 412   %t2 = fsub reassoc float %f2, 3.000000e+00
 413   %t3 = fadd reassoc float %t1, %t2
 414   ret float %t3
 415 }
 416
 417 ; This used to crash/miscompile.
 418
 419 define float @fail1(float %f1, float %f2) {
 420 ; CHECK-LABEL: @fail1(
 421 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[F1:%.*]], 3.000000e+00
 422 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast float [[TMP1]], -3.000000e+00
 423 ; CHECK-NEXT:    ret float [[TMP2]]
 424 ;
 425   %conv3 = fadd fast float %f1, -1.000000e+00
 426   %add = fadd fast float %conv3, %conv3
 427   %add2 = fadd fast float %add, %conv3
 428   ret float %add2
 429 }
 430
 431 define double @fail2(double %f1, double %f2) {
 432 ; CHECK-LABEL: @fail2(
 433 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast double [[F2:%.*]], [[F2]]
 434 ; CHECK-NEXT:    [[TMP2:%.*]] = fsub fast double -0.000000e+00, [[TMP1]]
 435 ; CHECK-NEXT:    ret double [[TMP2]]
 436 ;
 437   %t1 = fsub fast double %f1, %f2
 438   %t2 = fadd fast double %f1, %f2
 439   %t3 = fsub fast double %t1, %t2
 440   ret double %t3
 441 }
 442
 443 ; c1 * x - x => (c1 - 1.0) * x
 444 define float @fold13(float %x) {
 445 ; CHECK-LABEL: @fold13(
 446 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[X:%.*]], 6.000000e+00
 447 ; CHECK-NEXT:    ret float [[TMP1]]
 448 ;
 449   %mul = fmul fast float %x, 7.000000e+00
 450   %sub = fsub fast float %mul, %x
 451   ret float %sub
 452 }
 453
 454 ; Check again using the minimal subset of FMF.
 455 define float @fold13_reassoc_nsz(float %x) {
 456 ; CHECK-LABEL: @fold13_reassoc_nsz(
 457 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc nsz float [[X:%.*]], 6.000000e+00
 458 ; CHECK-NEXT:    ret float [[TMP1]]
 459 ;
 460   %mul = fmul reassoc nsz float %x, 7.000000e+00
 461   %sub = fsub reassoc nsz float %mul, %x
 462   ret float %sub
 463 }
 464
 465 ; Verify the fold is not done with only 'reassoc' ('nsz' is required).
 466 define float @fold13_reassoc(float %x) {
 467 ; CHECK-LABEL: @fold13_reassoc(
 468 ; CHECK-NEXT:    [[MUL:%.*]] = fmul reassoc float [[X:%.*]], 7.000000e+00
 469 ; CHECK-NEXT:    [[SUB:%.*]] = fsub reassoc float [[MUL]], [[X]]
 470 ; CHECK-NEXT:    ret float [[SUB]]
 471 ;
 472   %mul = fmul reassoc float %x, 7.000000e+00
 473   %sub = fsub reassoc float %mul, %x
 474   ret float %sub
 475 }
 476
 477 ; (select X+Y, X-Y) => X + (select Y, -Y)
 478 ; This is always safe.  No FMF required.
 479 define float @fold16(float %x, float %y) {
 480 ; CHECK-LABEL: @fold16(
 481 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt float [[X:%.*]], [[Y:%.*]]
 482 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub float -0.000000e+00, [[Y]]
 483 ; CHECK-NEXT:    [[R_P:%.*]] = select i1 [[CMP]], float [[Y]], float [[TMP1]]
 484 ; CHECK-NEXT:    [[R:%.*]] = fadd float [[R_P]], [[X]]
 485 ; CHECK-NEXT:    ret float [[R]]
 486 ;
 487   %cmp = fcmp ogt float %x, %y
 488   %plus = fadd float %x, %y
 489   %minus = fsub float %x, %y
 490   %r = select i1 %cmp, float %plus, float %minus
 491   ret float %r
 492 }
 493
 494 ; =========================================================================
 495 ;
 496 ;   Testing-cases about negation
 497 ;
 498 ; =========================================================================
 499 define float @fneg1(float %f1, float %f2) {
 500 ; CHECK-LABEL: @fneg1(
 501 ; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[F1:%.*]], [[F2:%.*]]
 502 ; CHECK-NEXT:    ret float [[MUL]]
 503 ;
 504   %sub = fsub float -0.000000e+00, %f1
 505   %sub1 = fsub nsz float 0.000000e+00, %f2
 506   %mul = fmul float %sub, %sub1
 507   ret float %mul
 508 }
 509
 510 define float @fneg2(float %x) {
 511 ; CHECK-LABEL: @fneg2(
 512 ; CHECK-NEXT:    [[SUB:%.*]] = fsub nsz float -0.000000e+00, [[X:%.*]]
 513 ; CHECK-NEXT:    ret float [[SUB]]
 514 ;
 515   %sub = fsub nsz float 0.0, %x
 516   ret float %sub
 517 }
 518
 519 define <2 x float> @fneg2_vec_undef(<2 x float> %x) {
 520 ; CHECK-LABEL: @fneg2_vec_undef(
 521 ; CHECK-NEXT:    [[SUB:%.*]] = fsub nsz <2 x float> <float -0.000000e+00, float -0.000000e+00>, [[X:%.*]]
 522 ; CHECK-NEXT:    ret <2 x float> [[SUB]]
 523 ;
 524   %sub = fsub nsz <2 x float> <float undef, float 0.0>, %x
 525   ret <2 x float> %sub
 526 }
 527
 528 ; =========================================================================
 529 ;
 530 ;   Testing-cases about div
 531 ;
 532 ; =========================================================================
 533
 534 ; X/C1 / C2 => X * (1/(C2*C1))
 535 define float @fdiv1(float %x) {
 536 ; CHECK-LABEL: @fdiv1(
 537 ; CHECK-NEXT:    [[DIV1:%.*]] = fmul fast float [[X:%.*]], 0x3FD7303B60000000
 538 ; CHECK-NEXT:    ret float [[DIV1]]
 539 ;
 540   %div = fdiv float %x, 0x3FF3333340000000
 541   %div1 = fdiv fast float %div, 0x4002666660000000
 542   ret float %div1
 543 ; 0x3FF3333340000000 = 1.2f
 544 ; 0x4002666660000000 = 2.3f
 545 ; 0x3FD7303B60000000 = 0.36231884057971014492
 546 }
 547
 548 ; X*C1 / C2 => X * (C1/C2)
 549 define float @fdiv2(float %x) {
 550 ; CHECK-LABEL: @fdiv2(
 551 ; CHECK-NEXT:    [[DIV1:%.*]] = fmul fast float [[X:%.*]], 0x3FE0B21660000000
 552 ; CHECK-NEXT:    ret float [[DIV1]]
 553 ;
 554   %mul = fmul float %x, 0x3FF3333340000000
 555   %div1 = fdiv fast float %mul, 0x4002666660000000
 556   ret float %div1
 557
 558 ; 0x3FF3333340000000 = 1.2f
 559 ; 0x4002666660000000 = 2.3f
 560 ; 0x3FE0B21660000000 = 0.52173918485641479492
 561 }
 562
 563 define <2 x float> @fdiv2_vec(<2 x float> %x) {
 564 ; CHECK-LABEL: @fdiv2_vec(
 565 ; CHECK-NEXT:    [[DIV1:%.*]] = fmul fast <2 x float> [[X:%.*]], <float 3.000000e+00, float 3.000000e+00>
 566 ; CHECK-NEXT:    ret <2 x float> [[DIV1]]
 567 ;
 568   %mul = fmul <2 x float> %x, <float 6.0, float 9.0>
 569   %div1 = fdiv fast <2 x float> %mul, <float 2.0, float 3.0>
 570   ret <2 x float> %div1
 571 }
 572
 573 ; "X/C1 / C2 => X * (1/(C2*C1))" is disabled (for now) is C2/C1 is a denormal
 574 ;
 575 define float @fdiv3(float %x) {
 576 ; CHECK-LABEL: @fdiv3(
 577 ; CHECK-NEXT:    [[DIV:%.*]] = fdiv float [[X:%.*]], 0x47EFFFFFE0000000
 578 ; CHECK-NEXT:    [[DIV1:%.*]] = fmul fast float [[DIV]], 0x3FDBD37A80000000
 579 ; CHECK-NEXT:    ret float [[DIV1]]
 580 ;
 581   %div = fdiv float %x, 0x47EFFFFFE0000000
 582   %div1 = fdiv fast float %div, 0x4002666660000000
 583   ret float %div1
 584 }
 585
 586 ; "X*C1 / C2 => X * (C1/C2)" is disabled if C1/C2 is a denormal
 587 define float @fdiv4(float %x) {
 588 ; CHECK-LABEL: @fdiv4(
 589 ; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[X:%.*]], 0x47EFFFFFE0000000
 590 ; CHECK-NEXT:    [[DIV:%.*]] = fdiv float [[MUL]], 0x3FC99999A0000000
 591 ; CHECK-NEXT:    ret float [[DIV]]
 592 ;
 593   %mul = fmul float %x, 0x47EFFFFFE0000000
 594   %div = fdiv float %mul, 0x3FC99999A0000000
 595   ret float %div
 596 }
 597
 598 ; =========================================================================
 599 ;
 600 ;   Testing-cases about factorization
 601 ;
 602 ; =========================================================================
 603 ; x*z + y*z => (x+y) * z
 604 define float @fact_mul1(float %x, float %y, float %z) {
 605 ; CHECK-LABEL: @fact_mul1(
 606 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast float [[X:%.*]], [[Y:%.*]]
 607 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast float [[TMP1]], [[Z:%.*]]
 608 ; CHECK-NEXT:    ret float [[TMP2]]
 609 ;
 610   %t1 = fmul fast float %x, %z
 611   %t2 = fmul fast float %y, %z
 612   %t3 = fadd fast float %t1, %t2
 613   ret float %t3
 614 }
 615
 616 ; Check again using the minimal subset of FMF.
 617 define float @fact_mul1_reassoc_nsz(float %x, float %y, float %z) {
 618 ; CHECK-LABEL: @fact_mul1_reassoc_nsz(
 619 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc nsz float [[X:%.*]], [[Y:%.*]]
 620 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc nsz float [[TMP1]], [[Z:%.*]]
 621 ; CHECK-NEXT:    ret float [[TMP2]]
 622 ;
 623   %t1 = fmul reassoc nsz float %x, %z
 624   %t2 = fmul reassoc nsz float %y, %z
 625   %t3 = fadd reassoc nsz float %t1, %t2
 626   ret float %t3
 627 }
 628
 629 ; Verify the fold is not done with only 'reassoc' ('nsz' is required).
 630 define float @fact_mul1_reassoc(float %x, float %y, float %z) {
 631 ; CHECK-LABEL: @fact_mul1_reassoc(
 632 ; CHECK-NEXT:    [[T1:%.*]] = fmul reassoc float [[X:%.*]], [[Z:%.*]]
 633 ; CHECK-NEXT:    [[T2:%.*]] = fmul reassoc float [[Y:%.*]], [[Z]]
 634 ; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc float [[T1]], [[T2]]
 635 ; CHECK-NEXT:    ret float [[T3]]
 636 ;
 637   %t1 = fmul reassoc float %x, %z
 638   %t2 = fmul reassoc float %y, %z
 639   %t3 = fadd reassoc float %t1, %t2
 640   ret float %t3
 641 }
 642
 643 ; z*x + y*z => (x+y) * z
 644 define float @fact_mul2(float %x, float %y, float %z) {
 645 ; CHECK-LABEL: @fact_mul2(
 646 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float [[X:%.*]], [[Y:%.*]]
 647 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast float [[TMP1]], [[Z:%.*]]
 648 ; CHECK-NEXT:    ret float [[TMP2]]
 649 ;
 650   %t1 = fmul fast float %z, %x
 651   %t2 = fmul fast float %y, %z
 652   %t3 = fsub fast float %t1, %t2
 653   ret float %t3
 654 }
 655
 656 ; Check again using the minimal subset of FMF.
 657 define float @fact_mul2_reassoc_nsz(float %x, float %y, float %z) {
 658 ; CHECK-LABEL: @fact_mul2_reassoc_nsz(
 659 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz float [[X:%.*]], [[Y:%.*]]
 660 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc nsz float [[TMP1]], [[Z:%.*]]
 661 ; CHECK-NEXT:    ret float [[TMP2]]
 662 ;
 663   %t1 = fmul reassoc nsz float %z, %x
 664   %t2 = fmul reassoc nsz float %y, %z
 665   %t3 = fsub reassoc nsz float %t1, %t2
 666   ret float %t3
 667 }
 668
 669 ; Verify the fold is not done with only 'reassoc' ('nsz' is required).
 670 define float @fact_mul2_reassoc(float %x, float %y, float %z) {
 671 ; CHECK-LABEL: @fact_mul2_reassoc(
 672 ; CHECK-NEXT:    [[T1:%.*]] = fmul reassoc float [[Z:%.*]], [[X:%.*]]
 673 ; CHECK-NEXT:    [[T2:%.*]] = fmul reassoc float [[Y:%.*]], [[Z]]
 674 ; CHECK-NEXT:    [[T3:%.*]] = fsub reassoc float [[T1]], [[T2]]
 675 ; CHECK-NEXT:    ret float [[T3]]
 676 ;
 677   %t1 = fmul reassoc float %z, %x
 678   %t2 = fmul reassoc float %y, %z
 679   %t3 = fsub reassoc float %t1, %t2
 680   ret float %t3
 681 }
 682
 683 ; z*x - z*y => (x-y) * z
 684 define float @fact_mul3(float %x, float %y, float %z) {
 685 ; CHECK-LABEL: @fact_mul3(
 686 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float [[X:%.*]], [[Y:%.*]]
 687 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast float [[TMP1]], [[Z:%.*]]
 688 ; CHECK-NEXT:    ret float [[TMP2]]
 689 ;
 690   %t2 = fmul fast float %z, %y
 691   %t1 = fmul fast float %z, %x
 692   %t3 = fsub fast float %t1, %t2
 693   ret float %t3
 694 }
 695
 696 ; Check again using the minimal subset of FMF.
 697 define float @fact_mul3_reassoc_nsz(float %x, float %y, float %z) {
 698 ; CHECK-LABEL: @fact_mul3_reassoc_nsz(
 699 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz float [[X:%.*]], [[Y:%.*]]
 700 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc nsz float [[TMP1]], [[Z:%.*]]
 701 ; CHECK-NEXT:    ret float [[TMP2]]
 702 ;
 703   %t2 = fmul reassoc nsz float %z, %y
 704   %t1 = fmul reassoc nsz float %z, %x
 705   %t3 = fsub reassoc nsz float %t1, %t2
 706   ret float %t3
 707 }
 708
 709 ; Verify the fold is not done with only 'reassoc' ('nsz' is required).
 710 define float @fact_mul3_reassoc(float %x, float %y, float %z) {
 711 ; CHECK-LABEL: @fact_mul3_reassoc(
 712 ; CHECK-NEXT:    [[T2:%.*]] = fmul reassoc float [[Z:%.*]], [[Y:%.*]]
 713 ; CHECK-NEXT:    [[T1:%.*]] = fmul reassoc float [[Z]], [[X:%.*]]
 714 ; CHECK-NEXT:    [[T3:%.*]] = fsub reassoc float [[T1]], [[T2]]
 715 ; CHECK-NEXT:    ret float [[T3]]
 716 ;
 717   %t2 = fmul reassoc float %z, %y
 718   %t1 = fmul reassoc float %z, %x
 719   %t3 = fsub reassoc float %t1, %t2
 720   ret float %t3
 721 }
 722
 723 ; x*z - z*y => (x-y) * z
 724 define float @fact_mul4(float %x, float %y, float %z) {
 725 ; CHECK-LABEL: @fact_mul4(
 726 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float [[X:%.*]], [[Y:%.*]]
 727 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast float [[TMP1]], [[Z:%.*]]
 728 ; CHECK-NEXT:    ret float [[TMP2]]
 729 ;
 730   %t1 = fmul fast float %x, %z
 731   %t2 = fmul fast float %z, %y
 732   %t3 = fsub fast float %t1, %t2
 733   ret float %t3
 734 }
 735
 736 ; Check again using the minimal subset of FMF.
 737 define float @fact_mul4_reassoc_nsz(float %x, float %y, float %z) {
 738 ; CHECK-LABEL: @fact_mul4_reassoc_nsz(
 739 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz float [[X:%.*]], [[Y:%.*]]
 740 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc nsz float [[TMP1]], [[Z:%.*]]
 741 ; CHECK-NEXT:    ret float [[TMP2]]
 742 ;
 743   %t1 = fmul reassoc nsz float %x, %z
 744   %t2 = fmul reassoc nsz float %z, %y
 745   %t3 = fsub reassoc nsz float %t1, %t2
 746   ret float %t3
 747 }
 748
 749 ; Verify the fold is not done with only 'reassoc' ('nsz' is required).
 750 define float @fact_mul4_reassoc(float %x, float %y, float %z) {
 751 ; CHECK-LABEL: @fact_mul4_reassoc(
 752 ; CHECK-NEXT:    [[T1:%.*]] = fmul reassoc float [[X:%.*]], [[Z:%.*]]
 753 ; CHECK-NEXT:    [[T2:%.*]] = fmul reassoc float [[Z]], [[Y:%.*]]
 754 ; CHECK-NEXT:    [[T3:%.*]] = fsub reassoc float [[T1]], [[T2]]
 755 ; CHECK-NEXT:    ret float [[T3]]
 756 ;
 757   %t1 = fmul reassoc float %x, %z
 758   %t2 = fmul reassoc float %z, %y
 759   %t3 = fsub reassoc float %t1, %t2
 760   ret float %t3
 761 }
 762
 763 ; x/y + x/z, no xform
 764 define float @fact_div1(float %x, float %y, float %z) {
 765 ; CHECK-LABEL: @fact_div1(
 766 ; CHECK-NEXT:    [[T1:%.*]] = fdiv fast float [[X:%.*]], [[Y:%.*]]
 767 ; CHECK-NEXT:    [[T2:%.*]] = fdiv fast float [[X]], [[Z:%.*]]
 768 ; CHECK-NEXT:    [[T3:%.*]] = fadd fast float [[T1]], [[T2]]
 769 ; CHECK-NEXT:    ret float [[T3]]
 770 ;
 771   %t1 = fdiv fast float %x, %y
 772   %t2 = fdiv fast float %x, %z
 773   %t3 = fadd fast float %t1, %t2
 774   ret float %t3
 775 }
 776
 777 ; x/y + z/x; no xform
 778 define float @fact_div2(float %x, float %y, float %z) {
 779 ; CHECK-LABEL: @fact_div2(
 780 ; CHECK-NEXT:    [[T1:%.*]] = fdiv fast float [[X:%.*]], [[Y:%.*]]
 781 ; CHECK-NEXT:    [[T2:%.*]] = fdiv fast float [[Z:%.*]], [[X]]
 782 ; CHECK-NEXT:    [[T3:%.*]] = fadd fast float [[T1]], [[T2]]
 783 ; CHECK-NEXT:    ret float [[T3]]
 784 ;
 785   %t1 = fdiv fast float %x, %y
 786   %t2 = fdiv fast float %z, %x
 787   %t3 = fadd fast float %t1, %t2
 788   ret float %t3
 789 }
 790
 791 ; y/x + z/x => (y+z)/x
 792 define float @fact_div3(float %x, float %y, float %z) {
 793 ; CHECK-LABEL: @fact_div3(
 794 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast float [[Y:%.*]], [[Z:%.*]]
 795 ; CHECK-NEXT:    [[TMP2:%.*]] = fdiv fast float [[TMP1]], [[X:%.*]]
 796 ; CHECK-NEXT:    ret float [[TMP2]]
 797 ;
 798   %t1 = fdiv fast float %y, %x
 799   %t2 = fdiv fast float %z, %x
 800   %t3 = fadd fast float %t1, %t2
 801   ret float %t3
 802 }
 803
 804 ; Check again using the minimal subset of FMF.
 805 define float @fact_div3_reassoc_nsz(float %x, float %y, float %z) {
 806 ; CHECK-LABEL: @fact_div3_reassoc_nsz(
 807 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc nsz float [[Y:%.*]], [[Z:%.*]]
 808 ; CHECK-NEXT:    [[TMP2:%.*]] = fdiv reassoc nsz float [[TMP1]], [[X:%.*]]
 809 ; CHECK-NEXT:    ret float [[TMP2]]
 810 ;
 811   %t1 = fdiv reassoc nsz float %y, %x
 812   %t2 = fdiv reassoc nsz float %z, %x
 813   %t3 = fadd reassoc nsz float %t1, %t2
 814   ret float %t3
 815 }
 816
 817 ; Verify the fold is not done with only 'reassoc' ('nsz' is required).
 818 define float @fact_div3_reassoc(float %x, float %y, float %z) {
 819 ; CHECK-LABEL: @fact_div3_reassoc(
 820 ; CHECK-NEXT:    [[T1:%.*]] = fdiv reassoc float [[Y:%.*]], [[X:%.*]]
 821 ; CHECK-NEXT:    [[T2:%.*]] = fdiv reassoc float [[Z:%.*]], [[X]]
 822 ; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc float [[T1]], [[T2]]
 823 ; CHECK-NEXT:    ret float [[T3]]
 824 ;
 825   %t1 = fdiv reassoc float %y, %x
 826   %t2 = fdiv reassoc float %z, %x
 827   %t3 = fadd reassoc float %t1, %t2
 828   ret float %t3
 829 }
 830
 831 ; y/x - z/x => (y-z)/x
 832 define float @fact_div4(float %x, float %y, float %z) {
 833 ; CHECK-LABEL: @fact_div4(
 834 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float [[Y:%.*]], [[Z:%.*]]
 835 ; CHECK-NEXT:    [[TMP2:%.*]] = fdiv fast float [[TMP1]], [[X:%.*]]
 836 ; CHECK-NEXT:    ret float [[TMP2]]
 837 ;
 838   %t1 = fdiv fast float %y, %x
 839   %t2 = fdiv fast float %z, %x
 840   %t3 = fsub fast float %t1, %t2
 841   ret float %t3
 842 }
 843
 844 ; Check again using the minimal subset of FMF.
 845 define float @fact_div4_reassoc_nsz(float %x, float %y, float %z) {
 846 ; CHECK-LABEL: @fact_div4_reassoc_nsz(
 847 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz float [[Y:%.*]], [[Z:%.*]]
 848 ; CHECK-NEXT:    [[TMP2:%.*]] = fdiv reassoc nsz float [[TMP1]], [[X:%.*]]
 849 ; CHECK-NEXT:    ret float [[TMP2]]
 850 ;
 851   %t1 = fdiv reassoc nsz float %y, %x
 852   %t2 = fdiv reassoc nsz float %z, %x
 853   %t3 = fsub reassoc nsz float %t1, %t2
 854   ret float %t3
 855 }
 856
 857 ; Verify the fold is not done with only 'reassoc' ('nsz' is required).
 858 define float @fact_div4_reassoc(float %x, float %y, float %z) {
 859 ; CHECK-LABEL: @fact_div4_reassoc(
 860 ; CHECK-NEXT:    [[T1:%.*]] = fdiv reassoc float [[Y:%.*]], [[X:%.*]]
 861 ; CHECK-NEXT:    [[T2:%.*]] = fdiv reassoc float [[Z:%.*]], [[X]]
 862 ; CHECK-NEXT:    [[T3:%.*]] = fsub reassoc float [[T1]], [[T2]]
 863 ; CHECK-NEXT:    ret float [[T3]]
 864 ;
 865   %t1 = fdiv reassoc float %y, %x
 866   %t2 = fdiv reassoc float %z, %x
 867   %t3 = fsub reassoc float %t1, %t2
 868   ret float %t3
 869 }
 870
 871 ; y/x - z/x => (y-z)/x is disabled if y-z is denormal.
 872 define float @fact_div5(float %x) {
 873 ; CHECK-LABEL: @fact_div5(
 874 ; CHECK-NEXT:    [[TMP1:%.*]] = fdiv fast float 0x3818000000000000, [[X:%.*]]
 875 ; CHECK-NEXT:    ret float [[TMP1]]
 876 ;
 877   %t1 = fdiv fast float 0x3810000000000000, %x
 878   %t2 = fdiv fast float 0x3800000000000000, %x
 879   %t3 = fadd fast float %t1, %t2
 880   ret float %t3
 881 }
 882
 883 ; y/x - z/x => (y-z)/x is disabled if y-z is denormal.
 884 define float @fact_div6(float %x) {
 885 ; CHECK-LABEL: @fact_div6(
 886 ; CHECK-NEXT:    [[T1:%.*]] = fdiv fast float 0x3810000000000000, [[X:%.*]]
 887 ; CHECK-NEXT:    [[T2:%.*]] = fdiv fast float 0x3800000000000000, [[X]]
 888 ; CHECK-NEXT:    [[T3:%.*]] = fsub fast float [[T1]], [[T2]]
 889 ; CHECK-NEXT:    ret float [[T3]]
 890 ;
 891   %t1 = fdiv fast float 0x3810000000000000, %x
 892   %t2 = fdiv fast float 0x3800000000000000, %x
 893   %t3 = fsub fast float %t1, %t2
 894   ret float %t3
 895 }
 896
 897 ; =========================================================================
 898 ;
 899 ;   Test-cases for square root
 900 ;
 901 ; =========================================================================
 902
 903 ; A squared factor fed into a square root intrinsic should be hoisted out
 904 ; as a fabs() value.
 905
 906 declare double @llvm.sqrt.f64(double)
 907
 908 define double @sqrt_intrinsic_arg_squared(double %x) {
 909 ; CHECK-LABEL: @sqrt_intrinsic_arg_squared(
 910 ; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
 911 ; CHECK-NEXT:    ret double [[FABS]]
 912 ;
 913   %mul = fmul fast double %x, %x
 914   %sqrt = call fast double @llvm.sqrt.f64(double %mul)
 915   ret double %sqrt
 916 }
 917
 918 ; Check all 6 combinations of a 3-way multiplication tree where
 919 ; one factor is repeated.
 920
 921 define double @sqrt_intrinsic_three_args1(double %x, double %y) {
 922 ; CHECK-LABEL: @sqrt_intrinsic_three_args1(
 923 ; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
 924 ; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]])
 925 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]]
 926 ; CHECK-NEXT:    ret double [[TMP1]]
 927 ;
 928   %mul = fmul fast double %y, %x
 929   %mul2 = fmul fast double %mul, %x
 930   %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
 931   ret double %sqrt
 932 }
 933
 934 define double @sqrt_intrinsic_three_args2(double %x, double %y) {
 935 ; CHECK-LABEL: @sqrt_intrinsic_three_args2(
 936 ; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
 937 ; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]])
 938 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]]
 939 ; CHECK-NEXT:    ret double [[TMP1]]
 940 ;
 941   %mul = fmul fast double %x, %y
 942   %mul2 = fmul fast double %mul, %x
 943   %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
 944   ret double %sqrt
 945 }
 946
 947 define double @sqrt_intrinsic_three_args3(double %x, double %y) {
 948 ; CHECK-LABEL: @sqrt_intrinsic_three_args3(
 949 ; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
 950 ; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]])
 951 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]]
 952 ; CHECK-NEXT:    ret double [[TMP1]]
 953 ;
 954   %mul = fmul fast double %x, %x
 955   %mul2 = fmul fast double %mul, %y
 956   %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
 957   ret double %sqrt
 958 }
 959
 960 define double @sqrt_intrinsic_three_args4(double %x, double %y) {
 961 ; CHECK-LABEL: @sqrt_intrinsic_three_args4(
 962 ; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
 963 ; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]])
 964 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]]
 965 ; CHECK-NEXT:    ret double [[TMP1]]
 966 ;
 967   %mul = fmul fast double %y, %x
 968   %mul2 = fmul fast double %x, %mul
 969   %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
 970   ret double %sqrt
 971 }
 972
 973 define double @sqrt_intrinsic_three_args5(double %x, double %y) {
 974 ; CHECK-LABEL: @sqrt_intrinsic_three_args5(
 975 ; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
 976 ; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]])
 977 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]]
 978 ; CHECK-NEXT:    ret double [[TMP1]]
 979 ;
 980   %mul = fmul fast double %x, %y
 981   %mul2 = fmul fast double %x, %mul
 982   %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
 983   ret double %sqrt
 984 }
 985
 986 define double @sqrt_intrinsic_three_args6(double %x, double %y) {
 987 ; CHECK-LABEL: @sqrt_intrinsic_three_args6(
 988 ; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
 989 ; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]])
 990 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]]
 991 ; CHECK-NEXT:    ret double [[TMP1]]
 992 ;
 993   %mul = fmul fast double %x, %x
 994   %mul2 = fmul fast double %y, %mul
 995   %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
 996   ret double %sqrt
 997 }
 998
 999 ; If any operation is not 'fast', we can't simplify.
1000
1001 define double @sqrt_intrinsic_not_so_fast(double %x, double %y) {
1002 ; CHECK-LABEL: @sqrt_intrinsic_not_so_fast(
1003 ; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[X:%.*]], [[X]]
1004 ; CHECK-NEXT:    [[MUL2:%.*]] = fmul fast double [[MUL]], [[Y:%.*]]
1005 ; CHECK-NEXT:    [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[MUL2]])
1006 ; CHECK-NEXT:    ret double [[SQRT]]
1007 ;
1008   %mul = fmul double %x, %x
1009   %mul2 = fmul fast double %mul, %y
1010   %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
1011   ret double %sqrt
1012 }
1013
1014 define double @sqrt_intrinsic_arg_4th(double %x) {
1015 ; CHECK-LABEL: @sqrt_intrinsic_arg_4th(
1016 ; CHECK-NEXT:    [[MUL:%.*]] = fmul fast double [[X:%.*]], [[X]]
1017 ; CHECK-NEXT:    ret double [[MUL]]
1018 ;
1019   %mul = fmul fast double %x, %x
1020   %mul2 = fmul fast double %mul, %mul
1021   %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
1022   ret double %sqrt
1023 }
1024
1025 define double @sqrt_intrinsic_arg_5th(double %x) {
1026 ; CHECK-LABEL: @sqrt_intrinsic_arg_5th(
1027 ; CHECK-NEXT:    [[MUL:%.*]] = fmul fast double [[X:%.*]], [[X]]
1028 ; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[X]])
1029 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[MUL]], [[SQRT1]]
1030 ; CHECK-NEXT:    ret double [[TMP1]]
1031 ;
1032   %mul = fmul fast double %x, %x
1033   %mul2 = fmul fast double %mul, %x
1034   %mul3 = fmul fast double %mul2, %mul
1035   %sqrt = call fast double @llvm.sqrt.f64(double %mul3)
1036   ret double %sqrt
1037 }
1038
1039 ; Check that square root calls have the same behavior.
1040
1041 declare float @sqrtf(float)
1042 declare double @sqrt(double)
1043 declare fp128 @sqrtl(fp128)
1044
1045 define float @sqrt_call_squared_f32(float %x) {
1046 ; CHECK-LABEL: @sqrt_call_squared_f32(
1047 ; CHECK-NEXT:    [[FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X:%.*]])
1048 ; CHECK-NEXT:    ret float [[FABS]]
1049 ;
1050   %mul = fmul fast float %x, %x
1051   %sqrt = call fast float @sqrtf(float %mul)
1052   ret float %sqrt
1053 }
1054
1055 define double @sqrt_call_squared_f64(double %x) {
1056 ; CHECK-LABEL: @sqrt_call_squared_f64(
1057 ; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
1058 ; CHECK-NEXT:    ret double [[FABS]]
1059 ;
1060   %mul = fmul fast double %x, %x
1061   %sqrt = call fast double @sqrt(double %mul)
1062   ret double %sqrt
1063 }
1064
1065 define fp128 @sqrt_call_squared_f128(fp128 %x) {
1066 ; CHECK-LABEL: @sqrt_call_squared_f128(
1067 ; CHECK-NEXT:    [[FABS:%.*]] = call fast fp128 @llvm.fabs.f128(fp128 [[X:%.*]])
1068 ; CHECK-NEXT:    ret fp128 [[FABS]]
1069 ;
1070   %mul = fmul fast fp128 %x, %x
1071   %sqrt = call fast fp128 @sqrtl(fp128 %mul)
1072   ret fp128 %sqrt
1073 }
1074
1075 ; =========================================================================
1076 ;
1077 ;   Test-cases for fmin / fmax
1078 ;
1079 ; =========================================================================
1080
1081 declare double @fmax(double, double)
1082 declare double @fmin(double, double)
1083 declare float @fmaxf(float, float)
1084 declare float @fminf(float, float)
1085 declare fp128 @fmaxl(fp128, fp128)
1086 declare fp128 @fminl(fp128, fp128)
1087
1088 ; No NaNs is the minimum requirement to replace these calls.
1089 ; This should always be set when unsafe-fp-math is true, but
1090 ; alternate the attributes for additional test coverage.
1091 ; 'nsz' is implied by the definition of fmax or fmin itself.
1092
1093 ; Shrink and remove the call.
1094 define float @max1(float %a, float %b) {
1095 ; CHECK-LABEL: @max1(
1096 ; CHECK-NEXT:    [[TMP1:%.*]] = fcmp fast ogt float [[A:%.*]], [[B:%.*]]
1097 ; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], float [[A]], float [[B]]
1098 ; CHECK-NEXT:    ret float [[TMP2]]
1099 ;
1100   %c = fpext float %a to double
1101   %d = fpext float %b to double
1102   %e = call fast double @fmax(double %c, double %d)
1103   %f = fptrunc double %e to float
1104   ret float %f
1105 }
1106
1107 define float @max2(float %a, float %b) {
1108 ; CHECK-LABEL: @max2(
1109 ; CHECK-NEXT:    [[TMP1:%.*]] = fcmp nnan nsz ogt float [[A:%.*]], [[B:%.*]]
1110 ; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], float [[A]], float [[B]]
1111 ; CHECK-NEXT:    ret float [[TMP2]]
1112 ;
1113   %c = call nnan float @fmaxf(float %a, float %b)
1114   ret float %c
1115 }
1116
1117
1118 define double @max3(double %a, double %b) {
1119 ; CHECK-LABEL: @max3(
1120 ; CHECK-NEXT:    [[TMP1:%.*]] = fcmp fast ogt double [[A:%.*]], [[B:%.*]]
1121 ; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], double [[A]], double [[B]]
1122 ; CHECK-NEXT:    ret double [[TMP2]]
1123 ;
1124   %c = call fast double @fmax(double %a, double %b)
1125   ret double %c
1126 }
1127
1128 define fp128 @max4(fp128 %a, fp128 %b) {
1129 ; CHECK-LABEL: @max4(
1130 ; CHECK-NEXT:    [[TMP1:%.*]] = fcmp nnan nsz ogt fp128 [[A:%.*]], [[B:%.*]]
1131 ; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], fp128 [[A]], fp128 [[B]]
1132 ; CHECK-NEXT:    ret fp128 [[TMP2]]
1133 ;
1134   %c = call nnan fp128 @fmaxl(fp128 %a, fp128 %b)
1135   ret fp128 %c
1136 }
1137
1138 ; Shrink and remove the call.
1139 define float @min1(float %a, float %b) {
1140 ; CHECK-LABEL: @min1(
1141 ; CHECK-NEXT:    [[TMP1:%.*]] = fcmp nnan nsz olt float [[A:%.*]], [[B:%.*]]
1142 ; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], float [[A]], float [[B]]
1143 ; CHECK-NEXT:    ret float [[TMP2]]
1144 ;
1145   %c = fpext float %a to double
1146   %d = fpext float %b to double
1147   %e = call nnan double @fmin(double %c, double %d)
1148   %f = fptrunc double %e to float
1149   ret float %f
1150 }
1151
1152 define float @min2(float %a, float %b) {
1153 ; CHECK-LABEL: @min2(
1154 ; CHECK-NEXT:    [[TMP1:%.*]] = fcmp fast olt float [[A:%.*]], [[B:%.*]]
1155 ; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], float [[A]], float [[B]]
1156 ; CHECK-NEXT:    ret float [[TMP2]]
1157 ;
1158   %c = call fast float @fminf(float %a, float %b)
1159   ret float %c
1160 }
1161
1162 define double @min3(double %a, double %b) {
1163 ; CHECK-LABEL: @min3(
1164 ; CHECK-NEXT:    [[TMP1:%.*]] = fcmp nnan nsz olt double [[A:%.*]], [[B:%.*]]
1165 ; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], double [[A]], double [[B]]
1166 ; CHECK-NEXT:    ret double [[TMP2]]
1167 ;
1168   %c = call nnan double @fmin(double %a, double %b)
1169   ret double %c
1170 }
1171
1172 define fp128 @min4(fp128 %a, fp128 %b) {
1173 ; CHECK-LABEL: @min4(
1174 ; CHECK-NEXT:    [[TMP1:%.*]] = fcmp fast olt fp128 [[A:%.*]], [[B:%.*]]
1175 ; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], fp128 [[A]], fp128 [[B]]
1176 ; CHECK-NEXT:    ret fp128 [[TMP2]]
1177 ;
1178   %c = call fast fp128 @fminl(fp128 %a, fp128 %b)
1179   ret fp128 %c
1180 }
1181
1182 ; ((which ? 2.0 : a) + 1.0) => (which ? 3.0 : (a + 1.0))
1183 ; This is always safe.  No FMF required.
1184 define float @test55(i1 %which, float %a) {
1185 ; CHECK-LABEL: @test55(
1186 ; CHECK-NEXT:  entry:
1187 ; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
1188 ; CHECK:       delay:
1189 ; CHECK-NEXT:    [[PHITMP:%.*]] = fadd float [[A:%.*]], 1.000000e+00
1190 ; CHECK-NEXT:    br label [[FINAL]]
1191 ; CHECK:       final:
1192 ; CHECK-NEXT:    [[A:%.*]] = phi float [ 3.000000e+00, [[ENTRY:%.*]] ], [ [[PHITMP]], [[DELAY]] ]
1193 ; CHECK-NEXT:    ret float [[A]]
1194 ;
1195 entry:
1196   br i1 %which, label %final, label %delay
1197
1198 delay:
1199   br label %final
1200
1201 final:
1202   %A = phi float [ 2.0, %entry ], [ %a, %delay ]
1203   %value = fadd float %A, 1.0
1204   ret float %value
1205 }