test/CodeGen/PowerPC/vsx-fma-m.ll

   1 ; RUN: llc < %s -mcpu=pwr7 -mattr=+vsx | FileCheck %s
   2
   3 ; Also run with -schedule-ppc-vsx-fma-mutation-early as a stress test for the
   4 ; live-interval-updating logic.
   5 ; RUN: llc < %s -mcpu=pwr7 -mattr=+vsx -schedule-ppc-vsx-fma-mutation-early
   6 target datalayout = "E-m:e-i64:64-n32:64"
   7 target triple = "powerpc64-unknown-linux-gnu"
   8
   9 define void @test1(double %a, double %b, double %c, double %e, double* nocapture %d) #0 {
  10 entry:
  11   %0 = tail call double @llvm.fma.f64(double %b, double %c, double %a)
  12   store double %0, double* %d, align 8
  13   %1 = tail call double @llvm.fma.f64(double %b, double %e, double %a)
  14   %arrayidx1 = getelementptr inbounds double* %d, i64 1
  15   store double %1, double* %arrayidx1, align 8
  16   ret void
  17
  18 ; CHECK-LABEL: @test1
  19 ; CHECK-DAG: li [[C1:[0-9]+]], 8
  20 ; CHECK-DAG: xsmaddmdp 3, 2, 1
  21 ; CHECK-DAG: xsmaddadp 1, 2, 4
  22 ; CHECK-DAG: stxsdx 3, 0, 7
  23 ; CHECK-DAG: stxsdx 1, 7, [[C1]]
  24 ; CHECK: blr
  25 }
  26
  27 define void @test2(double %a, double %b, double %c, double %e, double %f, double* nocapture %d) #0 {
  28 entry:
  29   %0 = tail call double @llvm.fma.f64(double %b, double %c, double %a)
  30   store double %0, double* %d, align 8
  31   %1 = tail call double @llvm.fma.f64(double %b, double %e, double %a)
  32   %arrayidx1 = getelementptr inbounds double* %d, i64 1
  33   store double %1, double* %arrayidx1, align 8
  34   %2 = tail call double @llvm.fma.f64(double %b, double %f, double %a)
  35   %arrayidx2 = getelementptr inbounds double* %d, i64 2
  36   store double %2, double* %arrayidx2, align 8
  37   ret void
  38
  39 ; CHECK-LABEL: @test2
  40 ; CHECK-DAG: li [[C1:[0-9]+]], 8
  41 ; CHECK-DAG: li [[C2:[0-9]+]], 16
  42 ; CHECK-DAG: xsmaddmdp 3, 2, 1
  43 ; CHECK-DAG: xsmaddmdp 4, 2, 1
  44 ; CHECK-DAG: xsmaddadp 1, 2, 5
  45 ; CHECK-DAG: stxsdx 3, 0, 8
  46 ; CHECK-DAG: stxsdx 4, 8, [[C1]]
  47 ; CHECK-DAG: stxsdx 1, 8, [[C2]]
  48 ; CHECK: blr
  49 }
  50
  51 define void @test3(double %a, double %b, double %c, double %e, double %f, double* nocapture %d) #0 {
  52 entry:
  53   %0 = tail call double @llvm.fma.f64(double %b, double %c, double %a)
  54   store double %0, double* %d, align 8
  55   %1 = tail call double @llvm.fma.f64(double %b, double %e, double %a)
  56   %2 = tail call double @llvm.fma.f64(double %b, double %c, double %1)
  57   %arrayidx1 = getelementptr inbounds double* %d, i64 3
  58   store double %2, double* %arrayidx1, align 8
  59   %3 = tail call double @llvm.fma.f64(double %b, double %f, double %a)
  60   %arrayidx2 = getelementptr inbounds double* %d, i64 2
  61   store double %3, double* %arrayidx2, align 8
  62   %arrayidx3 = getelementptr inbounds double* %d, i64 1
  63   store double %1, double* %arrayidx3, align 8
  64   ret void
  65
  66 ; CHECK-LABEL: @test3
  67 ; CHECK-DAG: fmr [[F1:[0-9]+]], 1
  68 ; CHECK-DAG: li [[C1:[0-9]+]], 24
  69 ; CHECK-DAG: li [[C2:[0-9]+]], 16
  70 ; CHECK-DAG: li [[C3:[0-9]+]], 8
  71 ; CHECK-DAG: xsmaddmdp 4, 2, 1
  72 ; CHECK-DAG: xsmaddadp 1, 2, 5
  73
  74 ; Note: We could convert this next FMA to M-type as well, but it would require
  75 ; re-ordering the instructions.
  76 ; CHECK-DAG: xsmaddadp [[F1]], 2, 3
  77
  78 ; CHECK-DAG: xsmaddmdp 2, 3, 4
  79 ; CHECK-DAG: stxsdx [[F1]], 0, 8
  80 ; CHECK-DAG: stxsdx 2, 8, [[C1]]
  81 ; CHECK-DAG: stxsdx 1, 8, [[C2]]
  82 ; CHECK-DAG: stxsdx 4, 8, [[C3]]
  83 ; CHECK: blr
  84 }
  85
  86 define void @test4(double %a, double %b, double %c, double %e, double %f, double* nocapture %d) #0 {
  87 entry:
  88   %0 = tail call double @llvm.fma.f64(double %b, double %c, double %a)
  89   store double %0, double* %d, align 8
  90   %1 = tail call double @llvm.fma.f64(double %b, double %e, double %a)
  91   %arrayidx1 = getelementptr inbounds double* %d, i64 1
  92   store double %1, double* %arrayidx1, align 8
  93   %2 = tail call double @llvm.fma.f64(double %b, double %c, double %1)
  94   %arrayidx3 = getelementptr inbounds double* %d, i64 3
  95   store double %2, double* %arrayidx3, align 8
  96   %3 = tail call double @llvm.fma.f64(double %b, double %f, double %a)
  97   %arrayidx4 = getelementptr inbounds double* %d, i64 2
  98   store double %3, double* %arrayidx4, align 8
  99   ret void
 100
 101 ; CHECK-LABEL: @test4
 102 ; CHECK-DAG: fmr [[F1:[0-9]+]], 1
 103 ; CHECK-DAG: li [[C1:[0-9]+]], 8
 104 ; CHECK-DAG: li [[C2:[0-9]+]], 16
 105 ; CHECK-DAG: xsmaddmdp 4, 2, 1
 106
 107 ; Note: We could convert this next FMA to M-type as well, but it would require
 108 ; re-ordering the instructions.
 109 ; CHECK-DAG: xsmaddadp 1, 2, 5
 110
 111 ; CHECK-DAG: xsmaddadp [[F1]], 2, 3
 112 ; CHECK-DAG: stxsdx [[F1]], 0, 8
 113 ; CHECK-DAG: stxsdx 4, 8, [[C1]]
 114 ; CHECK-DAG: li [[C3:[0-9]+]], 24
 115 ; CHECK-DAG: xsmaddadp 4, 2, 3
 116 ; CHECK-DAG: stxsdx 4, 8, [[C3]]
 117 ; CHECK-DAG: stxsdx 1, 8, [[C2]]
 118 ; CHECK: blr
 119 }
 120
 121 declare double @llvm.fma.f64(double, double, double) #0
 122
 123 define void @testv1(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double>* nocapture %d) #0 {
 124 entry:
 125   %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
 126   store <2 x double> %0, <2 x double>* %d, align 8
 127   %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
 128   %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 1
 129   store <2 x double> %1, <2 x double>* %arrayidx1, align 8
 130   ret void
 131
 132 ; CHECK-LABEL: @testv1
 133 ; CHECK-DAG: xvmaddmdp 36, 35, 34
 134 ; CHECK-DAG: xvmaddadp 34, 35, 37
 135 ; CHECK-DAG: li [[C1:[0-9]+]], 16
 136 ; CHECK-DAG: stxvd2x 36, 0, 3
 137 ; CHECK-DAG: stxvd2x 34, 3, [[C1:[0-9]+]]
 138 ; CHECK: blr
 139 }
 140
 141 define void @testv2(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 {
 142 entry:
 143   %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
 144   store <2 x double> %0, <2 x double>* %d, align 8
 145   %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
 146   %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 1
 147   store <2 x double> %1, <2 x double>* %arrayidx1, align 8
 148   %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a)
 149   %arrayidx2 = getelementptr inbounds <2 x double>* %d, i64 2
 150   store <2 x double> %2, <2 x double>* %arrayidx2, align 8
 151   ret void
 152
 153 ; CHECK-LABEL: @testv2
 154 ; CHECK-DAG: xvmaddmdp 36, 35, 34
 155 ; CHECK-DAG: xvmaddmdp 37, 35, 34
 156 ; CHECK-DAG: li [[C1:[0-9]+]], 16
 157 ; CHECK-DAG: li [[C2:[0-9]+]], 32
 158 ; CHECK-DAG: xvmaddadp 34, 35, 38
 159 ; CHECK-DAG: stxvd2x 36, 0, 3
 160 ; CHECK-DAG: stxvd2x 37, 3, [[C1:[0-9]+]]
 161 ; CHECK-DAG: stxvd2x 34, 3, [[C2:[0-9]+]]
 162 ; CHECK: blr
 163 }
 164
 165 define void @testv3(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 {
 166 entry:
 167   %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
 168   store <2 x double> %0, <2 x double>* %d, align 8
 169   %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
 170   %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %1)
 171   %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 3
 172   store <2 x double> %2, <2 x double>* %arrayidx1, align 8
 173   %3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a)
 174   %arrayidx2 = getelementptr inbounds <2 x double>* %d, i64 2
 175   store <2 x double> %3, <2 x double>* %arrayidx2, align 8
 176   %arrayidx3 = getelementptr inbounds <2 x double>* %d, i64 1
 177   store <2 x double> %1, <2 x double>* %arrayidx3, align 8
 178   ret void
 179
 180 ; Note: There is some unavoidable changeability in this variant.  If the
 181 ; FMAs are reordered differently, the algorithm can pick a different
 182 ; multiplicand to destroy, changing the register assignment.  There isn't
 183 ; a good way to express this possibility, so hopefully this doesn't change
 184 ; too often.
 185
 186 ; CHECK-LABEL: @testv3
 187 ; CHECK-DAG: xxlor [[V1:[0-9]+]], 34, 34
 188 ; CHECK-DAG: li [[C1:[0-9]+]], 48
 189 ; CHECK-DAG: li [[C2:[0-9]+]], 32
 190 ; CHECK-DAG: xvmaddmdp 37, 35, 34
 191 ; CHECK-DAG: li [[C3:[0-9]+]], 16
 192
 193 ; Note: We could convert this next FMA to M-type as well, but it would require
 194 ; re-ordering the instructions.
 195 ; CHECK-DAG: xvmaddadp [[V1]], 35, 36
 196
 197 ; CHECK-DAG: xvmaddmdp 36, 35, 37
 198 ; CHECK-DAG: xvmaddadp 34, 35, 38
 199 ; CHECK-DAG: stxvd2x 32, 0, 3
 200 ; CHECK-DAG: stxvd2x 36, 3, [[C1]]
 201 ; CHECK-DAG: stxvd2x 34, 3, [[C2]]
 202 ; CHECK-DAG: stxvd2x 37, 3, [[C3]]
 203 ; CHECK: blr
 204 }
 205
 206 define void @testv4(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 {
 207 entry:
 208   %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
 209   store <2 x double> %0, <2 x double>* %d, align 8
 210   %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
 211   %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 1
 212   store <2 x double> %1, <2 x double>* %arrayidx1, align 8
 213   %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %1)
 214   %arrayidx3 = getelementptr inbounds <2 x double>* %d, i64 3
 215   store <2 x double> %2, <2 x double>* %arrayidx3, align 8
 216   %3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a)
 217   %arrayidx4 = getelementptr inbounds <2 x double>* %d, i64 2
 218   store <2 x double> %3, <2 x double>* %arrayidx4, align 8
 219   ret void
 220
 221 ; CHECK-LABEL: @testv4
 222 ; CHECK-DAG: xxlor [[V1:[0-9]+]], 34, 34
 223 ; CHECK-DAG: xvmaddmdp 37, 35, 34
 224 ; CHECK-DAG: li [[C1:[0-9]+]], 16
 225 ; CHECK-DAG: li [[C2:[0-9]+]], 32
 226 ; CHECK-DAG: xvmaddadp 34, 35, 38
 227
 228 ; Note: We could convert this next FMA to M-type as well, but it would require
 229 ; re-ordering the instructions.
 230 ; CHECK-DAG: xvmaddadp [[V1]], 35, 36
 231
 232 ; CHECK-DAG: stxvd2x 32, 0, 3
 233 ; CHECK-DAG: stxvd2x 37, 3, [[C1]]
 234 ; CHECK-DAG: li [[C3:[0-9]+]], 48
 235 ; CHECK-DAG: xvmaddadp 37, 35, 36
 236 ; CHECK-DAG: stxvd2x 37, 3, [[C3]]
 237 ; CHECK-DAG: stxvd2x 34, 3, [[C2]]
 238 ; CHECK: blr
 239 }
 240
 241 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
 242
 243 attributes #0 = { nounwind readnone }
 244