1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
3 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
4 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1
5 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2
6 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
7 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
9 define <8 x float> @fadd_fsub_v8f32(<8 x float> %a, <8 x float> %b) {
10 ; CHECK-LABEL: @fadd_fsub_v8f32(
11 ; CHECK-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i32 0
12 ; CHECK-NEXT: [[A1:%.*]] = extractelement <8 x float> [[A]], i32 1
13 ; CHECK-NEXT: [[A2:%.*]] = extractelement <8 x float> [[A]], i32 2
14 ; CHECK-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i32 3
15 ; CHECK-NEXT: [[A4:%.*]] = extractelement <8 x float> [[A]], i32 4
16 ; CHECK-NEXT: [[A5:%.*]] = extractelement <8 x float> [[A]], i32 5
17 ; CHECK-NEXT: [[A6:%.*]] = extractelement <8 x float> [[A]], i32 6
18 ; CHECK-NEXT: [[A7:%.*]] = extractelement <8 x float> [[A]], i32 7
19 ; CHECK-NEXT: [[B0:%.*]] = extractelement <8 x float> [[B:%.*]], i32 0
20 ; CHECK-NEXT: [[B1:%.*]] = extractelement <8 x float> [[B]], i32 1
21 ; CHECK-NEXT: [[B2:%.*]] = extractelement <8 x float> [[B]], i32 2
22 ; CHECK-NEXT: [[B3:%.*]] = extractelement <8 x float> [[B]], i32 3
23 ; CHECK-NEXT: [[B4:%.*]] = extractelement <8 x float> [[B]], i32 4
24 ; CHECK-NEXT: [[B5:%.*]] = extractelement <8 x float> [[B]], i32 5
25 ; CHECK-NEXT: [[B6:%.*]] = extractelement <8 x float> [[B]], i32 6
26 ; CHECK-NEXT: [[B7:%.*]] = extractelement <8 x float> [[B]], i32 7
27 ; CHECK-NEXT: [[AB0:%.*]] = fadd float [[A0]], [[B0]]
28 ; CHECK-NEXT: [[AB1:%.*]] = fsub float [[A1]], [[B1]]
29 ; CHECK-NEXT: [[AB2:%.*]] = fsub float [[A2]], [[B2]]
30 ; CHECK-NEXT: [[AB3:%.*]] = fadd float [[A3]], [[B3]]
31 ; CHECK-NEXT: [[AB4:%.*]] = fadd float [[A4]], [[B4]]
32 ; CHECK-NEXT: [[AB5:%.*]] = fsub float [[A5]], [[B5]]
33 ; CHECK-NEXT: [[AB6:%.*]] = fsub float [[A6]], [[B6]]
34 ; CHECK-NEXT: [[AB7:%.*]] = fadd float [[A7]], [[B7]]
35 ; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[AB0]], i32 0
36 ; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[AB1]], i32 1
37 ; CHECK-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[AB2]], i32 2
38 ; CHECK-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[AB3]], i32 3
39 ; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[AB4]], i32 4
40 ; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5
41 ; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6
42 ; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7
43 ; CHECK-NEXT: ret <8 x float> [[R7]]
45 %a0 = extractelement <8 x float> %a, i32 0
46 %a1 = extractelement <8 x float> %a, i32 1
47 %a2 = extractelement <8 x float> %a, i32 2
48 %a3 = extractelement <8 x float> %a, i32 3
49 %a4 = extractelement <8 x float> %a, i32 4
50 %a5 = extractelement <8 x float> %a, i32 5
51 %a6 = extractelement <8 x float> %a, i32 6
52 %a7 = extractelement <8 x float> %a, i32 7
53 %b0 = extractelement <8 x float> %b, i32 0
54 %b1 = extractelement <8 x float> %b, i32 1
55 %b2 = extractelement <8 x float> %b, i32 2
56 %b3 = extractelement <8 x float> %b, i32 3
57 %b4 = extractelement <8 x float> %b, i32 4
58 %b5 = extractelement <8 x float> %b, i32 5
59 %b6 = extractelement <8 x float> %b, i32 6
60 %b7 = extractelement <8 x float> %b, i32 7
61 %ab0 = fadd float %a0, %b0
62 %ab1 = fsub float %a1, %b1
63 %ab2 = fsub float %a2, %b2
64 %ab3 = fadd float %a3, %b3
65 %ab4 = fadd float %a4, %b4
66 %ab5 = fsub float %a5, %b5
67 %ab6 = fsub float %a6, %b6
68 %ab7 = fadd float %a7, %b7
69 %r0 = insertelement <8 x float> undef, float %ab0, i32 0
70 %r1 = insertelement <8 x float> %r0, float %ab1, i32 1
71 %r2 = insertelement <8 x float> %r1, float %ab2, i32 2
72 %r3 = insertelement <8 x float> %r2, float %ab3, i32 3
73 %r4 = insertelement <8 x float> %r3, float %ab4, i32 4
74 %r5 = insertelement <8 x float> %r4, float %ab5, i32 5
75 %r6 = insertelement <8 x float> %r5, float %ab6, i32 6
76 %r7 = insertelement <8 x float> %r6, float %ab7, i32 7
80 define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) {
81 ; CHECK-LABEL: @fmul_fdiv_v8f32(
82 ; CHECK-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i32 0
83 ; CHECK-NEXT: [[A1:%.*]] = extractelement <8 x float> [[A]], i32 1
84 ; CHECK-NEXT: [[A2:%.*]] = extractelement <8 x float> [[A]], i32 2
85 ; CHECK-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i32 3
86 ; CHECK-NEXT: [[A4:%.*]] = extractelement <8 x float> [[A]], i32 4
87 ; CHECK-NEXT: [[A5:%.*]] = extractelement <8 x float> [[A]], i32 5
88 ; CHECK-NEXT: [[A6:%.*]] = extractelement <8 x float> [[A]], i32 6
89 ; CHECK-NEXT: [[A7:%.*]] = extractelement <8 x float> [[A]], i32 7
90 ; CHECK-NEXT: [[B0:%.*]] = extractelement <8 x float> [[B:%.*]], i32 0
91 ; CHECK-NEXT: [[B1:%.*]] = extractelement <8 x float> [[B]], i32 1
92 ; CHECK-NEXT: [[B2:%.*]] = extractelement <8 x float> [[B]], i32 2
93 ; CHECK-NEXT: [[B3:%.*]] = extractelement <8 x float> [[B]], i32 3
94 ; CHECK-NEXT: [[B4:%.*]] = extractelement <8 x float> [[B]], i32 4
95 ; CHECK-NEXT: [[B5:%.*]] = extractelement <8 x float> [[B]], i32 5
96 ; CHECK-NEXT: [[B6:%.*]] = extractelement <8 x float> [[B]], i32 6
97 ; CHECK-NEXT: [[B7:%.*]] = extractelement <8 x float> [[B]], i32 7
98 ; CHECK-NEXT: [[AB0:%.*]] = fmul float [[A0]], [[B0]]
99 ; CHECK-NEXT: [[AB1:%.*]] = fdiv float [[A1]], [[B1]]
100 ; CHECK-NEXT: [[AB2:%.*]] = fdiv float [[A2]], [[B2]]
101 ; CHECK-NEXT: [[AB3:%.*]] = fmul float [[A3]], [[B3]]
102 ; CHECK-NEXT: [[AB4:%.*]] = fmul float [[A4]], [[B4]]
103 ; CHECK-NEXT: [[AB5:%.*]] = fdiv float [[A5]], [[B5]]
104 ; CHECK-NEXT: [[AB6:%.*]] = fdiv float [[A6]], [[B6]]
105 ; CHECK-NEXT: [[AB7:%.*]] = fmul float [[A7]], [[B7]]
106 ; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[AB0]], i32 0
107 ; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[AB1]], i32 1
108 ; CHECK-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[AB2]], i32 2
109 ; CHECK-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[AB3]], i32 3
110 ; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[AB4]], i32 4
111 ; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5
112 ; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6
113 ; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7
114 ; CHECK-NEXT: ret <8 x float> [[R7]]
116 %a0 = extractelement <8 x float> %a, i32 0
117 %a1 = extractelement <8 x float> %a, i32 1
118 %a2 = extractelement <8 x float> %a, i32 2
119 %a3 = extractelement <8 x float> %a, i32 3
120 %a4 = extractelement <8 x float> %a, i32 4
121 %a5 = extractelement <8 x float> %a, i32 5
122 %a6 = extractelement <8 x float> %a, i32 6
123 %a7 = extractelement <8 x float> %a, i32 7
124 %b0 = extractelement <8 x float> %b, i32 0
125 %b1 = extractelement <8 x float> %b, i32 1
126 %b2 = extractelement <8 x float> %b, i32 2
127 %b3 = extractelement <8 x float> %b, i32 3
128 %b4 = extractelement <8 x float> %b, i32 4
129 %b5 = extractelement <8 x float> %b, i32 5
130 %b6 = extractelement <8 x float> %b, i32 6
131 %b7 = extractelement <8 x float> %b, i32 7
132 %ab0 = fmul float %a0, %b0
133 %ab1 = fdiv float %a1, %b1
134 %ab2 = fdiv float %a2, %b2
135 %ab3 = fmul float %a3, %b3
136 %ab4 = fmul float %a4, %b4
137 %ab5 = fdiv float %a5, %b5
138 %ab6 = fdiv float %a6, %b6
139 %ab7 = fmul float %a7, %b7
140 %r0 = insertelement <8 x float> undef, float %ab0, i32 0
141 %r1 = insertelement <8 x float> %r0, float %ab1, i32 1
142 %r2 = insertelement <8 x float> %r1, float %ab2, i32 2
143 %r3 = insertelement <8 x float> %r2, float %ab3, i32 3
144 %r4 = insertelement <8 x float> %r3, float %ab4, i32 4
145 %r5 = insertelement <8 x float> %r4, float %ab5, i32 5
146 %r6 = insertelement <8 x float> %r5, float %ab6, i32 6
147 %r7 = insertelement <8 x float> %r6, float %ab7, i32 7
151 define <4 x float> @fmul_fdiv_v4f32_const(<4 x float> %a) {
152 ; SSE-LABEL: @fmul_fdiv_v4f32_const(
153 ; SSE-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 2
154 ; SSE-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
155 ; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> undef, <2 x i32> <i32 0, i32 1>
156 ; SSE-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], <float 2.000000e+00, float 1.000000e+00>
157 ; SSE-NEXT: [[AB3:%.*]] = fmul float [[A3]], 2.000000e+00
158 ; SSE-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
159 ; SSE-NEXT: [[R0:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
160 ; SSE-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
161 ; SSE-NEXT: [[R1:%.*]] = insertelement <4 x float> [[R0]], float [[TMP4]], i32 1
162 ; SSE-NEXT: [[R2:%.*]] = insertelement <4 x float> [[R1]], float [[A2]], i32 2
163 ; SSE-NEXT: [[R3:%.*]] = insertelement <4 x float> [[R2]], float [[AB3]], i32 3
164 ; SSE-NEXT: ret <4 x float> [[R3]]
166 ; SLM-LABEL: @fmul_fdiv_v4f32_const(
167 ; SLM-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
168 ; SLM-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
169 ; SLM-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
170 ; SLM-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
171 ; SLM-NEXT: [[AB0:%.*]] = fmul float [[A0]], 2.000000e+00
172 ; SLM-NEXT: [[AB3:%.*]] = fmul float [[A3]], 2.000000e+00
173 ; SLM-NEXT: [[R0:%.*]] = insertelement <4 x float> undef, float [[AB0]], i32 0
174 ; SLM-NEXT: [[R1:%.*]] = insertelement <4 x float> [[R0]], float [[A1]], i32 1
175 ; SLM-NEXT: [[R2:%.*]] = insertelement <4 x float> [[R1]], float [[A2]], i32 2
176 ; SLM-NEXT: [[R3:%.*]] = insertelement <4 x float> [[R2]], float [[AB3]], i32 3
177 ; SLM-NEXT: ret <4 x float> [[R3]]
179 ; AVX-LABEL: @fmul_fdiv_v4f32_const(
180 ; AVX-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 2
181 ; AVX-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
182 ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> undef, <2 x i32> <i32 0, i32 1>
183 ; AVX-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], <float 2.000000e+00, float 1.000000e+00>
184 ; AVX-NEXT: [[AB3:%.*]] = fmul float [[A3]], 2.000000e+00
185 ; AVX-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
186 ; AVX-NEXT: [[R0:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
187 ; AVX-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
188 ; AVX-NEXT: [[R1:%.*]] = insertelement <4 x float> [[R0]], float [[TMP4]], i32 1
189 ; AVX-NEXT: [[R2:%.*]] = insertelement <4 x float> [[R1]], float [[A2]], i32 2
190 ; AVX-NEXT: [[R3:%.*]] = insertelement <4 x float> [[R2]], float [[AB3]], i32 3
191 ; AVX-NEXT: ret <4 x float> [[R3]]
193 ; AVX512-LABEL: @fmul_fdiv_v4f32_const(
194 ; AVX512-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 2
195 ; AVX512-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
196 ; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> undef, <2 x i32> <i32 0, i32 1>
197 ; AVX512-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], <float 2.000000e+00, float 1.000000e+00>
198 ; AVX512-NEXT: [[AB3:%.*]] = fmul float [[A3]], 2.000000e+00
199 ; AVX512-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
200 ; AVX512-NEXT: [[R0:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
201 ; AVX512-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
202 ; AVX512-NEXT: [[R1:%.*]] = insertelement <4 x float> [[R0]], float [[TMP4]], i32 1
203 ; AVX512-NEXT: [[R2:%.*]] = insertelement <4 x float> [[R1]], float [[A2]], i32 2
204 ; AVX512-NEXT: [[R3:%.*]] = insertelement <4 x float> [[R2]], float [[AB3]], i32 3
205 ; AVX512-NEXT: ret <4 x float> [[R3]]
207 %a0 = extractelement <4 x float> %a, i32 0
208 %a1 = extractelement <4 x float> %a, i32 1
209 %a2 = extractelement <4 x float> %a, i32 2
210 %a3 = extractelement <4 x float> %a, i32 3
211 %ab0 = fmul float %a0, 2.0
212 %ab1 = fmul float %a1, 1.0
213 %ab2 = fdiv float %a2, 1.0
214 %ab3 = fdiv float %a3, 0.5
215 %r0 = insertelement <4 x float> undef, float %ab0, i32 0
216 %r1 = insertelement <4 x float> %r0, float %ab1, i32 1
217 %r2 = insertelement <4 x float> %r1, float %ab2, i32 2
218 %r3 = insertelement <4 x float> %r2, float %ab3, i32 3