1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
3 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
4 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1
5 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2
6 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
7 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
9 define <8 x i32> @add_sub_v8i32(<8 x i32> %a, <8 x i32> %b) {
10 ; CHECK-LABEL: @add_sub_v8i32(
11 ; CHECK-NEXT: [[A4:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 4
12 ; CHECK-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5
13 ; CHECK-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
14 ; CHECK-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
15 ; CHECK-NEXT: [[B4:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 4
16 ; CHECK-NEXT: [[B5:%.*]] = extractelement <8 x i32> [[B]], i32 5
17 ; CHECK-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6
18 ; CHECK-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
19 ; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i32> [[A]], [[B]]
20 ; CHECK-NEXT: [[AB4:%.*]] = sub i32 [[A4]], [[B4]]
21 ; CHECK-NEXT: [[AB5:%.*]] = sub i32 [[A5]], [[B5]]
22 ; CHECK-NEXT: [[AB6:%.*]] = sub i32 [[A6]], [[B6]]
23 ; CHECK-NEXT: [[AB7:%.*]] = sub i32 [[A7]], [[B7]]
24 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0
25 ; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP2]], i32 0
26 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 1
27 ; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[TMP3]], i32 1
28 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2
29 ; CHECK-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[TMP4]], i32 2
30 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3
31 ; CHECK-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[TMP5]], i32 3
32 ; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB4]], i32 4
33 ; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5
34 ; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
35 ; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
36 ; CHECK-NEXT: ret <8 x i32> [[R7]]
38 %a0 = extractelement <8 x i32> %a, i32 0
39 %a1 = extractelement <8 x i32> %a, i32 1
40 %a2 = extractelement <8 x i32> %a, i32 2
41 %a3 = extractelement <8 x i32> %a, i32 3
42 %a4 = extractelement <8 x i32> %a, i32 4
43 %a5 = extractelement <8 x i32> %a, i32 5
44 %a6 = extractelement <8 x i32> %a, i32 6
45 %a7 = extractelement <8 x i32> %a, i32 7
46 %b0 = extractelement <8 x i32> %b, i32 0
47 %b1 = extractelement <8 x i32> %b, i32 1
48 %b2 = extractelement <8 x i32> %b, i32 2
49 %b3 = extractelement <8 x i32> %b, i32 3
50 %b4 = extractelement <8 x i32> %b, i32 4
51 %b5 = extractelement <8 x i32> %b, i32 5
52 %b6 = extractelement <8 x i32> %b, i32 6
53 %b7 = extractelement <8 x i32> %b, i32 7
54 %ab0 = add i32 %a0, %b0
55 %ab1 = add i32 %a1, %b1
56 %ab2 = add i32 %a2, %b2
57 %ab3 = add i32 %a3, %b3
58 %ab4 = sub i32 %a4, %b4
59 %ab5 = sub i32 %a5, %b5
60 %ab6 = sub i32 %a6, %b6
61 %ab7 = sub i32 %a7, %b7
62 %r0 = insertelement <8 x i32> undef, i32 %ab0, i32 0
63 %r1 = insertelement <8 x i32> %r0, i32 %ab1, i32 1
64 %r2 = insertelement <8 x i32> %r1, i32 %ab2, i32 2
65 %r3 = insertelement <8 x i32> %r2, i32 %ab3, i32 3
66 %r4 = insertelement <8 x i32> %r3, i32 %ab4, i32 4
67 %r5 = insertelement <8 x i32> %r4, i32 %ab5, i32 5
68 %r6 = insertelement <8 x i32> %r5, i32 %ab6, i32 6
69 %r7 = insertelement <8 x i32> %r6, i32 %ab7, i32 7
73 define <4 x i32> @add_and_v4i32(<4 x i32> %a, <4 x i32> %b) {
74 ; SSE-LABEL: @add_and_v4i32(
75 ; SSE-NEXT: [[A2:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 2
76 ; SSE-NEXT: [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3
77 ; SSE-NEXT: [[B2:%.*]] = extractelement <4 x i32> [[B:%.*]], i32 2
78 ; SSE-NEXT: [[B3:%.*]] = extractelement <4 x i32> [[B]], i32 3
79 ; SSE-NEXT: [[TMP1:%.*]] = add <4 x i32> [[A]], [[B]]
80 ; SSE-NEXT: [[AB2:%.*]] = and i32 [[A2]], [[B2]]
81 ; SSE-NEXT: [[AB3:%.*]] = and i32 [[A3]], [[B3]]
82 ; SSE-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
83 ; SSE-NEXT: [[R0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0
84 ; SSE-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
85 ; SSE-NEXT: [[R1:%.*]] = insertelement <4 x i32> [[R0]], i32 [[TMP3]], i32 1
86 ; SSE-NEXT: [[R2:%.*]] = insertelement <4 x i32> [[R1]], i32 [[AB2]], i32 2
87 ; SSE-NEXT: [[R3:%.*]] = insertelement <4 x i32> [[R2]], i32 [[AB3]], i32 3
88 ; SSE-NEXT: ret <4 x i32> [[R3]]
90 ; SLM-LABEL: @add_and_v4i32(
91 ; SLM-NEXT: [[A0:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 0
92 ; SLM-NEXT: [[A1:%.*]] = extractelement <4 x i32> [[A]], i32 1
93 ; SLM-NEXT: [[A2:%.*]] = extractelement <4 x i32> [[A]], i32 2
94 ; SLM-NEXT: [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3
95 ; SLM-NEXT: [[B0:%.*]] = extractelement <4 x i32> [[B:%.*]], i32 0
96 ; SLM-NEXT: [[B1:%.*]] = extractelement <4 x i32> [[B]], i32 1
97 ; SLM-NEXT: [[B2:%.*]] = extractelement <4 x i32> [[B]], i32 2
98 ; SLM-NEXT: [[B3:%.*]] = extractelement <4 x i32> [[B]], i32 3
99 ; SLM-NEXT: [[AB0:%.*]] = add i32 [[A0]], [[B0]]
100 ; SLM-NEXT: [[AB1:%.*]] = add i32 [[A1]], [[B1]]
101 ; SLM-NEXT: [[AB2:%.*]] = and i32 [[A2]], [[B2]]
102 ; SLM-NEXT: [[AB3:%.*]] = and i32 [[A3]], [[B3]]
103 ; SLM-NEXT: [[R0:%.*]] = insertelement <4 x i32> undef, i32 [[AB0]], i32 0
104 ; SLM-NEXT: [[R1:%.*]] = insertelement <4 x i32> [[R0]], i32 [[AB1]], i32 1
105 ; SLM-NEXT: [[R2:%.*]] = insertelement <4 x i32> [[R1]], i32 [[AB2]], i32 2
106 ; SLM-NEXT: [[R3:%.*]] = insertelement <4 x i32> [[R2]], i32 [[AB3]], i32 3
107 ; SLM-NEXT: ret <4 x i32> [[R3]]
109 ; AVX-LABEL: @add_and_v4i32(
110 ; AVX-NEXT: [[A2:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 2
111 ; AVX-NEXT: [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3
112 ; AVX-NEXT: [[B2:%.*]] = extractelement <4 x i32> [[B:%.*]], i32 2
113 ; AVX-NEXT: [[B3:%.*]] = extractelement <4 x i32> [[B]], i32 3
114 ; AVX-NEXT: [[TMP1:%.*]] = add <4 x i32> [[A]], [[B]]
115 ; AVX-NEXT: [[AB2:%.*]] = and i32 [[A2]], [[B2]]
116 ; AVX-NEXT: [[AB3:%.*]] = and i32 [[A3]], [[B3]]
117 ; AVX-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
118 ; AVX-NEXT: [[R0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0
119 ; AVX-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
120 ; AVX-NEXT: [[R1:%.*]] = insertelement <4 x i32> [[R0]], i32 [[TMP3]], i32 1
121 ; AVX-NEXT: [[R2:%.*]] = insertelement <4 x i32> [[R1]], i32 [[AB2]], i32 2
122 ; AVX-NEXT: [[R3:%.*]] = insertelement <4 x i32> [[R2]], i32 [[AB3]], i32 3
123 ; AVX-NEXT: ret <4 x i32> [[R3]]
125 ; AVX512-LABEL: @add_and_v4i32(
126 ; AVX512-NEXT: [[A2:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 2
127 ; AVX512-NEXT: [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3
128 ; AVX512-NEXT: [[B2:%.*]] = extractelement <4 x i32> [[B:%.*]], i32 2
129 ; AVX512-NEXT: [[B3:%.*]] = extractelement <4 x i32> [[B]], i32 3
130 ; AVX512-NEXT: [[TMP1:%.*]] = add <4 x i32> [[A]], [[B]]
131 ; AVX512-NEXT: [[AB2:%.*]] = and i32 [[A2]], [[B2]]
132 ; AVX512-NEXT: [[AB3:%.*]] = and i32 [[A3]], [[B3]]
133 ; AVX512-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
134 ; AVX512-NEXT: [[R0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0
135 ; AVX512-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
136 ; AVX512-NEXT: [[R1:%.*]] = insertelement <4 x i32> [[R0]], i32 [[TMP3]], i32 1
137 ; AVX512-NEXT: [[R2:%.*]] = insertelement <4 x i32> [[R1]], i32 [[AB2]], i32 2
138 ; AVX512-NEXT: [[R3:%.*]] = insertelement <4 x i32> [[R2]], i32 [[AB3]], i32 3
139 ; AVX512-NEXT: ret <4 x i32> [[R3]]
141 %a0 = extractelement <4 x i32> %a, i32 0
142 %a1 = extractelement <4 x i32> %a, i32 1
143 %a2 = extractelement <4 x i32> %a, i32 2
144 %a3 = extractelement <4 x i32> %a, i32 3
145 %b0 = extractelement <4 x i32> %b, i32 0
146 %b1 = extractelement <4 x i32> %b, i32 1
147 %b2 = extractelement <4 x i32> %b, i32 2
148 %b3 = extractelement <4 x i32> %b, i32 3
149 %ab0 = add i32 %a0, %b0
150 %ab1 = add i32 %a1, %b1
151 %ab2 = and i32 %a2, %b2
152 %ab3 = and i32 %a3, %b3
153 %r0 = insertelement <4 x i32> undef, i32 %ab0, i32 0
154 %r1 = insertelement <4 x i32> %r0, i32 %ab1, i32 1
155 %r2 = insertelement <4 x i32> %r1, i32 %ab2, i32 2
156 %r3 = insertelement <4 x i32> %r2, i32 %ab3, i32 3
160 define <4 x i32> @add_mul_v4i32(<4 x i32> %a, <4 x i32> %b) {
161 ; CHECK-LABEL: @add_mul_v4i32(
162 ; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 0
163 ; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x i32> [[A]], i32 1
164 ; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x i32> [[A]], i32 2
165 ; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3
166 ; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x i32> [[B:%.*]], i32 0
167 ; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x i32> [[B]], i32 1
168 ; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x i32> [[B]], i32 2
169 ; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x i32> [[B]], i32 3
170 ; CHECK-NEXT: [[AB0:%.*]] = mul i32 [[A0]], [[B0]]
171 ; CHECK-NEXT: [[AB1:%.*]] = add i32 [[A1]], [[B1]]
172 ; CHECK-NEXT: [[AB2:%.*]] = add i32 [[A2]], [[B2]]
173 ; CHECK-NEXT: [[AB3:%.*]] = mul i32 [[A3]], [[B3]]
174 ; CHECK-NEXT: [[R0:%.*]] = insertelement <4 x i32> undef, i32 [[AB0]], i32 0
175 ; CHECK-NEXT: [[R1:%.*]] = insertelement <4 x i32> [[R0]], i32 [[AB1]], i32 1
176 ; CHECK-NEXT: [[R2:%.*]] = insertelement <4 x i32> [[R1]], i32 [[AB2]], i32 2
177 ; CHECK-NEXT: [[R3:%.*]] = insertelement <4 x i32> [[R2]], i32 [[AB3]], i32 3
178 ; CHECK-NEXT: ret <4 x i32> [[R3]]
180 %a0 = extractelement <4 x i32> %a, i32 0
181 %a1 = extractelement <4 x i32> %a, i32 1
182 %a2 = extractelement <4 x i32> %a, i32 2
183 %a3 = extractelement <4 x i32> %a, i32 3
184 %b0 = extractelement <4 x i32> %b, i32 0
185 %b1 = extractelement <4 x i32> %b, i32 1
186 %b2 = extractelement <4 x i32> %b, i32 2
187 %b3 = extractelement <4 x i32> %b, i32 3
188 %ab0 = mul i32 %a0, %b0
189 %ab1 = add i32 %a1, %b1
190 %ab2 = add i32 %a2, %b2
191 %ab3 = mul i32 %a3, %b3
192 %r0 = insertelement <4 x i32> undef, i32 %ab0, i32 0
193 %r1 = insertelement <4 x i32> %r0, i32 %ab1, i32 1
194 %r2 = insertelement <4 x i32> %r1, i32 %ab2, i32 2
195 %r3 = insertelement <4 x i32> %r2, i32 %ab3, i32 3
199 define <8 x i32> @ashr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) {
200 ; SSE-LABEL: @ashr_shl_v8i32(
201 ; SSE-NEXT: [[A0:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 0
202 ; SSE-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1
203 ; SSE-NEXT: [[A2:%.*]] = extractelement <8 x i32> [[A]], i32 2
204 ; SSE-NEXT: [[A3:%.*]] = extractelement <8 x i32> [[A]], i32 3
205 ; SSE-NEXT: [[A4:%.*]] = extractelement <8 x i32> [[A]], i32 4
206 ; SSE-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5
207 ; SSE-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
208 ; SSE-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
209 ; SSE-NEXT: [[B0:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 0
210 ; SSE-NEXT: [[B1:%.*]] = extractelement <8 x i32> [[B]], i32 1
211 ; SSE-NEXT: [[B2:%.*]] = extractelement <8 x i32> [[B]], i32 2
212 ; SSE-NEXT: [[B3:%.*]] = extractelement <8 x i32> [[B]], i32 3
213 ; SSE-NEXT: [[B4:%.*]] = extractelement <8 x i32> [[B]], i32 4
214 ; SSE-NEXT: [[B5:%.*]] = extractelement <8 x i32> [[B]], i32 5
215 ; SSE-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6
216 ; SSE-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
217 ; SSE-NEXT: [[AB0:%.*]] = ashr i32 [[A0]], [[B0]]
218 ; SSE-NEXT: [[AB1:%.*]] = ashr i32 [[A1]], [[B1]]
219 ; SSE-NEXT: [[AB2:%.*]] = ashr i32 [[A2]], [[B2]]
220 ; SSE-NEXT: [[AB3:%.*]] = ashr i32 [[A3]], [[B3]]
221 ; SSE-NEXT: [[AB4:%.*]] = shl i32 [[A4]], [[B4]]
222 ; SSE-NEXT: [[AB5:%.*]] = shl i32 [[A5]], [[B5]]
223 ; SSE-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
224 ; SSE-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
225 ; SSE-NEXT: [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[AB0]], i32 0
226 ; SSE-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1
227 ; SSE-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[AB2]], i32 2
228 ; SSE-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[AB3]], i32 3
229 ; SSE-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB4]], i32 4
230 ; SSE-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5
231 ; SSE-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
232 ; SSE-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
233 ; SSE-NEXT: ret <8 x i32> [[R7]]
235 ; SLM-LABEL: @ashr_shl_v8i32(
236 ; SLM-NEXT: [[A0:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 0
237 ; SLM-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1
238 ; SLM-NEXT: [[A2:%.*]] = extractelement <8 x i32> [[A]], i32 2
239 ; SLM-NEXT: [[A3:%.*]] = extractelement <8 x i32> [[A]], i32 3
240 ; SLM-NEXT: [[A4:%.*]] = extractelement <8 x i32> [[A]], i32 4
241 ; SLM-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5
242 ; SLM-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
243 ; SLM-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
244 ; SLM-NEXT: [[B0:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 0
245 ; SLM-NEXT: [[B1:%.*]] = extractelement <8 x i32> [[B]], i32 1
246 ; SLM-NEXT: [[B2:%.*]] = extractelement <8 x i32> [[B]], i32 2
247 ; SLM-NEXT: [[B3:%.*]] = extractelement <8 x i32> [[B]], i32 3
248 ; SLM-NEXT: [[B4:%.*]] = extractelement <8 x i32> [[B]], i32 4
249 ; SLM-NEXT: [[B5:%.*]] = extractelement <8 x i32> [[B]], i32 5
250 ; SLM-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6
251 ; SLM-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
252 ; SLM-NEXT: [[AB0:%.*]] = ashr i32 [[A0]], [[B0]]
253 ; SLM-NEXT: [[AB1:%.*]] = ashr i32 [[A1]], [[B1]]
254 ; SLM-NEXT: [[AB2:%.*]] = ashr i32 [[A2]], [[B2]]
255 ; SLM-NEXT: [[AB3:%.*]] = ashr i32 [[A3]], [[B3]]
256 ; SLM-NEXT: [[AB4:%.*]] = shl i32 [[A4]], [[B4]]
257 ; SLM-NEXT: [[AB5:%.*]] = shl i32 [[A5]], [[B5]]
258 ; SLM-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
259 ; SLM-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
260 ; SLM-NEXT: [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[AB0]], i32 0
261 ; SLM-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1
262 ; SLM-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[AB2]], i32 2
263 ; SLM-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[AB3]], i32 3
264 ; SLM-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB4]], i32 4
265 ; SLM-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5
266 ; SLM-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
267 ; SLM-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
268 ; SLM-NEXT: ret <8 x i32> [[R7]]
270 ; AVX1-LABEL: @ashr_shl_v8i32(
271 ; AVX1-NEXT: [[A0:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 0
272 ; AVX1-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1
273 ; AVX1-NEXT: [[A2:%.*]] = extractelement <8 x i32> [[A]], i32 2
274 ; AVX1-NEXT: [[A3:%.*]] = extractelement <8 x i32> [[A]], i32 3
275 ; AVX1-NEXT: [[A4:%.*]] = extractelement <8 x i32> [[A]], i32 4
276 ; AVX1-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5
277 ; AVX1-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
278 ; AVX1-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
279 ; AVX1-NEXT: [[B0:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 0
280 ; AVX1-NEXT: [[B1:%.*]] = extractelement <8 x i32> [[B]], i32 1
281 ; AVX1-NEXT: [[B2:%.*]] = extractelement <8 x i32> [[B]], i32 2
282 ; AVX1-NEXT: [[B3:%.*]] = extractelement <8 x i32> [[B]], i32 3
283 ; AVX1-NEXT: [[B4:%.*]] = extractelement <8 x i32> [[B]], i32 4
284 ; AVX1-NEXT: [[B5:%.*]] = extractelement <8 x i32> [[B]], i32 5
285 ; AVX1-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6
286 ; AVX1-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
287 ; AVX1-NEXT: [[AB0:%.*]] = ashr i32 [[A0]], [[B0]]
288 ; AVX1-NEXT: [[AB1:%.*]] = ashr i32 [[A1]], [[B1]]
289 ; AVX1-NEXT: [[AB2:%.*]] = ashr i32 [[A2]], [[B2]]
290 ; AVX1-NEXT: [[AB3:%.*]] = ashr i32 [[A3]], [[B3]]
291 ; AVX1-NEXT: [[AB4:%.*]] = shl i32 [[A4]], [[B4]]
292 ; AVX1-NEXT: [[AB5:%.*]] = shl i32 [[A5]], [[B5]]
293 ; AVX1-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
294 ; AVX1-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
295 ; AVX1-NEXT: [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[AB0]], i32 0
296 ; AVX1-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1
297 ; AVX1-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[AB2]], i32 2
298 ; AVX1-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[AB3]], i32 3
299 ; AVX1-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB4]], i32 4
300 ; AVX1-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5
301 ; AVX1-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
302 ; AVX1-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
303 ; AVX1-NEXT: ret <8 x i32> [[R7]]
305 ; AVX2-LABEL: @ashr_shl_v8i32(
306 ; AVX2-NEXT: [[A4:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 4
307 ; AVX2-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5
308 ; AVX2-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
309 ; AVX2-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
310 ; AVX2-NEXT: [[B4:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 4
311 ; AVX2-NEXT: [[B5:%.*]] = extractelement <8 x i32> [[B]], i32 5
312 ; AVX2-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6
313 ; AVX2-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
314 ; AVX2-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A]], [[B]]
315 ; AVX2-NEXT: [[AB4:%.*]] = shl i32 [[A4]], [[B4]]
316 ; AVX2-NEXT: [[AB5:%.*]] = shl i32 [[A5]], [[B5]]
317 ; AVX2-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
318 ; AVX2-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
319 ; AVX2-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0
320 ; AVX2-NEXT: [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP2]], i32 0
321 ; AVX2-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 1
322 ; AVX2-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[TMP3]], i32 1
323 ; AVX2-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2
324 ; AVX2-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[TMP4]], i32 2
325 ; AVX2-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3
326 ; AVX2-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[TMP5]], i32 3
327 ; AVX2-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB4]], i32 4
328 ; AVX2-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5
329 ; AVX2-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
330 ; AVX2-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
331 ; AVX2-NEXT: ret <8 x i32> [[R7]]
333 ; AVX512-LABEL: @ashr_shl_v8i32(
334 ; AVX512-NEXT: [[A4:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 4
335 ; AVX512-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5
336 ; AVX512-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
337 ; AVX512-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
338 ; AVX512-NEXT: [[B4:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 4
339 ; AVX512-NEXT: [[B5:%.*]] = extractelement <8 x i32> [[B]], i32 5
340 ; AVX512-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6
341 ; AVX512-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
342 ; AVX512-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A]], [[B]]
343 ; AVX512-NEXT: [[AB4:%.*]] = shl i32 [[A4]], [[B4]]
344 ; AVX512-NEXT: [[AB5:%.*]] = shl i32 [[A5]], [[B5]]
345 ; AVX512-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
346 ; AVX512-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
347 ; AVX512-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0
348 ; AVX512-NEXT: [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP2]], i32 0
349 ; AVX512-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 1
350 ; AVX512-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[TMP3]], i32 1
351 ; AVX512-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2
352 ; AVX512-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[TMP4]], i32 2
353 ; AVX512-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3
354 ; AVX512-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[TMP5]], i32 3
355 ; AVX512-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB4]], i32 4
356 ; AVX512-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5
357 ; AVX512-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
358 ; AVX512-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
359 ; AVX512-NEXT: ret <8 x i32> [[R7]]
361 %a0 = extractelement <8 x i32> %a, i32 0
362 %a1 = extractelement <8 x i32> %a, i32 1
363 %a2 = extractelement <8 x i32> %a, i32 2
364 %a3 = extractelement <8 x i32> %a, i32 3
365 %a4 = extractelement <8 x i32> %a, i32 4
366 %a5 = extractelement <8 x i32> %a, i32 5
367 %a6 = extractelement <8 x i32> %a, i32 6
368 %a7 = extractelement <8 x i32> %a, i32 7
369 %b0 = extractelement <8 x i32> %b, i32 0
370 %b1 = extractelement <8 x i32> %b, i32 1
371 %b2 = extractelement <8 x i32> %b, i32 2
372 %b3 = extractelement <8 x i32> %b, i32 3
373 %b4 = extractelement <8 x i32> %b, i32 4
374 %b5 = extractelement <8 x i32> %b, i32 5
375 %b6 = extractelement <8 x i32> %b, i32 6
376 %b7 = extractelement <8 x i32> %b, i32 7
377 %ab0 = ashr i32 %a0, %b0
378 %ab1 = ashr i32 %a1, %b1
379 %ab2 = ashr i32 %a2, %b2
380 %ab3 = ashr i32 %a3, %b3
381 %ab4 = shl i32 %a4, %b4
382 %ab5 = shl i32 %a5, %b5
383 %ab6 = shl i32 %a6, %b6
384 %ab7 = shl i32 %a7, %b7
385 %r0 = insertelement <8 x i32> undef, i32 %ab0, i32 0
386 %r1 = insertelement <8 x i32> %r0, i32 %ab1, i32 1
387 %r2 = insertelement <8 x i32> %r1, i32 %ab2, i32 2
388 %r3 = insertelement <8 x i32> %r2, i32 %ab3, i32 3
389 %r4 = insertelement <8 x i32> %r3, i32 %ab4, i32 4
390 %r5 = insertelement <8 x i32> %r4, i32 %ab5, i32 5
391 %r6 = insertelement <8 x i32> %r5, i32 %ab6, i32 6
392 %r7 = insertelement <8 x i32> %r6, i32 %ab7, i32 7
396 define <8 x i32> @ashr_shl_v8i32_const(<8 x i32> %a) {
397 ; CHECK-LABEL: @ashr_shl_v8i32_const(
398 ; CHECK-NEXT: [[A4:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 4
399 ; CHECK-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5
400 ; CHECK-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
401 ; CHECK-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
402 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
403 ; CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 2, i32 2, i32 2, i32 2>
404 ; CHECK-NEXT: [[AB4:%.*]] = shl i32 [[A4]], 3
405 ; CHECK-NEXT: [[AB5:%.*]] = shl i32 [[A5]], 3
406 ; CHECK-NEXT: [[AB6:%.*]] = shl i32 [[A6]], 3
407 ; CHECK-NEXT: [[AB7:%.*]] = shl i32 [[A7]], 3
408 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
409 ; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP3]], i32 0
410 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1
411 ; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[TMP4]], i32 1
412 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2
413 ; CHECK-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[TMP5]], i32 2
414 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
415 ; CHECK-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[TMP6]], i32 3
416 ; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB4]], i32 4
417 ; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5
418 ; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
419 ; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
420 ; CHECK-NEXT: ret <8 x i32> [[R7]]
422 %a0 = extractelement <8 x i32> %a, i32 0
423 %a1 = extractelement <8 x i32> %a, i32 1
424 %a2 = extractelement <8 x i32> %a, i32 2
425 %a3 = extractelement <8 x i32> %a, i32 3
426 %a4 = extractelement <8 x i32> %a, i32 4
427 %a5 = extractelement <8 x i32> %a, i32 5
428 %a6 = extractelement <8 x i32> %a, i32 6
429 %a7 = extractelement <8 x i32> %a, i32 7
430 %ab0 = ashr i32 %a0, 2
431 %ab1 = ashr i32 %a1, 2
432 %ab2 = ashr i32 %a2, 2
433 %ab3 = ashr i32 %a3, 2
434 %ab4 = shl i32 %a4, 3
435 %ab5 = shl i32 %a5, 3
436 %ab6 = shl i32 %a6, 3
437 %ab7 = shl i32 %a7, 3
438 %r0 = insertelement <8 x i32> undef, i32 %ab0, i32 0
439 %r1 = insertelement <8 x i32> %r0, i32 %ab1, i32 1
440 %r2 = insertelement <8 x i32> %r1, i32 %ab2, i32 2
441 %r3 = insertelement <8 x i32> %r2, i32 %ab3, i32 3
442 %r4 = insertelement <8 x i32> %r3, i32 %ab4, i32 4
443 %r5 = insertelement <8 x i32> %r4, i32 %ab5, i32 5
444 %r6 = insertelement <8 x i32> %r5, i32 %ab6, i32 6
445 %r7 = insertelement <8 x i32> %r6, i32 %ab7, i32 7
449 define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) {
450 ; SSE-LABEL: @ashr_lshr_shl_v8i32(
451 ; SSE-NEXT: [[A0:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 0
452 ; SSE-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1
453 ; SSE-NEXT: [[A2:%.*]] = extractelement <8 x i32> [[A]], i32 2
454 ; SSE-NEXT: [[A3:%.*]] = extractelement <8 x i32> [[A]], i32 3
455 ; SSE-NEXT: [[A4:%.*]] = extractelement <8 x i32> [[A]], i32 4
456 ; SSE-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5
457 ; SSE-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
458 ; SSE-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
459 ; SSE-NEXT: [[B0:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 0
460 ; SSE-NEXT: [[B1:%.*]] = extractelement <8 x i32> [[B]], i32 1
461 ; SSE-NEXT: [[B2:%.*]] = extractelement <8 x i32> [[B]], i32 2
462 ; SSE-NEXT: [[B3:%.*]] = extractelement <8 x i32> [[B]], i32 3
463 ; SSE-NEXT: [[B4:%.*]] = extractelement <8 x i32> [[B]], i32 4
464 ; SSE-NEXT: [[B5:%.*]] = extractelement <8 x i32> [[B]], i32 5
465 ; SSE-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6
466 ; SSE-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
467 ; SSE-NEXT: [[AB0:%.*]] = ashr i32 [[A0]], [[B0]]
468 ; SSE-NEXT: [[AB1:%.*]] = ashr i32 [[A1]], [[B1]]
469 ; SSE-NEXT: [[AB2:%.*]] = lshr i32 [[A2]], [[B2]]
470 ; SSE-NEXT: [[AB3:%.*]] = lshr i32 [[A3]], [[B3]]
471 ; SSE-NEXT: [[AB4:%.*]] = lshr i32 [[A4]], [[B4]]
472 ; SSE-NEXT: [[AB5:%.*]] = lshr i32 [[A5]], [[B5]]
473 ; SSE-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
474 ; SSE-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
475 ; SSE-NEXT: [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[AB0]], i32 0
476 ; SSE-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1
477 ; SSE-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[AB2]], i32 2
478 ; SSE-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[AB3]], i32 3
479 ; SSE-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB4]], i32 4
480 ; SSE-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5
481 ; SSE-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
482 ; SSE-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
483 ; SSE-NEXT: ret <8 x i32> [[R7]]
485 ; SLM-LABEL: @ashr_lshr_shl_v8i32(
486 ; SLM-NEXT: [[A0:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 0
487 ; SLM-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1
488 ; SLM-NEXT: [[A2:%.*]] = extractelement <8 x i32> [[A]], i32 2
489 ; SLM-NEXT: [[A3:%.*]] = extractelement <8 x i32> [[A]], i32 3
490 ; SLM-NEXT: [[A4:%.*]] = extractelement <8 x i32> [[A]], i32 4
491 ; SLM-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5
492 ; SLM-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
493 ; SLM-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
494 ; SLM-NEXT: [[B0:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 0
495 ; SLM-NEXT: [[B1:%.*]] = extractelement <8 x i32> [[B]], i32 1
496 ; SLM-NEXT: [[B2:%.*]] = extractelement <8 x i32> [[B]], i32 2
497 ; SLM-NEXT: [[B3:%.*]] = extractelement <8 x i32> [[B]], i32 3
498 ; SLM-NEXT: [[B4:%.*]] = extractelement <8 x i32> [[B]], i32 4
499 ; SLM-NEXT: [[B5:%.*]] = extractelement <8 x i32> [[B]], i32 5
500 ; SLM-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6
501 ; SLM-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
502 ; SLM-NEXT: [[AB0:%.*]] = ashr i32 [[A0]], [[B0]]
503 ; SLM-NEXT: [[AB1:%.*]] = ashr i32 [[A1]], [[B1]]
504 ; SLM-NEXT: [[AB2:%.*]] = lshr i32 [[A2]], [[B2]]
505 ; SLM-NEXT: [[AB3:%.*]] = lshr i32 [[A3]], [[B3]]
506 ; SLM-NEXT: [[AB4:%.*]] = lshr i32 [[A4]], [[B4]]
507 ; SLM-NEXT: [[AB5:%.*]] = lshr i32 [[A5]], [[B5]]
508 ; SLM-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
509 ; SLM-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
510 ; SLM-NEXT: [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[AB0]], i32 0
511 ; SLM-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1
512 ; SLM-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[AB2]], i32 2
513 ; SLM-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[AB3]], i32 3
514 ; SLM-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB4]], i32 4
515 ; SLM-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5
516 ; SLM-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
517 ; SLM-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
518 ; SLM-NEXT: ret <8 x i32> [[R7]]
520 ; AVX-LABEL: @ashr_lshr_shl_v8i32(
521 ; AVX-NEXT: [[A0:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 0
522 ; AVX-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1
523 ; AVX-NEXT: [[A2:%.*]] = extractelement <8 x i32> [[A]], i32 2
524 ; AVX-NEXT: [[A3:%.*]] = extractelement <8 x i32> [[A]], i32 3
525 ; AVX-NEXT: [[A4:%.*]] = extractelement <8 x i32> [[A]], i32 4
526 ; AVX-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5
527 ; AVX-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
528 ; AVX-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
529 ; AVX-NEXT: [[B0:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 0
530 ; AVX-NEXT: [[B1:%.*]] = extractelement <8 x i32> [[B]], i32 1
531 ; AVX-NEXT: [[B2:%.*]] = extractelement <8 x i32> [[B]], i32 2
532 ; AVX-NEXT: [[B3:%.*]] = extractelement <8 x i32> [[B]], i32 3
533 ; AVX-NEXT: [[B4:%.*]] = extractelement <8 x i32> [[B]], i32 4
534 ; AVX-NEXT: [[B5:%.*]] = extractelement <8 x i32> [[B]], i32 5
535 ; AVX-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6
536 ; AVX-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
537 ; AVX-NEXT: [[AB0:%.*]] = ashr i32 [[A0]], [[B0]]
538 ; AVX-NEXT: [[AB1:%.*]] = ashr i32 [[A1]], [[B1]]
539 ; AVX-NEXT: [[AB2:%.*]] = lshr i32 [[A2]], [[B2]]
540 ; AVX-NEXT: [[AB3:%.*]] = lshr i32 [[A3]], [[B3]]
541 ; AVX-NEXT: [[AB4:%.*]] = lshr i32 [[A4]], [[B4]]
542 ; AVX-NEXT: [[AB5:%.*]] = lshr i32 [[A5]], [[B5]]
543 ; AVX-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
544 ; AVX-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
545 ; AVX-NEXT: [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[AB0]], i32 0
546 ; AVX-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1
547 ; AVX-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[AB2]], i32 2
548 ; AVX-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[AB3]], i32 3
549 ; AVX-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB4]], i32 4
550 ; AVX-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5
551 ; AVX-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
552 ; AVX-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
553 ; AVX-NEXT: ret <8 x i32> [[R7]]
555 ; AVX512-LABEL: @ashr_lshr_shl_v8i32(
556 ; AVX512-NEXT: [[A2:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 2
557 ; AVX512-NEXT: [[A3:%.*]] = extractelement <8 x i32> [[A]], i32 3
558 ; AVX512-NEXT: [[A4:%.*]] = extractelement <8 x i32> [[A]], i32 4
559 ; AVX512-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5
560 ; AVX512-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
561 ; AVX512-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
562 ; AVX512-NEXT: [[B2:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 2
563 ; AVX512-NEXT: [[B3:%.*]] = extractelement <8 x i32> [[B]], i32 3
564 ; AVX512-NEXT: [[B4:%.*]] = extractelement <8 x i32> [[B]], i32 4
565 ; AVX512-NEXT: [[B5:%.*]] = extractelement <8 x i32> [[B]], i32 5
566 ; AVX512-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6
567 ; AVX512-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
568 ; AVX512-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A]], [[B]]
569 ; AVX512-NEXT: [[AB2:%.*]] = lshr i32 [[A2]], [[B2]]
570 ; AVX512-NEXT: [[AB3:%.*]] = lshr i32 [[A3]], [[B3]]
571 ; AVX512-NEXT: [[AB4:%.*]] = lshr i32 [[A4]], [[B4]]
572 ; AVX512-NEXT: [[AB5:%.*]] = lshr i32 [[A5]], [[B5]]
573 ; AVX512-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
574 ; AVX512-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
575 ; AVX512-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0
576 ; AVX512-NEXT: [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP2]], i32 0
577 ; AVX512-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 1
578 ; AVX512-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[TMP3]], i32 1
579 ; AVX512-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[AB2]], i32 2
580 ; AVX512-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[AB3]], i32 3
581 ; AVX512-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB4]], i32 4
582 ; AVX512-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5
583 ; AVX512-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
584 ; AVX512-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
585 ; AVX512-NEXT: ret <8 x i32> [[R7]]
587 %a0 = extractelement <8 x i32> %a, i32 0
588 %a1 = extractelement <8 x i32> %a, i32 1
589 %a2 = extractelement <8 x i32> %a, i32 2
590 %a3 = extractelement <8 x i32> %a, i32 3
591 %a4 = extractelement <8 x i32> %a, i32 4
592 %a5 = extractelement <8 x i32> %a, i32 5
593 %a6 = extractelement <8 x i32> %a, i32 6
594 %a7 = extractelement <8 x i32> %a, i32 7
595 %b0 = extractelement <8 x i32> %b, i32 0
596 %b1 = extractelement <8 x i32> %b, i32 1
597 %b2 = extractelement <8 x i32> %b, i32 2
598 %b3 = extractelement <8 x i32> %b, i32 3
599 %b4 = extractelement <8 x i32> %b, i32 4
600 %b5 = extractelement <8 x i32> %b, i32 5
601 %b6 = extractelement <8 x i32> %b, i32 6
602 %b7 = extractelement <8 x i32> %b, i32 7
603 %ab0 = ashr i32 %a0, %b0
604 %ab1 = ashr i32 %a1, %b1
605 %ab2 = lshr i32 %a2, %b2
606 %ab3 = lshr i32 %a3, %b3
607 %ab4 = lshr i32 %a4, %b4
608 %ab5 = lshr i32 %a5, %b5
609 %ab6 = shl i32 %a6, %b6
610 %ab7 = shl i32 %a7, %b7
611 %r0 = insertelement <8 x i32> undef, i32 %ab0, i32 0
612 %r1 = insertelement <8 x i32> %r0, i32 %ab1, i32 1
613 %r2 = insertelement <8 x i32> %r1, i32 %ab2, i32 2
614 %r3 = insertelement <8 x i32> %r2, i32 %ab3, i32 3
615 %r4 = insertelement <8 x i32> %r3, i32 %ab4, i32 4
616 %r5 = insertelement <8 x i32> %r4, i32 %ab5, i32 5
617 %r6 = insertelement <8 x i32> %r5, i32 %ab6, i32 6
618 %r7 = insertelement <8 x i32> %r6, i32 %ab7, i32 7