1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the auto-upgrade helper functions.
11 // This is where deprecated IR intrinsics and other IR features are updated to
12 // current specifications.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/IR/AutoUpgrade.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/IR/Constants.h"
19 #include "llvm/IR/DIBuilder.h"
20 #include "llvm/IR/DebugInfo.h"
21 #include "llvm/IR/DiagnosticInfo.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/IRBuilder.h"
24 #include "llvm/IR/Instruction.h"
25 #include "llvm/IR/IntrinsicInst.h"
26 #include "llvm/IR/LLVMContext.h"
27 #include "llvm/IR/Module.h"
28 #include "llvm/IR/Verifier.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include "llvm/Support/Regex.h"
34 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
36 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
37 // changed their type from v4f32 to v2i64.
38 static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
40 // Check whether this is an old version of the function, which received
42 Type *Arg0Type = F->getFunctionType()->getParamType(0);
43 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
46 // Yes, it's old, replace it with new version.
48 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
52 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
53 // arguments have changed their type from i32 to i8.
54 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
56 // Check that the last argument is an i32.
57 Type *LastArgType = F->getFunctionType()->getParamType(
58 F->getFunctionType()->getNumParams() - 1);
59 if (!LastArgType->isIntegerTy(32))
62 // Move this function aside and map down.
64 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
68 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
69 // All of the intrinsics matches below should be marked with which llvm
70 // version started autoupgrading them. At some point in the future we would
71 // like to use this information to remove upgrade code for some older
72 // intrinsics. It is currently undecided how we will determine that future
74 if (Name == "addcarryx.u32" || // Added in 8.0
75 Name == "addcarryx.u64" || // Added in 8.0
76 Name == "addcarry.u32" || // Added in 8.0
77 Name == "addcarry.u64" || // Added in 8.0
78 Name == "subborrow.u32" || // Added in 8.0
79 Name == "subborrow.u64" || // Added in 8.0
80 Name.startswith("sse2.padds.") || // Added in 8.0
81 Name.startswith("sse2.psubs.") || // Added in 8.0
82 Name.startswith("sse2.paddus.") || // Added in 8.0
83 Name.startswith("sse2.psubus.") || // Added in 8.0
84 Name.startswith("avx2.padds.") || // Added in 8.0
85 Name.startswith("avx2.psubs.") || // Added in 8.0
86 Name.startswith("avx2.paddus.") || // Added in 8.0
87 Name.startswith("avx2.psubus.") || // Added in 8.0
88 Name.startswith("avx512.padds.") || // Added in 8.0
89 Name.startswith("avx512.psubs.") || // Added in 8.0
90 Name.startswith("avx512.mask.padds.") || // Added in 8.0
91 Name.startswith("avx512.mask.psubs.") || // Added in 8.0
92 Name.startswith("avx512.mask.paddus.") || // Added in 8.0
93 Name.startswith("avx512.mask.psubus.") || // Added in 8.0
94 Name=="ssse3.pabs.b.128" || // Added in 6.0
95 Name=="ssse3.pabs.w.128" || // Added in 6.0
96 Name=="ssse3.pabs.d.128" || // Added in 6.0
97 Name.startswith("fma4.vfmadd.s") || // Added in 7.0
98 Name.startswith("fma.vfmadd.") || // Added in 7.0
99 Name.startswith("fma.vfmsub.") || // Added in 7.0
100 Name.startswith("fma.vfmaddsub.") || // Added in 7.0
101 Name.startswith("fma.vfmsubadd.") || // Added in 7.0
102 Name.startswith("fma.vfnmadd.") || // Added in 7.0
103 Name.startswith("fma.vfnmsub.") || // Added in 7.0
104 Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
105 Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
106 Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
107 Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
108 Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
109 Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
110 Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
111 Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
112 Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
113 Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
114 Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
115 Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
116 Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
117 Name.startswith("avx512.kunpck") || //added in 6.0
118 Name.startswith("avx2.pabs.") || // Added in 6.0
119 Name.startswith("avx512.mask.pabs.") || // Added in 6.0
120 Name.startswith("avx512.broadcastm") || // Added in 6.0
121 Name == "sse.sqrt.ss" || // Added in 7.0
122 Name == "sse2.sqrt.sd" || // Added in 7.0
123 Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
124 Name.startswith("avx.sqrt.p") || // Added in 7.0
125 Name.startswith("sse2.sqrt.p") || // Added in 7.0
126 Name.startswith("sse.sqrt.p") || // Added in 7.0
127 Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
128 Name.startswith("sse2.pcmpeq.") || // Added in 3.1
129 Name.startswith("sse2.pcmpgt.") || // Added in 3.1
130 Name.startswith("avx2.pcmpeq.") || // Added in 3.1
131 Name.startswith("avx2.pcmpgt.") || // Added in 3.1
132 Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
133 Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
134 Name.startswith("avx.vperm2f128.") || // Added in 6.0
135 Name == "avx2.vperm2i128" || // Added in 6.0
136 Name == "sse.add.ss" || // Added in 4.0
137 Name == "sse2.add.sd" || // Added in 4.0
138 Name == "sse.sub.ss" || // Added in 4.0
139 Name == "sse2.sub.sd" || // Added in 4.0
140 Name == "sse.mul.ss" || // Added in 4.0
141 Name == "sse2.mul.sd" || // Added in 4.0
142 Name == "sse.div.ss" || // Added in 4.0
143 Name == "sse2.div.sd" || // Added in 4.0
144 Name == "sse41.pmaxsb" || // Added in 3.9
145 Name == "sse2.pmaxs.w" || // Added in 3.9
146 Name == "sse41.pmaxsd" || // Added in 3.9
147 Name == "sse2.pmaxu.b" || // Added in 3.9
148 Name == "sse41.pmaxuw" || // Added in 3.9
149 Name == "sse41.pmaxud" || // Added in 3.9
150 Name == "sse41.pminsb" || // Added in 3.9
151 Name == "sse2.pmins.w" || // Added in 3.9
152 Name == "sse41.pminsd" || // Added in 3.9
153 Name == "sse2.pminu.b" || // Added in 3.9
154 Name == "sse41.pminuw" || // Added in 3.9
155 Name == "sse41.pminud" || // Added in 3.9
156 Name == "avx512.kand.w" || // Added in 7.0
157 Name == "avx512.kandn.w" || // Added in 7.0
158 Name == "avx512.knot.w" || // Added in 7.0
159 Name == "avx512.kor.w" || // Added in 7.0
160 Name == "avx512.kxor.w" || // Added in 7.0
161 Name == "avx512.kxnor.w" || // Added in 7.0
162 Name == "avx512.kortestc.w" || // Added in 7.0
163 Name == "avx512.kortestz.w" || // Added in 7.0
164 Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
165 Name.startswith("avx2.pmax") || // Added in 3.9
166 Name.startswith("avx2.pmin") || // Added in 3.9
167 Name.startswith("avx512.mask.pmax") || // Added in 4.0
168 Name.startswith("avx512.mask.pmin") || // Added in 4.0
169 Name.startswith("avx2.vbroadcast") || // Added in 3.8
170 Name.startswith("avx2.pbroadcast") || // Added in 3.8
171 Name.startswith("avx.vpermil.") || // Added in 3.1
172 Name.startswith("sse2.pshuf") || // Added in 3.9
173 Name.startswith("avx512.pbroadcast") || // Added in 3.9
174 Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
175 Name.startswith("avx512.mask.movddup") || // Added in 3.9
176 Name.startswith("avx512.mask.movshdup") || // Added in 3.9
177 Name.startswith("avx512.mask.movsldup") || // Added in 3.9
178 Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
179 Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
180 Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
181 Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
182 Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
183 Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
184 Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
185 Name.startswith("avx512.mask.punpckl") || // Added in 3.9
186 Name.startswith("avx512.mask.punpckh") || // Added in 3.9
187 Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
188 Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
189 Name.startswith("avx512.mask.pand.") || // Added in 3.9
190 Name.startswith("avx512.mask.pandn.") || // Added in 3.9
191 Name.startswith("avx512.mask.por.") || // Added in 3.9
192 Name.startswith("avx512.mask.pxor.") || // Added in 3.9
193 Name.startswith("avx512.mask.and.") || // Added in 3.9
194 Name.startswith("avx512.mask.andn.") || // Added in 3.9
195 Name.startswith("avx512.mask.or.") || // Added in 3.9
196 Name.startswith("avx512.mask.xor.") || // Added in 3.9
197 Name.startswith("avx512.mask.padd.") || // Added in 4.0
198 Name.startswith("avx512.mask.psub.") || // Added in 4.0
199 Name.startswith("avx512.mask.pmull.") || // Added in 4.0
200 Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
201 Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
202 Name == "avx512.mask.cvtudq2ps.128" || // Added in 7.0
203 Name == "avx512.mask.cvtudq2ps.256" || // Added in 7.0
204 Name == "avx512.mask.cvtqq2pd.128" || // Added in 7.0
205 Name == "avx512.mask.cvtqq2pd.256" || // Added in 7.0
206 Name == "avx512.mask.cvtuqq2pd.128" || // Added in 7.0
207 Name == "avx512.mask.cvtuqq2pd.256" || // Added in 7.0
208 Name == "avx512.mask.cvtdq2ps.128" || // Added in 7.0
209 Name == "avx512.mask.cvtdq2ps.256" || // Added in 7.0
210 Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
211 Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
212 Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
213 Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
214 Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
215 Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
216 Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
217 Name == "avx512.cvtusi2sd" || // Added in 7.0
218 Name.startswith("avx512.mask.permvar.") || // Added in 7.0
219 Name.startswith("avx512.mask.permvar.") || // Added in 7.0
220 Name == "sse2.pmulu.dq" || // Added in 7.0
221 Name == "sse41.pmuldq" || // Added in 7.0
222 Name == "avx2.pmulu.dq" || // Added in 7.0
223 Name == "avx2.pmul.dq" || // Added in 7.0
224 Name == "avx512.pmulu.dq.512" || // Added in 7.0
225 Name == "avx512.pmul.dq.512" || // Added in 7.0
226 Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
227 Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
228 Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
229 Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
230 Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
231 Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
232 Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
233 Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
234 Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
235 Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
236 Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
237 Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
238 Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
239 Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
240 Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
241 Name.startswith("avx512.mask.cmp.p") || // Added in 7.0
242 Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
243 Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
244 Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
245 Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
246 Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
247 Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
248 Name.startswith("avx512.mask.psll.d") || // Added in 4.0
249 Name.startswith("avx512.mask.psll.q") || // Added in 4.0
250 Name.startswith("avx512.mask.psll.w") || // Added in 4.0
251 Name.startswith("avx512.mask.psra.d") || // Added in 4.0
252 Name.startswith("avx512.mask.psra.q") || // Added in 4.0
253 Name.startswith("avx512.mask.psra.w") || // Added in 4.0
254 Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
255 Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
256 Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
257 Name.startswith("avx512.mask.pslli") || // Added in 4.0
258 Name.startswith("avx512.mask.psrai") || // Added in 4.0
259 Name.startswith("avx512.mask.psrli") || // Added in 4.0
260 Name.startswith("avx512.mask.psllv") || // Added in 4.0
261 Name.startswith("avx512.mask.psrav") || // Added in 4.0
262 Name.startswith("avx512.mask.psrlv") || // Added in 4.0
263 Name.startswith("sse41.pmovsx") || // Added in 3.8
264 Name.startswith("sse41.pmovzx") || // Added in 3.9
265 Name.startswith("avx2.pmovsx") || // Added in 3.9
266 Name.startswith("avx2.pmovzx") || // Added in 3.9
267 Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
268 Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
269 Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
270 Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
271 Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
272 Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
273 Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
274 Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
275 Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
276 Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
277 Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
278 Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
279 Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
280 Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
281 Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
282 Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
283 Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
284 Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
285 Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
286 Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
287 Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
288 Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
289 Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
290 Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
291 Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
292 Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
293 Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
294 Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
295 Name == "sse.cvtsi2ss" || // Added in 7.0
296 Name == "sse.cvtsi642ss" || // Added in 7.0
297 Name == "sse2.cvtsi2sd" || // Added in 7.0
298 Name == "sse2.cvtsi642sd" || // Added in 7.0
299 Name == "sse2.cvtss2sd" || // Added in 7.0
300 Name == "sse2.cvtdq2pd" || // Added in 3.9
301 Name == "sse2.cvtdq2ps" || // Added in 7.0
302 Name == "sse2.cvtps2pd" || // Added in 3.9
303 Name == "avx.cvtdq2.pd.256" || // Added in 3.9
304 Name == "avx.cvtdq2.ps.256" || // Added in 7.0
305 Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
306 Name.startswith("avx.vinsertf128.") || // Added in 3.7
307 Name == "avx2.vinserti128" || // Added in 3.7
308 Name.startswith("avx512.mask.insert") || // Added in 4.0
309 Name.startswith("avx.vextractf128.") || // Added in 3.7
310 Name == "avx2.vextracti128" || // Added in 3.7
311 Name.startswith("avx512.mask.vextract") || // Added in 4.0
312 Name.startswith("sse4a.movnt.") || // Added in 3.9
313 Name.startswith("avx.movnt.") || // Added in 3.2
314 Name.startswith("avx512.storent.") || // Added in 3.9
315 Name == "sse41.movntdqa" || // Added in 5.0
316 Name == "avx2.movntdqa" || // Added in 5.0
317 Name == "avx512.movntdqa" || // Added in 5.0
318 Name == "sse2.storel.dq" || // Added in 3.9
319 Name.startswith("sse.storeu.") || // Added in 3.9
320 Name.startswith("sse2.storeu.") || // Added in 3.9
321 Name.startswith("avx.storeu.") || // Added in 3.9
322 Name.startswith("avx512.mask.storeu.") || // Added in 3.9
323 Name.startswith("avx512.mask.store.p") || // Added in 3.9
324 Name.startswith("avx512.mask.store.b.") || // Added in 3.9
325 Name.startswith("avx512.mask.store.w.") || // Added in 3.9
326 Name.startswith("avx512.mask.store.d.") || // Added in 3.9
327 Name.startswith("avx512.mask.store.q.") || // Added in 3.9
328 Name == "avx512.mask.store.ss" || // Added in 7.0
329 Name.startswith("avx512.mask.loadu.") || // Added in 3.9
330 Name.startswith("avx512.mask.load.") || // Added in 3.9
331 Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
332 Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
333 Name == "sse42.crc32.64.8" || // Added in 3.4
334 Name.startswith("avx.vbroadcast.s") || // Added in 3.5
335 Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
336 Name.startswith("avx512.mask.palignr.") || // Added in 3.9
337 Name.startswith("avx512.mask.valign.") || // Added in 4.0
338 Name.startswith("sse2.psll.dq") || // Added in 3.7
339 Name.startswith("sse2.psrl.dq") || // Added in 3.7
340 Name.startswith("avx2.psll.dq") || // Added in 3.7
341 Name.startswith("avx2.psrl.dq") || // Added in 3.7
342 Name.startswith("avx512.psll.dq") || // Added in 3.9
343 Name.startswith("avx512.psrl.dq") || // Added in 3.9
344 Name == "sse41.pblendw" || // Added in 3.7
345 Name.startswith("sse41.blendp") || // Added in 3.7
346 Name.startswith("avx.blend.p") || // Added in 3.7
347 Name == "avx2.pblendw" || // Added in 3.7
348 Name.startswith("avx2.pblendd.") || // Added in 3.7
349 Name.startswith("avx.vbroadcastf128") || // Added in 4.0
350 Name == "avx2.vbroadcasti128" || // Added in 3.7
351 Name.startswith("avx512.mask.broadcastf") || // Added in 6.0
352 Name.startswith("avx512.mask.broadcasti") || // Added in 6.0
353 Name == "xop.vpcmov" || // Added in 3.8
354 Name == "xop.vpcmov.256" || // Added in 5.0
355 Name.startswith("avx512.mask.move.s") || // Added in 4.0
356 Name.startswith("avx512.cvtmask2") || // Added in 5.0
357 (Name.startswith("xop.vpcom") && // Added in 3.2
358 F->arg_size() == 2) ||
359 Name.startswith("xop.vprot") || // Added in 8.0
360 Name.startswith("avx512.prol") || // Added in 8.0
361 Name.startswith("avx512.pror") || // Added in 8.0
362 Name.startswith("avx512.mask.prorv.") || // Added in 8.0
363 Name.startswith("avx512.mask.pror.") || // Added in 8.0
364 Name.startswith("avx512.mask.prolv.") || // Added in 8.0
365 Name.startswith("avx512.mask.prol.") || // Added in 8.0
366 Name.startswith("avx512.ptestm") || //Added in 6.0
367 Name.startswith("avx512.ptestnm") || //Added in 6.0
368 Name.startswith("sse2.pavg") || // Added in 6.0
369 Name.startswith("avx2.pavg") || // Added in 6.0
370 Name.startswith("avx512.mask.pavg")) // Added in 6.0
376 static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
378 // Only handle intrinsics that start with "x86.".
379 if (!Name.startswith("x86."))
381 // Remove "x86." prefix.
382 Name = Name.substr(4);
384 if (ShouldUpgradeX86Intrinsic(F, Name)) {
389 if (Name == "rdtscp") { // Added in 8.0
390 // If this intrinsic has 0 operands, it's the new version.
391 if (F->getFunctionType()->getNumParams() == 0)
395 NewFn = Intrinsic::getDeclaration(F->getParent(),
396 Intrinsic::x86_rdtscp);
400 // SSE4.1 ptest functions may have an old signature.
401 if (Name.startswith("sse41.ptest")) { // Added in 3.2
402 if (Name.substr(11) == "c")
403 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
404 if (Name.substr(11) == "z")
405 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
406 if (Name.substr(11) == "nzc")
407 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
409 // Several blend and other instructions with masks used the wrong number of
411 if (Name == "sse41.insertps") // Added in 3.6
412 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
414 if (Name == "sse41.dppd") // Added in 3.6
415 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
417 if (Name == "sse41.dpps") // Added in 3.6
418 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
420 if (Name == "sse41.mpsadbw") // Added in 3.6
421 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
423 if (Name == "avx.dp.ps.256") // Added in 3.6
424 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
426 if (Name == "avx2.mpsadbw") // Added in 3.6
427 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
430 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
431 if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
433 NewFn = Intrinsic::getDeclaration(F->getParent(),
434 Intrinsic::x86_xop_vfrcz_ss);
437 if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
439 NewFn = Intrinsic::getDeclaration(F->getParent(),
440 Intrinsic::x86_xop_vfrcz_sd);
443 // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
444 if (Name.startswith("xop.vpermil2")) { // Added in 3.9
445 auto Idx = F->getFunctionType()->getParamType(2);
446 if (Idx->isFPOrFPVectorTy()) {
448 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
449 unsigned EltSize = Idx->getScalarSizeInBits();
450 Intrinsic::ID Permil2ID;
451 if (EltSize == 64 && IdxSize == 128)
452 Permil2ID = Intrinsic::x86_xop_vpermil2pd;
453 else if (EltSize == 32 && IdxSize == 128)
454 Permil2ID = Intrinsic::x86_xop_vpermil2ps;
455 else if (EltSize == 64 && IdxSize == 256)
456 Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
458 Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
459 NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
467 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
468 assert(F && "Illegal to upgrade a non-existent Function.");
470 // Quickly eliminate it, if it's not a candidate.
471 StringRef Name = F->getName();
472 if (Name.size() <= 8 || !Name.startswith("llvm."))
474 Name = Name.substr(5); // Strip off "llvm."
479 if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
480 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
481 F->arg_begin()->getType());
484 if (Name.startswith("arm.neon.vclz")) {
486 F->arg_begin()->getType(),
487 Type::getInt1Ty(F->getContext())
489 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
490 // the end of the name. Change name from llvm.arm.neon.vclz.* to
492 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
493 NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
494 "llvm.ctlz." + Name.substr(14), F->getParent());
497 if (Name.startswith("arm.neon.vcnt")) {
498 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
499 F->arg_begin()->getType());
502 Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
503 if (vldRegex.match(Name)) {
504 auto fArgs = F->getFunctionType()->params();
505 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
506 // Can't use Intrinsic::getDeclaration here as the return types might
507 // then only be structurally equal.
508 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
509 NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
510 "llvm." + Name + ".p0i8", F->getParent());
513 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
514 if (vstRegex.match(Name)) {
515 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
516 Intrinsic::arm_neon_vst2,
517 Intrinsic::arm_neon_vst3,
518 Intrinsic::arm_neon_vst4};
520 static const Intrinsic::ID StoreLaneInts[] = {
521 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
522 Intrinsic::arm_neon_vst4lane
525 auto fArgs = F->getFunctionType()->params();
526 Type *Tys[] = {fArgs[0], fArgs[1]};
527 if (Name.find("lane") == StringRef::npos)
528 NewFn = Intrinsic::getDeclaration(F->getParent(),
529 StoreInts[fArgs.size() - 3], Tys);
531 NewFn = Intrinsic::getDeclaration(F->getParent(),
532 StoreLaneInts[fArgs.size() - 5], Tys);
535 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
536 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
543 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
545 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
546 F->arg_begin()->getType());
549 if (Name.startswith("cttz.") && F->arg_size() == 1) {
551 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
552 F->arg_begin()->getType());
558 if (Name == "dbg.value" && F->arg_size() == 4) {
560 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
567 bool IsLifetimeStart = Name.startswith("lifetime.start");
568 if (IsLifetimeStart || Name.startswith("invariant.start")) {
569 Intrinsic::ID ID = IsLifetimeStart ?
570 Intrinsic::lifetime_start : Intrinsic::invariant_start;
571 auto Args = F->getFunctionType()->params();
572 Type* ObjectPtr[1] = {Args[1]};
573 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
575 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
580 bool IsLifetimeEnd = Name.startswith("lifetime.end");
581 if (IsLifetimeEnd || Name.startswith("invariant.end")) {
582 Intrinsic::ID ID = IsLifetimeEnd ?
583 Intrinsic::lifetime_end : Intrinsic::invariant_end;
585 auto Args = F->getFunctionType()->params();
586 Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
587 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
589 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
593 if (Name.startswith("invariant.group.barrier")) {
594 // Rename invariant.group.barrier to launder.invariant.group
595 auto Args = F->getFunctionType()->params();
596 Type* ObjectPtr[1] = {Args[0]};
598 NewFn = Intrinsic::getDeclaration(F->getParent(),
599 Intrinsic::launder_invariant_group, ObjectPtr);
607 if (Name.startswith("masked.load.")) {
608 Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
609 if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
611 NewFn = Intrinsic::getDeclaration(F->getParent(),
612 Intrinsic::masked_load,
617 if (Name.startswith("masked.store.")) {
618 auto Args = F->getFunctionType()->params();
619 Type *Tys[] = { Args[0], Args[1] };
620 if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
622 NewFn = Intrinsic::getDeclaration(F->getParent(),
623 Intrinsic::masked_store,
628 // Renaming gather/scatter intrinsics with no address space overloading
629 // to the new overload which includes an address space
630 if (Name.startswith("masked.gather.")) {
631 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
632 if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
634 NewFn = Intrinsic::getDeclaration(F->getParent(),
635 Intrinsic::masked_gather, Tys);
639 if (Name.startswith("masked.scatter.")) {
640 auto Args = F->getFunctionType()->params();
641 Type *Tys[] = {Args[0], Args[1]};
642 if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
644 NewFn = Intrinsic::getDeclaration(F->getParent(),
645 Intrinsic::masked_scatter, Tys);
649 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
650 // alignment parameter to embedding the alignment as an attribute of
652 if (Name.startswith("memcpy.") && F->arg_size() == 5) {
654 // Get the types of dest, src, and len
655 ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
656 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
660 if (Name.startswith("memmove.") && F->arg_size() == 5) {
662 // Get the types of dest, src, and len
663 ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
664 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
668 if (Name.startswith("memset.") && F->arg_size() == 5) {
670 // Get the types of dest, and len
671 const auto *FT = F->getFunctionType();
672 Type *ParamTypes[2] = {
673 FT->getParamType(0), // Dest
674 FT->getParamType(2) // len
676 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
683 if (Name.startswith("nvvm.")) {
684 Name = Name.substr(5);
686 // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
687 Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
688 .Cases("brev32", "brev64", Intrinsic::bitreverse)
689 .Case("clz.i", Intrinsic::ctlz)
690 .Case("popc.i", Intrinsic::ctpop)
691 .Default(Intrinsic::not_intrinsic);
692 if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
693 NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
694 {F->getReturnType()});
698 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
699 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
701 // TODO: We could add lohi.i2d.
702 bool Expand = StringSwitch<bool>(Name)
703 .Cases("abs.i", "abs.ll", true)
704 .Cases("clz.ll", "popc.ll", "h2f", true)
705 .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
706 .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
716 // We only need to change the name to match the mangling including the
718 if (Name.startswith("objectsize.")) {
719 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
720 if (F->arg_size() == 2 ||
721 F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
723 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
731 if (Name == "stackprotectorcheck") {
738 if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
741 // Remangle our intrinsic since we upgrade the mangling
742 auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
743 if (Result != None) {
744 NewFn = Result.getValue();
748 // This may not belong here. This function is effectively being overloaded
749 // to both detect an intrinsic which needs upgrading, and to provide the
750 // upgraded form of the intrinsic. We should perhaps have two separate
751 // functions for this.
755 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
757 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
758 assert(F != NewFn && "Intrinsic function upgraded to the same function");
760 // Upgrade intrinsic attributes. This does not change the function.
763 if (Intrinsic::ID id = F->getIntrinsicID())
764 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
768 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
769 // Nothing to do yet.
773 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
775 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
776 Value *Op, unsigned Shift) {
777 Type *ResultTy = Op->getType();
778 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
780 // Bitcast from a 64-bit element type to a byte element type.
781 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
782 Op = Builder.CreateBitCast(Op, VecTy, "cast");
784 // We'll be shuffling in zeroes.
785 Value *Res = Constant::getNullValue(VecTy);
787 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
788 // we'll just return the zero vector.
791 // 256/512-bit version is split into 2/4 16-byte lanes.
792 for (unsigned l = 0; l != NumElts; l += 16)
793 for (unsigned i = 0; i != 16; ++i) {
794 unsigned Idx = NumElts + i - Shift;
796 Idx -= NumElts - 16; // end of lane, switch operand.
797 Idxs[l + i] = Idx + l;
800 Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
803 // Bitcast back to a 64-bit element type.
804 return Builder.CreateBitCast(Res, ResultTy, "cast");
807 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
809 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
811 Type *ResultTy = Op->getType();
812 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
814 // Bitcast from a 64-bit element type to a byte element type.
815 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
816 Op = Builder.CreateBitCast(Op, VecTy, "cast");
818 // We'll be shuffling in zeroes.
819 Value *Res = Constant::getNullValue(VecTy);
821 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
822 // we'll just return the zero vector.
825 // 256/512-bit version is split into 2/4 16-byte lanes.
826 for (unsigned l = 0; l != NumElts; l += 16)
827 for (unsigned i = 0; i != 16; ++i) {
828 unsigned Idx = i + Shift;
830 Idx += NumElts - 16; // end of lane, switch operand.
831 Idxs[l + i] = Idx + l;
834 Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
837 // Bitcast back to a 64-bit element type.
838 return Builder.CreateBitCast(Res, ResultTy, "cast");
841 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
843 llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
844 cast<IntegerType>(Mask->getType())->getBitWidth());
845 Mask = Builder.CreateBitCast(Mask, MaskTy);
847 // If we have less than 8 elements, then the starting mask was an i8 and
848 // we need to extract down to the right number of elements.
851 for (unsigned i = 0; i != NumElts; ++i)
853 Mask = Builder.CreateShuffleVector(Mask, Mask,
854 makeArrayRef(Indices, NumElts),
861 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
862 Value *Op0, Value *Op1) {
863 // If the mask is all ones just emit the first operation.
864 if (const auto *C = dyn_cast<Constant>(Mask))
865 if (C->isAllOnesValue())
868 Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
869 return Builder.CreateSelect(Mask, Op0, Op1);
872 static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask,
873 Value *Op0, Value *Op1) {
874 // If the mask is all ones just emit the first operation.
875 if (const auto *C = dyn_cast<Constant>(Mask))
876 if (C->isAllOnesValue())
879 llvm::VectorType *MaskTy =
880 llvm::VectorType::get(Builder.getInt1Ty(),
881 Mask->getType()->getIntegerBitWidth());
882 Mask = Builder.CreateBitCast(Mask, MaskTy);
883 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
884 return Builder.CreateSelect(Mask, Op0, Op1);
887 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
888 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
889 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
890 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
891 Value *Op1, Value *Shift,
892 Value *Passthru, Value *Mask,
894 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
896 unsigned NumElts = Op0->getType()->getVectorNumElements();
897 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
898 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
899 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
901 // Mask the immediate for VALIGN.
903 ShiftVal &= (NumElts - 1);
905 // If palignr is shifting the pair of vectors more than the size of two
908 return llvm::Constant::getNullValue(Op0->getType());
910 // If palignr is shifting the pair of input vectors more than one lane,
911 // but less than two lanes, convert to shifting in zeroes.
915 Op0 = llvm::Constant::getNullValue(Op0->getType());
918 uint32_t Indices[64];
919 // 256-bit palignr operates on 128-bit lanes so we need to handle that
920 for (unsigned l = 0; l < NumElts; l += 16) {
921 for (unsigned i = 0; i != 16; ++i) {
922 unsigned Idx = ShiftVal + i;
923 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
924 Idx += NumElts - 16; // End of lane, switch operand.
925 Indices[l + i] = Idx + l;
929 Value *Align = Builder.CreateShuffleVector(Op1, Op0,
930 makeArrayRef(Indices, NumElts),
933 return EmitX86Select(Builder, Mask, Align, Passthru);
936 static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI,
937 bool IsSigned, bool IsAddition) {
938 Type *Ty = CI.getType();
939 Value *Op0 = CI.getOperand(0);
940 Value *Op1 = CI.getOperand(1);
943 IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat)
944 : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat);
945 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
946 Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
948 if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
949 Value *VecSrc = CI.getOperand(2);
950 Value *Mask = CI.getOperand(3);
951 Res = EmitX86Select(Builder, Mask, Res, VecSrc);
956 static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI,
957 bool IsRotateRight) {
958 Type *Ty = CI.getType();
959 Value *Src = CI.getArgOperand(0);
960 Value *Amt = CI.getArgOperand(1);
962 // Amount may be scalar immediate, in which case create a splat vector.
963 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
964 // we only care about the lowest log2 bits anyway.
965 if (Amt->getType() != Ty) {
966 unsigned NumElts = Ty->getVectorNumElements();
967 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
968 Amt = Builder.CreateVectorSplat(NumElts, Amt);
971 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
972 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
973 Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
975 if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
976 Value *VecSrc = CI.getOperand(2);
977 Value *Mask = CI.getOperand(3);
978 Res = EmitX86Select(Builder, Mask, Res, VecSrc);
983 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
984 Value *Ptr, Value *Data, Value *Mask,
986 // Cast the pointer to the right type.
987 Ptr = Builder.CreateBitCast(Ptr,
988 llvm::PointerType::getUnqual(Data->getType()));
990 Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;
992 // If the mask is all ones just emit a regular store.
993 if (const auto *C = dyn_cast<Constant>(Mask))
994 if (C->isAllOnesValue())
995 return Builder.CreateAlignedStore(Data, Ptr, Align);
997 // Convert the mask from an integer type to a vector of i1.
998 unsigned NumElts = Data->getType()->getVectorNumElements();
999 Mask = getX86MaskVec(Builder, Mask, NumElts);
1000 return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
1003 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
1004 Value *Ptr, Value *Passthru, Value *Mask,
1006 // Cast the pointer to the right type.
1007 Ptr = Builder.CreateBitCast(Ptr,
1008 llvm::PointerType::getUnqual(Passthru->getType()));
1010 Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
1012 // If the mask is all ones just emit a regular store.
1013 if (const auto *C = dyn_cast<Constant>(Mask))
1014 if (C->isAllOnesValue())
1015 return Builder.CreateAlignedLoad(Ptr, Align);
1017 // Convert the mask from an integer type to a vector of i1.
1018 unsigned NumElts = Passthru->getType()->getVectorNumElements();
1019 Mask = getX86MaskVec(Builder, Mask, NumElts);
1020 return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
1023 static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
1024 Value *Op0 = CI.getArgOperand(0);
1025 llvm::Type *Ty = Op0->getType();
1026 Value *Zero = llvm::Constant::getNullValue(Ty);
1027 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero);
1028 Value *Neg = Builder.CreateNeg(Op0);
1029 Value *Res = Builder.CreateSelect(Cmp, Op0, Neg);
1031 if (CI.getNumArgOperands() == 3)
1032 Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1));
1037 static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
1038 ICmpInst::Predicate Pred) {
1039 Value *Op0 = CI.getArgOperand(0);
1040 Value *Op1 = CI.getArgOperand(1);
1041 Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
1042 Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
1044 if (CI.getNumArgOperands() == 4)
1045 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1050 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
1051 Type *Ty = CI.getType();
1053 // Arguments have a vXi32 type so cast to vXi64.
1054 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1055 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1058 // Shift left then arithmetic shift right.
1059 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1060 LHS = Builder.CreateShl(LHS, ShiftAmt);
1061 LHS = Builder.CreateAShr(LHS, ShiftAmt);
1062 RHS = Builder.CreateShl(RHS, ShiftAmt);
1063 RHS = Builder.CreateAShr(RHS, ShiftAmt);
1065 // Clear the upper bits.
1066 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1067 LHS = Builder.CreateAnd(LHS, Mask);
1068 RHS = Builder.CreateAnd(RHS, Mask);
1071 Value *Res = Builder.CreateMul(LHS, RHS);
1073 if (CI.getNumArgOperands() == 4)
1074 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1079 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1080 static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
1082 unsigned NumElts = Vec->getType()->getVectorNumElements();
1084 const auto *C = dyn_cast<Constant>(Mask);
1085 if (!C || !C->isAllOnesValue())
1086 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1090 uint32_t Indices[8];
1091 for (unsigned i = 0; i != NumElts; ++i)
1093 for (unsigned i = NumElts; i != 8; ++i)
1094 Indices[i] = NumElts + i % NumElts;
1095 Vec = Builder.CreateShuffleVector(Vec,
1096 Constant::getNullValue(Vec->getType()),
1099 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1102 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
1103 unsigned CC, bool Signed) {
1104 Value *Op0 = CI.getArgOperand(0);
1105 unsigned NumElts = Op0->getType()->getVectorNumElements();
1109 Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
1110 } else if (CC == 7) {
1111 Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
1113 ICmpInst::Predicate Pred;
1115 default: llvm_unreachable("Unknown condition code");
1116 case 0: Pred = ICmpInst::ICMP_EQ; break;
1117 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1118 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1119 case 4: Pred = ICmpInst::ICMP_NE; break;
1120 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1121 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1123 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1126 Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
1128 return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1131 // Replace a masked intrinsic with an older unmasked intrinsic.
1132 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
1133 Intrinsic::ID IID) {
1134 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1135 Value *Rep = Builder.CreateCall(Intrin,
1136 { CI.getArgOperand(0), CI.getArgOperand(1) });
1137 return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1140 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
1141 Value* A = CI.getArgOperand(0);
1142 Value* B = CI.getArgOperand(1);
1143 Value* Src = CI.getArgOperand(2);
1144 Value* Mask = CI.getArgOperand(3);
1146 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1147 Value* Cmp = Builder.CreateIsNotNull(AndNode);
1148 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1149 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1150 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1151 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1155 static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
1156 Value* Op = CI.getArgOperand(0);
1157 Type* ReturnOp = CI.getType();
1158 unsigned NumElts = CI.getType()->getVectorNumElements();
1159 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1160 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1163 // Replace intrinsic with unmasked version and a select.
1164 static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
1165 CallInst &CI, Value *&Rep) {
1166 Name = Name.substr(12); // Remove avx512.mask.
1168 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1169 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1171 if (Name.startswith("max.p")) {
1172 if (VecWidth == 128 && EltWidth == 32)
1173 IID = Intrinsic::x86_sse_max_ps;
1174 else if (VecWidth == 128 && EltWidth == 64)
1175 IID = Intrinsic::x86_sse2_max_pd;
1176 else if (VecWidth == 256 && EltWidth == 32)
1177 IID = Intrinsic::x86_avx_max_ps_256;
1178 else if (VecWidth == 256 && EltWidth == 64)
1179 IID = Intrinsic::x86_avx_max_pd_256;
1181 llvm_unreachable("Unexpected intrinsic");
1182 } else if (Name.startswith("min.p")) {
1183 if (VecWidth == 128 && EltWidth == 32)
1184 IID = Intrinsic::x86_sse_min_ps;
1185 else if (VecWidth == 128 && EltWidth == 64)
1186 IID = Intrinsic::x86_sse2_min_pd;
1187 else if (VecWidth == 256 && EltWidth == 32)
1188 IID = Intrinsic::x86_avx_min_ps_256;
1189 else if (VecWidth == 256 && EltWidth == 64)
1190 IID = Intrinsic::x86_avx_min_pd_256;
1192 llvm_unreachable("Unexpected intrinsic");
1193 } else if (Name.startswith("pshuf.b.")) {
1194 if (VecWidth == 128)
1195 IID = Intrinsic::x86_ssse3_pshuf_b_128;
1196 else if (VecWidth == 256)
1197 IID = Intrinsic::x86_avx2_pshuf_b;
1198 else if (VecWidth == 512)
1199 IID = Intrinsic::x86_avx512_pshuf_b_512;
1201 llvm_unreachable("Unexpected intrinsic");
1202 } else if (Name.startswith("pmul.hr.sw.")) {
1203 if (VecWidth == 128)
1204 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1205 else if (VecWidth == 256)
1206 IID = Intrinsic::x86_avx2_pmul_hr_sw;
1207 else if (VecWidth == 512)
1208 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1210 llvm_unreachable("Unexpected intrinsic");
1211 } else if (Name.startswith("pmulh.w.")) {
1212 if (VecWidth == 128)
1213 IID = Intrinsic::x86_sse2_pmulh_w;
1214 else if (VecWidth == 256)
1215 IID = Intrinsic::x86_avx2_pmulh_w;
1216 else if (VecWidth == 512)
1217 IID = Intrinsic::x86_avx512_pmulh_w_512;
1219 llvm_unreachable("Unexpected intrinsic");
1220 } else if (Name.startswith("pmulhu.w.")) {
1221 if (VecWidth == 128)
1222 IID = Intrinsic::x86_sse2_pmulhu_w;
1223 else if (VecWidth == 256)
1224 IID = Intrinsic::x86_avx2_pmulhu_w;
1225 else if (VecWidth == 512)
1226 IID = Intrinsic::x86_avx512_pmulhu_w_512;
1228 llvm_unreachable("Unexpected intrinsic");
1229 } else if (Name.startswith("pmaddw.d.")) {
1230 if (VecWidth == 128)
1231 IID = Intrinsic::x86_sse2_pmadd_wd;
1232 else if (VecWidth == 256)
1233 IID = Intrinsic::x86_avx2_pmadd_wd;
1234 else if (VecWidth == 512)
1235 IID = Intrinsic::x86_avx512_pmaddw_d_512;
1237 llvm_unreachable("Unexpected intrinsic");
1238 } else if (Name.startswith("pmaddubs.w.")) {
1239 if (VecWidth == 128)
1240 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
1241 else if (VecWidth == 256)
1242 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
1243 else if (VecWidth == 512)
1244 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
1246 llvm_unreachable("Unexpected intrinsic");
1247 } else if (Name.startswith("packsswb.")) {
1248 if (VecWidth == 128)
1249 IID = Intrinsic::x86_sse2_packsswb_128;
1250 else if (VecWidth == 256)
1251 IID = Intrinsic::x86_avx2_packsswb;
1252 else if (VecWidth == 512)
1253 IID = Intrinsic::x86_avx512_packsswb_512;
1255 llvm_unreachable("Unexpected intrinsic");
1256 } else if (Name.startswith("packssdw.")) {
1257 if (VecWidth == 128)
1258 IID = Intrinsic::x86_sse2_packssdw_128;
1259 else if (VecWidth == 256)
1260 IID = Intrinsic::x86_avx2_packssdw;
1261 else if (VecWidth == 512)
1262 IID = Intrinsic::x86_avx512_packssdw_512;
1264 llvm_unreachable("Unexpected intrinsic");
1265 } else if (Name.startswith("packuswb.")) {
1266 if (VecWidth == 128)
1267 IID = Intrinsic::x86_sse2_packuswb_128;
1268 else if (VecWidth == 256)
1269 IID = Intrinsic::x86_avx2_packuswb;
1270 else if (VecWidth == 512)
1271 IID = Intrinsic::x86_avx512_packuswb_512;
1273 llvm_unreachable("Unexpected intrinsic");
1274 } else if (Name.startswith("packusdw.")) {
1275 if (VecWidth == 128)
1276 IID = Intrinsic::x86_sse41_packusdw;
1277 else if (VecWidth == 256)
1278 IID = Intrinsic::x86_avx2_packusdw;
1279 else if (VecWidth == 512)
1280 IID = Intrinsic::x86_avx512_packusdw_512;
1282 llvm_unreachable("Unexpected intrinsic");
1283 } else if (Name.startswith("vpermilvar.")) {
1284 if (VecWidth == 128 && EltWidth == 32)
1285 IID = Intrinsic::x86_avx_vpermilvar_ps;
1286 else if (VecWidth == 128 && EltWidth == 64)
1287 IID = Intrinsic::x86_avx_vpermilvar_pd;
1288 else if (VecWidth == 256 && EltWidth == 32)
1289 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1290 else if (VecWidth == 256 && EltWidth == 64)
1291 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1292 else if (VecWidth == 512 && EltWidth == 32)
1293 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1294 else if (VecWidth == 512 && EltWidth == 64)
1295 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1297 llvm_unreachable("Unexpected intrinsic");
1298 } else if (Name == "cvtpd2dq.256") {
1299 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
1300 } else if (Name == "cvtpd2ps.256") {
1301 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
1302 } else if (Name == "cvttpd2dq.256") {
1303 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
1304 } else if (Name == "cvttps2dq.128") {
1305 IID = Intrinsic::x86_sse2_cvttps2dq;
1306 } else if (Name == "cvttps2dq.256") {
1307 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
1308 } else if (Name.startswith("permvar.")) {
1309 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
1310 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1311 IID = Intrinsic::x86_avx2_permps;
1312 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1313 IID = Intrinsic::x86_avx2_permd;
1314 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1315 IID = Intrinsic::x86_avx512_permvar_df_256;
1316 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1317 IID = Intrinsic::x86_avx512_permvar_di_256;
1318 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1319 IID = Intrinsic::x86_avx512_permvar_sf_512;
1320 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1321 IID = Intrinsic::x86_avx512_permvar_si_512;
1322 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1323 IID = Intrinsic::x86_avx512_permvar_df_512;
1324 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1325 IID = Intrinsic::x86_avx512_permvar_di_512;
1326 else if (VecWidth == 128 && EltWidth == 16)
1327 IID = Intrinsic::x86_avx512_permvar_hi_128;
1328 else if (VecWidth == 256 && EltWidth == 16)
1329 IID = Intrinsic::x86_avx512_permvar_hi_256;
1330 else if (VecWidth == 512 && EltWidth == 16)
1331 IID = Intrinsic::x86_avx512_permvar_hi_512;
1332 else if (VecWidth == 128 && EltWidth == 8)
1333 IID = Intrinsic::x86_avx512_permvar_qi_128;
1334 else if (VecWidth == 256 && EltWidth == 8)
1335 IID = Intrinsic::x86_avx512_permvar_qi_256;
1336 else if (VecWidth == 512 && EltWidth == 8)
1337 IID = Intrinsic::x86_avx512_permvar_qi_512;
1339 llvm_unreachable("Unexpected intrinsic");
1340 } else if (Name.startswith("dbpsadbw.")) {
1341 if (VecWidth == 128)
1342 IID = Intrinsic::x86_avx512_dbpsadbw_128;
1343 else if (VecWidth == 256)
1344 IID = Intrinsic::x86_avx512_dbpsadbw_256;
1345 else if (VecWidth == 512)
1346 IID = Intrinsic::x86_avx512_dbpsadbw_512;
1348 llvm_unreachable("Unexpected intrinsic");
1349 } else if (Name.startswith("vpshld.")) {
1350 if (VecWidth == 128 && Name[7] == 'q')
1351 IID = Intrinsic::x86_avx512_vpshld_q_128;
1352 else if (VecWidth == 128 && Name[7] == 'd')
1353 IID = Intrinsic::x86_avx512_vpshld_d_128;
1354 else if (VecWidth == 128 && Name[7] == 'w')
1355 IID = Intrinsic::x86_avx512_vpshld_w_128;
1356 else if (VecWidth == 256 && Name[7] == 'q')
1357 IID = Intrinsic::x86_avx512_vpshld_q_256;
1358 else if (VecWidth == 256 && Name[7] == 'd')
1359 IID = Intrinsic::x86_avx512_vpshld_d_256;
1360 else if (VecWidth == 256 && Name[7] == 'w')
1361 IID = Intrinsic::x86_avx512_vpshld_w_256;
1362 else if (VecWidth == 512 && Name[7] == 'q')
1363 IID = Intrinsic::x86_avx512_vpshld_q_512;
1364 else if (VecWidth == 512 && Name[7] == 'd')
1365 IID = Intrinsic::x86_avx512_vpshld_d_512;
1366 else if (VecWidth == 512 && Name[7] == 'w')
1367 IID = Intrinsic::x86_avx512_vpshld_w_512;
1369 llvm_unreachable("Unexpected intrinsic");
1370 } else if (Name.startswith("vpshrd.")) {
1371 if (VecWidth == 128 && Name[7] == 'q')
1372 IID = Intrinsic::x86_avx512_vpshrd_q_128;
1373 else if (VecWidth == 128 && Name[7] == 'd')
1374 IID = Intrinsic::x86_avx512_vpshrd_d_128;
1375 else if (VecWidth == 128 && Name[7] == 'w')
1376 IID = Intrinsic::x86_avx512_vpshrd_w_128;
1377 else if (VecWidth == 256 && Name[7] == 'q')
1378 IID = Intrinsic::x86_avx512_vpshrd_q_256;
1379 else if (VecWidth == 256 && Name[7] == 'd')
1380 IID = Intrinsic::x86_avx512_vpshrd_d_256;
1381 else if (VecWidth == 256 && Name[7] == 'w')
1382 IID = Intrinsic::x86_avx512_vpshrd_w_256;
1383 else if (VecWidth == 512 && Name[7] == 'q')
1384 IID = Intrinsic::x86_avx512_vpshrd_q_512;
1385 else if (VecWidth == 512 && Name[7] == 'd')
1386 IID = Intrinsic::x86_avx512_vpshrd_d_512;
1387 else if (VecWidth == 512 && Name[7] == 'w')
1388 IID = Intrinsic::x86_avx512_vpshrd_w_512;
1390 llvm_unreachable("Unexpected intrinsic");
1394 SmallVector<Value *, 4> Args(CI.arg_operands().begin(),
1395 CI.arg_operands().end());
1398 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1400 unsigned NumArgs = CI.getNumArgOperands();
1401 Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
1402 CI.getArgOperand(NumArgs - 2));
1406 /// Upgrade comment in call to inline asm that represents an objc retain release
1408 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
1410 if (AsmStr->find("mov\tfp") == 0 &&
1411 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
1412 (Pos = AsmStr->find("# marker")) != std::string::npos) {
1413 AsmStr->replace(Pos, 1, ";");
1418 /// Upgrade a call to an old intrinsic. All argument and return casting must be
1419 /// provided to seamlessly integrate with existing context.
1420 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
1421 Function *F = CI->getCalledFunction();
1422 LLVMContext &C = CI->getContext();
1423 IRBuilder<> Builder(C);
1424 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
1426 assert(F && "Intrinsic call is not direct?");
1429 // Get the Function's name.
1430 StringRef Name = F->getName();
1432 assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
1433 Name = Name.substr(5);
1435 bool IsX86 = Name.startswith("x86.");
1437 Name = Name.substr(4);
1438 bool IsNVVM = Name.startswith("nvvm.");
1440 Name = Name.substr(5);
1442 if (IsX86 && Name.startswith("sse4a.movnt.")) {
1443 Module *M = F->getParent();
1444 SmallVector<Metadata *, 1> Elts;
1446 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1447 MDNode *Node = MDNode::get(C, Elts);
1449 Value *Arg0 = CI->getArgOperand(0);
1450 Value *Arg1 = CI->getArgOperand(1);
1452 // Nontemporal (unaligned) store of the 0'th element of the float/double
1454 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
1455 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
1456 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
1458 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
1460 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
1461 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1463 // Remove intrinsic.
1464 CI->eraseFromParent();
1468 if (IsX86 && (Name.startswith("avx.movnt.") ||
1469 Name.startswith("avx512.storent."))) {
1470 Module *M = F->getParent();
1471 SmallVector<Metadata *, 1> Elts;
1473 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1474 MDNode *Node = MDNode::get(C, Elts);
1476 Value *Arg0 = CI->getArgOperand(0);
1477 Value *Arg1 = CI->getArgOperand(1);
1479 // Convert the type of the pointer to a pointer to the stored type.
1480 Value *BC = Builder.CreateBitCast(Arg0,
1481 PointerType::getUnqual(Arg1->getType()),
1483 VectorType *VTy = cast<VectorType>(Arg1->getType());
1484 StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC,
1485 VTy->getBitWidth() / 8);
1486 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1488 // Remove intrinsic.
1489 CI->eraseFromParent();
1493 if (IsX86 && Name == "sse2.storel.dq") {
1494 Value *Arg0 = CI->getArgOperand(0);
1495 Value *Arg1 = CI->getArgOperand(1);
1497 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
1498 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
1499 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
1500 Value *BC = Builder.CreateBitCast(Arg0,
1501 PointerType::getUnqual(Elt->getType()),
1503 Builder.CreateAlignedStore(Elt, BC, 1);
1505 // Remove intrinsic.
1506 CI->eraseFromParent();
1510 if (IsX86 && (Name.startswith("sse.storeu.") ||
1511 Name.startswith("sse2.storeu.") ||
1512 Name.startswith("avx.storeu."))) {
1513 Value *Arg0 = CI->getArgOperand(0);
1514 Value *Arg1 = CI->getArgOperand(1);
1516 Arg0 = Builder.CreateBitCast(Arg0,
1517 PointerType::getUnqual(Arg1->getType()),
1519 Builder.CreateAlignedStore(Arg1, Arg0, 1);
1521 // Remove intrinsic.
1522 CI->eraseFromParent();
1526 if (IsX86 && Name == "avx512.mask.store.ss") {
1527 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
1528 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1531 // Remove intrinsic.
1532 CI->eraseFromParent();
1536 if (IsX86 && (Name.startswith("avx512.mask.store"))) {
1537 // "avx512.mask.storeu." or "avx512.mask.store."
1538 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
1539 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1540 CI->getArgOperand(2), Aligned);
1542 // Remove intrinsic.
1543 CI->eraseFromParent();
1548 // Upgrade packed integer vector compare intrinsics to compare instructions.
1549 if (IsX86 && (Name.startswith("sse2.pcmp") ||
1550 Name.startswith("avx2.pcmp"))) {
1551 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
1552 bool CmpEq = Name[9] == 'e';
1553 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
1554 CI->getArgOperand(0), CI->getArgOperand(1));
1555 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
1556 } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
1557 Type *ExtTy = Type::getInt32Ty(C);
1558 if (CI->getOperand(0)->getType()->isIntegerTy(8))
1559 ExtTy = Type::getInt64Ty(C);
1560 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
1561 ExtTy->getPrimitiveSizeInBits();
1562 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
1563 Rep = Builder.CreateVectorSplat(NumElts, Rep);
1564 } else if (IsX86 && (Name == "sse.sqrt.ss" ||
1565 Name == "sse2.sqrt.sd")) {
1566 Value *Vec = CI->getArgOperand(0);
1567 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
1568 Function *Intr = Intrinsic::getDeclaration(F->getParent(),
1569 Intrinsic::sqrt, Elt0->getType());
1570 Elt0 = Builder.CreateCall(Intr, Elt0);
1571 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
1572 } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
1573 Name.startswith("sse2.sqrt.p") ||
1574 Name.startswith("sse.sqrt.p"))) {
1575 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1578 {CI->getArgOperand(0)});
1579 } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
1580 if (CI->getNumArgOperands() == 4 &&
1581 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
1582 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
1583 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
1584 : Intrinsic::x86_avx512_sqrt_pd_512;
1586 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
1587 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
1590 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1593 {CI->getArgOperand(0)});
1595 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1596 CI->getArgOperand(1));
1597 } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
1598 Name.startswith("avx512.ptestnm"))) {
1599 Value *Op0 = CI->getArgOperand(0);
1600 Value *Op1 = CI->getArgOperand(1);
1601 Value *Mask = CI->getArgOperand(2);
1602 Rep = Builder.CreateAnd(Op0, Op1);
1603 llvm::Type *Ty = Op0->getType();
1604 Value *Zero = llvm::Constant::getNullValue(Ty);
1605 ICmpInst::Predicate Pred =
1606 Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1607 Rep = Builder.CreateICmp(Pred, Rep, Zero);
1608 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
1609 } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
1611 CI->getArgOperand(1)->getType()->getVectorNumElements();
1612 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
1613 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1614 CI->getArgOperand(1));
1615 } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
1616 unsigned NumElts = CI->getType()->getScalarSizeInBits();
1617 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
1618 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
1619 uint32_t Indices[64];
1620 for (unsigned i = 0; i != NumElts; ++i)
1623 // First extract half of each vector. This gives better codegen than
1624 // doing it in a single shuffle.
1625 LHS = Builder.CreateShuffleVector(LHS, LHS,
1626 makeArrayRef(Indices, NumElts / 2));
1627 RHS = Builder.CreateShuffleVector(RHS, RHS,
1628 makeArrayRef(Indices, NumElts / 2));
1629 // Concat the vectors.
1630 // NOTE: Operands have to be swapped to match intrinsic definition.
1631 Rep = Builder.CreateShuffleVector(RHS, LHS,
1632 makeArrayRef(Indices, NumElts));
1633 Rep = Builder.CreateBitCast(Rep, CI->getType());
1634 } else if (IsX86 && Name == "avx512.kand.w") {
1635 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1636 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1637 Rep = Builder.CreateAnd(LHS, RHS);
1638 Rep = Builder.CreateBitCast(Rep, CI->getType());
1639 } else if (IsX86 && Name == "avx512.kandn.w") {
1640 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1641 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1642 LHS = Builder.CreateNot(LHS);
1643 Rep = Builder.CreateAnd(LHS, RHS);
1644 Rep = Builder.CreateBitCast(Rep, CI->getType());
1645 } else if (IsX86 && Name == "avx512.kor.w") {
1646 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1647 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1648 Rep = Builder.CreateOr(LHS, RHS);
1649 Rep = Builder.CreateBitCast(Rep, CI->getType());
1650 } else if (IsX86 && Name == "avx512.kxor.w") {
1651 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1652 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1653 Rep = Builder.CreateXor(LHS, RHS);
1654 Rep = Builder.CreateBitCast(Rep, CI->getType());
1655 } else if (IsX86 && Name == "avx512.kxnor.w") {
1656 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1657 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1658 LHS = Builder.CreateNot(LHS);
1659 Rep = Builder.CreateXor(LHS, RHS);
1660 Rep = Builder.CreateBitCast(Rep, CI->getType());
1661 } else if (IsX86 && Name == "avx512.knot.w") {
1662 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1663 Rep = Builder.CreateNot(Rep);
1664 Rep = Builder.CreateBitCast(Rep, CI->getType());
1666 (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
1667 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1668 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1669 Rep = Builder.CreateOr(LHS, RHS);
1670 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
1672 if (Name[14] == 'c')
1673 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
1675 C = ConstantInt::getNullValue(Builder.getInt16Ty());
1676 Rep = Builder.CreateICmpEQ(Rep, C);
1677 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
1678 } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
1679 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
1680 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
1681 Name == "sse.div.ss" || Name == "sse2.div.sd")) {
1682 Type *I32Ty = Type::getInt32Ty(C);
1683 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1684 ConstantInt::get(I32Ty, 0));
1685 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1686 ConstantInt::get(I32Ty, 0));
1688 if (Name.contains(".add."))
1689 EltOp = Builder.CreateFAdd(Elt0, Elt1);
1690 else if (Name.contains(".sub."))
1691 EltOp = Builder.CreateFSub(Elt0, Elt1);
1692 else if (Name.contains(".mul."))
1693 EltOp = Builder.CreateFMul(Elt0, Elt1);
1695 EltOp = Builder.CreateFDiv(Elt0, Elt1);
1696 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
1697 ConstantInt::get(I32Ty, 0));
1698 } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
1699 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
1700 bool CmpEq = Name[16] == 'e';
1701 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
1702 } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
1703 Type *OpTy = CI->getArgOperand(0)->getType();
1704 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1705 unsigned EltWidth = OpTy->getScalarSizeInBits();
1707 if (VecWidth == 128 && EltWidth == 32)
1708 IID = Intrinsic::x86_avx512_fpclass_ps_128;
1709 else if (VecWidth == 256 && EltWidth == 32)
1710 IID = Intrinsic::x86_avx512_fpclass_ps_256;
1711 else if (VecWidth == 512 && EltWidth == 32)
1712 IID = Intrinsic::x86_avx512_fpclass_ps_512;
1713 else if (VecWidth == 128 && EltWidth == 64)
1714 IID = Intrinsic::x86_avx512_fpclass_pd_128;
1715 else if (VecWidth == 256 && EltWidth == 64)
1716 IID = Intrinsic::x86_avx512_fpclass_pd_256;
1717 else if (VecWidth == 512 && EltWidth == 64)
1718 IID = Intrinsic::x86_avx512_fpclass_pd_512;
1720 llvm_unreachable("Unexpected intrinsic");
1722 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1723 { CI->getOperand(0), CI->getArgOperand(1) });
1724 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
1725 } else if (IsX86 && Name.startswith("avx512.mask.cmp.p")) {
1726 Type *OpTy = CI->getArgOperand(0)->getType();
1727 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1728 unsigned EltWidth = OpTy->getScalarSizeInBits();
1730 if (VecWidth == 128 && EltWidth == 32)
1731 IID = Intrinsic::x86_avx512_cmp_ps_128;
1732 else if (VecWidth == 256 && EltWidth == 32)
1733 IID = Intrinsic::x86_avx512_cmp_ps_256;
1734 else if (VecWidth == 512 && EltWidth == 32)
1735 IID = Intrinsic::x86_avx512_cmp_ps_512;
1736 else if (VecWidth == 128 && EltWidth == 64)
1737 IID = Intrinsic::x86_avx512_cmp_pd_128;
1738 else if (VecWidth == 256 && EltWidth == 64)
1739 IID = Intrinsic::x86_avx512_cmp_pd_256;
1740 else if (VecWidth == 512 && EltWidth == 64)
1741 IID = Intrinsic::x86_avx512_cmp_pd_512;
1743 llvm_unreachable("Unexpected intrinsic");
1745 SmallVector<Value *, 4> Args;
1746 Args.push_back(CI->getArgOperand(0));
1747 Args.push_back(CI->getArgOperand(1));
1748 Args.push_back(CI->getArgOperand(2));
1749 if (CI->getNumArgOperands() == 5)
1750 Args.push_back(CI->getArgOperand(4));
1752 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1754 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(3));
1755 } else if (IsX86 && Name.startswith("avx512.mask.cmp.") &&
1757 // Integer compare intrinsics.
1758 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1759 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
1760 } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
1761 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1762 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
1763 } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
1764 Name.startswith("avx512.cvtw2mask.") ||
1765 Name.startswith("avx512.cvtd2mask.") ||
1766 Name.startswith("avx512.cvtq2mask."))) {
1767 Value *Op = CI->getArgOperand(0);
1768 Value *Zero = llvm::Constant::getNullValue(Op->getType());
1769 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
1770 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
1771 } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
1772 Name == "ssse3.pabs.w.128" ||
1773 Name == "ssse3.pabs.d.128" ||
1774 Name.startswith("avx2.pabs") ||
1775 Name.startswith("avx512.mask.pabs"))) {
1776 Rep = upgradeAbs(Builder, *CI);
1777 } else if (IsX86 && (Name == "sse41.pmaxsb" ||
1778 Name == "sse2.pmaxs.w" ||
1779 Name == "sse41.pmaxsd" ||
1780 Name.startswith("avx2.pmaxs") ||
1781 Name.startswith("avx512.mask.pmaxs"))) {
1782 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
1783 } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
1784 Name == "sse41.pmaxuw" ||
1785 Name == "sse41.pmaxud" ||
1786 Name.startswith("avx2.pmaxu") ||
1787 Name.startswith("avx512.mask.pmaxu"))) {
1788 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
1789 } else if (IsX86 && (Name == "sse41.pminsb" ||
1790 Name == "sse2.pmins.w" ||
1791 Name == "sse41.pminsd" ||
1792 Name.startswith("avx2.pmins") ||
1793 Name.startswith("avx512.mask.pmins"))) {
1794 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
1795 } else if (IsX86 && (Name == "sse2.pminu.b" ||
1796 Name == "sse41.pminuw" ||
1797 Name == "sse41.pminud" ||
1798 Name.startswith("avx2.pminu") ||
1799 Name.startswith("avx512.mask.pminu"))) {
1800 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
1801 } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
1802 Name == "avx2.pmulu.dq" ||
1803 Name == "avx512.pmulu.dq.512" ||
1804 Name.startswith("avx512.mask.pmulu.dq."))) {
1805 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
1806 } else if (IsX86 && (Name == "sse41.pmuldq" ||
1807 Name == "avx2.pmul.dq" ||
1808 Name == "avx512.pmul.dq.512" ||
1809 Name.startswith("avx512.mask.pmul.dq."))) {
1810 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
1811 } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
1812 Name == "sse2.cvtsi2sd" ||
1813 Name == "sse.cvtsi642ss" ||
1814 Name == "sse2.cvtsi642sd")) {
1815 Rep = Builder.CreateSIToFP(CI->getArgOperand(1),
1816 CI->getType()->getVectorElementType());
1817 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
1818 } else if (IsX86 && Name == "avx512.cvtusi2sd") {
1819 Rep = Builder.CreateUIToFP(CI->getArgOperand(1),
1820 CI->getType()->getVectorElementType());
1821 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
1822 } else if (IsX86 && Name == "sse2.cvtss2sd") {
1823 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
1824 Rep = Builder.CreateFPExt(Rep, CI->getType()->getVectorElementType());
1825 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
1826 } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
1827 Name == "sse2.cvtdq2ps" ||
1828 Name == "avx.cvtdq2.pd.256" ||
1829 Name == "avx.cvtdq2.ps.256" ||
1830 Name.startswith("avx512.mask.cvtdq2pd.") ||
1831 Name.startswith("avx512.mask.cvtudq2pd.") ||
1832 Name == "avx512.mask.cvtdq2ps.128" ||
1833 Name == "avx512.mask.cvtdq2ps.256" ||
1834 Name == "avx512.mask.cvtudq2ps.128" ||
1835 Name == "avx512.mask.cvtudq2ps.256" ||
1836 Name == "avx512.mask.cvtqq2pd.128" ||
1837 Name == "avx512.mask.cvtqq2pd.256" ||
1838 Name == "avx512.mask.cvtuqq2pd.128" ||
1839 Name == "avx512.mask.cvtuqq2pd.256" ||
1840 Name == "sse2.cvtps2pd" ||
1841 Name == "avx.cvt.ps2.pd.256" ||
1842 Name == "avx512.mask.cvtps2pd.128" ||
1843 Name == "avx512.mask.cvtps2pd.256")) {
1844 Type *DstTy = CI->getType();
1845 Rep = CI->getArgOperand(0);
1847 unsigned NumDstElts = DstTy->getVectorNumElements();
1848 if (NumDstElts < Rep->getType()->getVectorNumElements()) {
1849 assert(NumDstElts == 2 && "Unexpected vector size");
1850 uint32_t ShuffleMask[2] = { 0, 1 };
1851 Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask);
1854 bool IsPS2PD = (StringRef::npos != Name.find("ps2"));
1855 bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
1857 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
1858 else if (IsUnsigned)
1859 Rep = Builder.CreateUIToFP(Rep, DstTy, "cvt");
1861 Rep = Builder.CreateSIToFP(Rep, DstTy, "cvt");
1863 if (CI->getNumArgOperands() == 3)
1864 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1865 CI->getArgOperand(1));
1866 } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
1867 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
1868 CI->getArgOperand(1), CI->getArgOperand(2),
1870 } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
1871 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
1872 CI->getArgOperand(1),CI->getArgOperand(2),
1874 } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
1875 Type *ResultTy = CI->getType();
1876 Type *PtrTy = ResultTy->getVectorElementType();
1878 // Cast the pointer to element type.
1879 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
1880 llvm::PointerType::getUnqual(PtrTy));
1882 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
1883 ResultTy->getVectorNumElements());
1885 Function *ELd = Intrinsic::getDeclaration(F->getParent(),
1886 Intrinsic::masked_expandload,
1888 Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
1889 } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
1890 Type *ResultTy = CI->getArgOperand(1)->getType();
1891 Type *PtrTy = ResultTy->getVectorElementType();
1893 // Cast the pointer to element type.
1894 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
1895 llvm::PointerType::getUnqual(PtrTy));
1897 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
1898 ResultTy->getVectorNumElements());
1900 Function *CSt = Intrinsic::getDeclaration(F->getParent(),
1901 Intrinsic::masked_compressstore,
1903 Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
1904 } else if (IsX86 && Name.startswith("xop.vpcom")) {
1905 Intrinsic::ID intID;
1906 if (Name.endswith("ub"))
1907 intID = Intrinsic::x86_xop_vpcomub;
1908 else if (Name.endswith("uw"))
1909 intID = Intrinsic::x86_xop_vpcomuw;
1910 else if (Name.endswith("ud"))
1911 intID = Intrinsic::x86_xop_vpcomud;
1912 else if (Name.endswith("uq"))
1913 intID = Intrinsic::x86_xop_vpcomuq;
1914 else if (Name.endswith("b"))
1915 intID = Intrinsic::x86_xop_vpcomb;
1916 else if (Name.endswith("w"))
1917 intID = Intrinsic::x86_xop_vpcomw;
1918 else if (Name.endswith("d"))
1919 intID = Intrinsic::x86_xop_vpcomd;
1920 else if (Name.endswith("q"))
1921 intID = Intrinsic::x86_xop_vpcomq;
1923 llvm_unreachable("Unknown suffix");
1925 Name = Name.substr(9); // strip off "xop.vpcom"
1927 if (Name.startswith("lt"))
1929 else if (Name.startswith("le"))
1931 else if (Name.startswith("gt"))
1933 else if (Name.startswith("ge"))
1935 else if (Name.startswith("eq"))
1937 else if (Name.startswith("ne"))
1939 else if (Name.startswith("false"))
1941 else if (Name.startswith("true"))
1944 llvm_unreachable("Unknown condition");
1946 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
1948 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
1949 Builder.getInt8(Imm)});
1950 } else if (IsX86 && Name.startswith("xop.vpcmov")) {
1951 Value *Sel = CI->getArgOperand(2);
1952 Value *NotSel = Builder.CreateNot(Sel);
1953 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
1954 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
1955 Rep = Builder.CreateOr(Sel0, Sel1);
1956 } else if (IsX86 && (Name.startswith("xop.vprot") ||
1957 Name.startswith("avx512.prol") ||
1958 Name.startswith("avx512.mask.prol"))) {
1959 Rep = upgradeX86Rotate(Builder, *CI, false);
1960 } else if (IsX86 && (Name.startswith("avx512.pror") ||
1961 Name.startswith("avx512.mask.pror"))) {
1962 Rep = upgradeX86Rotate(Builder, *CI, true);
1963 } else if (IsX86 && Name == "sse42.crc32.64.8") {
1964 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
1965 Intrinsic::x86_sse42_crc32_32_8);
1966 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
1967 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
1968 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
1969 } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
1970 Name.startswith("avx512.vbroadcast.s"))) {
1971 // Replace broadcasts with a series of insertelements.
1972 Type *VecTy = CI->getType();
1973 Type *EltTy = VecTy->getVectorElementType();
1974 unsigned EltNum = VecTy->getVectorNumElements();
1975 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
1976 EltTy->getPointerTo());
1977 Value *Load = Builder.CreateLoad(EltTy, Cast);
1978 Type *I32Ty = Type::getInt32Ty(C);
1979 Rep = UndefValue::get(VecTy);
1980 for (unsigned I = 0; I < EltNum; ++I)
1981 Rep = Builder.CreateInsertElement(Rep, Load,
1982 ConstantInt::get(I32Ty, I));
1983 } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
1984 Name.startswith("sse41.pmovzx") ||
1985 Name.startswith("avx2.pmovsx") ||
1986 Name.startswith("avx2.pmovzx") ||
1987 Name.startswith("avx512.mask.pmovsx") ||
1988 Name.startswith("avx512.mask.pmovzx"))) {
1989 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
1990 VectorType *DstTy = cast<VectorType>(CI->getType());
1991 unsigned NumDstElts = DstTy->getNumElements();
1993 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
1994 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
1995 for (unsigned i = 0; i != NumDstElts; ++i)
1998 Value *SV = Builder.CreateShuffleVector(
1999 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
2001 bool DoSext = (StringRef::npos != Name.find("pmovsx"));
2002 Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
2003 : Builder.CreateZExt(SV, DstTy);
2004 // If there are 3 arguments, it's a masked intrinsic so we need a select.
2005 if (CI->getNumArgOperands() == 3)
2006 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2007 CI->getArgOperand(1));
2008 } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
2009 Name == "avx2.vbroadcasti128")) {
2010 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2011 Type *EltTy = CI->getType()->getVectorElementType();
2012 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2013 Type *VT = VectorType::get(EltTy, NumSrcElts);
2014 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2015 PointerType::getUnqual(VT));
2016 Value *Load = Builder.CreateAlignedLoad(Op, 1);
2017 if (NumSrcElts == 2)
2018 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
2021 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
2022 { 0, 1, 2, 3, 0, 1, 2, 3 });
2023 } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
2024 Name.startswith("avx512.mask.shuf.f"))) {
2025 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2026 Type *VT = CI->getType();
2027 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2028 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2029 unsigned ControlBitsMask = NumLanes - 1;
2030 unsigned NumControlBits = NumLanes / 2;
2031 SmallVector<uint32_t, 8> ShuffleMask(0);
2033 for (unsigned l = 0; l != NumLanes; ++l) {
2034 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2035 // We actually need the other source.
2036 if (l >= NumLanes / 2)
2037 LaneMask += NumLanes;
2038 for (unsigned i = 0; i != NumElementsInLane; ++i)
2039 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2041 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2042 CI->getArgOperand(1), ShuffleMask);
2043 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2044 CI->getArgOperand(3));
2045 }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
2046 Name.startswith("avx512.mask.broadcasti"))) {
2047 unsigned NumSrcElts =
2048 CI->getArgOperand(0)->getType()->getVectorNumElements();
2049 unsigned NumDstElts = CI->getType()->getVectorNumElements();
2051 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
2052 for (unsigned i = 0; i != NumDstElts; ++i)
2053 ShuffleMask[i] = i % NumSrcElts;
2055 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2056 CI->getArgOperand(0),
2058 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2059 CI->getArgOperand(1));
2060 } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
2061 Name.startswith("avx2.vbroadcast") ||
2062 Name.startswith("avx512.pbroadcast") ||
2063 Name.startswith("avx512.mask.broadcast.s"))) {
2064 // Replace vp?broadcasts with a vector shuffle.
2065 Value *Op = CI->getArgOperand(0);
2066 unsigned NumElts = CI->getType()->getVectorNumElements();
2067 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
2068 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
2069 Constant::getNullValue(MaskTy));
2071 if (CI->getNumArgOperands() == 3)
2072 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2073 CI->getArgOperand(1));
2074 } else if (IsX86 && (Name.startswith("sse2.padds.") ||
2075 Name.startswith("sse2.psubs.") ||
2076 Name.startswith("avx2.padds.") ||
2077 Name.startswith("avx2.psubs.") ||
2078 Name.startswith("avx512.padds.") ||
2079 Name.startswith("avx512.psubs.") ||
2080 Name.startswith("avx512.mask.padds.") ||
2081 Name.startswith("avx512.mask.psubs."))) {
2082 bool IsAdd = Name.contains(".padds");
2083 Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, true, IsAdd);
2084 } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
2085 Name.startswith("sse2.psubus.") ||
2086 Name.startswith("avx2.paddus.") ||
2087 Name.startswith("avx2.psubus.") ||
2088 Name.startswith("avx512.mask.paddus.") ||
2089 Name.startswith("avx512.mask.psubus."))) {
2090 bool IsAdd = Name.contains(".paddus");
2091 Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, false, IsAdd);
2092 } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
2093 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2094 CI->getArgOperand(1),
2095 CI->getArgOperand(2),
2096 CI->getArgOperand(3),
2097 CI->getArgOperand(4),
2099 } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
2100 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2101 CI->getArgOperand(1),
2102 CI->getArgOperand(2),
2103 CI->getArgOperand(3),
2104 CI->getArgOperand(4),
2106 } else if (IsX86 && (Name == "sse2.psll.dq" ||
2107 Name == "avx2.psll.dq")) {
2108 // 128/256-bit shift left specified in bits.
2109 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2110 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
2111 Shift / 8); // Shift is in bits.
2112 } else if (IsX86 && (Name == "sse2.psrl.dq" ||
2113 Name == "avx2.psrl.dq")) {
2114 // 128/256-bit shift right specified in bits.
2115 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2116 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
2117 Shift / 8); // Shift is in bits.
2118 } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
2119 Name == "avx2.psll.dq.bs" ||
2120 Name == "avx512.psll.dq.512")) {
2121 // 128/256/512-bit shift left specified in bytes.
2122 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2123 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2124 } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
2125 Name == "avx2.psrl.dq.bs" ||
2126 Name == "avx512.psrl.dq.512")) {
2127 // 128/256/512-bit shift right specified in bytes.
2128 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2129 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2130 } else if (IsX86 && (Name == "sse41.pblendw" ||
2131 Name.startswith("sse41.blendp") ||
2132 Name.startswith("avx.blend.p") ||
2133 Name == "avx2.pblendw" ||
2134 Name.startswith("avx2.pblendd."))) {
2135 Value *Op0 = CI->getArgOperand(0);
2136 Value *Op1 = CI->getArgOperand(1);
2137 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2138 VectorType *VecTy = cast<VectorType>(CI->getType());
2139 unsigned NumElts = VecTy->getNumElements();
2141 SmallVector<uint32_t, 16> Idxs(NumElts);
2142 for (unsigned i = 0; i != NumElts; ++i)
2143 Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
2145 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2146 } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
2147 Name == "avx2.vinserti128" ||
2148 Name.startswith("avx512.mask.insert"))) {
2149 Value *Op0 = CI->getArgOperand(0);
2150 Value *Op1 = CI->getArgOperand(1);
2151 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2152 unsigned DstNumElts = CI->getType()->getVectorNumElements();
2153 unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
2154 unsigned Scale = DstNumElts / SrcNumElts;
2156 // Mask off the high bits of the immediate value; hardware ignores those.
2159 // Extend the second operand into a vector the size of the destination.
2160 Value *UndefV = UndefValue::get(Op1->getType());
2161 SmallVector<uint32_t, 8> Idxs(DstNumElts);
2162 for (unsigned i = 0; i != SrcNumElts; ++i)
2164 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2165 Idxs[i] = SrcNumElts;
2166 Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
2168 // Insert the second operand into the first operand.
2170 // Note that there is no guarantee that instruction lowering will actually
2171 // produce a vinsertf128 instruction for the created shuffles. In
2172 // particular, the 0 immediate case involves no lane changes, so it can
2173 // be handled as a blend.
2175 // Example of shuffle mask for 32-bit elements:
2176 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
2177 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
2179 // First fill with identify mask.
2180 for (unsigned i = 0; i != DstNumElts; ++i)
2182 // Then replace the elements where we need to insert.
2183 for (unsigned i = 0; i != SrcNumElts; ++i)
2184 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2185 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2187 // If the intrinsic has a mask operand, handle that.
2188 if (CI->getNumArgOperands() == 5)
2189 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2190 CI->getArgOperand(3));
2191 } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
2192 Name == "avx2.vextracti128" ||
2193 Name.startswith("avx512.mask.vextract"))) {
2194 Value *Op0 = CI->getArgOperand(0);
2195 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2196 unsigned DstNumElts = CI->getType()->getVectorNumElements();
2197 unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
2198 unsigned Scale = SrcNumElts / DstNumElts;
2200 // Mask off the high bits of the immediate value; hardware ignores those.
2203 // Get indexes for the subvector of the input vector.
2204 SmallVector<uint32_t, 8> Idxs(DstNumElts);
2205 for (unsigned i = 0; i != DstNumElts; ++i) {
2206 Idxs[i] = i + (Imm * DstNumElts);
2208 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2210 // If the intrinsic has a mask operand, handle that.
2211 if (CI->getNumArgOperands() == 4)
2212 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2213 CI->getArgOperand(2));
2214 } else if (!IsX86 && Name == "stackprotectorcheck") {
2216 } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
2217 Name.startswith("avx512.mask.perm.di."))) {
2218 Value *Op0 = CI->getArgOperand(0);
2219 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2220 VectorType *VecTy = cast<VectorType>(CI->getType());
2221 unsigned NumElts = VecTy->getNumElements();
2223 SmallVector<uint32_t, 8> Idxs(NumElts);
2224 for (unsigned i = 0; i != NumElts; ++i)
2225 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
2227 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2229 if (CI->getNumArgOperands() == 4)
2230 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2231 CI->getArgOperand(2));
2232 } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
2233 Name == "avx2.vperm2i128")) {
2234 // The immediate permute control byte looks like this:
2235 // [1:0] - select 128 bits from sources for low half of destination
2237 // [3] - zero low half of destination
2238 // [5:4] - select 128 bits from sources for high half of destination
2240 // [7] - zero high half of destination
2242 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2244 unsigned NumElts = CI->getType()->getVectorNumElements();
2245 unsigned HalfSize = NumElts / 2;
2246 SmallVector<uint32_t, 8> ShuffleMask(NumElts);
2248 // Determine which operand(s) are actually in use for this instruction.
2249 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2250 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2252 // If needed, replace operands based on zero mask.
2253 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
2254 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
2256 // Permute low half of result.
2257 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
2258 for (unsigned i = 0; i < HalfSize; ++i)
2259 ShuffleMask[i] = StartIndex + i;
2261 // Permute high half of result.
2262 StartIndex = (Imm & 0x10) ? HalfSize : 0;
2263 for (unsigned i = 0; i < HalfSize; ++i)
2264 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
2266 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2268 } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
2269 Name == "sse2.pshuf.d" ||
2270 Name.startswith("avx512.mask.vpermil.p") ||
2271 Name.startswith("avx512.mask.pshuf.d."))) {
2272 Value *Op0 = CI->getArgOperand(0);
2273 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2274 VectorType *VecTy = cast<VectorType>(CI->getType());
2275 unsigned NumElts = VecTy->getNumElements();
2276 // Calculate the size of each index in the immediate.
2277 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
2278 unsigned IdxMask = ((1 << IdxSize) - 1);
2280 SmallVector<uint32_t, 8> Idxs(NumElts);
2281 // Lookup the bits for this element, wrapping around the immediate every
2282 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2283 // to offset by the first index of each group.
2284 for (unsigned i = 0; i != NumElts; ++i)
2285 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
2287 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2289 if (CI->getNumArgOperands() == 4)
2290 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2291 CI->getArgOperand(2));
2292 } else if (IsX86 && (Name == "sse2.pshufl.w" ||
2293 Name.startswith("avx512.mask.pshufl.w."))) {
2294 Value *Op0 = CI->getArgOperand(0);
2295 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2296 unsigned NumElts = CI->getType()->getVectorNumElements();
2298 SmallVector<uint32_t, 16> Idxs(NumElts);
2299 for (unsigned l = 0; l != NumElts; l += 8) {
2300 for (unsigned i = 0; i != 4; ++i)
2301 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
2302 for (unsigned i = 4; i != 8; ++i)
2303 Idxs[i + l] = i + l;
2306 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2308 if (CI->getNumArgOperands() == 4)
2309 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2310 CI->getArgOperand(2));
2311 } else if (IsX86 && (Name == "sse2.pshufh.w" ||
2312 Name.startswith("avx512.mask.pshufh.w."))) {
2313 Value *Op0 = CI->getArgOperand(0);
2314 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2315 unsigned NumElts = CI->getType()->getVectorNumElements();
2317 SmallVector<uint32_t, 16> Idxs(NumElts);
2318 for (unsigned l = 0; l != NumElts; l += 8) {
2319 for (unsigned i = 0; i != 4; ++i)
2320 Idxs[i + l] = i + l;
2321 for (unsigned i = 0; i != 4; ++i)
2322 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
2325 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2327 if (CI->getNumArgOperands() == 4)
2328 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2329 CI->getArgOperand(2));
2330 } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
2331 Value *Op0 = CI->getArgOperand(0);
2332 Value *Op1 = CI->getArgOperand(1);
2333 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2334 unsigned NumElts = CI->getType()->getVectorNumElements();
2336 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2337 unsigned HalfLaneElts = NumLaneElts / 2;
2339 SmallVector<uint32_t, 16> Idxs(NumElts);
2340 for (unsigned i = 0; i != NumElts; ++i) {
2341 // Base index is the starting element of the lane.
2342 Idxs[i] = i - (i % NumLaneElts);
2343 // If we are half way through the lane switch to the other source.
2344 if ((i % NumLaneElts) >= HalfLaneElts)
2346 // Now select the specific element. By adding HalfLaneElts bits from
2347 // the immediate. Wrapping around the immediate every 8-bits.
2348 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
2351 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2353 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2354 CI->getArgOperand(3));
2355 } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
2356 Name.startswith("avx512.mask.movshdup") ||
2357 Name.startswith("avx512.mask.movsldup"))) {
2358 Value *Op0 = CI->getArgOperand(0);
2359 unsigned NumElts = CI->getType()->getVectorNumElements();
2360 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2362 unsigned Offset = 0;
2363 if (Name.startswith("avx512.mask.movshdup."))
2366 SmallVector<uint32_t, 16> Idxs(NumElts);
2367 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
2368 for (unsigned i = 0; i != NumLaneElts; i += 2) {
2369 Idxs[i + l + 0] = i + l + Offset;
2370 Idxs[i + l + 1] = i + l + Offset;
2373 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2375 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2376 CI->getArgOperand(1));
2377 } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
2378 Name.startswith("avx512.mask.unpckl."))) {
2379 Value *Op0 = CI->getArgOperand(0);
2380 Value *Op1 = CI->getArgOperand(1);
2381 int NumElts = CI->getType()->getVectorNumElements();
2382 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2384 SmallVector<uint32_t, 64> Idxs(NumElts);
2385 for (int l = 0; l != NumElts; l += NumLaneElts)
2386 for (int i = 0; i != NumLaneElts; ++i)
2387 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
2389 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2391 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2392 CI->getArgOperand(2));
2393 } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
2394 Name.startswith("avx512.mask.unpckh."))) {
2395 Value *Op0 = CI->getArgOperand(0);
2396 Value *Op1 = CI->getArgOperand(1);
2397 int NumElts = CI->getType()->getVectorNumElements();
2398 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2400 SmallVector<uint32_t, 64> Idxs(NumElts);
2401 for (int l = 0; l != NumElts; l += NumLaneElts)
2402 for (int i = 0; i != NumLaneElts; ++i)
2403 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
2405 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2407 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2408 CI->getArgOperand(2));
2409 } else if (IsX86 && Name.startswith("avx512.mask.pand.")) {
2410 Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1));
2411 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2412 CI->getArgOperand(2));
2413 } else if (IsX86 && Name.startswith("avx512.mask.pandn.")) {
2414 Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)),
2415 CI->getArgOperand(1));
2416 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2417 CI->getArgOperand(2));
2418 } else if (IsX86 && Name.startswith("avx512.mask.por.")) {
2419 Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1));
2420 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2421 CI->getArgOperand(2));
2422 } else if (IsX86 && Name.startswith("avx512.mask.pxor.")) {
2423 Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1));
2424 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2425 CI->getArgOperand(2));
2426 } else if (IsX86 && Name.startswith("avx512.mask.and.")) {
2427 VectorType *FTy = cast<VectorType>(CI->getType());
2428 VectorType *ITy = VectorType::getInteger(FTy);
2429 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2430 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2431 Rep = Builder.CreateBitCast(Rep, FTy);
2432 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2433 CI->getArgOperand(2));
2434 } else if (IsX86 && Name.startswith("avx512.mask.andn.")) {
2435 VectorType *FTy = cast<VectorType>(CI->getType());
2436 VectorType *ITy = VectorType::getInteger(FTy);
2437 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
2438 Rep = Builder.CreateAnd(Rep,
2439 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2440 Rep = Builder.CreateBitCast(Rep, FTy);
2441 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2442 CI->getArgOperand(2));
2443 } else if (IsX86 && Name.startswith("avx512.mask.or.")) {
2444 VectorType *FTy = cast<VectorType>(CI->getType());
2445 VectorType *ITy = VectorType::getInteger(FTy);
2446 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2447 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2448 Rep = Builder.CreateBitCast(Rep, FTy);
2449 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2450 CI->getArgOperand(2));
2451 } else if (IsX86 && Name.startswith("avx512.mask.xor.")) {
2452 VectorType *FTy = cast<VectorType>(CI->getType());
2453 VectorType *ITy = VectorType::getInteger(FTy);
2454 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2455 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2456 Rep = Builder.CreateBitCast(Rep, FTy);
2457 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2458 CI->getArgOperand(2));
2459 } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
2460 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2461 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2462 CI->getArgOperand(2));
2463 } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
2464 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
2465 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2466 CI->getArgOperand(2));
2467 } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
2468 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
2469 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2470 CI->getArgOperand(2));
2471 } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
2472 if (Name.endswith(".512")) {
2474 if (Name[17] == 's')
2475 IID = Intrinsic::x86_avx512_add_ps_512;
2477 IID = Intrinsic::x86_avx512_add_pd_512;
2479 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2480 { CI->getArgOperand(0), CI->getArgOperand(1),
2481 CI->getArgOperand(4) });
2483 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2485 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2486 CI->getArgOperand(2));
2487 } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
2488 if (Name.endswith(".512")) {
2490 if (Name[17] == 's')
2491 IID = Intrinsic::x86_avx512_div_ps_512;
2493 IID = Intrinsic::x86_avx512_div_pd_512;
2495 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2496 { CI->getArgOperand(0), CI->getArgOperand(1),
2497 CI->getArgOperand(4) });
2499 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
2501 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2502 CI->getArgOperand(2));
2503 } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
2504 if (Name.endswith(".512")) {
2506 if (Name[17] == 's')
2507 IID = Intrinsic::x86_avx512_mul_ps_512;
2509 IID = Intrinsic::x86_avx512_mul_pd_512;
2511 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2512 { CI->getArgOperand(0), CI->getArgOperand(1),
2513 CI->getArgOperand(4) });
2515 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
2517 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2518 CI->getArgOperand(2));
2519 } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
2520 if (Name.endswith(".512")) {
2522 if (Name[17] == 's')
2523 IID = Intrinsic::x86_avx512_sub_ps_512;
2525 IID = Intrinsic::x86_avx512_sub_pd_512;
2527 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2528 { CI->getArgOperand(0), CI->getArgOperand(1),
2529 CI->getArgOperand(4) });
2531 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
2533 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2534 CI->getArgOperand(2));
2535 } else if (IsX86 && Name.startswith("avx512.mask.max.p") &&
2536 Name.drop_front(18) == ".512") {
2538 if (Name[17] == 's')
2539 IID = Intrinsic::x86_avx512_max_ps_512;
2541 IID = Intrinsic::x86_avx512_max_pd_512;
2543 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2544 { CI->getArgOperand(0), CI->getArgOperand(1),
2545 CI->getArgOperand(4) });
2546 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2547 CI->getArgOperand(2));
2548 } else if (IsX86 && Name.startswith("avx512.mask.min.p") &&
2549 Name.drop_front(18) == ".512") {
2551 if (Name[17] == 's')
2552 IID = Intrinsic::x86_avx512_min_ps_512;
2554 IID = Intrinsic::x86_avx512_min_pd_512;
2556 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2557 { CI->getArgOperand(0), CI->getArgOperand(1),
2558 CI->getArgOperand(4) });
2559 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2560 CI->getArgOperand(2));
2561 } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
2562 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2565 { CI->getArgOperand(0), Builder.getInt1(false) });
2566 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2567 CI->getArgOperand(1));
2568 } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
2569 bool IsImmediate = Name[16] == 'i' ||
2570 (Name.size() > 18 && Name[18] == 'i');
2571 bool IsVariable = Name[16] == 'v';
2572 char Size = Name[16] == '.' ? Name[17] :
2573 Name[17] == '.' ? Name[18] :
2574 Name[18] == '.' ? Name[19] :
2578 if (IsVariable && Name[17] != '.') {
2579 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
2580 IID = Intrinsic::x86_avx2_psllv_q;
2581 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
2582 IID = Intrinsic::x86_avx2_psllv_q_256;
2583 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
2584 IID = Intrinsic::x86_avx2_psllv_d;
2585 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
2586 IID = Intrinsic::x86_avx2_psllv_d_256;
2587 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
2588 IID = Intrinsic::x86_avx512_psllv_w_128;
2589 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
2590 IID = Intrinsic::x86_avx512_psllv_w_256;
2591 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
2592 IID = Intrinsic::x86_avx512_psllv_w_512;
2594 llvm_unreachable("Unexpected size");
2595 } else if (Name.endswith(".128")) {
2596 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
2597 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
2598 : Intrinsic::x86_sse2_psll_d;
2599 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
2600 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
2601 : Intrinsic::x86_sse2_psll_q;
2602 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
2603 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
2604 : Intrinsic::x86_sse2_psll_w;
2606 llvm_unreachable("Unexpected size");
2607 } else if (Name.endswith(".256")) {
2608 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
2609 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
2610 : Intrinsic::x86_avx2_psll_d;
2611 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
2612 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
2613 : Intrinsic::x86_avx2_psll_q;
2614 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
2615 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
2616 : Intrinsic::x86_avx2_psll_w;
2618 llvm_unreachable("Unexpected size");
2620 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
2621 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
2622 IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
2623 Intrinsic::x86_avx512_psll_d_512;
2624 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
2625 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
2626 IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
2627 Intrinsic::x86_avx512_psll_q_512;
2628 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
2629 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
2630 : Intrinsic::x86_avx512_psll_w_512;
2632 llvm_unreachable("Unexpected size");
2635 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2636 } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
2637 bool IsImmediate = Name[16] == 'i' ||
2638 (Name.size() > 18 && Name[18] == 'i');
2639 bool IsVariable = Name[16] == 'v';
2640 char Size = Name[16] == '.' ? Name[17] :
2641 Name[17] == '.' ? Name[18] :
2642 Name[18] == '.' ? Name[19] :
2646 if (IsVariable && Name[17] != '.') {
2647 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
2648 IID = Intrinsic::x86_avx2_psrlv_q;
2649 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
2650 IID = Intrinsic::x86_avx2_psrlv_q_256;
2651 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
2652 IID = Intrinsic::x86_avx2_psrlv_d;
2653 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
2654 IID = Intrinsic::x86_avx2_psrlv_d_256;
2655 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
2656 IID = Intrinsic::x86_avx512_psrlv_w_128;
2657 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
2658 IID = Intrinsic::x86_avx512_psrlv_w_256;
2659 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
2660 IID = Intrinsic::x86_avx512_psrlv_w_512;
2662 llvm_unreachable("Unexpected size");
2663 } else if (Name.endswith(".128")) {
2664 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
2665 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
2666 : Intrinsic::x86_sse2_psrl_d;
2667 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
2668 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
2669 : Intrinsic::x86_sse2_psrl_q;
2670 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
2671 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
2672 : Intrinsic::x86_sse2_psrl_w;
2674 llvm_unreachable("Unexpected size");
2675 } else if (Name.endswith(".256")) {
2676 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
2677 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
2678 : Intrinsic::x86_avx2_psrl_d;
2679 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
2680 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
2681 : Intrinsic::x86_avx2_psrl_q;
2682 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
2683 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
2684 : Intrinsic::x86_avx2_psrl_w;
2686 llvm_unreachable("Unexpected size");
2688 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
2689 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
2690 IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
2691 Intrinsic::x86_avx512_psrl_d_512;
2692 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
2693 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
2694 IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
2695 Intrinsic::x86_avx512_psrl_q_512;
2696 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
2697 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
2698 : Intrinsic::x86_avx512_psrl_w_512;
2700 llvm_unreachable("Unexpected size");
2703 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2704 } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
2705 bool IsImmediate = Name[16] == 'i' ||
2706 (Name.size() > 18 && Name[18] == 'i');
2707 bool IsVariable = Name[16] == 'v';
2708 char Size = Name[16] == '.' ? Name[17] :
2709 Name[17] == '.' ? Name[18] :
2710 Name[18] == '.' ? Name[19] :
2714 if (IsVariable && Name[17] != '.') {
2715 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
2716 IID = Intrinsic::x86_avx2_psrav_d;
2717 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
2718 IID = Intrinsic::x86_avx2_psrav_d_256;
2719 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
2720 IID = Intrinsic::x86_avx512_psrav_w_128;
2721 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
2722 IID = Intrinsic::x86_avx512_psrav_w_256;
2723 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
2724 IID = Intrinsic::x86_avx512_psrav_w_512;
2726 llvm_unreachable("Unexpected size");
2727 } else if (Name.endswith(".128")) {
2728 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
2729 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
2730 : Intrinsic::x86_sse2_psra_d;
2731 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
2732 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
2733 IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
2734 Intrinsic::x86_avx512_psra_q_128;
2735 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
2736 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
2737 : Intrinsic::x86_sse2_psra_w;
2739 llvm_unreachable("Unexpected size");
2740 } else if (Name.endswith(".256")) {
2741 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
2742 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
2743 : Intrinsic::x86_avx2_psra_d;
2744 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
2745 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
2746 IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
2747 Intrinsic::x86_avx512_psra_q_256;
2748 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
2749 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
2750 : Intrinsic::x86_avx2_psra_w;
2752 llvm_unreachable("Unexpected size");
2754 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
2755 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
2756 IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
2757 Intrinsic::x86_avx512_psra_d_512;
2758 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
2759 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
2760 IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
2761 Intrinsic::x86_avx512_psra_q_512;
2762 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
2763 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
2764 : Intrinsic::x86_avx512_psra_w_512;
2766 llvm_unreachable("Unexpected size");
2769 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2770 } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
2771 Rep = upgradeMaskedMove(Builder, *CI);
2772 } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
2773 Rep = UpgradeMaskToInt(Builder, *CI);
2774 } else if (IsX86 && Name.endswith(".movntdqa")) {
2775 Module *M = F->getParent();
2776 MDNode *Node = MDNode::get(
2777 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2779 Value *Ptr = CI->getArgOperand(0);
2780 VectorType *VTy = cast<VectorType>(CI->getType());
2782 // Convert the type of the pointer to a pointer to the stored type.
2784 Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
2785 LoadInst *LI = Builder.CreateAlignedLoad(BC, VTy->getBitWidth() / 8);
2786 LI->setMetadata(M->getMDKindID("nontemporal"), Node);
2789 (Name.startswith("sse2.pavg") || Name.startswith("avx2.pavg") ||
2790 Name.startswith("avx512.mask.pavg"))) {
2791 // llvm.x86.sse2.pavg.b/w, llvm.x86.avx2.pavg.b/w,
2792 // llvm.x86.avx512.mask.pavg.b/w
2793 Value *A = CI->getArgOperand(0);
2794 Value *B = CI->getArgOperand(1);
2795 VectorType *ZextType = VectorType::getExtendedElementVectorType(
2796 cast<VectorType>(A->getType()));
2797 Value *ExtendedA = Builder.CreateZExt(A, ZextType);
2798 Value *ExtendedB = Builder.CreateZExt(B, ZextType);
2799 Value *Sum = Builder.CreateAdd(ExtendedA, ExtendedB);
2800 Value *AddOne = Builder.CreateAdd(Sum, ConstantInt::get(ZextType, 1));
2801 Value *ShiftR = Builder.CreateLShr(AddOne, ConstantInt::get(ZextType, 1));
2802 Rep = Builder.CreateTrunc(ShiftR, A->getType());
2803 if (CI->getNumArgOperands() > 2) {
2804 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2805 CI->getArgOperand(2));
2807 } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
2808 Name.startswith("fma.vfmsub.") ||
2809 Name.startswith("fma.vfnmadd.") ||
2810 Name.startswith("fma.vfnmsub."))) {
2811 bool NegMul = Name[6] == 'n';
2812 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
2813 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
2815 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
2816 CI->getArgOperand(2) };
2819 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
2820 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
2821 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
2824 if (NegMul && !IsScalar)
2825 Ops[0] = Builder.CreateFNeg(Ops[0]);
2826 if (NegMul && IsScalar)
2827 Ops[1] = Builder.CreateFNeg(Ops[1]);
2829 Ops[2] = Builder.CreateFNeg(Ops[2]);
2831 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
2837 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
2839 } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
2840 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
2841 CI->getArgOperand(2) };
2843 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
2844 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
2845 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
2847 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
2852 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
2854 } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
2855 Name.startswith("avx512.maskz.vfmadd.s") ||
2856 Name.startswith("avx512.mask3.vfmadd.s") ||
2857 Name.startswith("avx512.mask3.vfmsub.s") ||
2858 Name.startswith("avx512.mask3.vfnmsub.s"))) {
2859 bool IsMask3 = Name[11] == '3';
2860 bool IsMaskZ = Name[11] == 'z';
2861 // Drop the "avx512.mask." to make it easier.
2862 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
2863 bool NegMul = Name[2] == 'n';
2864 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
2866 Value *A = CI->getArgOperand(0);
2867 Value *B = CI->getArgOperand(1);
2868 Value *C = CI->getArgOperand(2);
2870 if (NegMul && (IsMask3 || IsMaskZ))
2871 A = Builder.CreateFNeg(A);
2872 if (NegMul && !(IsMask3 || IsMaskZ))
2873 B = Builder.CreateFNeg(B);
2875 C = Builder.CreateFNeg(C);
2877 A = Builder.CreateExtractElement(A, (uint64_t)0);
2878 B = Builder.CreateExtractElement(B, (uint64_t)0);
2879 C = Builder.CreateExtractElement(C, (uint64_t)0);
2881 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
2882 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
2883 Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
2886 if (Name.back() == 'd')
2887 IID = Intrinsic::x86_avx512_vfmadd_f64;
2889 IID = Intrinsic::x86_avx512_vfmadd_f32;
2890 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
2891 Rep = Builder.CreateCall(FMA, Ops);
2893 Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
2896 Rep = Builder.CreateCall(FMA, { A, B, C });
2899 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
2902 // For Mask3 with NegAcc, we need to create a new extractelement that
2903 // avoids the negation above.
2904 if (NegAcc && IsMask3)
2905 PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
2908 Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3),
2910 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
2912 } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
2913 Name.startswith("avx512.mask.vfnmadd.p") ||
2914 Name.startswith("avx512.mask.vfnmsub.p") ||
2915 Name.startswith("avx512.mask3.vfmadd.p") ||
2916 Name.startswith("avx512.mask3.vfmsub.p") ||
2917 Name.startswith("avx512.mask3.vfnmsub.p") ||
2918 Name.startswith("avx512.maskz.vfmadd.p"))) {
2919 bool IsMask3 = Name[11] == '3';
2920 bool IsMaskZ = Name[11] == 'z';
2921 // Drop the "avx512.mask." to make it easier.
2922 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
2923 bool NegMul = Name[2] == 'n';
2924 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
2926 Value *A = CI->getArgOperand(0);
2927 Value *B = CI->getArgOperand(1);
2928 Value *C = CI->getArgOperand(2);
2930 if (NegMul && (IsMask3 || IsMaskZ))
2931 A = Builder.CreateFNeg(A);
2932 if (NegMul && !(IsMask3 || IsMaskZ))
2933 B = Builder.CreateFNeg(B);
2935 C = Builder.CreateFNeg(C);
2937 if (CI->getNumArgOperands() == 5 &&
2938 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
2939 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
2941 // Check the character before ".512" in string.
2942 if (Name[Name.size()-5] == 's')
2943 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
2945 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
2947 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2948 { A, B, C, CI->getArgOperand(4) });
2950 Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
2953 Rep = Builder.CreateCall(FMA, { A, B, C });
2956 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
2957 IsMask3 ? CI->getArgOperand(2) :
2958 CI->getArgOperand(0);
2960 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
2961 } else if (IsX86 && (Name.startswith("fma.vfmaddsub.p") ||
2962 Name.startswith("fma.vfmsubadd.p"))) {
2963 bool IsSubAdd = Name[7] == 's';
2964 int NumElts = CI->getType()->getVectorNumElements();
2966 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
2967 CI->getArgOperand(2) };
2969 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
2971 Value *Odd = Builder.CreateCall(FMA, Ops);
2972 Ops[2] = Builder.CreateFNeg(Ops[2]);
2973 Value *Even = Builder.CreateCall(FMA, Ops);
2976 std::swap(Even, Odd);
2978 SmallVector<uint32_t, 32> Idxs(NumElts);
2979 for (int i = 0; i != NumElts; ++i)
2980 Idxs[i] = i + (i % 2) * NumElts;
2982 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
2983 } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
2984 Name.startswith("avx512.mask3.vfmaddsub.p") ||
2985 Name.startswith("avx512.maskz.vfmaddsub.p") ||
2986 Name.startswith("avx512.mask3.vfmsubadd.p"))) {
2987 bool IsMask3 = Name[11] == '3';
2988 bool IsMaskZ = Name[11] == 'z';
2989 // Drop the "avx512.mask." to make it easier.
2990 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
2991 bool IsSubAdd = Name[3] == 's';
2992 if (CI->getNumArgOperands() == 5 &&
2993 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
2994 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
2996 // Check the character before ".512" in string.
2997 if (Name[Name.size()-5] == 's')
2998 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3000 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3002 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3003 CI->getArgOperand(2), CI->getArgOperand(4) };
3005 Ops[2] = Builder.CreateFNeg(Ops[2]);
3007 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3008 {CI->getArgOperand(0), CI->getArgOperand(1),
3009 CI->getArgOperand(2), CI->getArgOperand(4)});
3011 int NumElts = CI->getType()->getVectorNumElements();
3013 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3014 CI->getArgOperand(2) };
3016 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3018 Value *Odd = Builder.CreateCall(FMA, Ops);
3019 Ops[2] = Builder.CreateFNeg(Ops[2]);
3020 Value *Even = Builder.CreateCall(FMA, Ops);
3023 std::swap(Even, Odd);
3025 SmallVector<uint32_t, 32> Idxs(NumElts);
3026 for (int i = 0; i != NumElts; ++i)
3027 Idxs[i] = i + (i % 2) * NumElts;
3029 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3032 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3033 IsMask3 ? CI->getArgOperand(2) :
3034 CI->getArgOperand(0);
3036 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3037 } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
3038 Name.startswith("avx512.maskz.pternlog."))) {
3039 bool ZeroMask = Name[11] == 'z';
3040 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3041 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3043 if (VecWidth == 128 && EltWidth == 32)
3044 IID = Intrinsic::x86_avx512_pternlog_d_128;
3045 else if (VecWidth == 256 && EltWidth == 32)
3046 IID = Intrinsic::x86_avx512_pternlog_d_256;
3047 else if (VecWidth == 512 && EltWidth == 32)
3048 IID = Intrinsic::x86_avx512_pternlog_d_512;
3049 else if (VecWidth == 128 && EltWidth == 64)
3050 IID = Intrinsic::x86_avx512_pternlog_q_128;
3051 else if (VecWidth == 256 && EltWidth == 64)
3052 IID = Intrinsic::x86_avx512_pternlog_q_256;
3053 else if (VecWidth == 512 && EltWidth == 64)
3054 IID = Intrinsic::x86_avx512_pternlog_q_512;
3056 llvm_unreachable("Unexpected intrinsic");
3058 Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3059 CI->getArgOperand(2), CI->getArgOperand(3) };
3060 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3062 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3063 : CI->getArgOperand(0);
3064 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3065 } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
3066 Name.startswith("avx512.maskz.vpmadd52"))) {
3067 bool ZeroMask = Name[11] == 'z';
3068 bool High = Name[20] == 'h' || Name[21] == 'h';
3069 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3071 if (VecWidth == 128 && !High)
3072 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3073 else if (VecWidth == 256 && !High)
3074 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3075 else if (VecWidth == 512 && !High)
3076 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3077 else if (VecWidth == 128 && High)
3078 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3079 else if (VecWidth == 256 && High)
3080 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3081 else if (VecWidth == 512 && High)
3082 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3084 llvm_unreachable("Unexpected intrinsic");
3086 Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3087 CI->getArgOperand(2) };
3088 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3090 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3091 : CI->getArgOperand(0);
3092 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3093 } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
3094 Name.startswith("avx512.mask.vpermt2var.") ||
3095 Name.startswith("avx512.maskz.vpermt2var."))) {
3096 bool ZeroMask = Name[11] == 'z';
3097 bool IndexForm = Name[17] == 'i';
3098 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3099 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3100 bool IsFloat = CI->getType()->isFPOrFPVectorTy();
3102 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
3103 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
3104 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
3105 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
3106 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
3107 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
3108 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
3109 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
3110 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
3111 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
3112 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
3113 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
3114 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
3115 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
3116 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
3117 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
3118 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
3119 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
3120 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
3121 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
3122 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
3123 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
3124 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
3125 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
3126 else if (VecWidth == 128 && EltWidth == 16)
3127 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
3128 else if (VecWidth == 256 && EltWidth == 16)
3129 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
3130 else if (VecWidth == 512 && EltWidth == 16)
3131 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
3132 else if (VecWidth == 128 && EltWidth == 8)
3133 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
3134 else if (VecWidth == 256 && EltWidth == 8)
3135 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
3136 else if (VecWidth == 512 && EltWidth == 8)
3137 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
3139 llvm_unreachable("Unexpected intrinsic");
3141 Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3142 CI->getArgOperand(2) };
3144 // If this isn't index form we need to swap operand 0 and 1.
3146 std::swap(Args[0], Args[1]);
3148 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3150 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3151 : Builder.CreateBitCast(CI->getArgOperand(1),
3153 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3154 } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
3155 Name.startswith("avx512.maskz.vpdpbusd.") ||
3156 Name.startswith("avx512.mask.vpdpbusds.") ||
3157 Name.startswith("avx512.maskz.vpdpbusds."))) {
3158 bool ZeroMask = Name[11] == 'z';
3159 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3160 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3162 if (VecWidth == 128 && !IsSaturating)
3163 IID = Intrinsic::x86_avx512_vpdpbusd_128;
3164 else if (VecWidth == 256 && !IsSaturating)
3165 IID = Intrinsic::x86_avx512_vpdpbusd_256;
3166 else if (VecWidth == 512 && !IsSaturating)
3167 IID = Intrinsic::x86_avx512_vpdpbusd_512;
3168 else if (VecWidth == 128 && IsSaturating)
3169 IID = Intrinsic::x86_avx512_vpdpbusds_128;
3170 else if (VecWidth == 256 && IsSaturating)
3171 IID = Intrinsic::x86_avx512_vpdpbusds_256;
3172 else if (VecWidth == 512 && IsSaturating)
3173 IID = Intrinsic::x86_avx512_vpdpbusds_512;
3175 llvm_unreachable("Unexpected intrinsic");
3177 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3178 CI->getArgOperand(2) };
3179 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3181 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3182 : CI->getArgOperand(0);
3183 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3184 } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
3185 Name.startswith("avx512.maskz.vpdpwssd.") ||
3186 Name.startswith("avx512.mask.vpdpwssds.") ||
3187 Name.startswith("avx512.maskz.vpdpwssds."))) {
3188 bool ZeroMask = Name[11] == 'z';
3189 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3190 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3192 if (VecWidth == 128 && !IsSaturating)
3193 IID = Intrinsic::x86_avx512_vpdpwssd_128;
3194 else if (VecWidth == 256 && !IsSaturating)
3195 IID = Intrinsic::x86_avx512_vpdpwssd_256;
3196 else if (VecWidth == 512 && !IsSaturating)
3197 IID = Intrinsic::x86_avx512_vpdpwssd_512;
3198 else if (VecWidth == 128 && IsSaturating)
3199 IID = Intrinsic::x86_avx512_vpdpwssds_128;
3200 else if (VecWidth == 256 && IsSaturating)
3201 IID = Intrinsic::x86_avx512_vpdpwssds_256;
3202 else if (VecWidth == 512 && IsSaturating)
3203 IID = Intrinsic::x86_avx512_vpdpwssds_512;
3205 llvm_unreachable("Unexpected intrinsic");
3207 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3208 CI->getArgOperand(2) };
3209 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3211 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3212 : CI->getArgOperand(0);
3213 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3214 } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
3215 Name == "addcarry.u32" || Name == "addcarry.u64" ||
3216 Name == "subborrow.u32" || Name == "subborrow.u64")) {
3218 if (Name[0] == 'a' && Name.back() == '2')
3219 IID = Intrinsic::x86_addcarry_32;
3220 else if (Name[0] == 'a' && Name.back() == '4')
3221 IID = Intrinsic::x86_addcarry_64;
3222 else if (Name[0] == 's' && Name.back() == '2')
3223 IID = Intrinsic::x86_subborrow_32;
3224 else if (Name[0] == 's' && Name.back() == '4')
3225 IID = Intrinsic::x86_subborrow_64;
3227 llvm_unreachable("Unexpected intrinsic");
3229 // Make a call with 3 operands.
3230 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3231 CI->getArgOperand(2)};
3232 Value *NewCall = Builder.CreateCall(
3233 Intrinsic::getDeclaration(CI->getModule(), IID),
3236 // Extract the second result and store it.
3237 Value *Data = Builder.CreateExtractValue(NewCall, 1);
3238 // Cast the pointer to the right type.
3239 Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
3240 llvm::PointerType::getUnqual(Data->getType()));
3241 Builder.CreateAlignedStore(Data, Ptr, 1);
3242 // Replace the original call result with the first result of the new call.
3243 Value *CF = Builder.CreateExtractValue(NewCall, 0);
3245 CI->replaceAllUsesWith(CF);
3247 } else if (IsX86 && Name.startswith("avx512.mask.") &&
3248 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3249 // Rep will be updated by the call in the condition.
3250 } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
3251 Value *Arg = CI->getArgOperand(0);
3252 Value *Neg = Builder.CreateNeg(Arg, "neg");
3253 Value *Cmp = Builder.CreateICmpSGE(
3254 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
3255 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
3256 } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
3257 Name == "max.ui" || Name == "max.ull")) {
3258 Value *Arg0 = CI->getArgOperand(0);
3259 Value *Arg1 = CI->getArgOperand(1);
3260 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3261 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
3262 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
3263 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
3264 } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
3265 Name == "min.ui" || Name == "min.ull")) {
3266 Value *Arg0 = CI->getArgOperand(0);
3267 Value *Arg1 = CI->getArgOperand(1);
3268 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3269 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
3270 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
3271 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
3272 } else if (IsNVVM && Name == "clz.ll") {
3273 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3274 Value *Arg = CI->getArgOperand(0);
3275 Value *Ctlz = Builder.CreateCall(
3276 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
3278 {Arg, Builder.getFalse()}, "ctlz");
3279 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
3280 } else if (IsNVVM && Name == "popc.ll") {
3281 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3283 Value *Arg = CI->getArgOperand(0);
3284 Value *Popc = Builder.CreateCall(
3285 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
3288 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
3289 } else if (IsNVVM && Name == "h2f") {
3290 Rep = Builder.CreateCall(Intrinsic::getDeclaration(
3291 F->getParent(), Intrinsic::convert_from_fp16,
3292 {Builder.getFloatTy()}),
3293 CI->getArgOperand(0), "h2f");
3295 llvm_unreachable("Unknown function for CallInst upgrade.");
3299 CI->replaceAllUsesWith(Rep);
3300 CI->eraseFromParent();
3304 const auto &DefaultCase = [&NewFn, &CI]() -> void {
3305 // Handle generic mangling change, but nothing else
3307 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
3308 "Unknown function for CallInst upgrade and isn't just a name change");
3309 CI->setCalledFunction(NewFn);
3311 CallInst *NewCall = nullptr;
3312 switch (NewFn->getIntrinsicID()) {
3318 case Intrinsic::arm_neon_vld1:
3319 case Intrinsic::arm_neon_vld2:
3320 case Intrinsic::arm_neon_vld3:
3321 case Intrinsic::arm_neon_vld4:
3322 case Intrinsic::arm_neon_vld2lane:
3323 case Intrinsic::arm_neon_vld3lane:
3324 case Intrinsic::arm_neon_vld4lane:
3325 case Intrinsic::arm_neon_vst1:
3326 case Intrinsic::arm_neon_vst2:
3327 case Intrinsic::arm_neon_vst3:
3328 case Intrinsic::arm_neon_vst4:
3329 case Intrinsic::arm_neon_vst2lane:
3330 case Intrinsic::arm_neon_vst3lane:
3331 case Intrinsic::arm_neon_vst4lane: {
3332 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3333 CI->arg_operands().end());
3334 NewCall = Builder.CreateCall(NewFn, Args);
3338 case Intrinsic::bitreverse:
3339 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3342 case Intrinsic::ctlz:
3343 case Intrinsic::cttz:
3344 assert(CI->getNumArgOperands() == 1 &&
3345 "Mismatch between function args and call args");
3347 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
3350 case Intrinsic::objectsize: {
3351 Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
3352 ? Builder.getFalse()
3353 : CI->getArgOperand(2);
3354 NewCall = Builder.CreateCall(
3355 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize});
3359 case Intrinsic::ctpop:
3360 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3363 case Intrinsic::convert_from_fp16:
3364 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3367 case Intrinsic::dbg_value:
3368 // Upgrade from the old version that had an extra offset argument.
3369 assert(CI->getNumArgOperands() == 4);
3370 // Drop nonzero offsets instead of attempting to upgrade them.
3371 if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
3372 if (Offset->isZeroValue()) {
3373 NewCall = Builder.CreateCall(
3375 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
3378 CI->eraseFromParent();
3381 case Intrinsic::x86_xop_vfrcz_ss:
3382 case Intrinsic::x86_xop_vfrcz_sd:
3383 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
3386 case Intrinsic::x86_xop_vpermil2pd:
3387 case Intrinsic::x86_xop_vpermil2ps:
3388 case Intrinsic::x86_xop_vpermil2pd_256:
3389 case Intrinsic::x86_xop_vpermil2ps_256: {
3390 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3391 CI->arg_operands().end());
3392 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
3393 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
3394 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
3395 NewCall = Builder.CreateCall(NewFn, Args);
3399 case Intrinsic::x86_sse41_ptestc:
3400 case Intrinsic::x86_sse41_ptestz:
3401 case Intrinsic::x86_sse41_ptestnzc: {
3402 // The arguments for these intrinsics used to be v4f32, and changed
3403 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
3404 // So, the only thing required is a bitcast for both arguments.
3405 // First, check the arguments have the old type.
3406 Value *Arg0 = CI->getArgOperand(0);
3407 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
3410 // Old intrinsic, add bitcasts
3411 Value *Arg1 = CI->getArgOperand(1);
3413 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
3415 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
3416 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
3418 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
3422 case Intrinsic::x86_rdtscp: {
3423 // This used to take 1 arguments. If we have no arguments, it is already
3425 if (CI->getNumOperands() == 0)
3428 NewCall = Builder.CreateCall(NewFn);
3429 // Extract the second result and store it.
3430 Value *Data = Builder.CreateExtractValue(NewCall, 1);
3431 // Cast the pointer to the right type.
3432 Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
3433 llvm::PointerType::getUnqual(Data->getType()));
3434 Builder.CreateAlignedStore(Data, Ptr, 1);
3435 // Replace the original call result with the first result of the new call.
3436 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
3438 std::string Name = CI->getName();
3439 if (!Name.empty()) {
3440 CI->setName(Name + ".old");
3441 NewCall->setName(Name);
3443 CI->replaceAllUsesWith(TSC);
3444 CI->eraseFromParent();
3448 case Intrinsic::x86_sse41_insertps:
3449 case Intrinsic::x86_sse41_dppd:
3450 case Intrinsic::x86_sse41_dpps:
3451 case Intrinsic::x86_sse41_mpsadbw:
3452 case Intrinsic::x86_avx_dp_ps_256:
3453 case Intrinsic::x86_avx2_mpsadbw: {
3454 // Need to truncate the last argument from i32 to i8 -- this argument models
3455 // an inherently 8-bit immediate operand to these x86 instructions.
3456 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3457 CI->arg_operands().end());
3459 // Replace the last argument with a trunc.
3460 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
3461 NewCall = Builder.CreateCall(NewFn, Args);
3465 case Intrinsic::thread_pointer: {
3466 NewCall = Builder.CreateCall(NewFn, {});
3470 case Intrinsic::invariant_start:
3471 case Intrinsic::invariant_end:
3472 case Intrinsic::masked_load:
3473 case Intrinsic::masked_store:
3474 case Intrinsic::masked_gather:
3475 case Intrinsic::masked_scatter: {
3476 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3477 CI->arg_operands().end());
3478 NewCall = Builder.CreateCall(NewFn, Args);
3482 case Intrinsic::memcpy:
3483 case Intrinsic::memmove:
3484 case Intrinsic::memset: {
3485 // We have to make sure that the call signature is what we're expecting.
3486 // We only want to change the old signatures by removing the alignment arg:
3487 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
3488 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
3489 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
3490 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
3491 // Note: i8*'s in the above can be any pointer type
3492 if (CI->getNumArgOperands() != 5) {
3496 // Remove alignment argument (3), and add alignment attributes to the
3497 // dest/src pointers.
3498 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
3499 CI->getArgOperand(2), CI->getArgOperand(4)};
3500 NewCall = Builder.CreateCall(NewFn, Args);
3501 auto *MemCI = cast<MemIntrinsic>(NewCall);
3502 // All mem intrinsics support dest alignment.
3503 const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
3504 MemCI->setDestAlignment(Align->getZExtValue());
3505 // Memcpy/Memmove also support source alignment.
3506 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
3507 MTI->setSourceAlignment(Align->getZExtValue());
3511 assert(NewCall && "Should have either set this variable or returned through "
3512 "the default case");
3513 std::string Name = CI->getName();
3514 if (!Name.empty()) {
3515 CI->setName(Name + ".old");
3516 NewCall->setName(Name);
3518 CI->replaceAllUsesWith(NewCall);
3519 CI->eraseFromParent();
3522 void llvm::UpgradeCallsToIntrinsic(Function *F) {
3523 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
3525 // Check if this function should be upgraded and get the replacement function
3528 if (UpgradeIntrinsicFunction(F, NewFn)) {
3529 // Replace all users of the old function with the new function or new
3530 // instructions. This is not a range loop because the call is deleted.
3531 for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
3532 if (CallInst *CI = dyn_cast<CallInst>(*UI++))
3533 UpgradeIntrinsicCall(CI, NewFn);
3535 // Remove old function, no longer used, from the module.
3536 F->eraseFromParent();
3540 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
3541 // Check if the tag uses struct-path aware TBAA format.
3542 if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
3545 auto &Context = MD.getContext();
3546 if (MD.getNumOperands() == 3) {
3547 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
3548 MDNode *ScalarType = MDNode::get(Context, Elts);
3549 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
3550 Metadata *Elts2[] = {ScalarType, ScalarType,
3551 ConstantAsMetadata::get(
3552 Constant::getNullValue(Type::getInt64Ty(Context))),
3554 return MDNode::get(Context, Elts2);
3556 // Create a MDNode <MD, MD, offset 0>
3557 Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
3558 Type::getInt64Ty(Context)))};
3559 return MDNode::get(Context, Elts);
3562 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
3563 Instruction *&Temp) {
3564 if (Opc != Instruction::BitCast)
3568 Type *SrcTy = V->getType();
3569 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3570 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3571 LLVMContext &Context = V->getContext();
3573 // We have no information about target data layout, so we assume that
3574 // the maximum pointer size is 64bit.
3575 Type *MidTy = Type::getInt64Ty(Context);
3576 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
3578 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
3584 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
3585 if (Opc != Instruction::BitCast)
3588 Type *SrcTy = C->getType();
3589 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3590 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3591 LLVMContext &Context = C->getContext();
3593 // We have no information about target data layout, so we assume that
3594 // the maximum pointer size is 64bit.
3595 Type *MidTy = Type::getInt64Ty(Context);
3597 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
3604 /// Check the debug info version number, if it is out-dated, drop the debug
3605 /// info. Return true if module is modified.
3606 bool llvm::UpgradeDebugInfo(Module &M) {
3607 unsigned Version = getDebugMetadataVersionFromModule(M);
3608 if (Version == DEBUG_METADATA_VERSION) {
3609 bool BrokenDebugInfo = false;
3610 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
3611 report_fatal_error("Broken module found, compilation aborted!");
3612 if (!BrokenDebugInfo)
3613 // Everything is ok.
3616 // Diagnose malformed debug info.
3617 DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
3618 M.getContext().diagnose(Diag);
3621 bool Modified = StripDebugInfo(M);
3622 if (Modified && Version != DEBUG_METADATA_VERSION) {
3623 // Diagnose a version mismatch.
3624 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
3625 M.getContext().diagnose(DiagVersion);
3630 bool llvm::UpgradeRetainReleaseMarker(Module &M) {
3631 bool Changed = false;
3632 NamedMDNode *ModRetainReleaseMarker =
3633 M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker");
3634 if (ModRetainReleaseMarker) {
3635 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
3637 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
3639 SmallVector<StringRef, 4> ValueComp;
3640 ID->getString().split(ValueComp, "#");
3641 if (ValueComp.size() == 2) {
3642 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
3643 Metadata *Ops[1] = {MDString::get(M.getContext(), NewValue)};
3644 ModRetainReleaseMarker->setOperand(0,
3645 MDNode::get(M.getContext(), Ops));
3654 bool llvm::UpgradeModuleFlags(Module &M) {
3655 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
3659 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
3660 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
3661 MDNode *Op = ModFlags->getOperand(I);
3662 if (Op->getNumOperands() != 3)
3664 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
3667 if (ID->getString() == "Objective-C Image Info Version")
3669 if (ID->getString() == "Objective-C Class Properties")
3670 HasClassProperties = true;
3671 // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
3672 // field was Error and now they are Max.
3673 if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
3674 if (auto *Behavior =
3675 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
3676 if (Behavior->getLimitedValue() == Module::Error) {
3677 Type *Int32Ty = Type::getInt32Ty(M.getContext());
3678 Metadata *Ops[3] = {
3679 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
3680 MDString::get(M.getContext(), ID->getString()),
3682 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
3687 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
3688 // section name so that llvm-lto will not complain about mismatching
3689 // module flags that is functionally the same.
3690 if (ID->getString() == "Objective-C Image Info Section") {
3691 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
3692 SmallVector<StringRef, 4> ValueComp;
3693 Value->getString().split(ValueComp, " ");
3694 if (ValueComp.size() != 1) {
3695 std::string NewValue;
3696 for (auto &S : ValueComp)
3697 NewValue += S.str();
3698 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
3699 MDString::get(M.getContext(), NewValue)};
3700 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
3707 // "Objective-C Class Properties" is recently added for Objective-C. We
3708 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
3709 // flag of value 0, so we can correclty downgrade this flag when trying to
3710 // link an ObjC bitcode without this module flag with an ObjC bitcode with
3711 // this module flag.
3712 if (HasObjCFlag && !HasClassProperties) {
3713 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
3721 void llvm::UpgradeSectionAttributes(Module &M) {
3722 auto TrimSpaces = [](StringRef Section) -> std::string {
3723 SmallVector<StringRef, 5> Components;
3724 Section.split(Components, ',');
3726 SmallString<32> Buffer;
3727 raw_svector_ostream OS(Buffer);
3729 for (auto Component : Components)
3730 OS << ',' << Component.trim();
3732 return OS.str().substr(1);
3735 for (auto &GV : M.globals()) {
3736 if (!GV.hasSection())
3739 StringRef Section = GV.getSection();
3741 if (!Section.startswith("__DATA, __objc_catlist"))
3744 // __DATA, __objc_catlist, regular, no_dead_strip
3745 // __DATA,__objc_catlist,regular,no_dead_strip
3746 GV.setSection(TrimSpaces(Section));
3750 static bool isOldLoopArgument(Metadata *MD) {
3751 auto *T = dyn_cast_or_null<MDTuple>(MD);
3754 if (T->getNumOperands() < 1)
3756 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
3759 return S->getString().startswith("llvm.vectorizer.");
3762 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
3763 StringRef OldPrefix = "llvm.vectorizer.";
3764 assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
3766 if (OldTag == "llvm.vectorizer.unroll")
3767 return MDString::get(C, "llvm.loop.interleave.count");
3769 return MDString::get(
3770 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
3774 static Metadata *upgradeLoopArgument(Metadata *MD) {
3775 auto *T = dyn_cast_or_null<MDTuple>(MD);
3778 if (T->getNumOperands() < 1)
3780 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
3783 if (!OldTag->getString().startswith("llvm.vectorizer."))
3786 // This has an old tag. Upgrade it.
3787 SmallVector<Metadata *, 8> Ops;
3788 Ops.reserve(T->getNumOperands());
3789 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
3790 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
3791 Ops.push_back(T->getOperand(I));
3793 return MDTuple::get(T->getContext(), Ops);
3796 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
3797 auto *T = dyn_cast<MDTuple>(&N);
3801 if (none_of(T->operands(), isOldLoopArgument))
3804 SmallVector<Metadata *, 8> Ops;
3805 Ops.reserve(T->getNumOperands());
3806 for (Metadata *MD : T->operands())
3807 Ops.push_back(upgradeLoopArgument(MD));
3809 return MDTuple::get(T->getContext(), Ops);