27 #include "llvm/IR/IntrinsicsAArch64.h"
28 #include "llvm/IR/IntrinsicsARM.h"
29 #include "llvm/IR/IntrinsicsX86.h"
46 Type *Arg0Type =
F->getFunctionType()->getParamType(0);
61 Type *LastArgType =
F->getFunctionType()->getParamType(
62 F->getFunctionType()->getNumParams() - 1);
77 if (
F->getReturnType()->isVectorTy())
91 if (
Name ==
"addcarryx.u32" ||
92 Name ==
"addcarryx.u64" ||
93 Name ==
"addcarry.u32" ||
94 Name ==
"addcarry.u64" ||
95 Name ==
"subborrow.u32" ||
96 Name ==
"subborrow.u64" ||
97 Name.startswith(
"sse2.padds.") ||
98 Name.startswith(
"sse2.psubs.") ||
99 Name.startswith(
"sse2.paddus.") ||
100 Name.startswith(
"sse2.psubus.") ||
101 Name.startswith(
"avx2.padds.") ||
102 Name.startswith(
"avx2.psubs.") ||
103 Name.startswith(
"avx2.paddus.") ||
104 Name.startswith(
"avx2.psubus.") ||
105 Name.startswith(
"avx512.padds.") ||
106 Name.startswith(
"avx512.psubs.") ||
107 Name.startswith(
"avx512.mask.padds.") ||
108 Name.startswith(
"avx512.mask.psubs.") ||
109 Name.startswith(
"avx512.mask.paddus.") ||
110 Name.startswith(
"avx512.mask.psubus.") ||
111 Name==
"ssse3.pabs.b.128" ||
112 Name==
"ssse3.pabs.w.128" ||
113 Name==
"ssse3.pabs.d.128" ||
114 Name.startswith(
"fma4.vfmadd.s") ||
115 Name.startswith(
"fma.vfmadd.") ||
116 Name.startswith(
"fma.vfmsub.") ||
117 Name.startswith(
"fma.vfmsubadd.") ||
118 Name.startswith(
"fma.vfnmadd.") ||
119 Name.startswith(
"fma.vfnmsub.") ||
120 Name.startswith(
"avx512.mask.vfmadd.") ||
121 Name.startswith(
"avx512.mask.vfnmadd.") ||
122 Name.startswith(
"avx512.mask.vfnmsub.") ||
123 Name.startswith(
"avx512.mask3.vfmadd.") ||
124 Name.startswith(
"avx512.maskz.vfmadd.") ||
125 Name.startswith(
"avx512.mask3.vfmsub.") ||
126 Name.startswith(
"avx512.mask3.vfnmsub.") ||
127 Name.startswith(
"avx512.mask.vfmaddsub.") ||
128 Name.startswith(
"avx512.maskz.vfmaddsub.") ||
129 Name.startswith(
"avx512.mask3.vfmaddsub.") ||
130 Name.startswith(
"avx512.mask3.vfmsubadd.") ||
131 Name.startswith(
"avx512.mask.shuf.i") ||
132 Name.startswith(
"avx512.mask.shuf.f") ||
133 Name.startswith(
"avx512.kunpck") ||
134 Name.startswith(
"avx2.pabs.") ||
135 Name.startswith(
"avx512.mask.pabs.") ||
136 Name.startswith(
"avx512.broadcastm") ||
137 Name ==
"sse.sqrt.ss" ||
138 Name ==
"sse2.sqrt.sd" ||
139 Name.startswith(
"avx512.mask.sqrt.p") ||
140 Name.startswith(
"avx.sqrt.p") ||
141 Name.startswith(
"sse2.sqrt.p") ||
142 Name.startswith(
"sse.sqrt.p") ||
143 Name.startswith(
"avx512.mask.pbroadcast") ||
144 Name.startswith(
"sse2.pcmpeq.") ||
145 Name.startswith(
"sse2.pcmpgt.") ||
146 Name.startswith(
"avx2.pcmpeq.") ||
147 Name.startswith(
"avx2.pcmpgt.") ||
148 Name.startswith(
"avx512.mask.pcmpeq.") ||
149 Name.startswith(
"avx512.mask.pcmpgt.") ||
150 Name.startswith(
"avx.vperm2f128.") ||
151 Name ==
"avx2.vperm2i128" ||
152 Name ==
"sse.add.ss" ||
153 Name ==
"sse2.add.sd" ||
154 Name ==
"sse.sub.ss" ||
155 Name ==
"sse2.sub.sd" ||
156 Name ==
"sse.mul.ss" ||
157 Name ==
"sse2.mul.sd" ||
158 Name ==
"sse.div.ss" ||
159 Name ==
"sse2.div.sd" ||
160 Name ==
"sse41.pmaxsb" ||
161 Name ==
"sse2.pmaxs.w" ||
162 Name ==
"sse41.pmaxsd" ||
163 Name ==
"sse2.pmaxu.b" ||
164 Name ==
"sse41.pmaxuw" ||
165 Name ==
"sse41.pmaxud" ||
166 Name ==
"sse41.pminsb" ||
167 Name ==
"sse2.pmins.w" ||
168 Name ==
"sse41.pminsd" ||
169 Name ==
"sse2.pminu.b" ||
170 Name ==
"sse41.pminuw" ||
171 Name ==
"sse41.pminud" ||
172 Name ==
"avx512.kand.w" ||
173 Name ==
"avx512.kandn.w" ||
174 Name ==
"avx512.knot.w" ||
175 Name ==
"avx512.kor.w" ||
176 Name ==
"avx512.kxor.w" ||
177 Name ==
"avx512.kxnor.w" ||
178 Name ==
"avx512.kortestc.w" ||
179 Name ==
"avx512.kortestz.w" ||
180 Name.startswith(
"avx512.mask.pshuf.b.") ||
181 Name.startswith(
"avx2.pmax") ||
182 Name.startswith(
"avx2.pmin") ||
183 Name.startswith(
"avx512.mask.pmax") ||
184 Name.startswith(
"avx512.mask.pmin") ||
185 Name.startswith(
"avx2.vbroadcast") ||
186 Name.startswith(
"avx2.pbroadcast") ||
187 Name.startswith(
"avx.vpermil.") ||
188 Name.startswith(
"sse2.pshuf") ||
189 Name.startswith(
"avx512.pbroadcast") ||
190 Name.startswith(
"avx512.mask.broadcast.s") ||
191 Name.startswith(
"avx512.mask.movddup") ||
192 Name.startswith(
"avx512.mask.movshdup") ||
193 Name.startswith(
"avx512.mask.movsldup") ||
194 Name.startswith(
"avx512.mask.pshuf.d.") ||
195 Name.startswith(
"avx512.mask.pshufl.w.") ||
196 Name.startswith(
"avx512.mask.pshufh.w.") ||
197 Name.startswith(
"avx512.mask.shuf.p") ||
198 Name.startswith(
"avx512.mask.vpermil.p") ||
199 Name.startswith(
"avx512.mask.perm.df.") ||
200 Name.startswith(
"avx512.mask.perm.di.") ||
201 Name.startswith(
"avx512.mask.punpckl") ||
202 Name.startswith(
"avx512.mask.punpckh") ||
203 Name.startswith(
"avx512.mask.unpckl.") ||
204 Name.startswith(
"avx512.mask.unpckh.") ||
205 Name.startswith(
"avx512.mask.pand.") ||
206 Name.startswith(
"avx512.mask.pandn.") ||
207 Name.startswith(
"avx512.mask.por.") ||
208 Name.startswith(
"avx512.mask.pxor.") ||
209 Name.startswith(
"avx512.mask.and.") ||
210 Name.startswith(
"avx512.mask.andn.") ||
211 Name.startswith(
"avx512.mask.or.") ||
212 Name.startswith(
"avx512.mask.xor.") ||
213 Name.startswith(
"avx512.mask.padd.") ||
214 Name.startswith(
"avx512.mask.psub.") ||
215 Name.startswith(
"avx512.mask.pmull.") ||
216 Name.startswith(
"avx512.mask.cvtdq2pd.") ||
217 Name.startswith(
"avx512.mask.cvtudq2pd.") ||
218 Name.startswith(
"avx512.mask.cvtudq2ps.") ||
219 Name.startswith(
"avx512.mask.cvtqq2pd.") ||
220 Name.startswith(
"avx512.mask.cvtuqq2pd.") ||
221 Name.startswith(
"avx512.mask.cvtdq2ps.") ||
222 Name ==
"avx512.mask.vcvtph2ps.128" ||
223 Name ==
"avx512.mask.vcvtph2ps.256" ||
224 Name ==
"avx512.mask.cvtqq2ps.256" ||
225 Name ==
"avx512.mask.cvtqq2ps.512" ||
226 Name ==
"avx512.mask.cvtuqq2ps.256" ||
227 Name ==
"avx512.mask.cvtuqq2ps.512" ||
228 Name ==
"avx512.mask.cvtpd2dq.256" ||
229 Name ==
"avx512.mask.cvtpd2ps.256" ||
230 Name ==
"avx512.mask.cvttpd2dq.256" ||
231 Name ==
"avx512.mask.cvttps2dq.128" ||
232 Name ==
"avx512.mask.cvttps2dq.256" ||
233 Name ==
"avx512.mask.cvtps2pd.128" ||
234 Name ==
"avx512.mask.cvtps2pd.256" ||
235 Name ==
"avx512.cvtusi2sd" ||
236 Name.startswith(
"avx512.mask.permvar.") ||
237 Name ==
"sse2.pmulu.dq" ||
238 Name ==
"sse41.pmuldq" ||
239 Name ==
"avx2.pmulu.dq" ||
240 Name ==
"avx2.pmul.dq" ||
241 Name ==
"avx512.pmulu.dq.512" ||
242 Name ==
"avx512.pmul.dq.512" ||
243 Name.startswith(
"avx512.mask.pmul.dq.") ||
244 Name.startswith(
"avx512.mask.pmulu.dq.") ||
245 Name.startswith(
"avx512.mask.pmul.hr.sw.") ||
246 Name.startswith(
"avx512.mask.pmulh.w.") ||
247 Name.startswith(
"avx512.mask.pmulhu.w.") ||
248 Name.startswith(
"avx512.mask.pmaddw.d.") ||
249 Name.startswith(
"avx512.mask.pmaddubs.w.") ||
250 Name.startswith(
"avx512.mask.packsswb.") ||
251 Name.startswith(
"avx512.mask.packssdw.") ||
252 Name.startswith(
"avx512.mask.packuswb.") ||
253 Name.startswith(
"avx512.mask.packusdw.") ||
254 Name.startswith(
"avx512.mask.cmp.b") ||
255 Name.startswith(
"avx512.mask.cmp.d") ||
256 Name.startswith(
"avx512.mask.cmp.q") ||
257 Name.startswith(
"avx512.mask.cmp.w") ||
258 Name.startswith(
"avx512.cmp.p") ||
259 Name.startswith(
"avx512.mask.ucmp.") ||
260 Name.startswith(
"avx512.cvtb2mask.") ||
261 Name.startswith(
"avx512.cvtw2mask.") ||
262 Name.startswith(
"avx512.cvtd2mask.") ||
263 Name.startswith(
"avx512.cvtq2mask.") ||
264 Name.startswith(
"avx512.mask.vpermilvar.") ||
265 Name.startswith(
"avx512.mask.psll.d") ||
266 Name.startswith(
"avx512.mask.psll.q") ||
267 Name.startswith(
"avx512.mask.psll.w") ||
268 Name.startswith(
"avx512.mask.psra.d") ||
269 Name.startswith(
"avx512.mask.psra.q") ||
270 Name.startswith(
"avx512.mask.psra.w") ||
271 Name.startswith(
"avx512.mask.psrl.d") ||
272 Name.startswith(
"avx512.mask.psrl.q") ||
273 Name.startswith(
"avx512.mask.psrl.w") ||
274 Name.startswith(
"avx512.mask.pslli") ||
275 Name.startswith(
"avx512.mask.psrai") ||
276 Name.startswith(
"avx512.mask.psrli") ||
277 Name.startswith(
"avx512.mask.psllv") ||
278 Name.startswith(
"avx512.mask.psrav") ||
279 Name.startswith(
"avx512.mask.psrlv") ||
280 Name.startswith(
"sse41.pmovsx") ||
281 Name.startswith(
"sse41.pmovzx") ||
282 Name.startswith(
"avx2.pmovsx") ||
283 Name.startswith(
"avx2.pmovzx") ||
284 Name.startswith(
"avx512.mask.pmovsx") ||
285 Name.startswith(
"avx512.mask.pmovzx") ||
286 Name.startswith(
"avx512.mask.lzcnt.") ||
287 Name.startswith(
"avx512.mask.pternlog.") ||
288 Name.startswith(
"avx512.maskz.pternlog.") ||
289 Name.startswith(
"avx512.mask.vpmadd52") ||
290 Name.startswith(
"avx512.maskz.vpmadd52") ||
291 Name.startswith(
"avx512.mask.vpermi2var.") ||
292 Name.startswith(
"avx512.mask.vpermt2var.") ||
293 Name.startswith(
"avx512.maskz.vpermt2var.") ||
294 Name.startswith(
"avx512.mask.vpdpbusd.") ||
295 Name.startswith(
"avx512.maskz.vpdpbusd.") ||
296 Name.startswith(
"avx512.mask.vpdpbusds.") ||
297 Name.startswith(
"avx512.maskz.vpdpbusds.") ||
298 Name.startswith(
"avx512.mask.vpdpwssd.") ||
299 Name.startswith(
"avx512.maskz.vpdpwssd.") ||
300 Name.startswith(
"avx512.mask.vpdpwssds.") ||
301 Name.startswith(
"avx512.maskz.vpdpwssds.") ||
302 Name.startswith(
"avx512.mask.dbpsadbw.") ||
303 Name.startswith(
"avx512.mask.vpshld.") ||
304 Name.startswith(
"avx512.mask.vpshrd.") ||
305 Name.startswith(
"avx512.mask.vpshldv.") ||
306 Name.startswith(
"avx512.mask.vpshrdv.") ||
307 Name.startswith(
"avx512.maskz.vpshldv.") ||
308 Name.startswith(
"avx512.maskz.vpshrdv.") ||
309 Name.startswith(
"avx512.vpshld.") ||
310 Name.startswith(
"avx512.vpshrd.") ||
311 Name.startswith(
"avx512.mask.add.p") ||
312 Name.startswith(
"avx512.mask.sub.p") ||
313 Name.startswith(
"avx512.mask.mul.p") ||
314 Name.startswith(
"avx512.mask.div.p") ||
315 Name.startswith(
"avx512.mask.max.p") ||
316 Name.startswith(
"avx512.mask.min.p") ||
317 Name.startswith(
"avx512.mask.fpclass.p") ||
318 Name.startswith(
"avx512.mask.vpshufbitqmb.") ||
319 Name.startswith(
"avx512.mask.pmultishift.qb.") ||
320 Name.startswith(
"avx512.mask.conflict.") ||
321 Name ==
"avx512.mask.pmov.qd.256" ||
322 Name ==
"avx512.mask.pmov.qd.512" ||
323 Name ==
"avx512.mask.pmov.wb.256" ||
324 Name ==
"avx512.mask.pmov.wb.512" ||
325 Name ==
"sse.cvtsi2ss" ||
326 Name ==
"sse.cvtsi642ss" ||
327 Name ==
"sse2.cvtsi2sd" ||
328 Name ==
"sse2.cvtsi642sd" ||
329 Name ==
"sse2.cvtss2sd" ||
330 Name ==
"sse2.cvtdq2pd" ||
331 Name ==
"sse2.cvtdq2ps" ||
332 Name ==
"sse2.cvtps2pd" ||
333 Name ==
"avx.cvtdq2.pd.256" ||
334 Name ==
"avx.cvtdq2.ps.256" ||
335 Name ==
"avx.cvt.ps2.pd.256" ||
336 Name.startswith(
"vcvtph2ps.") ||
337 Name.startswith(
"avx.vinsertf128.") ||
338 Name ==
"avx2.vinserti128" ||
339 Name.startswith(
"avx512.mask.insert") ||
340 Name.startswith(
"avx.vextractf128.") ||
341 Name ==
"avx2.vextracti128" ||
342 Name.startswith(
"avx512.mask.vextract") ||
343 Name.startswith(
"sse4a.movnt.") ||
344 Name.startswith(
"avx.movnt.") ||
345 Name.startswith(
"avx512.storent.") ||
346 Name ==
"sse41.movntdqa" ||
347 Name ==
"avx2.movntdqa" ||
348 Name ==
"avx512.movntdqa" ||
349 Name ==
"sse2.storel.dq" ||
350 Name.startswith(
"sse.storeu.") ||
351 Name.startswith(
"sse2.storeu.") ||
352 Name.startswith(
"avx.storeu.") ||
353 Name.startswith(
"avx512.mask.storeu.") ||
354 Name.startswith(
"avx512.mask.store.p") ||
355 Name.startswith(
"avx512.mask.store.b.") ||
356 Name.startswith(
"avx512.mask.store.w.") ||
357 Name.startswith(
"avx512.mask.store.d.") ||
358 Name.startswith(
"avx512.mask.store.q.") ||
359 Name ==
"avx512.mask.store.ss" ||
360 Name.startswith(
"avx512.mask.loadu.") ||
361 Name.startswith(
"avx512.mask.load.") ||
362 Name.startswith(
"avx512.mask.expand.load.") ||
363 Name.startswith(
"avx512.mask.compress.store.") ||
364 Name.startswith(
"avx512.mask.expand.b") ||
365 Name.startswith(
"avx512.mask.expand.w") ||
366 Name.startswith(
"avx512.mask.expand.d") ||
367 Name.startswith(
"avx512.mask.expand.q") ||
368 Name.startswith(
"avx512.mask.expand.p") ||
369 Name.startswith(
"avx512.mask.compress.b") ||
370 Name.startswith(
"avx512.mask.compress.w") ||
371 Name.startswith(
"avx512.mask.compress.d") ||
372 Name.startswith(
"avx512.mask.compress.q") ||
373 Name.startswith(
"avx512.mask.compress.p") ||
374 Name ==
"sse42.crc32.64.8" ||
375 Name.startswith(
"avx.vbroadcast.s") ||
376 Name.startswith(
"avx512.vbroadcast.s") ||
377 Name.startswith(
"avx512.mask.palignr.") ||
378 Name.startswith(
"avx512.mask.valign.") ||
379 Name.startswith(
"sse2.psll.dq") ||
380 Name.startswith(
"sse2.psrl.dq") ||
381 Name.startswith(
"avx2.psll.dq") ||
382 Name.startswith(
"avx2.psrl.dq") ||
383 Name.startswith(
"avx512.psll.dq") ||
384 Name.startswith(
"avx512.psrl.dq") ||
385 Name ==
"sse41.pblendw" ||
386 Name.startswith(
"sse41.blendp") ||
387 Name.startswith(
"avx.blend.p") ||
388 Name ==
"avx2.pblendw" ||
389 Name.startswith(
"avx2.pblendd.") ||
390 Name.startswith(
"avx.vbroadcastf128") ||
391 Name ==
"avx2.vbroadcasti128" ||
392 Name.startswith(
"avx512.mask.broadcastf32x4.") ||
393 Name.startswith(
"avx512.mask.broadcastf64x2.") ||
394 Name.startswith(
"avx512.mask.broadcastf32x8.") ||
395 Name.startswith(
"avx512.mask.broadcastf64x4.") ||
396 Name.startswith(
"avx512.mask.broadcasti32x4.") ||
397 Name.startswith(
"avx512.mask.broadcasti64x2.") ||
398 Name.startswith(
"avx512.mask.broadcasti32x8.") ||
399 Name.startswith(
"avx512.mask.broadcasti64x4.") ||
400 Name ==
"xop.vpcmov" ||
401 Name ==
"xop.vpcmov.256" ||
402 Name.startswith(
"avx512.mask.move.s") ||
403 Name.startswith(
"avx512.cvtmask2") ||
404 Name.startswith(
"xop.vpcom") ||
405 Name.startswith(
"xop.vprot") ||
406 Name.startswith(
"avx512.prol") ||
407 Name.startswith(
"avx512.pror") ||
408 Name.startswith(
"avx512.mask.prorv.") ||
409 Name.startswith(
"avx512.mask.pror.") ||
410 Name.startswith(
"avx512.mask.prolv.") ||
411 Name.startswith(
"avx512.mask.prol.") ||
412 Name.startswith(
"avx512.ptestm") ||
413 Name.startswith(
"avx512.ptestnm") ||
414 Name.startswith(
"avx512.mask.pavg"))
423 if (!
Name.startswith(
"x86."))
433 if (
Name ==
"rdtscp") {
435 if (
F->getFunctionType()->getNumParams() == 0)
440 Intrinsic::x86_rdtscp);
445 if (
Name.startswith(
"sse41.ptest")) {
446 if (
Name.substr(11) ==
"c")
448 if (
Name.substr(11) ==
"z")
450 if (
Name.substr(11) ==
"nzc")
455 if (
Name ==
"sse41.insertps")
458 if (
Name ==
"sse41.dppd")
461 if (
Name ==
"sse41.dpps")
464 if (
Name ==
"sse41.mpsadbw")
467 if (
Name ==
"avx.dp.ps.256")
470 if (
Name ==
"avx2.mpsadbw")
473 if (
Name ==
"avx512.mask.cmp.pd.128")
476 if (
Name ==
"avx512.mask.cmp.pd.256")
479 if (
Name ==
"avx512.mask.cmp.pd.512")
482 if (
Name ==
"avx512.mask.cmp.ps.128")
485 if (
Name ==
"avx512.mask.cmp.ps.256")
488 if (
Name ==
"avx512.mask.cmp.ps.512")
493 if (
Name.startswith(
"xop.vfrcz.ss") &&
F->arg_size() == 2) {
496 Intrinsic::x86_xop_vfrcz_ss);
499 if (
Name.startswith(
"xop.vfrcz.sd") &&
F->arg_size() == 2) {
502 Intrinsic::x86_xop_vfrcz_sd);
506 if (
Name.startswith(
"xop.vpermil2")) {
507 auto Idx =
F->getFunctionType()->getParamType(2);
508 if (Idx->isFPOrFPVectorTy()) {
510 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
511 unsigned EltSize = Idx->getScalarSizeInBits();
513 if (EltSize == 64 && IdxSize == 128)
514 Permil2ID = Intrinsic::x86_xop_vpermil2pd;
515 else if (EltSize == 32 && IdxSize == 128)
516 Permil2ID = Intrinsic::x86_xop_vpermil2ps;
517 else if (EltSize == 64 && IdxSize == 256)
518 Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
520 Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
526 if (
Name ==
"seh.recoverfp") {
535 assert(
F &&
"Illegal to upgrade a non-existent Function.");
539 if (
Name.size() <= 8 || !
Name.startswith(
"llvm."))
546 if (
Name.startswith(
"arm.rbit") ||
Name.startswith(
"aarch64.rbit")) {
548 F->arg_begin()->getType());
551 if (
Name.startswith(
"aarch64.neon.frintn")) {
553 F->arg_begin()->getType());
556 if (
Name.startswith(
"aarch64.neon.rbit")) {
558 F->arg_begin()->getType());
561 if (
Name.startswith(
"arm.neon.vclz")) {
563 F->arg_begin()->getType(),
571 "llvm.ctlz." +
Name.substr(14),
F->getParent());
574 if (
Name.startswith(
"arm.neon.vcnt")) {
576 F->arg_begin()->getType());
579 static const Regex vstRegex(
"^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
581 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
582 Intrinsic::arm_neon_vst2,
583 Intrinsic::arm_neon_vst3,
584 Intrinsic::arm_neon_vst4};
587 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
588 Intrinsic::arm_neon_vst4lane
591 auto fArgs =
F->getFunctionType()->params();
592 Type *Tys[] = {fArgs[0], fArgs[1]};
593 if (!
Name.contains(
"lane"))
595 StoreInts[fArgs.size() - 3], Tys);
598 StoreLaneInts[fArgs.size() - 5], Tys);
601 if (
Name ==
"aarch64.thread.pointer" ||
Name ==
"arm.thread.pointer") {
605 if (
Name.startswith(
"arm.neon.vqadds.")) {
607 F->arg_begin()->getType());
610 if (
Name.startswith(
"arm.neon.vqaddu.")) {
612 F->arg_begin()->getType());
615 if (
Name.startswith(
"arm.neon.vqsubs.")) {
617 F->arg_begin()->getType());
620 if (
Name.startswith(
"arm.neon.vqsubu.")) {
622 F->arg_begin()->getType());
625 if (
Name.startswith(
"aarch64.neon.addp")) {
626 if (
F->arg_size() != 2)
628 VectorType *Ty = dyn_cast<VectorType>(
F->getReturnType());
631 Intrinsic::aarch64_neon_faddp, Ty);
638 if ((
Name.startswith(
"arm.neon.bfdot.") ||
639 Name.startswith(
"aarch64.neon.bfdot.")) &&
640 Name.endswith(
"i8")) {
643 .
Cases(
"arm.neon.bfdot.v2f32.v8i8",
644 "arm.neon.bfdot.v4f32.v16i8",
645 Intrinsic::arm_neon_bfdot)
646 .
Cases(
"aarch64.neon.bfdot.v2f32.v8i8",
647 "aarch64.neon.bfdot.v4f32.v16i8",
648 Intrinsic::aarch64_neon_bfdot)
653 size_t OperandWidth =
F->getReturnType()->getPrimitiveSizeInBits();
654 assert((OperandWidth == 64 || OperandWidth == 128) &&
655 "Unexpected operand width");
657 std::array<Type *, 2> Tys {{
667 if ((
Name.startswith(
"arm.neon.bfm") ||
668 Name.startswith(
"aarch64.neon.bfm")) &&
669 Name.endswith(
".v4f32.v16i8")) {
672 .
Case(
"arm.neon.bfmmla.v4f32.v16i8",
673 Intrinsic::arm_neon_bfmmla)
674 .
Case(
"arm.neon.bfmlalb.v4f32.v16i8",
675 Intrinsic::arm_neon_bfmlalb)
676 .
Case(
"arm.neon.bfmlalt.v4f32.v16i8",
677 Intrinsic::arm_neon_bfmlalt)
678 .
Case(
"aarch64.neon.bfmmla.v4f32.v16i8",
679 Intrinsic::aarch64_neon_bfmmla)
680 .
Case(
"aarch64.neon.bfmlalb.v4f32.v16i8",
681 Intrinsic::aarch64_neon_bfmlalb)
682 .
Case(
"aarch64.neon.bfmlalt.v4f32.v16i8",
683 Intrinsic::aarch64_neon_bfmlalt)
688 std::array<Type *, 0> Tys;
693 if (
Name ==
"arm.mve.vctp64" &&
694 cast<FixedVectorType>(
F->getReturnType())->getNumElements() == 4) {
701 if (
Name ==
"arm.mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
702 Name ==
"arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
703 Name ==
"arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
704 Name ==
"arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
705 Name ==
"arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
706 Name ==
"arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
707 Name ==
"arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
708 Name ==
"arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
709 Name ==
"arm.cde.vcx1q.predicated.v2i64.v4i1" ||
710 Name ==
"arm.cde.vcx1qa.predicated.v2i64.v4i1" ||
711 Name ==
"arm.cde.vcx2q.predicated.v2i64.v4i1" ||
712 Name ==
"arm.cde.vcx2qa.predicated.v2i64.v4i1" ||
713 Name ==
"arm.cde.vcx3q.predicated.v2i64.v4i1" ||
714 Name ==
"arm.cde.vcx3qa.predicated.v2i64.v4i1")
717 if (
Name ==
"amdgcn.alignbit") {
720 {F->getReturnType()});
728 if (
Name.startswith(
"ctlz.") &&
F->arg_size() == 1) {
731 F->arg_begin()->getType());
734 if (
Name.startswith(
"cttz.") &&
F->arg_size() == 1) {
737 F->arg_begin()->getType());
743 if (
Name ==
"dbg.value" &&
F->arg_size() == 4) {
751 if (
Name.startswith(
"experimental.vector.extract.")) {
753 Type *Tys[] = {
F->getReturnType(),
F->arg_begin()->getType()};
755 Intrinsic::vector_extract, Tys);
759 if (
Name.startswith(
"experimental.vector.insert.")) {
761 auto Args =
F->getFunctionType()->params();
764 Intrinsic::vector_insert, Tys);
769 static const Regex R(
"^experimental.vector.reduce.([a-z]+)\\.[a-z][0-9]+");
770 if (R.match(
Name, &Groups)) {
773 .
Case(
"add", Intrinsic::vector_reduce_add)
774 .
Case(
"mul", Intrinsic::vector_reduce_mul)
775 .
Case(
"and", Intrinsic::vector_reduce_and)
776 .
Case(
"or", Intrinsic::vector_reduce_or)
777 .
Case(
"xor", Intrinsic::vector_reduce_xor)
778 .
Case(
"smax", Intrinsic::vector_reduce_smax)
779 .
Case(
"smin", Intrinsic::vector_reduce_smin)
780 .
Case(
"umax", Intrinsic::vector_reduce_umax)
781 .
Case(
"umin", Intrinsic::vector_reduce_umin)
782 .
Case(
"fmax", Intrinsic::vector_reduce_fmax)
783 .
Case(
"fmin", Intrinsic::vector_reduce_fmin)
787 auto Args =
F->getFunctionType()->params();
793 "^experimental.vector.reduce.v2.([a-z]+)\\.[fi][0-9]+");
795 if (
R2.match(
Name, &Groups)) {
797 if (Groups[1] ==
"fadd")
798 ID = Intrinsic::vector_reduce_fadd;
799 if (Groups[1] ==
"fmul")
800 ID = Intrinsic::vector_reduce_fmul;
803 auto Args =
F->getFunctionType()->params();
813 bool IsLifetimeStart =
Name.startswith(
"lifetime.start");
814 if (IsLifetimeStart ||
Name.startswith(
"invariant.start")) {
816 Intrinsic::lifetime_start : Intrinsic::invariant_start;
817 auto Args =
F->getFunctionType()->params();
826 bool IsLifetimeEnd =
Name.startswith(
"lifetime.end");
827 if (IsLifetimeEnd ||
Name.startswith(
"invariant.end")) {
829 Intrinsic::lifetime_end : Intrinsic::invariant_end;
831 auto Args =
F->getFunctionType()->params();
832 Type* ObjectPtr[1] = {
Args[IsLifetimeEnd ? 1 : 2]};
839 if (
Name.startswith(
"invariant.group.barrier")) {
841 auto Args =
F->getFunctionType()->params();
845 Intrinsic::launder_invariant_group, ObjectPtr);
853 if (
Name.startswith(
"masked.load.")) {
854 Type *Tys[] = {
F->getReturnType(),
F->arg_begin()->getType() };
859 Intrinsic::masked_load,
864 if (
Name.startswith(
"masked.store.")) {
865 auto Args =
F->getFunctionType()->params();
871 Intrinsic::masked_store,
878 if (
Name.startswith(
"masked.gather.")) {
879 Type *Tys[] = {
F->getReturnType(),
F->arg_begin()->getType()};
884 Intrinsic::masked_gather, Tys);
888 if (
Name.startswith(
"masked.scatter.")) {
889 auto Args =
F->getFunctionType()->params();
895 Intrinsic::masked_scatter, Tys);
902 if (
Name.startswith(
"memcpy.") &&
F->arg_size() == 5) {
910 if (
Name.startswith(
"memmove.") &&
F->arg_size() == 5) {
918 if (
Name.startswith(
"memset.") &&
F->arg_size() == 5) {
921 const auto *FT =
F->getFunctionType();
922 Type *ParamTypes[2] = {
933 if (
Name.startswith(
"nvvm.")) {
938 .
Cases(
"brev32",
"brev64", Intrinsic::bitreverse)
939 .
Case(
"clz.i", Intrinsic::ctlz)
940 .
Case(
"popc.i", Intrinsic::ctpop)
944 {F->getReturnType()});
953 .
Cases(
"abs.i",
"abs.ll",
true)
954 .
Cases(
"clz.ll",
"popc.ll",
"h2f",
true)
955 .
Cases(
"max.i",
"max.ll",
"max.ui",
"max.ull",
true)
956 .
Cases(
"min.i",
"min.ll",
"min.ui",
"min.ull",
true)
970 if (
Name.startswith(
"objectsize.")) {
971 Type *Tys[2] = {
F->getReturnType(),
F->arg_begin()->getType() };
972 if (
F->arg_size() == 2 ||
F->arg_size() == 3 ||
984 if (
Name ==
"prefetch") {
986 Type *Tys[] = {
F->arg_begin()->getType()};
994 }
else if (
Name.startswith(
"ptr.annotation.") &&
F->arg_size() == 4) {
997 Intrinsic::ptr_annotation,
998 F->arg_begin()->getType());
1004 if (
Name ==
"stackprotectorcheck") {
1011 if (
Name ==
"var.annotation" &&
F->arg_size() == 4) {
1014 Intrinsic::var_annotation);
1025 auto *
ST = dyn_cast<StructType>(
F->getReturnType());
1026 if (
ST && (!
ST->isLiteral() ||
ST->isPacked())) {
1034 auto *FT =
F->getFunctionType();
1037 std::string
Name =
F->getName().str();
1040 Name,
F->getParent());
1051 if (Result !=
None) {
1066 assert(
F != NewFn &&
"Intrinsic function upgraded to the same function");
1078 GV->
getName() ==
"llvm.global_dtors")) ||
1093 unsigned N =
Init->getNumOperands();
1094 std::vector<Constant *> NewCtors(
N);
1095 for (
unsigned i = 0;
i !=
N; ++
i) {
1096 auto Ctor = cast<Constant>(
Init->getOperand(
i));
1098 EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1),
1111 auto *ResultTy = cast<FixedVectorType>(
Op->getType());
1112 unsigned NumElts = ResultTy->getNumElements() * 8;
1126 for (
unsigned l = 0;
l != NumElts;
l += 16)
1127 for (
unsigned i = 0;
i != 16; ++
i) {
1128 unsigned Idx = NumElts +
i -
Shift;
1130 Idx -= NumElts - 16;
1131 Idxs[
l +
i] = Idx +
l;
1138 return Builder.CreateBitCast(Res, ResultTy,
"cast");
1145 auto *ResultTy = cast<FixedVectorType>(
Op->getType());
1146 unsigned NumElts = ResultTy->getNumElements() * 8;
1160 for (
unsigned l = 0;
l != NumElts;
l += 16)
1161 for (
unsigned i = 0;
i != 16; ++
i) {
1162 unsigned Idx =
i +
Shift;
1164 Idx += NumElts - 16;
1165 Idxs[
l +
i] = Idx +
l;
1172 return Builder.CreateBitCast(Res, ResultTy,
"cast");
1179 Builder.getInt1Ty(), cast<IntegerType>(
Mask->getType())->getBitWidth());
1186 for (
unsigned i = 0;
i != NumElts; ++
i)
1198 if (
const auto *
C = dyn_cast<Constant>(
Mask))
1199 if (
C->isAllOnesValue())
1203 cast<FixedVectorType>(Op0->
getType())->getNumElements());
1210 if (
const auto *
C = dyn_cast<Constant>(
Mask))
1211 if (
C->isAllOnesValue())
1215 Mask->getType()->getIntegerBitWidth());
1228 unsigned ShiftVal = cast<llvm::ConstantInt>(
Shift)->getZExtValue();
1230 unsigned NumElts = cast<FixedVectorType>(Op0->
getType())->getNumElements();
1231 assert((IsVALIGN || NumElts % 16 == 0) &&
"Illegal NumElts for PALIGNR!");
1232 assert((!IsVALIGN || NumElts <= 16) &&
"NumElts too large for VALIGN!");
1237 ShiftVal &= (NumElts - 1);
1246 if (ShiftVal > 16) {
1254 for (
unsigned l = 0;
l < NumElts;
l += 16) {
1255 for (
unsigned i = 0;
i != 16; ++
i) {
1256 unsigned Idx = ShiftVal +
i;
1257 if (!IsVALIGN && Idx >= 16)
1258 Idx += NumElts - 16;
1259 Indices[
l +
i] = Idx +
l;
1271 bool ZeroMask,
bool IndexForm) {
1277 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1278 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1279 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1280 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1281 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1282 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1283 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1284 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1285 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1286 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1287 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1288 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1289 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1290 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1291 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1292 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1293 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1294 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1295 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1296 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1297 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1298 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1299 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1300 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1301 else if (VecWidth == 128 && EltWidth == 16)
1302 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1303 else if (VecWidth == 256 && EltWidth == 16)
1304 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1305 else if (VecWidth == 512 && EltWidth == 16)
1306 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1307 else if (VecWidth == 128 && EltWidth == 8)
1308 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1309 else if (VecWidth == 256 && EltWidth == 8)
1310 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1311 else if (VecWidth == 512 && EltWidth == 8)
1312 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1348 bool IsRotateRight) {
1357 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1359 Amt =
Builder.CreateVectorSplat(NumElts, Amt);
1362 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1364 Value *Res =
Builder.CreateCall(Intrin, {Src, Src, Amt});
1414 bool IsShiftRight,
bool ZeroMask) {
1427 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1429 Amt =
Builder.CreateVectorSplat(NumElts, Amt);
1432 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1434 Value *Res =
Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1451 Ptr =
Builder.CreateBitCast(Ptr,
1453 const Align Alignment =
1455 ?
Align(
Data->getType()->getPrimitiveSizeInBits().getFixedSize() / 8)
1459 if (
const auto *
C = dyn_cast<Constant>(
Mask))
1460 if (
C->isAllOnesValue())
1461 return Builder.CreateAlignedStore(
Data, Ptr, Alignment);
1464 unsigned NumElts = cast<FixedVectorType>(
Data->getType())->getNumElements();
1475 const Align Alignment =
1482 if (
const auto *
C = dyn_cast<Constant>(
Mask))
1483 if (
C->isAllOnesValue())
1484 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1487 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1489 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment,
Mask, Passthru);
1534 unsigned NumElts = cast<FixedVectorType>(Vec->
getType())->getNumElements();
1536 const auto *
C = dyn_cast<Constant>(
Mask);
1537 if (!
C || !
C->isAllOnesValue())
1543 for (
unsigned i = 0;
i != NumElts; ++
i)
1545 for (
unsigned i = NumElts;
i != 8; ++
i)
1546 Indices[
i] = NumElts +
i % NumElts;
1547 Vec =
Builder.CreateShuffleVector(Vec,
1555 unsigned CC,
bool Signed) {
1557 unsigned NumElts = cast<FixedVectorType>(Op0->
getType())->getNumElements();
1563 }
else if (CC == 7) {
1612 unsigned NumElts = cast<FixedVectorType>(CI.
getType())->getNumElements();
1614 return Builder.CreateSExt(
Mask, ReturnOp,
"vpmovm2");
1625 if (
Name.startswith(
"max.p")) {
1626 if (VecWidth == 128 && EltWidth == 32)
1627 IID = Intrinsic::x86_sse_max_ps;
1628 else if (VecWidth == 128 && EltWidth == 64)
1629 IID = Intrinsic::x86_sse2_max_pd;
1630 else if (VecWidth == 256 && EltWidth == 32)
1631 IID = Intrinsic::x86_avx_max_ps_256;
1632 else if (VecWidth == 256 && EltWidth == 64)
1633 IID = Intrinsic::x86_avx_max_pd_256;
1636 }
else if (
Name.startswith(
"min.p")) {
1637 if (VecWidth == 128 && EltWidth == 32)
1638 IID = Intrinsic::x86_sse_min_ps;
1639 else if (VecWidth == 128 && EltWidth == 64)
1640 IID = Intrinsic::x86_sse2_min_pd;
1641 else if (VecWidth == 256 && EltWidth == 32)
1642 IID = Intrinsic::x86_avx_min_ps_256;
1643 else if (VecWidth == 256 && EltWidth == 64)
1644 IID = Intrinsic::x86_avx_min_pd_256;
1647 }
else if (
Name.startswith(
"pshuf.b.")) {
1648 if (VecWidth == 128)
1649 IID = Intrinsic::x86_ssse3_pshuf_b_128;
1650 else if (VecWidth == 256)
1651 IID = Intrinsic::x86_avx2_pshuf_b;
1652 else if (VecWidth == 512)
1653 IID = Intrinsic::x86_avx512_pshuf_b_512;
1656 }
else if (
Name.startswith(
"pmul.hr.sw.")) {
1657 if (VecWidth == 128)
1658 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1659 else if (VecWidth == 256)
1660 IID = Intrinsic::x86_avx2_pmul_hr_sw;
1661 else if (VecWidth == 512)
1662 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1665 }
else if (
Name.startswith(
"pmulh.w.")) {
1666 if (VecWidth == 128)
1667 IID = Intrinsic::x86_sse2_pmulh_w;
1668 else if (VecWidth == 256)
1669 IID = Intrinsic::x86_avx2_pmulh_w;
1670 else if (VecWidth == 512)
1671 IID = Intrinsic::x86_avx512_pmulh_w_512;
1674 }
else if (
Name.startswith(
"pmulhu.w.")) {
1675 if (VecWidth == 128)
1676 IID = Intrinsic::x86_sse2_pmulhu_w;
1677 else if (VecWidth == 256)
1678 IID = Intrinsic::x86_avx2_pmulhu_w;
1679 else if (VecWidth == 512)
1680 IID = Intrinsic::x86_avx512_pmulhu_w_512;
1683 }
else if (
Name.startswith(
"pmaddw.d.")) {
1684 if (VecWidth == 128)
1685 IID = Intrinsic::x86_sse2_pmadd_wd;
1686 else if (VecWidth == 256)
1687 IID = Intrinsic::x86_avx2_pmadd_wd;
1688 else if (VecWidth == 512)
1689 IID = Intrinsic::x86_avx512_pmaddw_d_512;
1692 }
else if (
Name.startswith(
"pmaddubs.w.")) {
1693 if (VecWidth == 128)
1694 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
1695 else if (VecWidth == 256)
1696 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
1697 else if (VecWidth == 512)
1698 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
1701 }
else if (
Name.startswith(
"packsswb.")) {
1702 if (VecWidth == 128)
1703 IID = Intrinsic::x86_sse2_packsswb_128;
1704 else if (VecWidth == 256)
1705 IID = Intrinsic::x86_avx2_packsswb;
1706 else if (VecWidth == 512)
1707 IID = Intrinsic::x86_avx512_packsswb_512;
1710 }
else if (
Name.startswith(
"packssdw.")) {
1711 if (VecWidth == 128)
1712 IID = Intrinsic::x86_sse2_packssdw_128;
1713 else if (VecWidth == 256)
1714 IID = Intrinsic::x86_avx2_packssdw;
1715 else if (VecWidth == 512)
1716 IID = Intrinsic::x86_avx512_packssdw_512;
1719 }
else if (
Name.startswith(
"packuswb.")) {
1720 if (VecWidth == 128)
1721 IID = Intrinsic::x86_sse2_packuswb_128;
1722 else if (VecWidth == 256)
1723 IID = Intrinsic::x86_avx2_packuswb;
1724 else if (VecWidth == 512)
1725 IID = Intrinsic::x86_avx512_packuswb_512;
1728 }
else if (
Name.startswith(
"packusdw.")) {
1729 if (VecWidth == 128)
1730 IID = Intrinsic::x86_sse41_packusdw;
1731 else if (VecWidth == 256)
1732 IID = Intrinsic::x86_avx2_packusdw;
1733 else if (VecWidth == 512)
1734 IID = Intrinsic::x86_avx512_packusdw_512;
1737 }
else if (
Name.startswith(
"vpermilvar.")) {
1738 if (VecWidth == 128 && EltWidth == 32)
1739 IID = Intrinsic::x86_avx_vpermilvar_ps;
1740 else if (VecWidth == 128 && EltWidth == 64)
1741 IID = Intrinsic::x86_avx_vpermilvar_pd;
1742 else if (VecWidth == 256 && EltWidth == 32)
1743 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1744 else if (VecWidth == 256 && EltWidth == 64)
1745 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1746 else if (VecWidth == 512 && EltWidth == 32)
1747 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1748 else if (VecWidth == 512 && EltWidth == 64)
1749 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1752 }
else if (
Name ==
"cvtpd2dq.256") {
1753 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
1754 }
else if (
Name ==
"cvtpd2ps.256") {
1755 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
1756 }
else if (
Name ==
"cvttpd2dq.256") {
1757 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
1758 }
else if (
Name ==
"cvttps2dq.128") {
1759 IID = Intrinsic::x86_sse2_cvttps2dq;
1760 }
else if (
Name ==
"cvttps2dq.256") {
1761 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
1762 }
else if (
Name.startswith(
"permvar.")) {
1764 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1765 IID = Intrinsic::x86_avx2_permps;
1766 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1767 IID = Intrinsic::x86_avx2_permd;
1768 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1769 IID = Intrinsic::x86_avx512_permvar_df_256;
1770 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1771 IID = Intrinsic::x86_avx512_permvar_di_256;
1772 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1773 IID = Intrinsic::x86_avx512_permvar_sf_512;
1774 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1775 IID = Intrinsic::x86_avx512_permvar_si_512;
1776 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1777 IID = Intrinsic::x86_avx512_permvar_df_512;
1778 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1779 IID = Intrinsic::x86_avx512_permvar_di_512;
1780 else if (VecWidth == 128 && EltWidth == 16)
1781 IID = Intrinsic::x86_avx512_permvar_hi_128;
1782 else if (VecWidth == 256 && EltWidth == 16)
1783 IID = Intrinsic::x86_avx512_permvar_hi_256;
1784 else if (VecWidth == 512 && EltWidth == 16)
1785 IID = Intrinsic::x86_avx512_permvar_hi_512;
1786 else if (VecWidth == 128 && EltWidth == 8)
1787 IID = Intrinsic::x86_avx512_permvar_qi_128;
1788 else if (VecWidth == 256 && EltWidth == 8)
1789 IID = Intrinsic::x86_avx512_permvar_qi_256;
1790 else if (VecWidth == 512 && EltWidth == 8)
1791 IID = Intrinsic::x86_avx512_permvar_qi_512;
1794 }
else if (
Name.startswith(
"dbpsadbw.")) {
1795 if (VecWidth == 128)
1796 IID = Intrinsic::x86_avx512_dbpsadbw_128;
1797 else if (VecWidth == 256)
1798 IID = Intrinsic::x86_avx512_dbpsadbw_256;
1799 else if (VecWidth == 512)
1800 IID = Intrinsic::x86_avx512_dbpsadbw_512;
1803 }
else if (
Name.startswith(
"pmultishift.qb.")) {
1804 if (VecWidth == 128)
1805 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
1806 else if (VecWidth == 256)
1807 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
1808 else if (VecWidth == 512)
1809 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
1812 }
else if (
Name.startswith(
"conflict.")) {
1813 if (
Name[9] ==
'd' && VecWidth == 128)
1814 IID = Intrinsic::x86_avx512_conflict_d_128;
1815 else if (
Name[9] ==
'd' && VecWidth == 256)
1816 IID = Intrinsic::x86_avx512_conflict_d_256;
1817 else if (
Name[9] ==
'd' && VecWidth == 512)
1818 IID = Intrinsic::x86_avx512_conflict_d_512;
1819 else if (
Name[9] ==
'q' && VecWidth == 128)
1820 IID = Intrinsic::x86_avx512_conflict_q_128;
1821 else if (
Name[9] ==
'q' && VecWidth == 256)
1822 IID = Intrinsic::x86_avx512_conflict_q_256;
1823 else if (
Name[9] ==
'q' && VecWidth == 512)
1824 IID = Intrinsic::x86_avx512_conflict_q_512;
1827 }
else if (
Name.startswith(
"pavg.")) {
1828 if (
Name[5] ==
'b' && VecWidth == 128)
1829 IID = Intrinsic::x86_sse2_pavg_b;
1830 else if (
Name[5] ==
'b' && VecWidth == 256)
1831 IID = Intrinsic::x86_avx2_pavg_b;
1832 else if (
Name[5] ==
'b' && VecWidth == 512)
1833 IID = Intrinsic::x86_avx512_pavg_b_512;
1834 else if (
Name[5] ==
'w' && VecWidth == 128)
1835 IID = Intrinsic::x86_sse2_pavg_w;
1836 else if (
Name[5] ==
'w' && VecWidth == 256)
1837 IID = Intrinsic::x86_avx2_pavg_w;
1838 else if (
Name[5] ==
'w' && VecWidth == 512)
1839 IID = Intrinsic::x86_avx512_pavg_w_512;
1860 if (AsmStr->find(
"mov\tfp") == 0 &&
1861 AsmStr->find(
"objc_retainAutoreleaseReturnValue") != std::string::npos &&
1862 (Pos = AsmStr->find(
"# marker")) != std::string::npos) {
1863 AsmStr->replace(Pos, 1,
";");
1869 if (
Name ==
"mve.vctp64.old") {
1877 F->getParent(), Intrinsic::arm_mve_pred_v2i,
1878 {VectorType::get(Builder.getInt1Ty(), 2, false)}),
1882 F->getParent(), Intrinsic::arm_mve_pred_i2v,
1883 {VectorType::get(Builder.getInt1Ty(), 4, false)}),
1885 }
else if (
Name ==
"mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
1886 Name ==
"mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
1887 Name ==
"mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
1888 Name ==
"mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
1889 Name ==
"mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
1890 Name ==
"mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
1891 Name ==
"mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
1892 Name ==
"mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
1893 Name ==
"cde.vcx1q.predicated.v2i64.v4i1" ||
1894 Name ==
"cde.vcx1qa.predicated.v2i64.v4i1" ||
1895 Name ==
"cde.vcx2q.predicated.v2i64.v4i1" ||
1896 Name ==
"cde.vcx2qa.predicated.v2i64.v4i1" ||
1897 Name ==
"cde.vcx3q.predicated.v2i64.v4i1" ||
1898 Name ==
"cde.vcx3qa.predicated.v2i64.v4i1") {
1899 std::vector<Type *> Tys;
1903 case Intrinsic::arm_mve_mull_int_predicated:
1904 case Intrinsic::arm_mve_vqdmull_predicated:
1905 case Intrinsic::arm_mve_vldr_gather_base_predicated:
1908 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
1909 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
1910 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
1914 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
1918 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
1922 case Intrinsic::arm_cde_vcx1q_predicated:
1923 case Intrinsic::arm_cde_vcx1qa_predicated:
1924 case Intrinsic::arm_cde_vcx2q_predicated:
1925 case Intrinsic::arm_cde_vcx2qa_predicated:
1926 case Intrinsic::arm_cde_vcx3q_predicated:
1927 case Intrinsic::arm_cde_vcx3qa_predicated:
1934 std::vector<Value *> Ops;
1936 Type *Ty =
Op->getType();
1940 F->getParent(), Intrinsic::arm_mve_pred_v2i,
1941 {VectorType::get(Builder.getInt1Ty(), 4, false)}),
1945 Intrinsic::arm_mve_pred_i2v, {V2I1Ty}),
1965 assert(
F &&
"Intrinsic call is not direct?");
1971 assert(
Name.startswith(
"llvm.") &&
"Intrinsic doesn't start with 'llvm.'");
1974 bool IsX86 =
Name.startswith(
"x86.");
1977 bool IsNVVM =
Name.startswith(
"nvvm.");
1980 bool IsARM =
Name.startswith(
"arm.");
1984 if (IsX86 &&
Name.startswith(
"sse4a.movnt.")) {
1996 Type *SrcEltTy = cast<VectorType>(Arg1->
getType())->getElementType();
2003 SI->setMetadata(
M->getMDKindID(
"nontemporal"), Node);
2010 if (IsX86 && (
Name.startswith(
"avx.movnt.") ||
2011 Name.startswith(
"avx512.storent."))) {
2028 SI->setMetadata(
M->getMDKindID(
"nontemporal"), Node);
2035 if (IsX86 &&
Name ==
"sse2.storel.dq") {
2040 Value *BC0 =
Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
2052 if (IsX86 && (
Name.startswith(
"sse.storeu.") ||
2053 Name.startswith(
"sse2.storeu.") ||
2054 Name.startswith(
"avx.storeu."))) {
2058 Arg0 =
Builder.CreateBitCast(Arg0,
2068 if (IsX86 &&
Name ==
"avx512.mask.store.ss") {
2078 if (IsX86 && (
Name.startswith(
"avx512.mask.store"))) {
2091 if (IsX86 && (
Name.startswith(
"sse2.pcmp") ||
2092 Name.startswith(
"avx2.pcmp"))) {
2094 bool CmpEq =
Name[9] ==
'e';
2098 }
else if (IsX86 && (
Name.startswith(
"avx512.broadcastm"))) {
2105 Rep =
Builder.CreateVectorSplat(NumElts, Rep);
2106 }
else if (IsX86 && (
Name ==
"sse.sqrt.ss" ||
2107 Name ==
"sse2.sqrt.sd")) {
2111 Intrinsic::sqrt, Elt0->
getType());
2114 }
else if (IsX86 && (
Name.startswith(
"avx.sqrt.p") ||
2115 Name.startswith(
"sse2.sqrt.p") ||
2116 Name.startswith(
"sse.sqrt.p"))) {
2120 {CI->getArgOperand(0)});
2121 }
else if (IsX86 && (
Name.startswith(
"avx512.mask.sqrt.p"))) {
2124 cast<ConstantInt>(CI->
getArgOperand(3))->getZExtValue() != 4)) {
2126 : Intrinsic::x86_avx512_sqrt_pd_512;
2135 {CI->getArgOperand(0)});
2139 }
else if (IsX86 && (
Name.startswith(
"avx512.ptestm") ||
2140 Name.startswith(
"avx512.ptestnm"))) {
2144 Rep =
Builder.CreateAnd(Op0, Op1);
2149 Rep =
Builder.CreateICmp(Pred, Rep, Zero);
2151 }
else if (IsX86 && (
Name.startswith(
"avx512.mask.pbroadcast"))){
2157 }
else if (IsX86 && (
Name.startswith(
"avx512.kunpck"))) {
2162 for (
unsigned i = 0;
i != NumElts; ++
i)
2176 }
else if (IsX86 &&
Name ==
"avx512.kand.w") {
2181 }
else if (IsX86 &&
Name ==
"avx512.kandn.w") {
2187 }
else if (IsX86 &&
Name ==
"avx512.kor.w") {
2192 }
else if (IsX86 &&
Name ==
"avx512.kxor.w") {
2197 }
else if (IsX86 &&
Name ==
"avx512.kxnor.w") {
2203 }
else if (IsX86 &&
Name ==
"avx512.knot.w") {
2208 (
Name ==
"avx512.kortestz.w" ||
Name ==
"avx512.kortestc.w")) {
2214 if (
Name[14] ==
'c')
2218 Rep =
Builder.CreateICmpEQ(Rep,
C);
2220 }
else if (IsX86 && (
Name ==
"sse.add.ss" ||
Name ==
"sse2.add.sd" ||
2221 Name ==
"sse.sub.ss" ||
Name ==
"sse2.sub.sd" ||
2222 Name ==
"sse.mul.ss" ||
Name ==
"sse2.mul.sd" ||
2223 Name ==
"sse.div.ss" ||
Name ==
"sse2.div.sd")) {
2230 if (
Name.contains(
".add."))
2231 EltOp =
Builder.CreateFAdd(Elt0, Elt1);
2232 else if (
Name.contains(
".sub."))
2233 EltOp =
Builder.CreateFSub(Elt0, Elt1);
2234 else if (
Name.contains(
".mul."))
2235 EltOp =
Builder.CreateFMul(Elt0, Elt1);
2237 EltOp =
Builder.CreateFDiv(Elt0, Elt1);
2240 }
else if (IsX86 &&
Name.startswith(
"avx512.mask.pcmp")) {
2242 bool CmpEq =
Name[16] ==
'e';
2244 }
else if (IsX86 &&
Name.startswith(
"avx512.mask.vpshufbitqmb.")) {
2250 case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
break;
2251 case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
break;
2252 case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
break;
2256 { CI->getOperand(0), CI->getArgOperand(1) });
2258 }
else if (IsX86 &&
Name.startswith(
"avx512.mask.fpclass.p")) {
2263 if (VecWidth == 128 && EltWidth == 32)
2264 IID = Intrinsic::x86_avx512_fpclass_ps_128;
2265 else if (VecWidth == 256 && EltWidth == 32)
2266 IID = Intrinsic::x86_avx512_fpclass_ps_256;
2267 else if (VecWidth == 512 && EltWidth == 32)
2268 IID = Intrinsic::x86_avx512_fpclass_ps_512;
2269 else if (VecWidth == 128 && EltWidth == 64)
2270 IID = Intrinsic::x86_avx512_fpclass_pd_128;
2271 else if (VecWidth == 256 && EltWidth == 64)
2272 IID = Intrinsic::x86_avx512_fpclass_pd_256;
2273 else if (VecWidth == 512 && EltWidth == 64)
2274 IID = Intrinsic::x86_avx512_fpclass_pd_512;
2279 { CI->getOperand(0), CI->getArgOperand(1) });
2281 }
else if (IsX86 &&
Name.startswith(
"avx512.cmp.p")) {
2287 if (VecWidth == 128 && EltWidth == 32)
2288 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2289 else if (VecWidth == 256 && EltWidth == 32)
2290 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2291 else if (VecWidth == 512 && EltWidth == 32)
2292 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2293 else if (VecWidth == 128 && EltWidth == 64)
2294 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2295 else if (VecWidth == 256 && EltWidth == 64)
2296 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2297 else if (VecWidth == 512 && EltWidth == 64)
2298 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2303 if (VecWidth == 512)
2309 }
else if (IsX86 &&
Name.startswith(
"avx512.mask.cmp.")) {
2313 }
else if (IsX86 &&
Name.startswith(
"avx512.mask.ucmp.")) {
2316 }
else if (IsX86 && (
Name.startswith(
"avx512.cvtb2mask.") ||
2317 Name.startswith(
"avx512.cvtw2mask.") ||
2318 Name.startswith(
"avx512.cvtd2mask.") ||
2319 Name.startswith(
"avx512.cvtq2mask."))) {
2324 }
else if(IsX86 && (
Name ==
"ssse3.pabs.b.128" ||
2325 Name ==
"ssse3.pabs.w.128" ||
2326 Name ==
"ssse3.pabs.d.128" ||
2327 Name.startswith(
"avx2.pabs") ||
2328 Name.startswith(
"avx512.mask.pabs"))) {
2330 }
else if (IsX86 && (
Name ==
"sse41.pmaxsb" ||
2331 Name ==
"sse2.pmaxs.w" ||
2332 Name ==
"sse41.pmaxsd" ||
2333 Name.startswith(
"avx2.pmaxs") ||
2334 Name.startswith(
"avx512.mask.pmaxs"))) {
2336 }
else if (IsX86 && (
Name ==
"sse2.pmaxu.b" ||
2337 Name ==
"sse41.pmaxuw" ||
2338 Name ==
"sse41.pmaxud" ||
2339 Name.startswith(
"avx2.pmaxu") ||
2340 Name.startswith(
"avx512.mask.pmaxu"))) {
2342 }
else if (IsX86 && (
Name ==
"sse41.pminsb" ||
2343 Name ==
"sse2.pmins.w" ||
2344 Name ==
"sse41.pminsd" ||
2345 Name.startswith(
"avx2.pmins") ||
2346 Name.startswith(
"avx512.mask.pmins"))) {
2348 }
else if (IsX86 && (
Name ==
"sse2.pminu.b" ||
2349 Name ==
"sse41.pminuw" ||
2350 Name ==
"sse41.pminud" ||
2351 Name.startswith(
"avx2.pminu") ||
2352 Name.startswith(
"avx512.mask.pminu"))) {
2354 }
else if (IsX86 && (
Name ==
"sse2.pmulu.dq" ||
2355 Name ==
"avx2.pmulu.dq" ||
2356 Name ==
"avx512.pmulu.dq.512" ||
2357 Name.startswith(
"avx512.mask.pmulu.dq."))) {
2359 }
else if (IsX86 && (
Name ==
"sse41.pmuldq" ||
2360 Name ==
"avx2.pmul.dq" ||
2361 Name ==
"avx512.pmul.dq.512" ||
2362 Name.startswith(
"avx512.mask.pmul.dq."))) {
2364 }
else if (IsX86 && (
Name ==
"sse.cvtsi2ss" ||
2365 Name ==
"sse2.cvtsi2sd" ||
2366 Name ==
"sse.cvtsi642ss" ||
2367 Name ==
"sse2.cvtsi642sd")) {
2370 cast<VectorType>(CI->
getType())->getElementType());
2372 }
else if (IsX86 &&
Name ==
"avx512.cvtusi2sd") {
2375 cast<VectorType>(CI->
getType())->getElementType());
2377 }
else if (IsX86 &&
Name ==
"sse2.cvtss2sd") {
2380 Rep, cast<VectorType>(CI->
getType())->getElementType());
2382 }
else if (IsX86 && (
Name ==
"sse2.cvtdq2pd" ||
2383 Name ==
"sse2.cvtdq2ps" ||
2384 Name ==
"avx.cvtdq2.pd.256" ||
2385 Name ==
"avx.cvtdq2.ps.256" ||
2386 Name.startswith(
"avx512.mask.cvtdq2pd.") ||
2387 Name.startswith(
"avx512.mask.cvtudq2pd.") ||
2388 Name.startswith(
"avx512.mask.cvtdq2ps.") ||
2389 Name.startswith(
"avx512.mask.cvtudq2ps.") ||
2390 Name.startswith(
"avx512.mask.cvtqq2pd.") ||
2391 Name.startswith(
"avx512.mask.cvtuqq2pd.") ||
2392 Name ==
"avx512.mask.cvtqq2ps.256" ||
2393 Name ==
"avx512.mask.cvtqq2ps.512" ||
2394 Name ==
"avx512.mask.cvtuqq2ps.256" ||
2395 Name ==
"avx512.mask.cvtuqq2ps.512" ||
2396 Name ==
"sse2.cvtps2pd" ||
2397 Name ==
"avx.cvt.ps2.pd.256" ||
2398 Name ==
"avx512.mask.cvtps2pd.128" ||
2399 Name ==
"avx512.mask.cvtps2pd.256")) {
2400 auto *DstTy = cast<FixedVectorType>(CI->
getType());
2402 auto *SrcTy = cast<FixedVectorType>(Rep->
getType());
2404 unsigned NumDstElts = DstTy->getNumElements();
2405 if (NumDstElts < SrcTy->getNumElements()) {
2406 assert(NumDstElts == 2 &&
"Unexpected vector size");
2410 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2413 Rep =
Builder.CreateFPExt(Rep, DstTy,
"cvtps2pd");
2416 cast<ConstantInt>(CI->
getArgOperand(3))->getZExtValue() != 4)) {
2417 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2418 : Intrinsic::x86_avx512_sitofp_round;
2423 Rep = IsUnsigned ?
Builder.CreateUIToFP(Rep, DstTy,
"cvt")
2424 :
Builder.CreateSIToFP(Rep, DstTy,
"cvt");
2430 }
else if (IsX86 && (
Name.startswith(
"avx512.mask.vcvtph2ps.") ||
2431 Name.startswith(
"vcvtph2ps."))) {
2432 auto *DstTy = cast<FixedVectorType>(CI->
getType());
2434 auto *SrcTy = cast<FixedVectorType>(Rep->
getType());
2435 unsigned NumDstElts = DstTy->getNumElements();
2436 if (NumDstElts != SrcTy->getNumElements()) {
2437 assert(NumDstElts == 4 &&
"Unexpected vector size");
2442 Rep =
Builder.CreateFPExt(Rep, DstTy,
"cvtph2ps");
2446 }
else if (IsX86 &&
Name.startswith(
"avx512.mask.load")) {
2452 }
else if (IsX86 &&
Name.startswith(
"avx512.mask.expand.load.")) {
2453 auto *ResultTy = cast<FixedVectorType>(CI->
getType());
2454 Type *PtrTy = ResultTy->getElementType();
2461 ResultTy->getNumElements());
2464 Intrinsic::masked_expandload,
2467 }
else if (IsX86 &&
Name.startswith(
"avx512.mask.compress.store.")) {
2469 Type *PtrTy = ResultTy->getElementType();
2477 cast<FixedVectorType>(ResultTy)->getNumElements());
2480 Intrinsic::masked_compressstore,
2483 }
else if (IsX86 && (
Name.startswith(
"avx512.mask.compress.") ||
2484 Name.startswith(
"avx512.mask.expand."))) {
2485 auto *ResultTy = cast<FixedVectorType>(CI->
getType());
2488 ResultTy->getNumElements());
2490 bool IsCompress =
Name[12] ==
'c';
2491 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2492 : Intrinsic::x86_avx512_mask_expand;
2496 }
else if (IsX86 &&
Name.startswith(
"xop.vpcom")) {
2498 if (
Name.endswith(
"ub") ||
Name.endswith(
"uw") ||
Name.endswith(
"ud") ||
2499 Name.endswith(
"uq"))
2501 else if (
Name.endswith(
"b") ||
Name.endswith(
"w") ||
Name.endswith(
"d") ||
2512 if (
Name.startswith(
"lt"))
2514 else if (
Name.startswith(
"le"))
2516 else if (
Name.startswith(
"gt"))
2518 else if (
Name.startswith(
"ge"))
2520 else if (
Name.startswith(
"eq"))
2522 else if (
Name.startswith(
"ne"))
2524 else if (
Name.startswith(
"false"))
2526 else if (
Name.startswith(
"true"))
2533 }
else if (IsX86 &&
Name.startswith(
"xop.vpcmov")) {
2538 Rep =
Builder.CreateOr(Sel0, Sel1);
2539 }
else if (IsX86 && (
Name.startswith(
"xop.vprot") ||
2540 Name.startswith(
"avx512.prol") ||
2541 Name.startswith(
"avx512.mask.prol"))) {
2543 }
else if (IsX86 && (
Name.startswith(
"avx512.pror") ||
2544 Name.startswith(
"avx512.mask.pror"))) {
2546 }
else if (IsX86 && (
Name.startswith(
"avx512.vpshld.") ||
2547 Name.startswith(
"avx512.mask.vpshld") ||
2548 Name.startswith(
"avx512.maskz.vpshld"))) {
2549 bool ZeroMask =
Name[11] ==
'z';
2551 }
else if (IsX86 && (
Name.startswith(
"avx512.vpshrd.") ||
2552 Name.startswith(
"avx512.mask.vpshrd") ||
2553 Name.startswith(
"avx512.maskz.vpshrd"))) {
2554 bool ZeroMask =
Name[11] ==
'z';
2556 }
else if (IsX86 &&
Name ==
"sse42.crc32.64.8") {
2558 Intrinsic::x86_sse42_crc32_32_8);
2562 }
else if (IsX86 && (
Name.startswith(
"avx.vbroadcast.s") ||
2563 Name.startswith(
"avx512.vbroadcast.s"))) {
2565 auto *VecTy = cast<FixedVectorType>(CI->
getType());
2566 Type *EltTy = VecTy->getElementType();
2567 unsigned EltNum = VecTy->getNumElements();
2573 for (
unsigned I = 0;
I < EltNum; ++
I)
2576 }
else if (IsX86 && (
Name.startswith(
"sse41.pmovsx") ||
2577 Name.startswith(
"sse41.pmovzx") ||
2578 Name.startswith(
"avx2.pmovsx") ||
2579 Name.startswith(
"avx2.pmovzx") ||
2580 Name.startswith(
"avx512.mask.pmovsx") ||
2581 Name.startswith(
"avx512.mask.pmovzx"))) {
2582 auto *DstTy = cast<FixedVectorType>(CI->
getType());
2583 unsigned NumDstElts = DstTy->getNumElements();
2587 for (
unsigned i = 0;
i != NumDstElts; ++
i)
2594 Rep = DoSext ?
Builder.CreateSExt(SV, DstTy)
2595 :
Builder.CreateZExt(SV, DstTy);
2600 }
else if (
Name ==
"avx512.mask.pmov.qd.256" ||
2601 Name ==
"avx512.mask.pmov.qd.512" ||
2602 Name ==
"avx512.mask.pmov.wb.256" ||
2603 Name ==
"avx512.mask.pmov.wb.512") {
2608 }
else if (IsX86 && (
Name.startswith(
"avx.vbroadcastf128") ||
2609 Name ==
"avx2.vbroadcasti128")) {
2611 Type *EltTy = cast<VectorType>(CI->
getType())->getElementType();
2617 if (NumSrcElts == 2)
2620 Rep =
Builder.CreateShuffleVector(
2622 }
else if (IsX86 && (
Name.startswith(
"avx512.mask.shuf.i") ||
2623 Name.startswith(
"avx512.mask.shuf.f"))) {
2628 unsigned ControlBitsMask = NumLanes - 1;
2629 unsigned NumControlBits = NumLanes / 2;
2632 for (
unsigned l = 0;
l != NumLanes; ++
l) {
2633 unsigned LaneMask = (
Imm >> (
l * NumControlBits)) & ControlBitsMask;
2635 if (
l >= NumLanes / 2)
2636 LaneMask += NumLanes;
2637 for (
unsigned i = 0;
i != NumElementsInLane; ++
i)
2638 ShuffleMask.push_back(LaneMask * NumElementsInLane +
i);
2644 }
else if (IsX86 && (
Name.startswith(
"avx512.mask.broadcastf") ||
2645 Name.startswith(
"avx512.mask.broadcasti"))) {
2646 unsigned NumSrcElts =
2649 unsigned NumDstElts =
2650 cast<FixedVectorType>(CI->
getType())->getNumElements();
2653 for (
unsigned i = 0;
i != NumDstElts; ++
i)
2654 ShuffleMask[
i] =
i % NumSrcElts;
2661 }
else if (IsX86 && (
Name.startswith(
"avx2.pbroadcast") ||
2662 Name.startswith(
"avx2.vbroadcast") ||
2663 Name.startswith(
"avx512.pbroadcast") ||
2664 Name.startswith(
"avx512.mask.broadcast.s"))) {
2676 }
else if (IsX86 && (
Name.startswith(
"sse2.padds.") ||
2677 Name.startswith(
"avx2.padds.") ||
2678 Name.startswith(
"avx512.padds.") ||
2679 Name.startswith(
"avx512.mask.padds."))) {
2681 }
else if (IsX86 && (
Name.startswith(
"sse2.psubs.") ||
2682 Name.startswith(
"avx2.psubs.") ||
2683 Name.startswith(
"avx512.psubs.") ||
2684 Name.startswith(
"avx512.mask.psubs."))) {
2686 }
else if (IsX86 && (
Name.startswith(
"sse2.paddus.") ||
2687 Name.startswith(
"avx2.paddus.") ||
2688 Name.startswith(
"avx512.mask.paddus."))) {
2690 }
else if (IsX86 && (
Name.startswith(
"sse2.psubus.") ||
2691 Name.startswith(
"avx2.psubus.") ||
2692 Name.startswith(
"avx512.mask.psubus."))) {
2694 }
else if (IsX86 &&
Name.startswith(
"avx512.mask.palignr.")) {
2701 }
else if (IsX86 &&
Name.startswith(
"avx512.mask.valign.")) {
2708 }
else if (IsX86 && (
Name ==
"sse2.psll.dq" ||
2709 Name ==
"avx2.psll.dq")) {
2714 }
else if (IsX86 && (
Name ==
"sse2.psrl.dq" ||
2715 Name ==
"avx2.psrl.dq")) {
2720 }
else if (IsX86 && (
Name ==
"sse2.psll.dq.bs" ||
2721 Name ==
"avx2.psll.dq.bs" ||
2722 Name ==
"avx512.psll.dq.512")) {
2726 }
else if (IsX86 && (
Name ==
"sse2.psrl.dq.bs" ||
2727 Name ==
"avx2.psrl.dq.bs" ||
2728 Name ==
"avx512.psrl.dq.512")) {
2732 }
else if (IsX86 && (
Name ==
"sse41.pblendw" ||
2733 Name.startswith(
"sse41.blendp") ||
2734 Name.startswith(
"avx.blend.p") ||
2735 Name ==
"avx2.pblendw" ||
2736 Name.startswith(
"avx2.pblendd."))) {
2740 auto *VecTy = cast<FixedVectorType>(CI->
getType());
2741 unsigned NumElts = VecTy->getNumElements();
2744 for (
unsigned i = 0;
i != NumElts; ++
i)
2745 Idxs[
i] = ((
Imm >> (
i%8)) & 1) ?
i + NumElts :
i;
2747 Rep =
Builder.CreateShuffleVector(Op0, Op1, Idxs);
2748 }
else if (IsX86 && (
Name.startswith(
"avx.vinsertf128.") ||
2749 Name ==
"avx2.vinserti128" ||
2750 Name.startswith(
"avx512.mask.insert"))) {
2754 unsigned DstNumElts =
2755 cast<FixedVectorType>(CI->
getType())->getNumElements();
2756 unsigned SrcNumElts =
2757 cast<FixedVectorType>(Op1->
getType())->getNumElements();
2758 unsigned Scale = DstNumElts / SrcNumElts;
2765 for (
unsigned i = 0;
i != SrcNumElts; ++
i)
2767 for (
unsigned i = SrcNumElts;
i != DstNumElts; ++
i)
2768 Idxs[
i] = SrcNumElts;
2769 Rep =
Builder.CreateShuffleVector(Op1, Idxs);
2783 for (
unsigned i = 0;
i != DstNumElts; ++
i)
2786 for (
unsigned i = 0;
i != SrcNumElts; ++
i)
2787 Idxs[
i +
Imm * SrcNumElts] =
i + DstNumElts;
2788 Rep =
Builder.CreateShuffleVector(Op0, Rep, Idxs);
2794 }
else if (IsX86 && (
Name.startswith(
"avx.vextractf128.") ||
2795 Name ==
"avx2.vextracti128" ||
2796 Name.startswith(
"avx512.mask.vextract"))) {
2799 unsigned DstNumElts =
2800 cast<FixedVectorType>(CI->
getType())->getNumElements();
2801 unsigned SrcNumElts =
2802 cast<FixedVectorType>(Op0->
getType())->getNumElements();
2803 unsigned Scale = SrcNumElts / DstNumElts;
2810 for (
unsigned i = 0;
i != DstNumElts; ++
i) {
2811 Idxs[
i] =
i + (
Imm * DstNumElts);
2813 Rep =
Builder.CreateShuffleVector(Op0, Op0, Idxs);
2819 }
else if (!IsX86 &&
Name ==
"stackprotectorcheck") {
2821 }
else if (IsX86 && (
Name.startswith(
"avx512.mask.perm.df.") ||
2822 Name.startswith(
"avx512.mask.perm.di."))) {
2825 auto *VecTy = cast<FixedVectorType>(CI->
getType());
2826 unsigned NumElts = VecTy->getNumElements();
2829 for (
unsigned i = 0;
i != NumElts; ++
i)
2830 Idxs[
i] = (
i & ~0
x3) + ((
Imm >> (2 * (
i & 0x3))) & 3);
2832 Rep =
Builder.CreateShuffleVector(Op0, Op0, Idxs);
2837 }
else if (IsX86 && (
Name.startswith(
"avx.vperm2f128.") ||
2838 Name ==
"avx2.vperm2i128")) {
2849 unsigned NumElts = cast<FixedVectorType>(CI->
getType())->getNumElements();
2850 unsigned HalfSize = NumElts / 2;
2862 unsigned StartIndex = (
Imm & 0x01) ? HalfSize : 0;
2863 for (
unsigned i = 0;
i < HalfSize; ++
i)
2864 ShuffleMask[
i] = StartIndex +
i;
2867 StartIndex = (
Imm & 0x10) ? HalfSize : 0;
2868 for (
unsigned i = 0;
i < HalfSize; ++
i)
2869 ShuffleMask[
i + HalfSize] = NumElts + StartIndex +
i;
2871 Rep =
Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2873 }
else if (IsX86 && (
Name.startswith(
"avx.vpermil.") ||
2874 Name ==
"sse2.pshuf.d" ||
2875 Name.startswith(
"avx512.mask.vpermil.p") ||
2876 Name.startswith(
"avx512.mask.pshuf.d."))) {
2879 auto *VecTy = cast<FixedVectorType>(CI->
getType());
2880 unsigned NumElts = VecTy->getNumElements();
2882 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
2883 unsigned IdxMask = ((1 << IdxSize) - 1);
2889 for (
unsigned i = 0;
i != NumElts; ++
i)
2890 Idxs[
i] = ((
Imm >> ((
i * IdxSize) % 8)) & IdxMask) | (
i & ~IdxMask);
2892 Rep =
Builder.CreateShuffleVector(Op0, Op0, Idxs);
2897 }
else if (IsX86 && (
Name ==
"sse2.pshufl.w" ||
2898 Name.startswith(
"avx512.mask.pshufl.w."))) {
2901 unsigned NumElts = cast<FixedVectorType>(CI->
getType())->getNumElements();
2904 for (
unsigned l = 0;
l != NumElts;
l += 8) {
2905 for (
unsigned i = 0;
i != 4; ++
i)
2906 Idxs[
i +
l] = ((
Imm >> (2 *
i)) & 0
x3) +
l;
2907 for (
unsigned i = 4;
i != 8; ++
i)
2908 Idxs[
i +
l] =
i +
l;
2911 Rep =
Builder.CreateShuffleVector(Op0, Op0, Idxs);
2916 }
else if (IsX86 && (
Name ==
"sse2.pshufh.w" ||
2917 Name.startswith(
"avx512.mask.pshufh.w."))) {
2920 unsigned NumElts = cast<FixedVectorType>(CI->
getType())->getNumElements();
2923 for (
unsigned l = 0;
l != NumElts;
l += 8) {
2924 for (
unsigned i = 0;
i != 4; ++
i)
2925 Idxs[
i +
l] =
i +
l;
2926 for (
unsigned i = 0;
i != 4; ++
i)
2927 Idxs[
i +
l + 4] = ((
Imm >> (2 *
i)) & 0
x3) + 4 +
l;
2930 Rep =
Builder.CreateShuffleVector(Op0, Op0, Idxs);
2935 }
else if (IsX86 &&
Name.startswith(
"avx512.mask.shuf.p")) {
2939 unsigned NumElts = cast<FixedVectorType>(CI->
getType())->getNumElements();
2942 unsigned HalfLaneElts = NumLaneElts / 2;
2945 for (
unsigned i = 0;
i != NumElts; ++
i) {
2947 Idxs[
i] =
i - (
i % NumLaneElts);
2949 if ((
i % NumLaneElts) >= HalfLaneElts)
2953 Idxs[
i] += (
Imm >> ((
i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
2956 Rep =
Builder.CreateShuffleVector(Op0, Op1, Idxs);
2960 }
else if (IsX86 && (
Name.startswith(
"avx512.mask.movddup") ||
2961 Name.startswith(
"avx512.mask.movshdup") ||
2962 Name.startswith(
"avx512.mask.movsldup"))) {
2964 unsigned NumElts = cast<FixedVectorType>(CI->
getType())->getNumElements();
2967 unsigned Offset = 0;
2968 if (
Name.startswith(
"avx512.mask.movshdup."))
2972 for (
unsigned l = 0;
l != NumElts;
l += NumLaneElts)
2973 for (
unsigned i = 0;
i != NumLaneElts;
i += 2) {
2974 Idxs[
i +
l + 0] =
i +
l + Offset;
2975 Idxs[
i +
l + 1] =
i +
l + Offset;
2978 Rep =
Builder.CreateShuffleVector(Op0, Op0, Idxs);
2982 }
else if (IsX86 && (
Name.startswith(
"avx512.mask.punpckl") ||
2983 Name.startswith(
"avx512.mask.unpckl."))) {
2986 int NumElts = cast<FixedVectorType>(CI->
getType())->getNumElements();
2990 for (
int l = 0;
l != NumElts;
l += NumLaneElts)
2991 for (
int i = 0;
i != NumLaneElts; ++
i)
2992 Idxs[
i +
l] =
l + (
i / 2) + NumElts * (
i % 2);
2994 Rep =
Builder.CreateShuffleVector(Op0, Op1, Idxs);
2998 }
else if (IsX86 && (
Name.startswith(
"avx512.mask.punpckh") ||
2999 Name.startswith(
"avx512.mask.unpckh."))) {
3002 int NumElts = cast<FixedVectorType>(CI->
getType())->getNumElements();
3006 for (
int l = 0;
l != NumElts;
l += NumLaneElts)
3007 for (
int i = 0;
i != NumLaneElts; ++
i)
3008 Idxs[
i +
l] = (NumLaneElts / 2) +
l + (
i / 2) + NumElts * (
i % 2);
3010 Rep =
Builder.CreateShuffleVector(Op0, Op1, Idxs);
3014 }
else if (IsX86 && (
Name.startswith(
"avx512.mask.and.") ||
3015 Name.startswith(
"avx512.mask.pand."))) {
3020 Rep =
Builder.CreateBitCast(Rep, FTy);
3023 }
else if (IsX86 && (
Name.startswith(
"avx512.mask.andn.") ||
3024 Name.startswith(
"avx512.mask.pandn."))) {
3030 Rep =
Builder.CreateBitCast(Rep, FTy);
3033 }
else if (IsX86 && (
Name.startswith(
"avx512.mask.or.") ||
3034 Name.startswith(
"avx512.mask.por."))) {
3039 Rep =
Builder.CreateBitCast(Rep, FTy);
3042 }
else if (IsX86 && (
Name.startswith(
"avx512.mask.xor.") ||
3043 Name.startswith(
"avx512.mask.pxor."))) {
3048 Rep =
Builder.CreateBitCast(Rep, FTy);
3051 }
else if (IsX86 &&
Name.startswith(
"avx512.mask.padd.")) {
3055 }
else if (IsX86 &&
Name.startswith(
"avx512.mask.psub.")) {
3059 }
else if (IsX86 &&
Name.startswith(
"avx512.mask.pmull.")) {
3063 }
else if (IsX86 &&
Name.startswith(
"avx512.mask.add.p")) {
3064 if (
Name.endswith(
".512")) {
3066 if (
Name[17] ==
's')
3067 IID = Intrinsic::x86_avx512_add_ps_512;
3069 IID = Intrinsic::x86_avx512_add_pd_512;
3072 { CI->getArgOperand(0), CI->getArgOperand(1),
3073 CI->getArgOperand(4) });
3079 }
else if (IsX86 &&
Name.startswith(
"avx512.mask.div.p")) {
3080 if (
Name.endswith(
".512")) {
3082 if (
Name[17] ==
's')
3083 IID = Intrinsic::x86_avx512_div_ps_512;
3085 IID = Intrinsic::x86_avx512_div_pd_512;
3088 { CI->getArgOperand(0), CI->getArgOperand(1),
3089 CI->getArgOperand(4) });
3095 }
else if (IsX86 &&
Name.startswith(
"avx512.mask.mul.p")) {
3096 if (
Name.endswith(
".512")) {
3098 if (
Name[17] ==
's')
3099 IID = Intrinsic::x86_avx512_mul_ps_512;
3101 IID = Intrinsic::x86_avx512_mul_pd_512;
3104 { CI->getArgOperand(0), CI->getArgOperand(1),
3105 CI->getArgOperand(4) });
3111 }
else if (IsX86 &&
Name.startswith(
"avx512.mask.sub.p")) {
3112 if (
Name.endswith(
".512")) {
3114 if (
Name[17] ==
's')
3115 IID = Intrinsic::x86_avx512_sub_ps_512;
3117 IID = Intrinsic::x86_avx512_sub_pd_512;
3120 { CI->getArgOperand(0), CI->getArgOperand(1),
3121 CI->getArgOperand(4) });
3127 }
else if (IsX86 && (
Name.startswith(
"avx512.mask.max.p") ||
3128 Name.startswith(
"avx512.mask.min.p")) &&
3129 Name.drop_front(18) ==
".512") {
3130 bool IsDouble =
Name[17] ==
'd';
3131 bool IsMin =
Name[13] ==
'i';
3133 { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
3134 { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
3139 { CI->getArgOperand(0), CI->getArgOperand(1),
3140 CI->getArgOperand(4) });
3143 }
else if (IsX86 &&
Name.startswith(
"avx512.mask.lzcnt.")) {
3147 { CI->getArgOperand(0), Builder.getInt1(false) });
3150 }
else if (IsX86 &&
Name.startswith(
"avx512.mask.psll")) {
3151 bool IsImmediate =
Name[16] ==
'i' ||
3152 (
Name.size() > 18 &&
Name[18] ==
'i');
3153 bool IsVariable =
Name[16] ==
'v';
3154 char Size =
Name[16] ==
'.' ?
Name[17] :
3160 if (IsVariable &&
Name[17] !=
'.') {
3161 if (Size ==
'd' &&
Name[17] ==
'2')
3162 IID = Intrinsic::x86_avx2_psllv_q;
3163 else if (Size ==
'd' &&
Name[17] ==
'4')
3164 IID = Intrinsic::x86_avx2_psllv_q_256;
3165 else if (Size ==
's' &&
Name[17] ==
'4')
3166 IID = Intrinsic::x86_avx2_psllv_d;
3167 else if (Size ==
's' &&
Name[17] ==
'8')
3168 IID = Intrinsic::x86_avx2_psllv_d_256;
3169 else if (Size ==
'h' &&
Name[17] ==
'8')
3170 IID = Intrinsic::x86_avx512_psllv_w_128;
3171 else if (Size ==
'h' &&
Name[17] ==
'1')
3172 IID = Intrinsic::x86_avx512_psllv_w_256;
3173 else if (
Name[17] ==
'3' &&
Name[18] ==
'2')
3174 IID = Intrinsic::x86_avx512_psllv_w_512;
3177 }
else if (
Name.endswith(
".128")) {
3179 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3180 : Intrinsic::x86_sse2_psll_d;
3181 else if (Size ==
'q')
3182 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3183 : Intrinsic::x86_sse2_psll_q;
3184 else if (Size ==
'w')
3185 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3186 : Intrinsic::x86_sse2_psll_w;
3189 }
else if (
Name.endswith(
".256")) {
3191 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3192 : Intrinsic::x86_avx2_psll_d;
3193 else if (Size ==
'q')
3194 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3195 : Intrinsic::x86_avx2_psll_q;
3196 else if (Size ==
'w')
3197 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3198 : Intrinsic::x86_avx2_psll_w;
3203 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
3204 IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
3205 Intrinsic::x86_avx512_psll_d_512;
3206 else if (Size ==
'q')
3207 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
3208 IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
3209 Intrinsic::x86_avx512_psll_q_512;
3210 else if (Size ==
'w')
3211 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3212 : Intrinsic::x86_avx512_psll_w_512;
3218 }
else if (IsX86 &&
Name.startswith(
"avx512.mask.psrl")) {
3219 bool IsImmediate =
Name[16] ==
'i' ||
3220 (
Name.size() > 18 &&
Name[18] ==
'i');
3221 bool IsVariable =
Name[16] ==
'v';
3222 char Size =
Name[16] ==
'.' ?
Name[17] :
3228 if (IsVariable &&
Name[17] !=
'.') {
3229 if (Size ==
'd' &&
Name[17] ==
'2')
3230 IID = Intrinsic::x86_avx2_psrlv_q;
3231 else if (Size ==
'd' &&
Name[17] ==
'4')
3232 IID = Intrinsic::x86_avx2_psrlv_q_256;
3233 else if (Size ==
's' &&
Name[17] ==
'4')
3234 IID = Intrinsic::x86_avx2_psrlv_d;
3235 else if (Size ==
's' &&
Name[17] ==
'8')
3236 IID = Intrinsic::x86_avx2_psrlv_d_256;
3237 else if (Size ==
'h' &&
Name[17] ==
'8')
3238 IID = Intrinsic::x86_avx512_psrlv_w_128;
3239 else if (Size ==
'h' &&
Name[17] ==
'1')
3240 IID = Intrinsic::x86_avx512_psrlv_w_256;
3241 else if (
Name[17] ==
'3' &&
Name[18] ==
'2')
3242 IID = Intrinsic::x86_avx512_psrlv_w_512;
3245 }
else if (
Name.endswith(
".128")) {
3247 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3248 : Intrinsic::x86_sse2_psrl_d;
3249 else if (Size ==
'q')
3250 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3251 : Intrinsic::x86_sse2_psrl_q;
3252 else if (Size ==
'w')
3253 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3254 : Intrinsic::x86_sse2_psrl_w;
3257 }
else if (
Name.endswith(
".256")) {
3259 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3260 : Intrinsic::x86_avx2_psrl_d;
3261 else if (Size ==
'q')
3262 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3263 : Intrinsic::x86_avx2_psrl_q;
3264 else if (Size ==
'w')
3265 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3266 : Intrinsic::x86_avx2_psrl_w;
3271 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
3272 IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
3273 Intrinsic::x86_avx512_psrl_d_512;
3274 else if (Size ==
'q')
3275 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
3276 IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
3277 Intrinsic::x86_avx512_psrl_q_512;
3278 else if (Size ==
'w')
3279 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3280 : Intrinsic::x86_avx512_psrl_w_512;
3286 }
else if (IsX86 &&
Name.startswith(
"avx512.mask.psra")) {
3287 bool IsImmediate =
Name[16] ==
'i' ||
3288 (
Name.size() > 18 &&
Name[18] ==
'i');
3289 bool IsVariable =
Name[16] ==
'v';
3290 char Size =
Name[16] ==
'.' ?
Name[17] :
3296 if (IsVariable &&
Name[17] !=
'.') {
3297 if (Size ==
's' &&
Name[17] ==
'4')
3298 IID = Intrinsic::x86_avx2_psrav_d;
3299 else if (Size ==
's' &&
Name[17] ==
'8')
3300 IID = Intrinsic::x86_avx2_psrav_d_256;
3301 else if (Size ==
'h' &&
Name[17] ==
'8')
3302 IID = Intrinsic::x86_avx512_psrav_w_128;
3303 else if (Size ==
'h' &&
Name[17] ==
'1')
3304 IID = Intrinsic::x86_avx512_psrav_w_256;
3305 else if (
Name[17] ==
'3' &&
Name[18] ==
'2')
3306 IID = Intrinsic::x86_avx512_psrav_w_512;
3309 }
else if (
Name.endswith(
".128")) {
3311 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3312 : Intrinsic::x86_sse2_psra_d;
3313 else if (Size ==
'q')
3314 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
3315 IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
3316 Intrinsic::x86_avx512_psra_q_128;
3317 else if (Size ==
'w')
3318 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3319 : Intrinsic::x86_sse2_psra_w;
3322 }
else if (
Name.endswith(
".256")) {
3324 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3325 : Intrinsic::x86_avx2_psra_d;
3326 else if (Size ==
'q')
3327 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
3328 IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
3329 Intrinsic::x86_avx512_psra_q_256;
3330 else if (Size ==
'w')
3331 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3332 : Intrinsic::x86_avx2_psra_w;
3337 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
3338 IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
3339 Intrinsic::x86_avx512_psra_d_512;
3340 else if (Size ==
'q')
3341 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
3342 IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
3343 Intrinsic::x86_avx512_psra_q_512;
3344 else if (Size ==
'w')
3345 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3346 : Intrinsic::x86_avx512_psra_w_512;
3352 }
else if (IsX86 &&
Name.startswith(
"avx512.mask.move.s")) {
3354 }
else if (IsX86 &&
Name.startswith(
"avx512.cvtmask2")) {
3356 }
else if (IsX86 &&
Name.endswith(
".movntdqa")) {
3371 }
else if (IsX86 && (
Name.startswith(
"fma.vfmadd.") ||
3372 Name.startswith(
"fma.vfmsub.") ||
3373 Name.startswith(
"fma.vfnmadd.") ||
3374 Name.startswith(
"fma.vfnmsub."))) {
3375 bool NegMul =
Name[6] ==
'n';
3376 bool NegAcc = NegMul ?
Name[8] ==
's' :
Name[7] ==
's';
3377 bool IsScalar = NegMul ?
Name[12] ==
's' :
Name[11] ==
's';
3388 if (NegMul && !IsScalar)
3389 Ops[0] =
Builder.CreateFNeg(Ops[0]);
3390 if (NegMul && IsScalar)
3391 Ops[1] =
Builder.CreateFNeg(Ops[1]);
3393 Ops[2] =
Builder.CreateFNeg(Ops[2]);
3403 }
else if (IsX86 &&
Name.startswith(
"fma4.vfmadd.s")) {
3418 }
else if (IsX86 && (
Name.startswith(
"avx512.mask.vfmadd.s") ||
3419 Name.startswith(
"avx512.maskz.vfmadd.s") ||
3420 Name.startswith(
"avx512.mask3.vfmadd.s") ||
3421 Name.startswith(
"avx512.mask3.vfmsub.s") ||
3422 Name.startswith(
"avx512.mask3.vfnmsub.s"))) {
3423 bool IsMask3 =
Name[11] ==
'3';
3424 bool IsMaskZ =
Name[11] ==
'z';
3426 Name =
Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3427 bool NegMul =
Name[2] ==
'n';
3428 bool NegAcc = NegMul ?
Name[4] ==
's' :
Name[3] ==
's';
3434 if (NegMul && (IsMask3 || IsMaskZ))
3436 if (NegMul && !(IsMask3 || IsMaskZ))
3446 cast<ConstantInt>(CI->
getArgOperand(4))->getZExtValue() != 4) {
3450 if (
Name.back() ==
'd')
3451 IID = Intrinsic::x86_avx512_vfmadd_f64;
3453 IID = Intrinsic::x86_avx512_vfmadd_f32;
3468 if (NegAcc && IsMask3)
3476 }
else if (IsX86 && (
Name.startswith(
"avx512.mask.vfmadd.p") ||
3477 Name.startswith(
"avx512.mask.vfnmadd.p") ||
3478 Name.startswith(
"avx512.mask.vfnmsub.p") ||
3479 Name.startswith(
"avx512.mask3.vfmadd.p") ||
3480 Name.startswith(
"avx512.mask3.vfmsub.p") ||
3481 Name.startswith(
"avx512.mask3.vfnmsub.p") ||
3482 Name.startswith(
"avx512.maskz.vfmadd.p"))) {
3483 bool IsMask3 =
Name[11] ==
'3';
3484 bool IsMaskZ =
Name[11] ==
'z';
3486 Name =
Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3487 bool NegMul =
Name[2] ==
'n';
3488 bool NegAcc = NegMul ?
Name[4] ==
's' :
Name[3] ==
's';
3494 if (NegMul && (IsMask3 || IsMaskZ))
3496 if (NegMul && !(IsMask3 || IsMaskZ))
3503 cast<ConstantInt>(CI->
getArgOperand(4))->getZExtValue() != 4)) {
3507 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3509 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3512 { A, B, C, CI->getArgOperand(4) });
3525 }
else if (IsX86 &&
Name.startswith(
"fma.vfmsubadd.p")) {
3529 if (VecWidth == 128 && EltWidth == 32)
3530 IID = Intrinsic::x86_fma_vfmaddsub_ps;
3531 else if (VecWidth == 256 && EltWidth == 32)
3532 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3533 else if (VecWidth == 128 && EltWidth == 64)
3534 IID = Intrinsic::x86_fma_vfmaddsub_pd;
3535 else if (VecWidth == 256 && EltWidth == 64)
3536 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3542 Ops[2] =
Builder.CreateFNeg(Ops[2]);
3545 }
else if (IsX86 && (
Name.startswith(
"avx512.mask.vfmaddsub.p") ||
3546 Name.startswith(
"avx512.mask3.vfmaddsub.p") ||
3547 Name.startswith(
"avx512.maskz.vfmaddsub.p") ||
3548 Name.startswith(
"avx512.mask3.vfmsubadd.p"))) {
3549 bool IsMask3 =
Name[11] ==
'3';
3550 bool IsMaskZ =
Name[11] ==
'z';
3552 Name =
Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3553 bool IsSubAdd =
Name[3] ==
's';
3558 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3560 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3565 Ops[2] =
Builder.CreateFNeg(Ops[2]);
3570 int NumElts = cast<FixedVectorType>(CI->
getType())->getNumElements();
3578 Ops[2] =
Builder.CreateFNeg(Ops[2]);
3585 for (
int i = 0;
i != NumElts; ++
i)
3586 Idxs[
i] =
i + (
i % 2) * NumElts;
3588 Rep =
Builder.CreateShuffleVector(Even, Odd, Idxs);
3596 }
else if (IsX86 && (
Name.startswith(
"avx512.mask.pternlog.") ||
3597 Name.startswith(
"avx512.maskz.pternlog."))) {
3598 bool ZeroMask =
Name[11] ==
'z';
3602 if (VecWidth == 128 && EltWidth == 32)
3603 IID = Intrinsic::x86_avx512_pternlog_d_128;
3604 else if (VecWidth == 256 && EltWidth == 32)
3605 IID = Intrinsic::x86_avx512_pternlog_d_256;
3606 else if (VecWidth == 512 && EltWidth == 32)
3607 IID = Intrinsic::x86_avx512_pternlog_d_512;
3608 else if (VecWidth == 128 && EltWidth == 64)
3609 IID = Intrinsic::x86_avx512_pternlog_q_128;
3610 else if (VecWidth == 256 && EltWidth == 64)
3611 IID = Intrinsic::x86_avx512_pternlog_q_256;
3612 else if (VecWidth == 512 && EltWidth == 64)
3613 IID = Intrinsic::x86_avx512_pternlog_q_512;
3624 }
else if (IsX86 && (
Name.startswith(
"avx512.mask.vpmadd52") ||
3625 Name.startswith(
"avx512.maskz.vpmadd52"))) {
3626 bool ZeroMask =
Name[11] ==
'z';
3630 if (VecWidth == 128 && !
High)
3631 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3632 else if (VecWidth == 256 && !
High)
3633 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3634 else if (VecWidth == 512 && !
High)
3635 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3636 else if (VecWidth == 128 &&
High)
3637 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3638 else if (VecWidth == 256 &&
High)
3639 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3640 else if (VecWidth == 512 &&
High)
3641 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3652 }
else if (IsX86 && (
Name.startswith(
"avx512.mask.vpermi2var.") ||
3653 Name.startswith(
"avx512.mask.vpermt2var.") ||
3654 Name.startswith(
"avx512.maskz.vpermt2var."))) {
3655 bool ZeroMask =
Name[11] ==
'z';
3656 bool IndexForm =
Name[17] ==
'i';
3658 }
else if (IsX86 && (
Name.startswith(
"avx512.mask.vpdpbusd.") ||
3659 Name.startswith(
"avx512.maskz.vpdpbusd.") ||
3660 Name.startswith(
"avx512.mask.vpdpbusds.") ||
3661 Name.startswith(
"avx512.maskz.vpdpbusds."))) {
3662 bool ZeroMask =
Name[11] ==
'z';
3663 bool IsSaturating =
Name[ZeroMask ? 21 : 20] ==
's';
3666 if (VecWidth == 128 && !IsSaturating)
3667 IID = Intrinsic::x86_avx512_vpdpbusd_128;
3668 else if (VecWidth == 256 && !IsSaturating)
3669 IID = Intrinsic::x86_avx512_vpdpbusd_256;
3670 else if (VecWidth == 512 && !IsSaturating)
3671 IID = Intrinsic::x86_avx512_vpdpbusd_512;
3672 else if (VecWidth == 128 && IsSaturating)
3673 IID = Intrinsic::x86_avx512_vpdpbusds_128;
3674 else if (VecWidth == 256 && IsSaturating)
3675 IID = Intrinsic::x86_avx512_vpdpbusds_256;
3676 else if (VecWidth == 512 && IsSaturating)
3677 IID = Intrinsic::x86_avx512_vpdpbusds_512;
3688 }
else if (IsX86 && (
Name.startswith(
"avx512.mask.vpdpwssd.") ||
3689 Name.startswith(
"avx512.maskz.vpdpwssd.") ||
3690 Name.startswith(
"avx512.mask.vpdpwssds.") ||
3691 Name.startswith(
"avx512.maskz.vpdpwssds."))) {
3692 bool ZeroMask =
Name[11] ==
'z';
3693 bool IsSaturating =
Name[ZeroMask ? 21 : 20] ==
's';
3696 if (VecWidth == 128 && !IsSaturating)
3697 IID = Intrinsic::x86_avx512_vpdpwssd_128;
3698 else if (VecWidth == 256 && !IsSaturating)
3699 IID = Intrinsic::x86_avx512_vpdpwssd_256;
3700 else if (VecWidth == 512 && !IsSaturating)
3701 IID = Intrinsic::x86_avx512_vpdpwssd_512;
3702 else if (VecWidth == 128 && IsSaturating)
3703 IID = Intrinsic::x86_avx512_vpdpwssds_128;
3704 else if (VecWidth == 256 && IsSaturating)
3705 IID = Intrinsic::x86_avx512_vpdpwssds_256;
3706 else if (VecWidth == 512 && IsSaturating)
3707 IID = Intrinsic::x86_avx512_vpdpwssds_512;
3718 }
else if (IsX86 && (
Name ==
"addcarryx.u32" ||
Name ==
"addcarryx.u64" ||
3719 Name ==
"addcarry.u32" ||
Name ==
"addcarry.u64" ||
3720 Name ==
"subborrow.u32" ||
Name ==
"subborrow.u64")) {
3722 if (
Name[0] ==
'a' &&
Name.back() ==
'2')
3723 IID = Intrinsic::x86_addcarry_32;
3724 else if (
Name[0] ==
'a' &&
Name.back() ==
'4')
3725 IID = Intrinsic::x86_addcarry_64;
3726 else if (
Name[0] ==
's' &&
Name.back() ==
'2')
3727 IID = Intrinsic::x86_subborrow_32;
3728 else if (
Name[0] ==
's' &&
Name.back() ==
'4')
3729 IID = Intrinsic::x86_subborrow_64;
3751 }
else if (IsX86 &&
Name.startswith(
"avx512.mask.") &&
3754 }
else if (IsNVVM && (
Name ==
"abs.i" ||
Name ==
"abs.ll")) {
3759 Rep =
Builder.CreateSelect(Cmp,
Arg, Neg,
"abs");
3760 }
else if (IsNVVM && (
Name.startswith(
"atomic.load.add.f32.p") ||
3761 Name.startswith(
"atomic.load.add.f64.p"))) {
3766 }
else if (IsNVVM && (
Name ==
"max.i" ||
Name ==
"max.ll" ||
3767 Name ==
"max.ui" ||
Name ==
"max.ull")) {
3771 ?
Builder.CreateICmpUGE(Arg0, Arg1,
"max.cond")
3772 :
Builder.CreateICmpSGE(Arg0, Arg1,
"max.cond");
3773 Rep =
Builder.CreateSelect(Cmp, Arg0, Arg1,
"max");
3774 }
else if (IsNVVM && (
Name ==
"min.i" ||
Name ==
"min.ll" ||
3775 Name ==
"min.ui" ||
Name ==
"min.ull")) {
3779 ?
Builder.CreateICmpULE(Arg0, Arg1,
"min.cond")
3780 :
Builder.CreateICmpSLE(Arg0, Arg1,
"min.cond");
3781 Rep =
Builder.CreateSelect(Cmp, Arg0, Arg1,
"min");
3782 }
else if (IsNVVM &&
Name ==
"clz.ll") {
3788 {Arg, Builder.getFalse()},
"ctlz");
3789 Rep =
Builder.CreateTrunc(Ctlz,
Builder.getInt32Ty(),
"ctlz.trunc");
3790 }
else if (IsNVVM &&
Name ==
"popc.ll") {
3798 Rep =
Builder.CreateTrunc(Popc,
Builder.getInt32Ty(),
"ctpop.trunc");
3799 }
else if (IsNVVM &&
Name ==
"h2f") {
3801 F->getParent(), Intrinsic::convert_from_fp16,
3802 {Builder.getFloatTy()}),
3816 const auto &DefaultCase = [&]() ->
void {
3821 "Unknown function for CallBase upgrade and isn't just a name change");
3827 auto *OldST = cast<StructType>(CI->
getType());
3829 assert(OldST->getNumElements() ==
3830 cast<StructType>(NewFn->
getReturnType())->getNumElements() &&
3831 "Must have same number of elements");
3836 for (
unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
3838 Res =
Builder.CreateInsertValue(Res, Elem, Idx);
3850 case Intrinsic::arm_neon_vst1:
3851 case Intrinsic::arm_neon_vst2:
3852 case Intrinsic::arm_neon_vst3:
3853 case Intrinsic::arm_neon_vst4:
3854 case Intrinsic::arm_neon_vst2lane:
3855 case Intrinsic::arm_neon_vst3lane:
3856 case Intrinsic::arm_neon_vst4lane: {
3862 case Intrinsic::arm_neon_bfdot:
3863 case Intrinsic::arm_neon_bfmmla:
3864 case Intrinsic::arm_neon_bfmlalb:
3865 case Intrinsic::arm_neon_bfmlalt:
3866 case Intrinsic::aarch64_neon_bfdot:
3867 case Intrinsic::aarch64_neon_bfmmla:
3868 case Intrinsic::aarch64_neon_bfmlalb:
3869 case Intrinsic::aarch64_neon_bfmlalt: {
3872 "Mismatch between function args and call args");
3873 size_t OperandWidth =
3875 assert((OperandWidth == 64 || OperandWidth == 128) &&
3876 "Unexpected operand width");
3878 auto Iter = CI->
args().begin();
3879 Args.push_back(*Iter++);
3880 Args.push_back(
Builder.CreateBitCast(*Iter++, NewTy));
3881 Args.push_back(
Builder.CreateBitCast(*Iter++, NewTy));
3886 case Intrinsic::bitreverse:
3890 case Intrinsic::ctlz:
3891 case Intrinsic::cttz:
3893 "Mismatch between function args and call args");
3898 case Intrinsic::objectsize: {
3899 Value *NullIsUnknownSize =
3908 case Intrinsic::ctpop:
3912 case Intrinsic::convert_from_fp16:
3916 case Intrinsic::dbg_value:
3920 if (
auto *Offset = dyn_cast_or_null<Constant>(CI->
getArgOperand(1)))
3921 if (Offset->isZeroValue()) {
3930 case Intrinsic::ptr_annotation:
3947 case Intrinsic::var_annotation:
3950 "Before LLVM 12.0 this intrinsic took four arguments");
3959 case Intrinsic::x86_xop_vfrcz_ss:
3960 case Intrinsic::x86_xop_vfrcz_sd:
3964 case Intrinsic::x86_xop_vpermil2pd:
3965 case Intrinsic::x86_xop_vpermil2ps:
3966 case Intrinsic::x86_xop_vpermil2pd_256:
3967 case Intrinsic::x86_xop_vpermil2ps_256: {
3976 case Intrinsic::x86_sse41_ptestc:
3977 case Intrinsic::x86_sse41_ptestz:
3978 case Intrinsic::x86_sse41_ptestnzc: {
3992 Value *BC0 =
Builder.CreateBitCast(Arg0, NewVecTy,
"cast");
3993 Value *BC1 =
Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
3995 NewCall =
Builder.CreateCall(NewFn, {BC0, BC1});
3999 case Intrinsic::x86_rdtscp: {
4005 NewCall =
Builder.CreateCall(NewFn);
4021 case Intrinsic::x86_sse41_insertps:
4022 case Intrinsic::x86_sse41_dppd:
4023 case Intrinsic::x86_sse41_dpps:
4024 case Intrinsic::x86_sse41_mpsadbw:
4025 case Intrinsic::x86_avx_dp_ps_256:
4026 case Intrinsic::x86_avx2_mpsadbw: {
4037 case Intrinsic::x86_avx512_mask_cmp_pd_128:
4038 case Intrinsic::x86_avx512_mask_cmp_pd_256:
4039 case Intrinsic::x86_avx512_mask_cmp_pd_512:
4040 case Intrinsic::x86_avx512_mask_cmp_ps_128:
4041 case Intrinsic::x86_avx512_mask_cmp_ps_256:
4042 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
4045 cast<FixedVectorType>(
Args[0]->
getType())->getNumElements();
4057 case Intrinsic::thread_pointer: {
4058 NewCall =
Builder.CreateCall(NewFn, {});
4062 case Intrinsic::invariant_start:
4063 case Intrinsic::invariant_end: {
4068 case Intrinsic::masked_load:
4069 case Intrinsic::masked_store:
4070 case Intrinsic::masked_gather:
4071 case Intrinsic::masked_scatter: {
4079 case Intrinsic::memmove:
4080 case Intrinsic::memset: {
4100 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
4101 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
4103 auto *MemCI = cast<MemIntrinsic>(NewCall);
4106 MemCI->setDestAlignment(
Align->getMaybeAlignValue());
4108 if (
auto *MTI = dyn_cast<MemTransferInst>(MemCI))
4109 MTI->setSourceAlignment(
Align->getMaybeAlignValue());
4113 assert(NewCall &&
"Should have either set this variable or returned through "
4114 "the default case");
4121 assert(
F &&
"Illegal attempt to upgrade a non-existent intrinsic.");
4130 if (
CallBase *CB = dyn_cast<CallBase>(U))
4134 F->eraseFromParent();
4148 Metadata *Elts2[] = {ScalarType, ScalarType,
4162 if (Opc != Instruction::BitCast)
4183 if (Opc != Instruction::BitCast)
4186 Type *SrcTy =
C->getType();
4207 bool BrokenDebugInfo =
false;